docling 2.52.0__py3-none-any.whl → 2.54.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docling might be problematic. Click here for more details.

@@ -1,7 +1,6 @@
1
- import math
2
1
  from collections import defaultdict
3
2
  from enum import Enum
4
- from typing import TYPE_CHECKING, Dict, List, Optional, Type, Union
3
+ from typing import TYPE_CHECKING, Optional, Type, Union
5
4
 
6
5
  import numpy as np
7
6
  from docling_core.types.doc import (
@@ -14,9 +13,7 @@ from docling_core.types.doc import (
14
13
  )
15
14
  from docling_core.types.doc.base import PydanticSerCtxKey, round_pydantic_float
16
15
  from docling_core.types.doc.page import SegmentedPdfPage, TextCell
17
- from docling_core.types.io import (
18
- DocumentStream,
19
- )
16
+ from docling_core.types.io import DocumentStream
20
17
 
21
18
  # DO NOT REMOVE; explicitly exposed from this location
22
19
  from PIL.Image import Image
@@ -71,6 +68,7 @@ class InputFormat(str, Enum):
71
68
  METS_GBS = "mets_gbs"
72
69
  JSON_DOCLING = "json_docling"
73
70
  AUDIO = "audio"
71
+ VTT = "vtt"
74
72
 
75
73
 
76
74
  class OutputFormat(str, Enum):
@@ -82,7 +80,7 @@ class OutputFormat(str, Enum):
82
80
  DOCTAGS = "doctags"
83
81
 
84
82
 
85
- FormatToExtensions: Dict[InputFormat, List[str]] = {
83
+ FormatToExtensions: dict[InputFormat, list[str]] = {
86
84
  InputFormat.DOCX: ["docx", "dotx", "docm", "dotm"],
87
85
  InputFormat.PPTX: ["pptx", "potx", "ppsx", "pptm", "potm", "ppsm"],
88
86
  InputFormat.PDF: ["pdf"],
@@ -97,9 +95,10 @@ FormatToExtensions: Dict[InputFormat, List[str]] = {
97
95
  InputFormat.METS_GBS: ["tar.gz"],
98
96
  InputFormat.JSON_DOCLING: ["json"],
99
97
  InputFormat.AUDIO: ["wav", "mp3"],
98
+ InputFormat.VTT: ["vtt"],
100
99
  }
101
100
 
102
- FormatToMimeType: Dict[InputFormat, List[str]] = {
101
+ FormatToMimeType: dict[InputFormat, list[str]] = {
103
102
  InputFormat.DOCX: [
104
103
  "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
105
104
  "application/vnd.openxmlformats-officedocument.wordprocessingml.template",
@@ -130,6 +129,7 @@ FormatToMimeType: Dict[InputFormat, List[str]] = {
130
129
  InputFormat.METS_GBS: ["application/mets+xml"],
131
130
  InputFormat.JSON_DOCLING: ["application/json"],
132
131
  InputFormat.AUDIO: ["audio/x-wav", "audio/mpeg", "audio/wav", "audio/mp3"],
132
+ InputFormat.VTT: ["text/vtt"],
133
133
  }
134
134
 
135
135
  MimeTypeToFormat: dict[str, list[InputFormat]] = {
@@ -162,8 +162,8 @@ class Cluster(BaseModel):
162
162
  label: DocItemLabel
163
163
  bbox: BoundingBox
164
164
  confidence: float = 1.0
165
- cells: List[TextCell] = []
166
- children: List["Cluster"] = [] # Add child cluster support
165
+ cells: list[TextCell] = []
166
+ children: list["Cluster"] = [] # Add child cluster support
167
167
 
168
168
  @field_serializer("confidence")
169
169
  def _serialize(self, value: float, info: FieldSerializationInfo) -> float:
@@ -179,7 +179,7 @@ class BasePageElement(BaseModel):
179
179
 
180
180
 
181
181
  class LayoutPrediction(BaseModel):
182
- clusters: List[Cluster] = []
182
+ clusters: list[Cluster] = []
183
183
 
184
184
 
185
185
  class VlmPredictionToken(BaseModel):
@@ -201,14 +201,14 @@ class ContainerElement(
201
201
 
202
202
 
203
203
  class Table(BasePageElement):
204
- otsl_seq: List[str]
204
+ otsl_seq: list[str]
205
205
  num_rows: int = 0
206
206
  num_cols: int = 0
207
- table_cells: List[TableCell]
207
+ table_cells: list[TableCell]
208
208
 
209
209
 
210
210
  class TableStructurePrediction(BaseModel):
211
- table_map: Dict[int, Table] = {}
211
+ table_map: dict[int, Table] = {}
212
212
 
213
213
 
214
214
  class TextElement(BasePageElement):
@@ -216,7 +216,7 @@ class TextElement(BasePageElement):
216
216
 
217
217
 
218
218
  class FigureElement(BasePageElement):
219
- annotations: List[PictureDataType] = []
219
+ annotations: list[PictureDataType] = []
220
220
  provenance: Optional[str] = None
221
221
  predicted_class: Optional[str] = None
222
222
  confidence: Optional[float] = None
@@ -234,12 +234,12 @@ class FigureElement(BasePageElement):
234
234
 
235
235
  class FigureClassificationPrediction(BaseModel):
236
236
  figure_count: int = 0
237
- figure_map: Dict[int, FigureElement] = {}
237
+ figure_map: dict[int, FigureElement] = {}
238
238
 
239
239
 
240
240
  class EquationPrediction(BaseModel):
241
241
  equation_count: int = 0
242
- equation_map: Dict[int, TextElement] = {}
242
+ equation_map: dict[int, TextElement] = {}
243
243
 
244
244
 
245
245
  class PagePredictions(BaseModel):
@@ -254,9 +254,9 @@ PageElement = Union[TextElement, Table, FigureElement, ContainerElement]
254
254
 
255
255
 
256
256
  class AssembledUnit(BaseModel):
257
- elements: List[PageElement] = []
258
- body: List[PageElement] = []
259
- headers: List[PageElement] = []
257
+ elements: list[PageElement] = []
258
+ body: list[PageElement] = []
259
+ headers: list[PageElement] = []
260
260
 
261
261
 
262
262
  class ItemAndImageEnrichmentElement(BaseModel):
@@ -280,12 +280,12 @@ class Page(BaseModel):
280
280
  None # Internal PDF backend. By default it is cleared during assembling.
281
281
  )
282
282
  _default_image_scale: float = 1.0 # Default image scale for external usage.
283
- _image_cache: Dict[
283
+ _image_cache: dict[
284
284
  float, Image
285
285
  ] = {} # Cache of images in different scales. By default it is cleared during assembling.
286
286
 
287
287
  @property
288
- def cells(self) -> List[TextCell]:
288
+ def cells(self) -> list[TextCell]:
289
289
  """Return text cells as a read-only view of parsed_page.textline_cells."""
290
290
  if self.parsed_page is not None:
291
291
  return self.parsed_page.textline_cells
@@ -354,7 +354,7 @@ class OpenAiApiResponse(BaseModel):
354
354
 
355
355
  id: str
356
356
  model: Optional[str] = None # returned by openai
357
- choices: List[OpenAiResponseChoice]
357
+ choices: list[OpenAiResponseChoice]
358
358
  created: int
359
359
  usage: OpenAiResponseUsage
360
360
 
@@ -430,7 +430,7 @@ class PageConfidenceScores(BaseModel):
430
430
 
431
431
 
432
432
  class ConfidenceReport(PageConfidenceScores):
433
- pages: Dict[int, PageConfidenceScores] = Field(
433
+ pages: dict[int, PageConfidenceScores] = Field(
434
434
  default_factory=lambda: defaultdict(PageConfidenceScores)
435
435
  )
436
436
 
@@ -394,6 +394,8 @@ class _DocumentConversionInput(BaseModel):
394
394
  mime = FormatToMimeType[InputFormat.PPTX][0]
395
395
  elif ext in FormatToExtensions[InputFormat.XLSX]:
396
396
  mime = FormatToMimeType[InputFormat.XLSX][0]
397
+ elif ext in FormatToExtensions[InputFormat.VTT]:
398
+ mime = FormatToMimeType[InputFormat.VTT][0]
397
399
 
398
400
  return mime
399
401
 
@@ -12,7 +12,7 @@ from pydantic import (
12
12
  )
13
13
  from typing_extensions import deprecated
14
14
 
15
- from docling.datamodel import asr_model_specs
15
+ from docling.datamodel import asr_model_specs, vlm_model_specs
16
16
 
17
17
  # Import the following for backwards compatibility
18
18
  from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
@@ -114,7 +114,11 @@ class RapidOcrOptions(OcrOptions):
114
114
  cls_model_path: Optional[str] = None # same default as rapidocr
115
115
  rec_model_path: Optional[str] = None # same default as rapidocr
116
116
  rec_keys_path: Optional[str] = None # same default as rapidocr
117
- rec_font_path: Optional[str] = None # same default as rapidocr
117
+ rec_font_path: Optional[str] = None # Deprecated, please use font_path instead
118
+ font_path: Optional[str] = None # same default as rapidocr
119
+
120
+ # Dictionary to overwrite or pass-through additional parameters
121
+ rapidocr_params: Dict[str, Any] = Field(default_factory=dict)
118
122
 
119
123
  model_config = ConfigDict(
120
124
  extra="forbid",
@@ -286,7 +290,7 @@ class VlmPipelineOptions(PaginatedPipelineOptions):
286
290
  )
287
291
  # If True, text from backend will be used instead of generated text
288
292
  vlm_options: Union[InlineVlmOptions, ApiVlmOptions] = (
289
- smoldocling_vlm_conversion_options
293
+ vlm_model_specs.GRANITEDOCLING_TRANSFORMERS
290
294
  )
291
295
 
292
296
 
@@ -18,6 +18,35 @@ from docling.datamodel.pipeline_options_vlm_model import (
18
18
  _log = logging.getLogger(__name__)
19
19
 
20
20
 
21
+ # Granite-Docling
22
+ GRANITEDOCLING_TRANSFORMERS = InlineVlmOptions(
23
+ repo_id="ibm-granite/granite-docling-258M",
24
+ prompt="Convert this page to docling.",
25
+ response_format=ResponseFormat.DOCTAGS,
26
+ inference_framework=InferenceFramework.TRANSFORMERS,
27
+ transformers_model_type=TransformersModelType.AUTOMODEL_IMAGETEXTTOTEXT,
28
+ supported_devices=[
29
+ AcceleratorDevice.CPU,
30
+ AcceleratorDevice.CUDA,
31
+ ],
32
+ scale=2.0,
33
+ temperature=0.0,
34
+ max_new_tokens=8192,
35
+ stop_strings=["</doctag>", "<|end_of_text|>"],
36
+ )
37
+
38
+ GRANITEDOCLING_MLX = InlineVlmOptions(
39
+ repo_id="ibm-granite/granite-docling-258M-mlx",
40
+ prompt="Convert this page to docling.",
41
+ response_format=ResponseFormat.DOCTAGS,
42
+ inference_framework=InferenceFramework.MLX,
43
+ supported_devices=[AcceleratorDevice.MPS],
44
+ scale=2.0,
45
+ temperature=0.0,
46
+ max_new_tokens=8192,
47
+ stop_strings=["</doctag>", "<|end_of_text|>"],
48
+ )
49
+
21
50
  # SmolDocling
22
51
  SMOLDOCLING_MLX = InlineVlmOptions(
23
52
  repo_id="ds4sd/SmolDocling-256M-preview-mlx-bf16",
@@ -272,3 +301,4 @@ class VlmModelType(str, Enum):
272
301
  GRANITE_VISION_VLLM = "granite_vision_vllm"
273
302
  GRANITE_VISION_OLLAMA = "granite_vision_ollama"
274
303
  GOT_OCR_2 = "got_ocr_2"
304
+ GRANITEDOCLING = "granite_docling"
@@ -25,6 +25,7 @@ from docling.backend.msexcel_backend import MsExcelDocumentBackend
25
25
  from docling.backend.mspowerpoint_backend import MsPowerpointDocumentBackend
26
26
  from docling.backend.msword_backend import MsWordDocumentBackend
27
27
  from docling.backend.noop_backend import NoOpBackend
28
+ from docling.backend.webvtt_backend import WebVTTDocumentBackend
28
29
  from docling.backend.xml.jats_backend import JatsDocumentBackend
29
30
  from docling.backend.xml.uspto_backend import PatentUsptoDocumentBackend
30
31
  from docling.datamodel.base_models import (
@@ -170,6 +171,9 @@ def _get_default_option(format: InputFormat) -> FormatOption:
170
171
  pipeline_cls=SimplePipeline, backend=DoclingJSONBackend
171
172
  ),
172
173
  InputFormat.AUDIO: FormatOption(pipeline_cls=AsrPipeline, backend=NoOpBackend),
174
+ InputFormat.VTT: FormatOption(
175
+ pipeline_cls=SimplePipeline, backend=WebVTTDocumentBackend
176
+ ),
173
177
  }
174
178
  if (options := format_to_default_options.get(format)) is not None:
175
179
  return options
@@ -62,32 +62,44 @@ class RapidOcrModel(BaseOcrModel):
62
62
  }
63
63
  backend_enum = _ALIASES.get(self.options.backend, EngineType.ONNXRUNTIME)
64
64
 
65
+ params = {
66
+ # Global settings (these are still correct)
67
+ "Global.text_score": self.options.text_score,
68
+ "Global.font_path": self.options.font_path,
69
+ # "Global.verbose": self.options.print_verbose,
70
+ # Detection model settings
71
+ "Det.model_path": self.options.det_model_path,
72
+ "Det.use_cuda": use_cuda,
73
+ "Det.use_dml": use_dml,
74
+ "Det.intra_op_num_threads": intra_op_num_threads,
75
+ # Classification model settings
76
+ "Cls.model_path": self.options.cls_model_path,
77
+ "Cls.use_cuda": use_cuda,
78
+ "Cls.use_dml": use_dml,
79
+ "Cls.intra_op_num_threads": intra_op_num_threads,
80
+ # Recognition model settings
81
+ "Rec.model_path": self.options.rec_model_path,
82
+ "Rec.font_path": self.options.rec_font_path,
83
+ "Rec.keys_path": self.options.rec_keys_path,
84
+ "Rec.use_cuda": use_cuda,
85
+ "Rec.use_dml": use_dml,
86
+ "Rec.intra_op_num_threads": intra_op_num_threads,
87
+ "Det.engine_type": backend_enum,
88
+ "Cls.engine_type": backend_enum,
89
+ "Rec.engine_type": backend_enum,
90
+ }
91
+
92
+ if self.options.rec_font_path is not None:
93
+ _log.warning(
94
+ "The 'rec_font_path' option for RapidOCR is deprecated. Please use 'font_path' instead."
95
+ )
96
+ user_params = self.options.rapidocr_params
97
+ if user_params:
98
+ _log.debug("Overwriting RapidOCR params with user-provided values.")
99
+ params.update(user_params)
100
+
65
101
  self.reader = RapidOCR(
66
- params={
67
- # Global settings (these are still correct)
68
- "Global.text_score": self.options.text_score,
69
- # "Global.verbose": self.options.print_verbose,
70
- # Detection model settings
71
- "Det.model_path": self.options.det_model_path,
72
- "Det.use_cuda": use_cuda,
73
- "Det.use_dml": use_dml,
74
- "Det.intra_op_num_threads": intra_op_num_threads,
75
- # Classification model settings
76
- "Cls.model_path": self.options.cls_model_path,
77
- "Cls.use_cuda": use_cuda,
78
- "Cls.use_dml": use_dml,
79
- "Cls.intra_op_num_threads": intra_op_num_threads,
80
- # Recognition model settings
81
- "Rec.model_path": self.options.rec_model_path,
82
- "Rec.font_path": self.options.rec_font_path,
83
- "Rec.keys_path": self.options.rec_keys_path,
84
- "Rec.use_cuda": use_cuda,
85
- "Rec.use_dml": use_dml,
86
- "Rec.intra_op_num_threads": intra_op_num_threads,
87
- "Det.engine_type": backend_enum,
88
- "Cls.engine_type": backend_enum,
89
- "Rec.engine_type": backend_enum,
90
- }
102
+ params=params,
91
103
  )
92
104
 
93
105
  def __call__(
@@ -120,6 +132,9 @@ class RapidOcrModel(BaseOcrModel):
120
132
  use_cls=self.options.use_cls,
121
133
  use_rec=self.options.use_rec,
122
134
  )
135
+ if result is None or result.boxes is None:
136
+ _log.warning("RapidOCR returned empty result!")
137
+ continue
123
138
  result = list(
124
139
  zip(result.boxes.tolist(), result.txts, result.scores)
125
140
  )
@@ -121,7 +121,7 @@ class TableStructureModel(BasePageModel):
121
121
 
122
122
  for table_element in tbl_list:
123
123
  x0, y0, x1, y1 = table_element.cluster.bbox.as_tuple()
124
- y0 *= scale_x
124
+ y0 *= scale_y
125
125
  y1 *= scale_y
126
126
  x0 *= scale_x
127
127
  x1 *= scale_x
@@ -132,7 +132,7 @@ class TableStructureModel(BasePageModel):
132
132
  x0, y0, x1, y1 = cell.rect.to_bounding_box().as_tuple()
133
133
  x0 *= scale_x
134
134
  x1 *= scale_x
135
- y0 *= scale_x
135
+ y0 *= scale_y
136
136
  y1 *= scale_y
137
137
 
138
138
  draw.rectangle([(x0, y0), (x1, y1)], outline="green")
@@ -142,7 +142,7 @@ class TableStructureModel(BasePageModel):
142
142
  x0, y0, x1, y1 = tc.bbox.as_tuple()
143
143
  x0 *= scale_x
144
144
  x1 *= scale_x
145
- y0 *= scale_x
145
+ y0 *= scale_y
146
146
  y1 *= scale_y
147
147
 
148
148
  if tc.column_header:
@@ -10,6 +10,8 @@ from docling.datamodel.pipeline_options import (
10
10
  )
11
11
  from docling.datamodel.settings import settings
12
12
  from docling.datamodel.vlm_model_specs import (
13
+ GRANITEDOCLING_MLX,
14
+ GRANITEDOCLING_TRANSFORMERS,
13
15
  SMOLDOCLING_MLX,
14
16
  SMOLDOCLING_TRANSFORMERS,
15
17
  )
@@ -34,6 +36,8 @@ def download_models(
34
36
  with_code_formula: bool = True,
35
37
  with_picture_classifier: bool = True,
36
38
  with_smolvlm: bool = False,
39
+ with_granitedocling: bool = False,
40
+ with_granitedocling_mlx: bool = False,
37
41
  with_smoldocling: bool = False,
38
42
  with_smoldocling_mlx: bool = False,
39
43
  with_granite_vision: bool = False,
@@ -86,6 +90,24 @@ def download_models(
86
90
  progress=progress,
87
91
  )
88
92
 
93
+ if with_granitedocling:
94
+ _log.info("Downloading GraniteDocling model...")
95
+ download_hf_model(
96
+ repo_id=GRANITEDOCLING_TRANSFORMERS.repo_id,
97
+ local_dir=output_dir / GRANITEDOCLING_TRANSFORMERS.repo_cache_folder,
98
+ force=force,
99
+ progress=progress,
100
+ )
101
+
102
+ if with_granitedocling_mlx:
103
+ _log.info("Downloading GraniteDocling MLX model...")
104
+ download_hf_model(
105
+ repo_id=GRANITEDOCLING_MLX.repo_id,
106
+ local_dir=output_dir / GRANITEDOCLING_MLX.repo_cache_folder,
107
+ force=force,
108
+ progress=progress,
109
+ )
110
+
89
111
  if with_smoldocling:
90
112
  _log.info("Downloading SmolDocling model...")
91
113
  download_hf_model(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docling
3
- Version: 2.52.0
3
+ Version: 2.54.0
4
4
  Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
5
5
  Author-email: Christoph Auer <cau@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Maxim Lysak <mly@zurich.ibm.com>, Nikos Livathinos <nli@zurich.ibm.com>, Ahmed Nassar <ahn@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
6
6
  License-Expression: MIT
@@ -26,7 +26,7 @@ Requires-Python: <4.0,>=3.9
26
26
  Description-Content-Type: text/markdown
27
27
  License-File: LICENSE
28
28
  Requires-Dist: pydantic<3.0.0,>=2.0.0
29
- Requires-Dist: docling-core[chunking]<3.0.0,>=2.48.0
29
+ Requires-Dist: docling-core[chunking]<3.0.0,>=2.48.2
30
30
  Requires-Dist: docling-parse<5.0.0,>=4.4.0
31
31
  Requires-Dist: docling-ibm-models<4,>=3.9.1
32
32
  Requires-Dist: filetype<2.0.0,>=1.2.0
@@ -101,14 +101,14 @@ Docling simplifies document processing, parsing diverse formats — including ad
101
101
 
102
102
  ## Features
103
103
 
104
- * 🗂️ Parsing of [multiple document formats][supported_formats] incl. PDF, DOCX, PPTX, XLSX, HTML, WAV, MP3, images (PNG, TIFF, JPEG, ...), and more
104
+ * 🗂️ Parsing of [multiple document formats][supported_formats] incl. PDF, DOCX, PPTX, XLSX, HTML, WAV, MP3, VTT, images (PNG, TIFF, JPEG, ...), and more
105
105
  * 📑 Advanced PDF understanding incl. page layout, reading order, table structure, code, formulas, image classification, and more
106
106
  * 🧬 Unified, expressive [DoclingDocument][docling_document] representation format
107
107
  * ↪️ Various [export formats][supported_formats] and options, including Markdown, HTML, [DocTags](https://arxiv.org/abs/2503.11576) and lossless JSON
108
108
  * 🔒 Local execution capabilities for sensitive data and air-gapped environments
109
109
  * 🤖 Plug-and-play [integrations][integrations] incl. LangChain, LlamaIndex, Crew AI & Haystack for agentic AI
110
110
  * 🔍 Extensive OCR support for scanned PDFs and images
111
- * 👓 Support of several Visual Language Models ([SmolDocling](https://huggingface.co/ds4sd/SmolDocling-256M-preview))
111
+ * 👓 Support of several Visual Language Models ([GraniteDocling](https://huggingface.co/ibm-granite/granite-docling-258M))
112
112
  * 🎙️ Audio support with Automatic Speech Recognition (ASR) models
113
113
  * 🔌 Connect to any agent using the [MCP server](https://docling-project.github.io/docling/usage/mcp/)
114
114
  * 💻 Simple and convenient CLI
@@ -117,13 +117,13 @@ Docling simplifies document processing, parsing diverse formats — including ad
117
117
  * 📤 Structured [information extraction][extraction] \[🧪 beta\]
118
118
  * 📑 New layout model (**Heron**) by default, for faster PDF parsing
119
119
  * 🔌 [MCP server](https://docling-project.github.io/docling/usage/mcp/) for agentic applications
120
+ * 💬 Parsing of Web Video Text Tracks (WebVTT) files
120
121
 
121
122
  ### Coming soon
122
123
 
123
124
  * 📝 Metadata extraction, including title, authors, references & language
124
125
  * 📝 Chart understanding (Barchart, Piechart, LinePlot, etc)
125
126
  * 📝 Complex chemistry understanding (Molecular structures)
126
- * 📝 Parsing of Web Video Text Tracks (WebVTT) files
127
127
 
128
128
  ## Installation
129
129
 
@@ -160,9 +160,9 @@ Docling has a built-in CLI to run conversions.
160
160
  docling https://arxiv.org/pdf/2206.01062
161
161
  ```
162
162
 
163
- You can also use 🥚[SmolDocling](https://huggingface.co/ds4sd/SmolDocling-256M-preview) and other VLMs via Docling CLI:
163
+ You can also use 🥚[GraniteDocling](https://huggingface.co/ibm-granite/granite-docling-258M) and other VLMs via Docling CLI:
164
164
  ```bash
165
- docling --pipeline vlm --vlm-model smoldocling https://arxiv.org/pdf/2206.01062
165
+ docling --pipeline vlm --vlm-model granite_docling https://arxiv.org/pdf/2206.01062
166
166
  ```
167
167
  This will use MLX acceleration on supported Apple Silicon hardware.
168
168
 
@@ -1,5 +1,5 @@
1
1
  docling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- docling/document_converter.py,sha256=CKMlobhTt8Y5yZ_tQOnPAP7_otBiddQ_klRGT5Bgwyo,15827
2
+ docling/document_converter.py,sha256=gPyBrNegMgeBGxN7iebrjqEDm7zQQOmFNm8hVi-pFEQ,16013
3
3
  docling/document_extractor.py,sha256=-RbQRvLWLXF15HYqBbV_lJhh08Zl487UEQKhP-_FR8k,11969
4
4
  docling/exceptions.py,sha256=K1WnCS1leK2JtMB5ewZWKkb0EaijFgl-tRzrO9ntgPM,134
5
5
  docling/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
@@ -15,10 +15,11 @@ docling/backend/md_backend.py,sha256=qCI7SD9hnWWGrkG_drpzQv2Z7DVBG4Tsq3hhTsYV790
15
15
  docling/backend/mets_gbs_backend.py,sha256=EA8sY6tbmGiysKGYPPZiNlK-i7Adn8bLTo-7Ym15hTU,12774
16
16
  docling/backend/msexcel_backend.py,sha256=5JRbPwOjR1r45AMeIts1rj6InbOgLBf_CtAhvNPVmsQ,19157
17
17
  docling/backend/mspowerpoint_backend.py,sha256=wJgB2JStEPfD7MPpWQlpPN7bffPxaHFUnKD4wj8SLxU,15114
18
- docling/backend/msword_backend.py,sha256=fKeAMGGR5ABimedo_ofCQAybzdqmqWA3A3mpLl7X6qY,49129
18
+ docling/backend/msword_backend.py,sha256=kQI9hrx_lvHn__KdxW8MbvB78snoVzA_m4jXx6f_LJ8,54419
19
19
  docling/backend/noop_backend.py,sha256=EOPbD86FzZPX-K_DpNrJh0_lC0bZz--4DpG-OagDNGY,1688
20
20
  docling/backend/pdf_backend.py,sha256=Wcd1NSrAMjXK8VicTki5p-j-JLofklt07eF0kIG17_0,3361
21
21
  docling/backend/pypdfium2_backend.py,sha256=AYhWs9S8W_TkAK0-OkRmUNf4HUZl26FP7-XYjwU5zDk,14209
22
+ docling/backend/webvtt_backend.py,sha256=9xPcfWVLuqhEAFrkv8aU36qHnSgjeINZAXT_C9C6XJA,19165
22
23
  docling/backend/docx/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
24
  docling/backend/docx/latex/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
25
  docling/backend/docx/latex/latex_dict.py,sha256=tFJp4ScT_AkY2ON7nLEa560p601Jq2glcZvMKxxjn7w,6593
@@ -30,21 +31,21 @@ docling/backend/xml/jats_backend.py,sha256=LPj33EFdi2MRCakkLWrRLlUAc-B-949f8zp5g
30
31
  docling/backend/xml/uspto_backend.py,sha256=nyAMr5ht7dclxkVDwsKNeiOhLQrUtRLS8JdscB2AVJg,70924
31
32
  docling/chunking/__init__.py,sha256=h83TDs0AuOV6oEPLAPrn9dpGKiU-2Vg6IRNo4cv6GDA,346
32
33
  docling/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
33
- docling/cli/main.py,sha256=K4m7dtnLUM2gqU8n_Mntpc_ODrwWtrjBPTUZakQ8erg,32111
34
- docling/cli/models.py,sha256=5C3CZz3HZXoCrBl92Is62KMCtUqsZK-oygj1hqzJ8vo,6008
34
+ docling/cli/main.py,sha256=J_hXHclzT-uNu-cuKNdlc3vwCnyDRxXrJ5L2LJofzeo,32729
35
+ docling/cli/models.py,sha256=rw_2JfeJ-k_iOLpz3JfgL1QbJY__W9nE23nHdov6VfU,6252
35
36
  docling/cli/tools.py,sha256=QhtRxQG0TVrfsMqdv5i7J0_qQy1ZZyWYnHPwJl7b5oY,322
36
37
  docling/datamodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
37
38
  docling/datamodel/accelerator_options.py,sha256=wv6dOFTVAwr9onkE-0pfUqX_fDb6gX53iPPE6o8nKjI,2511
38
39
  docling/datamodel/asr_model_specs.py,sha256=Wg7z3zm_wXIWu122iPVy0RMECsA_JCFHrlFF-xxHoVQ,2187
39
- docling/datamodel/base_models.py,sha256=vOt895z0GsFirHkkI3hM23e9oyUuz9RXfcGFtoINLtw,12334
40
- docling/datamodel/document.py,sha256=ElY7G6FYJ6Bayyw433_tbnxyE47fnQRoBG_mygvOBrA,17370
40
+ docling/datamodel/base_models.py,sha256=CQ6eThPzVeVD2Gq7BNz9Q5RDLwhe4NgMzk7tdLtk1c8,12382
41
+ docling/datamodel/document.py,sha256=HyO3kdJcXIJ3wL95sPoL3zvsO4Rww3-qHH6IkL4I0q4,17483
41
42
  docling/datamodel/extraction.py,sha256=7dgvtK5SuvgfB8LHAwS1FwrW1kcMQJuJG0ol8uAQgoQ,1323
42
43
  docling/datamodel/layout_model_specs.py,sha256=GSkJ-Z_0PVgwWGi7C7TsxbzRjlrWS9ZrHJjHumv-Z5U,2339
43
- docling/datamodel/pipeline_options.py,sha256=N9g-3FA4hFU8A0uGvPmcy1emBBT4JH6u7CUzl3D-Ta0,11049
44
+ docling/datamodel/pipeline_options.py,sha256=28opZ3woXA8IKaG2-BHM-lmmi-gyuScCMHGxhlxGOsk,11290
44
45
  docling/datamodel/pipeline_options_asr_model.py,sha256=7X068xl-qpbyPxC7-TwX7Q6tLyZXGT5h1osZ_xLNLM0,1454
45
46
  docling/datamodel/pipeline_options_vlm_model.py,sha256=AcqqThSW74hwQ6x7pazzm57LnJiUqB7gQi5wFayGlbk,2628
46
47
  docling/datamodel/settings.py,sha256=c0MTw6pO5be_BKxHKYl4SaBJAw_qL-aapxp-g5HHj1A,2084
47
- docling/datamodel/vlm_model_specs.py,sha256=8D-bF95EoaD-Wd29lVX094HPJT1gYN393aFmzv7RipQ,8713
48
+ docling/datamodel/vlm_model_specs.py,sha256=UMXiTzWCXcx2BtF5slYfWhjRXAx0s1oiAvE-vCzrATo,9686
48
49
  docling/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
49
50
  docling/models/api_vlm_model.py,sha256=-zisU32pgDRbychyG6-neB0qweNbPaYnLXwiGT7SEdI,2859
50
51
  docling/models/base_model.py,sha256=beMGyrpl-yYX3YnLzQkLfxMLxwmDWnbcFhkjbUlWJSU,7146
@@ -59,9 +60,9 @@ docling/models/page_preprocessing_model.py,sha256=EmusNexws5ZmR93js_saVU0BedqZ_H
59
60
  docling/models/picture_description_api_model.py,sha256=o3EkV5aHW_6WzE_fdj_VRnNCrS_btclO_ZCLAUqrfl0,2377
60
61
  docling/models/picture_description_base_model.py,sha256=kLthLhdlgwhootQ4_xhhcAk6A-vso5-qcsFJ3TcYfO0,2991
61
62
  docling/models/picture_description_vlm_model.py,sha256=Uja_BQSk7F-U1J2hm4yeLguirUzKYv1K8zRyw1IYomY,4150
62
- docling/models/rapid_ocr_model.py,sha256=7yZC7I1qoC9xC8xJIjTk2c8VFm89RfB6Vr7IDOnr5gs,7102
63
+ docling/models/rapid_ocr_model.py,sha256=anUVUwaj9Wubgu4FnHdYMuOVkQP_hJiLY1qRToelBoc,7700
63
64
  docling/models/readingorder_model.py,sha256=bZoXHaSwUsa8niSmJrbCuy784ixCeBXT-RQBUfgHJ4A,14925
64
- docling/models/table_structure_model.py,sha256=7vO8LisdoqCTsY8X8lsk9d-oD2hVjUtdaWlkMTQxEg0,12518
65
+ docling/models/table_structure_model.py,sha256=7g_mFf1YzfF8PXQfefNu6XYZu7TzJAn86zKb6IEUdCg,12518
65
66
  docling/models/tesseract_ocr_cli_model.py,sha256=I3Gn28Y-LD8OfvyCElN9fLiNgpo2sT0uMkVt258253s,12881
66
67
  docling/models/tesseract_ocr_model.py,sha256=GdI5Cjfi87qcehVbM3wdKRvKkl_F9A4bwTUbjXZCJYA,10745
67
68
  docling/models/factories/__init__.py,sha256=x_EM5dDg_A3HBcBYzOoqwmA2AFLtJ1IzYDPX-R1A-Sg,868
@@ -93,15 +94,15 @@ docling/utils/export.py,sha256=VwVUnYDk3mhGmISDbVm306fwpGNnoojouStBD4UajXI,4673
93
94
  docling/utils/glm_utils.py,sha256=TKOWQqWAHsX_w4fvoAA7_2xCi_urhnp1DsmjY8_sk5w,12274
94
95
  docling/utils/layout_postprocessor.py,sha256=sE9UR3Nv4iOk26uoIsN3bFioE7ScfAjj0orDBDneLXg,25166
95
96
  docling/utils/locks.py,sha256=RzqQtD5UispgV71pGN_nU6GYfeN11BN0Sh_Dq9ycqGo,52
96
- docling/utils/model_downloader.py,sha256=lAIyevIC6dyv1TS0ElRSAGNylB5n_V8pWs1PhxH8wAQ,4104
97
+ docling/utils/model_downloader.py,sha256=kFIxr5KUQbisQH0h8yP9GZMqsRJD3Xo1uOIiLiB1T78,4869
97
98
  docling/utils/ocr_utils.py,sha256=nmresYyfin0raanpQc_GGeU3WoLsfExf6SEXNIQ7Djg,2325
98
99
  docling/utils/orientation.py,sha256=jTyLxyT31FlOodZoBMlADHNQK2lAWKYVs5z7pXd_6Cg,1842
99
100
  docling/utils/profiling.py,sha256=YaMGoB9MMZpagF9mb5ndoHj8Lpb9aIdb7El-Pl7IcFs,1753
100
101
  docling/utils/utils.py,sha256=kJtIYuzXeOyJHYlxmLAo7dGM5rEsDa1i84qEsUj1nio,1908
101
102
  docling/utils/visualization.py,sha256=tY2ylE2aiQKkmzlSLnFW-HTfFyqUUMguW18ldd1PLfo,2868
102
- docling-2.52.0.dist-info/licenses/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
103
- docling-2.52.0.dist-info/METADATA,sha256=EhUePtqwKQJTgkU9pCtvpWT7wtU-84KXkc48XExkRSQ,11233
104
- docling-2.52.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
105
- docling-2.52.0.dist-info/entry_points.txt,sha256=hzVlbeE0aMSTQ9S0-NTYN0Hmgsn6qL_EA2qX4UbkAuY,149
106
- docling-2.52.0.dist-info/top_level.txt,sha256=vkIywP-USjFyYo1AIRQbWQQaL3xB5jf8vkCYdTIfNic,8
107
- docling-2.52.0.dist-info/RECORD,,
103
+ docling-2.54.0.dist-info/licenses/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
104
+ docling-2.54.0.dist-info/METADATA,sha256=_GsdUYyPCv8XKeLeSO9Y0euAH8Eanr5i_y5kLvDEb1g,11252
105
+ docling-2.54.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
106
+ docling-2.54.0.dist-info/entry_points.txt,sha256=hzVlbeE0aMSTQ9S0-NTYN0Hmgsn6qL_EA2qX4UbkAuY,149
107
+ docling-2.54.0.dist-info/top_level.txt,sha256=vkIywP-USjFyYo1AIRQbWQQaL3xB5jf8vkCYdTIfNic,8
108
+ docling-2.54.0.dist-info/RECORD,,