docling 2.53.0__py3-none-any.whl → 2.54.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docling might be problematic. Click here for more details.

@@ -1,7 +1,6 @@
1
- import math
2
1
  from collections import defaultdict
3
2
  from enum import Enum
4
- from typing import TYPE_CHECKING, Dict, List, Optional, Type, Union
3
+ from typing import TYPE_CHECKING, Optional, Type, Union
5
4
 
6
5
  import numpy as np
7
6
  from docling_core.types.doc import (
@@ -14,9 +13,7 @@ from docling_core.types.doc import (
14
13
  )
15
14
  from docling_core.types.doc.base import PydanticSerCtxKey, round_pydantic_float
16
15
  from docling_core.types.doc.page import SegmentedPdfPage, TextCell
17
- from docling_core.types.io import (
18
- DocumentStream,
19
- )
16
+ from docling_core.types.io import DocumentStream
20
17
 
21
18
  # DO NOT REMOVE; explicitly exposed from this location
22
19
  from PIL.Image import Image
@@ -71,6 +68,7 @@ class InputFormat(str, Enum):
71
68
  METS_GBS = "mets_gbs"
72
69
  JSON_DOCLING = "json_docling"
73
70
  AUDIO = "audio"
71
+ VTT = "vtt"
74
72
 
75
73
 
76
74
  class OutputFormat(str, Enum):
@@ -82,7 +80,7 @@ class OutputFormat(str, Enum):
82
80
  DOCTAGS = "doctags"
83
81
 
84
82
 
85
- FormatToExtensions: Dict[InputFormat, List[str]] = {
83
+ FormatToExtensions: dict[InputFormat, list[str]] = {
86
84
  InputFormat.DOCX: ["docx", "dotx", "docm", "dotm"],
87
85
  InputFormat.PPTX: ["pptx", "potx", "ppsx", "pptm", "potm", "ppsm"],
88
86
  InputFormat.PDF: ["pdf"],
@@ -97,9 +95,10 @@ FormatToExtensions: Dict[InputFormat, List[str]] = {
97
95
  InputFormat.METS_GBS: ["tar.gz"],
98
96
  InputFormat.JSON_DOCLING: ["json"],
99
97
  InputFormat.AUDIO: ["wav", "mp3"],
98
+ InputFormat.VTT: ["vtt"],
100
99
  }
101
100
 
102
- FormatToMimeType: Dict[InputFormat, List[str]] = {
101
+ FormatToMimeType: dict[InputFormat, list[str]] = {
103
102
  InputFormat.DOCX: [
104
103
  "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
105
104
  "application/vnd.openxmlformats-officedocument.wordprocessingml.template",
@@ -130,6 +129,7 @@ FormatToMimeType: Dict[InputFormat, List[str]] = {
130
129
  InputFormat.METS_GBS: ["application/mets+xml"],
131
130
  InputFormat.JSON_DOCLING: ["application/json"],
132
131
  InputFormat.AUDIO: ["audio/x-wav", "audio/mpeg", "audio/wav", "audio/mp3"],
132
+ InputFormat.VTT: ["text/vtt"],
133
133
  }
134
134
 
135
135
  MimeTypeToFormat: dict[str, list[InputFormat]] = {
@@ -162,8 +162,8 @@ class Cluster(BaseModel):
162
162
  label: DocItemLabel
163
163
  bbox: BoundingBox
164
164
  confidence: float = 1.0
165
- cells: List[TextCell] = []
166
- children: List["Cluster"] = [] # Add child cluster support
165
+ cells: list[TextCell] = []
166
+ children: list["Cluster"] = [] # Add child cluster support
167
167
 
168
168
  @field_serializer("confidence")
169
169
  def _serialize(self, value: float, info: FieldSerializationInfo) -> float:
@@ -179,7 +179,7 @@ class BasePageElement(BaseModel):
179
179
 
180
180
 
181
181
  class LayoutPrediction(BaseModel):
182
- clusters: List[Cluster] = []
182
+ clusters: list[Cluster] = []
183
183
 
184
184
 
185
185
  class VlmPredictionToken(BaseModel):
@@ -201,14 +201,14 @@ class ContainerElement(
201
201
 
202
202
 
203
203
  class Table(BasePageElement):
204
- otsl_seq: List[str]
204
+ otsl_seq: list[str]
205
205
  num_rows: int = 0
206
206
  num_cols: int = 0
207
- table_cells: List[TableCell]
207
+ table_cells: list[TableCell]
208
208
 
209
209
 
210
210
  class TableStructurePrediction(BaseModel):
211
- table_map: Dict[int, Table] = {}
211
+ table_map: dict[int, Table] = {}
212
212
 
213
213
 
214
214
  class TextElement(BasePageElement):
@@ -216,7 +216,7 @@ class TextElement(BasePageElement):
216
216
 
217
217
 
218
218
  class FigureElement(BasePageElement):
219
- annotations: List[PictureDataType] = []
219
+ annotations: list[PictureDataType] = []
220
220
  provenance: Optional[str] = None
221
221
  predicted_class: Optional[str] = None
222
222
  confidence: Optional[float] = None
@@ -234,12 +234,12 @@ class FigureElement(BasePageElement):
234
234
 
235
235
  class FigureClassificationPrediction(BaseModel):
236
236
  figure_count: int = 0
237
- figure_map: Dict[int, FigureElement] = {}
237
+ figure_map: dict[int, FigureElement] = {}
238
238
 
239
239
 
240
240
  class EquationPrediction(BaseModel):
241
241
  equation_count: int = 0
242
- equation_map: Dict[int, TextElement] = {}
242
+ equation_map: dict[int, TextElement] = {}
243
243
 
244
244
 
245
245
  class PagePredictions(BaseModel):
@@ -254,9 +254,9 @@ PageElement = Union[TextElement, Table, FigureElement, ContainerElement]
254
254
 
255
255
 
256
256
  class AssembledUnit(BaseModel):
257
- elements: List[PageElement] = []
258
- body: List[PageElement] = []
259
- headers: List[PageElement] = []
257
+ elements: list[PageElement] = []
258
+ body: list[PageElement] = []
259
+ headers: list[PageElement] = []
260
260
 
261
261
 
262
262
  class ItemAndImageEnrichmentElement(BaseModel):
@@ -280,12 +280,12 @@ class Page(BaseModel):
280
280
  None # Internal PDF backend. By default it is cleared during assembling.
281
281
  )
282
282
  _default_image_scale: float = 1.0 # Default image scale for external usage.
283
- _image_cache: Dict[
283
+ _image_cache: dict[
284
284
  float, Image
285
285
  ] = {} # Cache of images in different scales. By default it is cleared during assembling.
286
286
 
287
287
  @property
288
- def cells(self) -> List[TextCell]:
288
+ def cells(self) -> list[TextCell]:
289
289
  """Return text cells as a read-only view of parsed_page.textline_cells."""
290
290
  if self.parsed_page is not None:
291
291
  return self.parsed_page.textline_cells
@@ -354,7 +354,7 @@ class OpenAiApiResponse(BaseModel):
354
354
 
355
355
  id: str
356
356
  model: Optional[str] = None # returned by openai
357
- choices: List[OpenAiResponseChoice]
357
+ choices: list[OpenAiResponseChoice]
358
358
  created: int
359
359
  usage: OpenAiResponseUsage
360
360
 
@@ -430,7 +430,7 @@ class PageConfidenceScores(BaseModel):
430
430
 
431
431
 
432
432
  class ConfidenceReport(PageConfidenceScores):
433
- pages: Dict[int, PageConfidenceScores] = Field(
433
+ pages: dict[int, PageConfidenceScores] = Field(
434
434
  default_factory=lambda: defaultdict(PageConfidenceScores)
435
435
  )
436
436
 
@@ -394,6 +394,8 @@ class _DocumentConversionInput(BaseModel):
394
394
  mime = FormatToMimeType[InputFormat.PPTX][0]
395
395
  elif ext in FormatToExtensions[InputFormat.XLSX]:
396
396
  mime = FormatToMimeType[InputFormat.XLSX][0]
397
+ elif ext in FormatToExtensions[InputFormat.VTT]:
398
+ mime = FormatToMimeType[InputFormat.VTT][0]
397
399
 
398
400
  return mime
399
401
 
@@ -25,6 +25,7 @@ from docling.backend.msexcel_backend import MsExcelDocumentBackend
25
25
  from docling.backend.mspowerpoint_backend import MsPowerpointDocumentBackend
26
26
  from docling.backend.msword_backend import MsWordDocumentBackend
27
27
  from docling.backend.noop_backend import NoOpBackend
28
+ from docling.backend.webvtt_backend import WebVTTDocumentBackend
28
29
  from docling.backend.xml.jats_backend import JatsDocumentBackend
29
30
  from docling.backend.xml.uspto_backend import PatentUsptoDocumentBackend
30
31
  from docling.datamodel.base_models import (
@@ -170,6 +171,9 @@ def _get_default_option(format: InputFormat) -> FormatOption:
170
171
  pipeline_cls=SimplePipeline, backend=DoclingJSONBackend
171
172
  ),
172
173
  InputFormat.AUDIO: FormatOption(pipeline_cls=AsrPipeline, backend=NoOpBackend),
174
+ InputFormat.VTT: FormatOption(
175
+ pipeline_cls=SimplePipeline, backend=WebVTTDocumentBackend
176
+ ),
173
177
  }
174
178
  if (options := format_to_default_options.get(format)) is not None:
175
179
  return options
@@ -121,7 +121,7 @@ class TableStructureModel(BasePageModel):
121
121
 
122
122
  for table_element in tbl_list:
123
123
  x0, y0, x1, y1 = table_element.cluster.bbox.as_tuple()
124
- y0 *= scale_x
124
+ y0 *= scale_y
125
125
  y1 *= scale_y
126
126
  x0 *= scale_x
127
127
  x1 *= scale_x
@@ -132,7 +132,7 @@ class TableStructureModel(BasePageModel):
132
132
  x0, y0, x1, y1 = cell.rect.to_bounding_box().as_tuple()
133
133
  x0 *= scale_x
134
134
  x1 *= scale_x
135
- y0 *= scale_x
135
+ y0 *= scale_y
136
136
  y1 *= scale_y
137
137
 
138
138
  draw.rectangle([(x0, y0), (x1, y1)], outline="green")
@@ -142,7 +142,7 @@ class TableStructureModel(BasePageModel):
142
142
  x0, y0, x1, y1 = tc.bbox.as_tuple()
143
143
  x0 *= scale_x
144
144
  x1 *= scale_x
145
- y0 *= scale_x
145
+ y0 *= scale_y
146
146
  y1 *= scale_y
147
147
 
148
148
  if tc.column_header:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docling
3
- Version: 2.53.0
3
+ Version: 2.54.0
4
4
  Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
5
5
  Author-email: Christoph Auer <cau@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Maxim Lysak <mly@zurich.ibm.com>, Nikos Livathinos <nli@zurich.ibm.com>, Ahmed Nassar <ahn@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
6
6
  License-Expression: MIT
@@ -26,7 +26,7 @@ Requires-Python: <4.0,>=3.9
26
26
  Description-Content-Type: text/markdown
27
27
  License-File: LICENSE
28
28
  Requires-Dist: pydantic<3.0.0,>=2.0.0
29
- Requires-Dist: docling-core[chunking]<3.0.0,>=2.48.0
29
+ Requires-Dist: docling-core[chunking]<3.0.0,>=2.48.2
30
30
  Requires-Dist: docling-parse<5.0.0,>=4.4.0
31
31
  Requires-Dist: docling-ibm-models<4,>=3.9.1
32
32
  Requires-Dist: filetype<2.0.0,>=1.2.0
@@ -101,7 +101,7 @@ Docling simplifies document processing, parsing diverse formats — including ad
101
101
 
102
102
  ## Features
103
103
 
104
- * 🗂️ Parsing of [multiple document formats][supported_formats] incl. PDF, DOCX, PPTX, XLSX, HTML, WAV, MP3, images (PNG, TIFF, JPEG, ...), and more
104
+ * 🗂️ Parsing of [multiple document formats][supported_formats] incl. PDF, DOCX, PPTX, XLSX, HTML, WAV, MP3, VTT, images (PNG, TIFF, JPEG, ...), and more
105
105
  * 📑 Advanced PDF understanding incl. page layout, reading order, table structure, code, formulas, image classification, and more
106
106
  * 🧬 Unified, expressive [DoclingDocument][docling_document] representation format
107
107
  * ↪️ Various [export formats][supported_formats] and options, including Markdown, HTML, [DocTags](https://arxiv.org/abs/2503.11576) and lossless JSON
@@ -117,13 +117,13 @@ Docling simplifies document processing, parsing diverse formats — including ad
117
117
  * 📤 Structured [information extraction][extraction] \[🧪 beta\]
118
118
  * 📑 New layout model (**Heron**) by default, for faster PDF parsing
119
119
  * 🔌 [MCP server](https://docling-project.github.io/docling/usage/mcp/) for agentic applications
120
+ * 💬 Parsing of Web Video Text Tracks (WebVTT) files
120
121
 
121
122
  ### Coming soon
122
123
 
123
124
  * 📝 Metadata extraction, including title, authors, references & language
124
125
  * 📝 Chart understanding (Barchart, Piechart, LinePlot, etc)
125
126
  * 📝 Complex chemistry understanding (Molecular structures)
126
- * 📝 Parsing of Web Video Text Tracks (WebVTT) files
127
127
 
128
128
  ## Installation
129
129
 
@@ -1,5 +1,5 @@
1
1
  docling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- docling/document_converter.py,sha256=CKMlobhTt8Y5yZ_tQOnPAP7_otBiddQ_klRGT5Bgwyo,15827
2
+ docling/document_converter.py,sha256=gPyBrNegMgeBGxN7iebrjqEDm7zQQOmFNm8hVi-pFEQ,16013
3
3
  docling/document_extractor.py,sha256=-RbQRvLWLXF15HYqBbV_lJhh08Zl487UEQKhP-_FR8k,11969
4
4
  docling/exceptions.py,sha256=K1WnCS1leK2JtMB5ewZWKkb0EaijFgl-tRzrO9ntgPM,134
5
5
  docling/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
@@ -15,10 +15,11 @@ docling/backend/md_backend.py,sha256=qCI7SD9hnWWGrkG_drpzQv2Z7DVBG4Tsq3hhTsYV790
15
15
  docling/backend/mets_gbs_backend.py,sha256=EA8sY6tbmGiysKGYPPZiNlK-i7Adn8bLTo-7Ym15hTU,12774
16
16
  docling/backend/msexcel_backend.py,sha256=5JRbPwOjR1r45AMeIts1rj6InbOgLBf_CtAhvNPVmsQ,19157
17
17
  docling/backend/mspowerpoint_backend.py,sha256=wJgB2JStEPfD7MPpWQlpPN7bffPxaHFUnKD4wj8SLxU,15114
18
- docling/backend/msword_backend.py,sha256=fKeAMGGR5ABimedo_ofCQAybzdqmqWA3A3mpLl7X6qY,49129
18
+ docling/backend/msword_backend.py,sha256=kQI9hrx_lvHn__KdxW8MbvB78snoVzA_m4jXx6f_LJ8,54419
19
19
  docling/backend/noop_backend.py,sha256=EOPbD86FzZPX-K_DpNrJh0_lC0bZz--4DpG-OagDNGY,1688
20
20
  docling/backend/pdf_backend.py,sha256=Wcd1NSrAMjXK8VicTki5p-j-JLofklt07eF0kIG17_0,3361
21
21
  docling/backend/pypdfium2_backend.py,sha256=AYhWs9S8W_TkAK0-OkRmUNf4HUZl26FP7-XYjwU5zDk,14209
22
+ docling/backend/webvtt_backend.py,sha256=9xPcfWVLuqhEAFrkv8aU36qHnSgjeINZAXT_C9C6XJA,19165
22
23
  docling/backend/docx/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
24
  docling/backend/docx/latex/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
25
  docling/backend/docx/latex/latex_dict.py,sha256=tFJp4ScT_AkY2ON7nLEa560p601Jq2glcZvMKxxjn7w,6593
@@ -36,8 +37,8 @@ docling/cli/tools.py,sha256=QhtRxQG0TVrfsMqdv5i7J0_qQy1ZZyWYnHPwJl7b5oY,322
36
37
  docling/datamodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
37
38
  docling/datamodel/accelerator_options.py,sha256=wv6dOFTVAwr9onkE-0pfUqX_fDb6gX53iPPE6o8nKjI,2511
38
39
  docling/datamodel/asr_model_specs.py,sha256=Wg7z3zm_wXIWu122iPVy0RMECsA_JCFHrlFF-xxHoVQ,2187
39
- docling/datamodel/base_models.py,sha256=vOt895z0GsFirHkkI3hM23e9oyUuz9RXfcGFtoINLtw,12334
40
- docling/datamodel/document.py,sha256=ElY7G6FYJ6Bayyw433_tbnxyE47fnQRoBG_mygvOBrA,17370
40
+ docling/datamodel/base_models.py,sha256=CQ6eThPzVeVD2Gq7BNz9Q5RDLwhe4NgMzk7tdLtk1c8,12382
41
+ docling/datamodel/document.py,sha256=HyO3kdJcXIJ3wL95sPoL3zvsO4Rww3-qHH6IkL4I0q4,17483
41
42
  docling/datamodel/extraction.py,sha256=7dgvtK5SuvgfB8LHAwS1FwrW1kcMQJuJG0ol8uAQgoQ,1323
42
43
  docling/datamodel/layout_model_specs.py,sha256=GSkJ-Z_0PVgwWGi7C7TsxbzRjlrWS9ZrHJjHumv-Z5U,2339
43
44
  docling/datamodel/pipeline_options.py,sha256=28opZ3woXA8IKaG2-BHM-lmmi-gyuScCMHGxhlxGOsk,11290
@@ -61,7 +62,7 @@ docling/models/picture_description_base_model.py,sha256=kLthLhdlgwhootQ4_xhhcAk6
61
62
  docling/models/picture_description_vlm_model.py,sha256=Uja_BQSk7F-U1J2hm4yeLguirUzKYv1K8zRyw1IYomY,4150
62
63
  docling/models/rapid_ocr_model.py,sha256=anUVUwaj9Wubgu4FnHdYMuOVkQP_hJiLY1qRToelBoc,7700
63
64
  docling/models/readingorder_model.py,sha256=bZoXHaSwUsa8niSmJrbCuy784ixCeBXT-RQBUfgHJ4A,14925
64
- docling/models/table_structure_model.py,sha256=7vO8LisdoqCTsY8X8lsk9d-oD2hVjUtdaWlkMTQxEg0,12518
65
+ docling/models/table_structure_model.py,sha256=7g_mFf1YzfF8PXQfefNu6XYZu7TzJAn86zKb6IEUdCg,12518
65
66
  docling/models/tesseract_ocr_cli_model.py,sha256=I3Gn28Y-LD8OfvyCElN9fLiNgpo2sT0uMkVt258253s,12881
66
67
  docling/models/tesseract_ocr_model.py,sha256=GdI5Cjfi87qcehVbM3wdKRvKkl_F9A4bwTUbjXZCJYA,10745
67
68
  docling/models/factories/__init__.py,sha256=x_EM5dDg_A3HBcBYzOoqwmA2AFLtJ1IzYDPX-R1A-Sg,868
@@ -99,9 +100,9 @@ docling/utils/orientation.py,sha256=jTyLxyT31FlOodZoBMlADHNQK2lAWKYVs5z7pXd_6Cg,
99
100
  docling/utils/profiling.py,sha256=YaMGoB9MMZpagF9mb5ndoHj8Lpb9aIdb7El-Pl7IcFs,1753
100
101
  docling/utils/utils.py,sha256=kJtIYuzXeOyJHYlxmLAo7dGM5rEsDa1i84qEsUj1nio,1908
101
102
  docling/utils/visualization.py,sha256=tY2ylE2aiQKkmzlSLnFW-HTfFyqUUMguW18ldd1PLfo,2868
102
- docling-2.53.0.dist-info/licenses/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
103
- docling-2.53.0.dist-info/METADATA,sha256=bpbaYrZCEppMQ3nPsq8wyn_Opp6IRK_P_rF5JQjCjr4,11247
104
- docling-2.53.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
105
- docling-2.53.0.dist-info/entry_points.txt,sha256=hzVlbeE0aMSTQ9S0-NTYN0Hmgsn6qL_EA2qX4UbkAuY,149
106
- docling-2.53.0.dist-info/top_level.txt,sha256=vkIywP-USjFyYo1AIRQbWQQaL3xB5jf8vkCYdTIfNic,8
107
- docling-2.53.0.dist-info/RECORD,,
103
+ docling-2.54.0.dist-info/licenses/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
104
+ docling-2.54.0.dist-info/METADATA,sha256=_GsdUYyPCv8XKeLeSO9Y0euAH8Eanr5i_y5kLvDEb1g,11252
105
+ docling-2.54.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
106
+ docling-2.54.0.dist-info/entry_points.txt,sha256=hzVlbeE0aMSTQ9S0-NTYN0Hmgsn6qL_EA2qX4UbkAuY,149
107
+ docling-2.54.0.dist-info/top_level.txt,sha256=vkIywP-USjFyYo1AIRQbWQQaL3xB5jf8vkCYdTIfNic,8
108
+ docling-2.54.0.dist-info/RECORD,,