docling 2.30.0__py3-none-any.whl → 2.31.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. docling/backend/asciidoc_backend.py +7 -15
  2. docling/backend/csv_backend.py +1 -1
  3. docling/backend/docling_parse_backend.py +2 -2
  4. docling/backend/docling_parse_v2_backend.py +2 -2
  5. docling/backend/docling_parse_v4_backend.py +3 -4
  6. docling/backend/docx/latex/latex_dict.py +0 -5
  7. docling/backend/docx/latex/omml.py +4 -7
  8. docling/backend/html_backend.py +66 -25
  9. docling/backend/md_backend.py +6 -8
  10. docling/backend/msexcel_backend.py +1 -7
  11. docling/backend/mspowerpoint_backend.py +4 -7
  12. docling/backend/msword_backend.py +5 -5
  13. docling/backend/pdf_backend.py +2 -1
  14. docling/backend/pypdfium2_backend.py +3 -3
  15. docling/backend/xml/jats_backend.py +11 -14
  16. docling/backend/xml/uspto_backend.py +19 -23
  17. docling/cli/main.py +8 -8
  18. docling/cli/models.py +6 -3
  19. docling/datamodel/base_models.py +7 -5
  20. docling/datamodel/document.py +19 -10
  21. docling/datamodel/pipeline_options.py +0 -1
  22. docling/document_converter.py +8 -6
  23. docling/models/api_vlm_model.py +1 -2
  24. docling/models/base_model.py +2 -4
  25. docling/models/base_ocr_model.py +2 -2
  26. docling/models/code_formula_model.py +2 -1
  27. docling/models/document_picture_classifier.py +2 -1
  28. docling/models/easyocr_model.py +10 -11
  29. docling/models/factories/__init__.py +2 -2
  30. docling/models/factories/base_factory.py +1 -1
  31. docling/models/hf_mlx_model.py +4 -6
  32. docling/models/hf_vlm_model.py +7 -5
  33. docling/models/layout_model.py +2 -2
  34. docling/models/ocr_mac_model.py +3 -4
  35. docling/models/page_assemble_model.py +7 -12
  36. docling/models/page_preprocessing_model.py +2 -1
  37. docling/models/picture_description_api_model.py +2 -1
  38. docling/models/picture_description_base_model.py +2 -3
  39. docling/models/picture_description_vlm_model.py +6 -4
  40. docling/models/rapid_ocr_model.py +2 -3
  41. docling/models/readingorder_model.py +9 -24
  42. docling/models/table_structure_model.py +4 -8
  43. docling/models/tesseract_ocr_cli_model.py +17 -16
  44. docling/models/tesseract_ocr_model.py +9 -5
  45. docling/pipeline/base_pipeline.py +4 -8
  46. docling/pipeline/simple_pipeline.py +0 -1
  47. docling/pipeline/standard_pdf_pipeline.py +0 -1
  48. docling/pipeline/vlm_pipeline.py +0 -3
  49. docling/utils/export.py +2 -4
  50. docling/utils/glm_utils.py +2 -2
  51. docling/utils/layout_postprocessor.py +4 -2
  52. docling/utils/model_downloader.py +31 -7
  53. docling/utils/utils.py +3 -3
  54. {docling-2.30.0.dist-info → docling-2.31.1.dist-info}/METADATA +2 -1
  55. docling-2.31.1.dist-info/RECORD +86 -0
  56. docling-2.30.0.dist-info/RECORD +0 -86
  57. {docling-2.30.0.dist-info → docling-2.31.1.dist-info}/LICENSE +0 -0
  58. {docling-2.30.0.dist-info → docling-2.31.1.dist-info}/WHEEL +0 -0
  59. {docling-2.30.0.dist-info → docling-2.31.1.dist-info}/entry_points.txt +0 -0
@@ -3,9 +3,10 @@ import io
3
3
  import logging
4
4
  import os
5
5
  import tempfile
6
+ from collections.abc import Iterable
6
7
  from pathlib import Path
7
8
  from subprocess import DEVNULL, PIPE, Popen
8
- from typing import Iterable, List, Optional, Tuple, Type
9
+ from typing import List, Optional, Tuple, Type
9
10
 
10
11
  import pandas as pd
11
12
  from docling_core.types.doc import BoundingBox, CoordOrigin
@@ -63,8 +64,7 @@ class TesseractOcrCliModel(BaseOcrModel):
63
64
  )
64
65
 
65
66
  def _get_name_and_version(self) -> Tuple[str, str]:
66
-
67
- if self._name != None and self._version != None:
67
+ if self._name is not None and self._version is not None:
68
68
  return self._name, self._version # type: ignore
69
69
 
70
70
  cmd = [self.options.tesseract_cmd, "--version"]
@@ -125,14 +125,16 @@ class TesseractOcrCliModel(BaseOcrModel):
125
125
  # _log.info(decoded_data)
126
126
 
127
127
  # Read the TSV file generated by Tesseract
128
- df = pd.read_csv(io.StringIO(decoded_data), quoting=csv.QUOTE_NONE, sep="\t")
128
+ df_result = pd.read_csv(
129
+ io.StringIO(decoded_data), quoting=csv.QUOTE_NONE, sep="\t"
130
+ )
129
131
 
130
132
  # Display the dataframe (optional)
131
133
  # _log.info("df: ", df.head())
132
134
 
133
135
  # Filter rows that contain actual text (ignore header or empty rows)
134
- df_filtered = df[
135
- df["text"].notnull() & (df["text"].apply(str).str.strip() != "")
136
+ df_filtered = df_result[
137
+ df_result["text"].notna() & (df_result["text"].apply(str).str.strip() != "")
136
138
  ]
137
139
 
138
140
  return df_filtered
@@ -149,10 +151,10 @@ class TesseractOcrCliModel(BaseOcrModel):
149
151
  proc = Popen(cmd, stdout=PIPE, stderr=DEVNULL)
150
152
  output, _ = proc.communicate()
151
153
  decoded_data = output.decode("utf-8")
152
- df = pd.read_csv(
154
+ df_detected = pd.read_csv(
153
155
  io.StringIO(decoded_data), sep=":", header=None, names=["key", "value"]
154
156
  )
155
- scripts = df.loc[df["key"] == "Script"].value.tolist()
157
+ scripts = df_detected.loc[df_detected["key"] == "Script"].value.tolist()
156
158
  if len(scripts) == 0:
157
159
  _log.warning("Tesseract cannot detect the script of the page")
158
160
  return None
@@ -183,11 +185,11 @@ class TesseractOcrCliModel(BaseOcrModel):
183
185
  proc = Popen(cmd, stdout=PIPE, stderr=DEVNULL)
184
186
  output, _ = proc.communicate()
185
187
  decoded_data = output.decode("utf-8")
186
- df = pd.read_csv(io.StringIO(decoded_data), header=None)
187
- self._tesseract_languages = df[0].tolist()[1:]
188
+ df_list = pd.read_csv(io.StringIO(decoded_data), header=None)
189
+ self._tesseract_languages = df_list[0].tolist()[1:]
188
190
 
189
191
  # Decide the script prefix
190
- if any([l.startswith("script/") for l in self._tesseract_languages]):
192
+ if any(lang.startswith("script/") for lang in self._tesseract_languages):
191
193
  script_prefix = "script/"
192
194
  else:
193
195
  script_prefix = ""
@@ -197,7 +199,6 @@ class TesseractOcrCliModel(BaseOcrModel):
197
199
  def __call__(
198
200
  self, conv_res: ConversionResult, page_batch: Iterable[Page]
199
201
  ) -> Iterable[Page]:
200
-
201
202
  if not self.enabled:
202
203
  yield from page_batch
203
204
  return
@@ -225,19 +226,19 @@ class TesseractOcrCliModel(BaseOcrModel):
225
226
  fname = image_file.name
226
227
  high_res_image.save(image_file)
227
228
 
228
- df = self._run_tesseract(fname)
229
+ df_result = self._run_tesseract(fname)
229
230
  finally:
230
231
  if os.path.exists(fname):
231
232
  os.remove(fname)
232
233
 
233
- # _log.info(df)
234
+ # _log.info(df_result)
234
235
 
235
236
  # Print relevant columns (bounding box and text)
236
- for ix, row in df.iterrows():
237
+ for ix, row in df_result.iterrows():
237
238
  text = row["text"]
238
239
  conf = row["conf"]
239
240
 
240
- l = float(row["left"])
241
+ l = float(row["left"]) # noqa: E741
241
242
  b = float(row["top"])
242
243
  w = float(row["width"])
243
244
  h = float(row["height"])
@@ -1,6 +1,9 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
4
+ from collections.abc import Iterable
2
5
  from pathlib import Path
3
- from typing import Iterable, Optional, Type
6
+ from typing import Optional, Type
4
7
 
5
8
  from docling_core.types.doc import BoundingBox, CoordOrigin
6
9
  from docling_core.types.doc.page import BoundingRectangle, TextCell
@@ -38,7 +41,6 @@ class TesseractOcrModel(BaseOcrModel):
38
41
 
39
42
  self.scale = 3 # multiplier for 72 dpi == 216 dpi.
40
43
  self.reader = None
41
- self.osd_reader = None
42
44
  self.script_readers: dict[str, tesserocr.PyTessBaseAPI] = {}
43
45
 
44
46
  if self.enabled:
@@ -64,7 +66,7 @@ class TesseractOcrModel(BaseOcrModel):
64
66
  raise ImportError(install_errmsg)
65
67
  try:
66
68
  tesseract_version = tesserocr.tesseract_version()
67
- except:
69
+ except Exception:
68
70
  raise ImportError(install_errmsg)
69
71
 
70
72
  _, self._tesserocr_languages = tesserocr.get_languages()
@@ -75,7 +77,7 @@ class TesseractOcrModel(BaseOcrModel):
75
77
  _log.debug("Initializing TesserOCR: %s", tesseract_version)
76
78
  lang = "+".join(self.options.lang)
77
79
 
78
- if any([l.startswith("script/") for l in self._tesserocr_languages]):
80
+ if any(lang.startswith("script/") for lang in self._tesserocr_languages):
79
81
  self.script_prefix = "script/"
80
82
  else:
81
83
  self.script_prefix = ""
@@ -86,6 +88,8 @@ class TesseractOcrModel(BaseOcrModel):
86
88
  "oem": tesserocr.OEM.DEFAULT,
87
89
  }
88
90
 
91
+ self.osd_reader = None
92
+
89
93
  if self.options.path is not None:
90
94
  tesserocr_kwargs["path"] = self.options.path
91
95
 
@@ -149,7 +153,7 @@ class TesseractOcrModel(BaseOcrModel):
149
153
  script = map_tesseract_script(script)
150
154
  lang = f"{self.script_prefix}{script}"
151
155
 
152
- # Check if the detected languge is present in the system
156
+ # Check if the detected language is present in the system
153
157
  if lang not in self._tesserocr_languages:
154
158
  msg = f"Tesseract detected the script '{script}' and language '{lang}'."
155
159
  msg += " However this language is not installed in your system and will be ignored."
@@ -3,9 +3,10 @@ import logging
3
3
  import time
4
4
  import traceback
5
5
  from abc import ABC, abstractmethod
6
- from typing import Any, Callable, Iterable, List
6
+ from collections.abc import Iterable
7
+ from typing import Any, Callable, List
7
8
 
8
- from docling_core.types.doc import DoclingDocument, NodeItem
9
+ from docling_core.types.doc import NodeItem
9
10
 
10
11
  from docling.backend.abstract_backend import AbstractDocumentBackend
11
12
  from docling.backend.pdf_backend import PdfDocumentBackend
@@ -64,7 +65,6 @@ class BasePipeline(ABC):
64
65
  return conv_res
65
66
 
66
67
  def _enrich_document(self, conv_res: ConversionResult) -> ConversionResult:
67
-
68
68
  def _prepare_elements(
69
69
  conv_res: ConversionResult, model: GenericEnrichmentModel[Any]
70
70
  ) -> Iterable[NodeItem]:
@@ -113,7 +113,6 @@ class BasePipeline(ABC):
113
113
 
114
114
 
115
115
  class PaginatedPipeline(BasePipeline): # TODO this is a bad name.
116
-
117
116
  def __init__(self, pipeline_options: PipelineOptions):
118
117
  super().__init__(pipeline_options)
119
118
  self.keep_backend = False
@@ -127,7 +126,6 @@ class PaginatedPipeline(BasePipeline): # TODO this is a bad name.
127
126
  yield from page_batch
128
127
 
129
128
  def _build_document(self, conv_res: ConversionResult) -> ConversionResult:
130
-
131
129
  if not isinstance(conv_res.input._backend, PdfDocumentBackend):
132
130
  raise RuntimeError(
133
131
  f"The selected backend {type(conv_res.input._backend).__name__} for {conv_res.input.file} is not a PDF backend. "
@@ -139,8 +137,7 @@ class PaginatedPipeline(BasePipeline): # TODO this is a bad name.
139
137
 
140
138
  total_elapsed_time = 0.0
141
139
  with TimeRecorder(conv_res, "doc_build", scope=ProfilingScope.DOCUMENT):
142
-
143
- for i in range(0, conv_res.input.page_count):
140
+ for i in range(conv_res.input.page_count):
144
141
  start_page, end_page = conv_res.input.limits.page_range
145
142
  if (start_page - 1) <= i <= (end_page - 1):
146
143
  conv_res.pages.append(Page(page_no=i))
@@ -161,7 +158,6 @@ class PaginatedPipeline(BasePipeline): # TODO this is a bad name.
161
158
  pipeline_pages = self._apply_on_pages(conv_res, init_pages)
162
159
 
163
160
  for p in pipeline_pages: # Must exhaust!
164
-
165
161
  # Cleanup cached images
166
162
  if not self.keep_images:
167
163
  p._image_cache = {}
@@ -24,7 +24,6 @@ class SimplePipeline(BasePipeline):
24
24
  super().__init__(pipeline_options)
25
25
 
26
26
  def _build_document(self, conv_res: ConversionResult) -> ConversionResult:
27
-
28
27
  if not isinstance(conv_res.input._backend, DeclarativeDocumentBackend):
29
28
  raise RuntimeError(
30
29
  f"The selected backend {type(conv_res.input._backend).__name__} for {conv_res.input.file} is not a declarative backend. "
@@ -1,5 +1,4 @@
1
1
  import logging
2
- import sys
3
2
  import warnings
4
3
  from pathlib import Path
5
4
  from typing import Optional, cast
@@ -1,5 +1,4 @@
1
1
  import logging
2
- import warnings
3
2
  from io import BytesIO
4
3
  from pathlib import Path
5
4
  from typing import List, Optional, Union, cast
@@ -32,7 +31,6 @@ _log = logging.getLogger(__name__)
32
31
 
33
32
 
34
33
  class VlmPipeline(PaginatedPipeline):
35
-
36
34
  def __init__(self, pipeline_options: VlmPipelineOptions):
37
35
  super().__init__(pipeline_options)
38
36
  self.keep_backend = True
@@ -114,7 +112,6 @@ class VlmPipeline(PaginatedPipeline):
114
112
 
115
113
  def _assemble_document(self, conv_res: ConversionResult) -> ConversionResult:
116
114
  with TimeRecorder(conv_res, "doc_assemble", scope=ProfilingScope.DOCUMENT):
117
-
118
115
  if (
119
116
  self.pipeline_options.vlm_options.response_format
120
117
  == ResponseFormat.DOCTAGS
docling/utils/export.py CHANGED
@@ -1,8 +1,8 @@
1
1
  import logging
2
- from typing import Any, Dict, Iterable, List, Tuple, Union
2
+ from collections.abc import Iterable
3
+ from typing import Any, Dict, List, Tuple, Union
3
4
 
4
5
  from docling_core.types.doc import BoundingBox, CoordOrigin
5
- from docling_core.types.doc.page import TextCell
6
6
  from docling_core.types.legacy_doc.base import BaseCell, BaseText, Ref, Table
7
7
 
8
8
  from docling.datamodel.document import ConversionResult, Page
@@ -13,7 +13,6 @@ _log = logging.getLogger(__name__)
13
13
  def generate_multimodal_pages(
14
14
  doc_result: ConversionResult,
15
15
  ) -> Iterable[Tuple[str, str, List[Dict[str, Any]], List[Dict[str, Any]], Page]]:
16
-
17
16
  label_to_doclaynet = {
18
17
  "title": "title",
19
18
  "table-of-contents": "document_index",
@@ -122,7 +121,6 @@ def generate_multimodal_pages(
122
121
  if doc.main_text is None:
123
122
  return
124
123
  for ix, orig_item in enumerate(doc.main_text):
125
-
126
124
  item = doc._resolve_ref(orig_item) if isinstance(orig_item, Ref) else orig_item
127
125
  if item is None or item.prov is None or len(item.prov) == 0:
128
126
  _log.debug(f"Skipping item {orig_item}")
@@ -29,7 +29,7 @@ def resolve_item(paths, obj):
29
29
 
30
30
  try:
31
31
  key = int(paths[0])
32
- except:
32
+ except Exception:
33
33
  key = paths[0]
34
34
 
35
35
  if len(paths) == 1:
@@ -67,7 +67,7 @@ def _flatten_table_grid(grid: List[List[dict]]) -> List[dict]:
67
67
  return unique_objects
68
68
 
69
69
 
70
- def to_docling_document(doc_glm, update_name_label=False) -> DoclingDocument:
70
+ def to_docling_document(doc_glm, update_name_label=False) -> DoclingDocument: # noqa: C901
71
71
  origin = DocumentOrigin(
72
72
  mimetype="application/pdf",
73
73
  filename=doc_glm["file-info"]["filename"],
@@ -18,7 +18,7 @@ class UnionFind:
18
18
 
19
19
  def __init__(self, elements):
20
20
  self.parent = {elem: elem for elem in elements}
21
- self.rank = {elem: 0 for elem in elements}
21
+ self.rank = dict.fromkeys(elements, 0)
22
22
 
23
23
  def find(self, x):
24
24
  if self.parent[x] != x:
@@ -484,7 +484,9 @@ class LayoutPostprocessor:
484
484
  spatial_index = (
485
485
  self.regular_index
486
486
  if cluster_type == "regular"
487
- else self.picture_index if cluster_type == "picture" else self.wrapper_index
487
+ else self.picture_index
488
+ if cluster_type == "picture"
489
+ else self.wrapper_index
488
490
  )
489
491
 
490
492
  # Map of currently valid clusters
@@ -4,12 +4,15 @@ from typing import Optional
4
4
 
5
5
  from docling.datamodel.pipeline_options import (
6
6
  granite_picture_description,
7
+ smoldocling_vlm_conversion_options,
8
+ smoldocling_vlm_mlx_conversion_options,
7
9
  smolvlm_picture_description,
8
10
  )
9
11
  from docling.datamodel.settings import settings
10
12
  from docling.models.code_formula_model import CodeFormulaModel
11
13
  from docling.models.document_picture_classifier import DocumentPictureClassifier
12
14
  from docling.models.easyocr_model import EasyOcrModel
15
+ from docling.models.hf_vlm_model import HuggingFaceVlmModel
13
16
  from docling.models.layout_model import LayoutModel
14
17
  from docling.models.picture_description_vlm_model import PictureDescriptionVlmModel
15
18
  from docling.models.table_structure_model import TableStructureModel
@@ -27,6 +30,8 @@ def download_models(
27
30
  with_code_formula: bool = True,
28
31
  with_picture_classifier: bool = True,
29
32
  with_smolvlm: bool = False,
33
+ with_smoldocling: bool = False,
34
+ with_smoldocling_mlx: bool = False,
30
35
  with_granite_vision: bool = False,
31
36
  with_easyocr: bool = True,
32
37
  ):
@@ -37,7 +42,7 @@ def download_models(
37
42
  output_dir.mkdir(exist_ok=True, parents=True)
38
43
 
39
44
  if with_layout:
40
- _log.info(f"Downloading layout model...")
45
+ _log.info("Downloading layout model...")
41
46
  LayoutModel.download_models(
42
47
  local_dir=output_dir / LayoutModel._model_repo_folder,
43
48
  force=force,
@@ -45,7 +50,7 @@ def download_models(
45
50
  )
46
51
 
47
52
  if with_tableformer:
48
- _log.info(f"Downloading tableformer model...")
53
+ _log.info("Downloading tableformer model...")
49
54
  TableStructureModel.download_models(
50
55
  local_dir=output_dir / TableStructureModel._model_repo_folder,
51
56
  force=force,
@@ -53,7 +58,7 @@ def download_models(
53
58
  )
54
59
 
55
60
  if with_picture_classifier:
56
- _log.info(f"Downloading picture classifier model...")
61
+ _log.info("Downloading picture classifier model...")
57
62
  DocumentPictureClassifier.download_models(
58
63
  local_dir=output_dir / DocumentPictureClassifier._model_repo_folder,
59
64
  force=force,
@@ -61,7 +66,7 @@ def download_models(
61
66
  )
62
67
 
63
68
  if with_code_formula:
64
- _log.info(f"Downloading code formula model...")
69
+ _log.info("Downloading code formula model...")
65
70
  CodeFormulaModel.download_models(
66
71
  local_dir=output_dir / CodeFormulaModel._model_repo_folder,
67
72
  force=force,
@@ -69,7 +74,7 @@ def download_models(
69
74
  )
70
75
 
71
76
  if with_smolvlm:
72
- _log.info(f"Downloading SmolVlm model...")
77
+ _log.info("Downloading SmolVlm model...")
73
78
  PictureDescriptionVlmModel.download_models(
74
79
  repo_id=smolvlm_picture_description.repo_id,
75
80
  local_dir=output_dir / smolvlm_picture_description.repo_cache_folder,
@@ -77,8 +82,27 @@ def download_models(
77
82
  progress=progress,
78
83
  )
79
84
 
85
+ if with_smoldocling:
86
+ _log.info("Downloading SmolDocling model...")
87
+ HuggingFaceVlmModel.download_models(
88
+ repo_id=smoldocling_vlm_conversion_options.repo_id,
89
+ local_dir=output_dir / smoldocling_vlm_conversion_options.repo_cache_folder,
90
+ force=force,
91
+ progress=progress,
92
+ )
93
+
94
+ if with_smoldocling_mlx:
95
+ _log.info("Downloading SmolDocling MLX model...")
96
+ HuggingFaceVlmModel.download_models(
97
+ repo_id=smoldocling_vlm_mlx_conversion_options.repo_id,
98
+ local_dir=output_dir
99
+ / smoldocling_vlm_mlx_conversion_options.repo_cache_folder,
100
+ force=force,
101
+ progress=progress,
102
+ )
103
+
80
104
  if with_granite_vision:
81
- _log.info(f"Downloading Granite Vision model...")
105
+ _log.info("Downloading Granite Vision model...")
82
106
  PictureDescriptionVlmModel.download_models(
83
107
  repo_id=granite_picture_description.repo_id,
84
108
  local_dir=output_dir / granite_picture_description.repo_cache_folder,
@@ -87,7 +111,7 @@ def download_models(
87
111
  )
88
112
 
89
113
  if with_easyocr:
90
- _log.info(f"Downloading easyocr models...")
114
+ _log.info("Downloading easyocr models...")
91
115
  EasyOcrModel.download_models(
92
116
  local_dir=output_dir / EasyOcrModel._model_repo_folder,
93
117
  force=force,
docling/utils/utils.py CHANGED
@@ -13,14 +13,14 @@ def chunkify(iterator, chunk_size):
13
13
  if isinstance(iterator, List):
14
14
  iterator = iter(iterator)
15
15
  for first in iterator: # Take the first element from the iterator
16
- yield [first] + list(islice(iterator, chunk_size - 1))
16
+ yield [first, *list(islice(iterator, chunk_size - 1))]
17
17
 
18
18
 
19
19
  def create_file_hash(path_or_stream: Union[BytesIO, Path]) -> str:
20
20
  """Create a stable page_hash of the path_or_stream of a file"""
21
21
 
22
22
  block_size = 65536
23
- hasher = hashlib.sha256()
23
+ hasher = hashlib.sha256(usedforsecurity=False)
24
24
 
25
25
  def _hash_buf(binary_stream):
26
26
  buf = binary_stream.read(block_size) # read and page_hash in chunks
@@ -38,7 +38,7 @@ def create_file_hash(path_or_stream: Union[BytesIO, Path]) -> str:
38
38
 
39
39
 
40
40
  def create_hash(string: str):
41
- hasher = hashlib.sha256()
41
+ hasher = hashlib.sha256(usedforsecurity=False)
42
42
  hasher.update(string.encode("utf-8"))
43
43
 
44
44
  return hasher.hexdigest()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: docling
3
- Version: 2.30.0
3
+ Version: 2.31.1
4
4
  Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
5
5
  Home-page: https://github.com/docling-project/docling
6
6
  License: MIT
@@ -86,6 +86,7 @@ Description-Content-Type: text/markdown
86
86
  [![License MIT](https://img.shields.io/github/license/docling-project/docling)](https://opensource.org/licenses/MIT)
87
87
  [![PyPI Downloads](https://static.pepy.tech/badge/docling/month)](https://pepy.tech/projects/docling)
88
88
  [![Docling Actor](https://apify.com/actor-badge?actor=vancura/docling?fpr=docling)](https://apify.com/vancura/docling)
89
+ [![OpenSSF Best Practices](https://www.bestpractices.dev/projects/10101/badge)](https://www.bestpractices.dev/projects/10101)
89
90
  [![LF AI & Data](https://img.shields.io/badge/LF%20AI%20%26%20Data-003778?logo=linuxfoundation&logoColor=fff&color=0094ff&labelColor=003778)](https://lfaidata.foundation/projects/)
90
91
 
91
92
  Docling simplifies document processing, parsing diverse formats — including advanced PDF understanding — and providing seamless integrations with the gen AI ecosystem.
@@ -0,0 +1,86 @@
1
+ docling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ docling/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ docling/backend/abstract_backend.py,sha256=1lNxzwDTn303aXduPDVmTyXn-5ZIoWMLYqNxANGWmQQ,1658
4
+ docling/backend/asciidoc_backend.py,sha256=VZ8Xk1VHGHRqBo_TdtMzRAu1NFaFaJ8dk4CaEcBaEm0,14038
5
+ docling/backend/csv_backend.py,sha256=2g9famYG2W-ID9jEdZPxc6O8QGv1vWQfjN8pL-QMBE0,4536
6
+ docling/backend/docling_parse_backend.py,sha256=V_CsUdN5RkGQBBq7A_ReAiUW4CQVh0-1Ur157Ozurdg,8017
7
+ docling/backend/docling_parse_v2_backend.py,sha256=6fokgqb1hMbZua33gL46EFamrwPTC7ms6ZuEHw-Dv28,9395
8
+ docling/backend/docling_parse_v4_backend.py,sha256=-WJZs0IsdN6blhkvTS1eh_qhujYLyJ3XcOMqS6AaXxg,6282
9
+ docling/backend/docx/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
+ docling/backend/docx/latex/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
+ docling/backend/docx/latex/latex_dict.py,sha256=tFJp4ScT_AkY2ON7nLEa560p601Jq2glcZvMKxxjn7w,6593
12
+ docling/backend/docx/latex/omml.py,sha256=nEpcfyyrOucJyj6cD7wfThrIa-q0CQCoqMb3dkrhCRg,12094
13
+ docling/backend/html_backend.py,sha256=3K-l5SUAAyqISNEb7nPst_I51xzYOVOkgmwXh3lv9sw,21063
14
+ docling/backend/json/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
+ docling/backend/json/docling_json_backend.py,sha256=LlFMVoZrrCfVwbDuRbNN4Xg96Lujh4xxrTBt9jGhY9I,1984
16
+ docling/backend/md_backend.py,sha256=JkY1qTvQFXjKSZGfD-83d-fZelorUG_l6mpJdYGqvX8,17210
17
+ docling/backend/msexcel_backend.py,sha256=3j0WQfqDpgPXdPMCguefdv7arcNVDedPD6gl54cmLn8,18110
18
+ docling/backend/mspowerpoint_backend.py,sha256=RwqfvvzrtM56L9uf7PR9lvlHJ-LyYGpkS1iVxkTl72Q,17203
19
+ docling/backend/msword_backend.py,sha256=lVVMNwt0WIl4RD5wAf8pc8bJsb60x1BA8hTTkVmEVa8,32477
20
+ docling/backend/pdf_backend.py,sha256=KE9TMuFO5WX-o5A_DAd4tEaLi4HMZ4XjKdpllItVkWM,2238
21
+ docling/backend/pypdfium2_backend.py,sha256=pX8f0WbUb0KTDTKyQuLzP_lgHHubyGXWD33vmpefPy8,10805
22
+ docling/backend/xml/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
+ docling/backend/xml/jats_backend.py,sha256=ghGi9bHjx3BvaOtmzLw86-wZy4UxpQPOPQL4e73-BI8,24927
24
+ docling/backend/xml/uspto_backend.py,sha256=nyAMr5ht7dclxkVDwsKNeiOhLQrUtRLS8JdscB2AVJg,70924
25
+ docling/chunking/__init__.py,sha256=h83TDs0AuOV6oEPLAPrn9dpGKiU-2Vg6IRNo4cv6GDA,346
26
+ docling/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
+ docling/cli/main.py,sha256=D7WEY4x6pQCVFRy3peK9KUDOb0Y5IVc-vTDqPnHPK00,26138
28
+ docling/cli/models.py,sha256=9yLGp6QRJGpR86U3SjmWAXDt3MvBaJLLY4xDVdsu3O8,4160
29
+ docling/cli/tools.py,sha256=QhtRxQG0TVrfsMqdv5i7J0_qQy1ZZyWYnHPwJl7b5oY,322
30
+ docling/datamodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
+ docling/datamodel/base_models.py,sha256=DRE_XoldtCreWF4ucO0iK0l8uOnfvnhQaYjV0z1Qe0M,7921
32
+ docling/datamodel/document.py,sha256=_0Z4zUgCB5677ZW8Y7C1fv75enLZJOJUjcUkGTSiTBA,15553
33
+ docling/datamodel/pipeline_options.py,sha256=-1QG8dY0RZkTJb66lXErEAnPq4F_1vgnk_5AcIr3cgU,13350
34
+ docling/datamodel/settings.py,sha256=bNMdowIKv7RUchabQTo4rFNEsxfB6pGg2LoZSY634zo,1869
35
+ docling/document_converter.py,sha256=PRRr65nigQ3LZDl4G2fBMkOtJyswT7xyGt7fpUeDO3w,13849
36
+ docling/exceptions.py,sha256=K1WnCS1leK2JtMB5ewZWKkb0EaijFgl-tRzrO9ntgPM,134
37
+ docling/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
38
+ docling/models/api_vlm_model.py,sha256=w1SzdG3Ypz_0iZGiX-skMwV1E1JnOHH2BJiNkcEEIAA,2478
39
+ docling/models/base_model.py,sha256=Zx_nByGYkubTvvYiQxwiB6P8lc7wOD4ZTC2QIw6vCEg,2950
40
+ docling/models/base_ocr_model.py,sha256=_iD8QCKQdv2VWrIuSRPyGP4oCz94h84WriHg9F2k-Z0,7172
41
+ docling/models/code_formula_model.py,sha256=9cplJFvP7jcJGz-p-MmL8_lqUhmaXZu7wKyX2aOTujs,11504
42
+ docling/models/document_picture_classifier.py,sha256=tyOnyM0vh8-pjh9PiHa_67YpK-3pc_vGQKlnfAyraBs,6255
43
+ docling/models/easyocr_model.py,sha256=3rgXMeB7LbMjevCAVDMG3voe3PQhQ7B-RyYrXzefUlQ,7365
44
+ docling/models/factories/__init__.py,sha256=x_EM5dDg_A3HBcBYzOoqwmA2AFLtJ1IzYDPX-R1A-Sg,868
45
+ docling/models/factories/base_factory.py,sha256=MfWIljMETi5aaVR-6qLTelW8u1gwDAQsOwg3fu7O4Qc,4028
46
+ docling/models/factories/ocr_factory.py,sha256=G5RkmkKvkl-ihpo6qSj8WC77VdlVSQ1s0ekwUX2ILts,316
47
+ docling/models/factories/picture_description_factory.py,sha256=Ru3-TnVVEKf5O07C_UpGf2HCOHc7j20AJzfficw3agM,385
48
+ docling/models/hf_mlx_model.py,sha256=B_B4hFU-jU0g_DQtQD8w4Ejorn10mkDuFI93wR_WhGk,4897
49
+ docling/models/hf_vlm_model.py,sha256=SiPMTLghMUjJ66dA2yN4UujpLO6PiOhLEPInWtXV_5s,6912
50
+ docling/models/layout_model.py,sha256=0fiJXJ4aPmcMsYY7rbN9LJ2mZ0_8G0ODY9kyNTAN3Ws,7823
51
+ docling/models/ocr_mac_model.py,sha256=A3TlEbvvwhkWiq9YARos3Y9yNcpPYQ7JGc_4hFtAK-8,5370
52
+ docling/models/page_assemble_model.py,sha256=GO7JI1D6T6EkSW94cLQobPGNQUahkxQqTPRwj5CnmFE,6304
53
+ docling/models/page_preprocessing_model.py,sha256=6pOGXiFQ-oz06UmJdcaYMdVyfZ0YVLWS6efGcx7Mxws,3105
54
+ docling/models/picture_description_api_model.py,sha256=qs3n0smC9DXhzwJeK_iQG08Y6ZFHInKtdGPVhzgvxgU,2091
55
+ docling/models/picture_description_base_model.py,sha256=FbBVXzAOB87xpJN28tuGCxoAdcf6mZNUOqJR7ljUg5g,2946
56
+ docling/models/picture_description_vlm_model.py,sha256=DiTjnehVy1n0N04xPUvZl8rx4TiNHzHn9Cnzy_ePGts,4177
57
+ docling/models/plugins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
58
+ docling/models/plugins/defaults.py,sha256=qslXGnRX07Z3GGttNriqaox0v0vXp4zs4KLurHCZjp4,858
59
+ docling/models/rapid_ocr_model.py,sha256=Tq_1Egu5Hjx7Y69Vox17QTtRXztSyflB1fhN08CWQwY,5894
60
+ docling/models/readingorder_model.py,sha256=S9ru2ApY9sE-Uue3hptWHmbmElwo36bUbAikxCFpHYs,14574
61
+ docling/models/table_structure_model.py,sha256=1gxLaooK0IKMrnmS8nT1BItKqt1GAKghfpmLKb3i53g,12566
62
+ docling/models/tesseract_ocr_cli_model.py,sha256=iFdOud5ymoW9WV8bWLCDpd3LJBo9M5bTT5vc635zEDY,10229
63
+ docling/models/tesseract_ocr_model.py,sha256=72009TJL_7tXTEnhlsGRiw_KibrQ0LjZlCBtW8NtwUc,9339
64
+ docling/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
65
+ docling/pipeline/base_pipeline.py,sha256=DnuxAf7EQusdSRae0QUVth-0f2mSff8JZjX-2vazk00,8751
66
+ docling/pipeline/simple_pipeline.py,sha256=TXZOwR7hZRji462ZTIpte0VJjzbxvNVE8dbLFANDhSU,2253
67
+ docling/pipeline/standard_pdf_pipeline.py,sha256=iNZMMGiHTwV6I4u_jjqXhVJ_DiPn_O9qnnee3PQxidc,10773
68
+ docling/pipeline/vlm_pipeline.py,sha256=g3bxPEqxK8x-B5S6pOpNNo5GxCMCRDZgPJUFqsBA1eg,9720
69
+ docling/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
70
+ docling/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
71
+ docling/utils/accelerator_utils.py,sha256=ONNRrC8fH-8E93WUCNhfOq1t7WrQ1T7-YsmExTOY5f0,2292
72
+ docling/utils/api_image_request.py,sha256=_CgdzmPqdsyXmyYUFGLZcXcoH586qC6A1p5vsNbj1Q0,1416
73
+ docling/utils/export.py,sha256=VwVUnYDk3mhGmISDbVm306fwpGNnoojouStBD4UajXI,4673
74
+ docling/utils/glm_utils.py,sha256=TKOWQqWAHsX_w4fvoAA7_2xCi_urhnp1DsmjY8_sk5w,12274
75
+ docling/utils/layout_postprocessor.py,sha256=x7exVG3HYzV9M_O78FfyoG43Y2L7PPMMydvSNwjqh8s,24528
76
+ docling/utils/locks.py,sha256=RzqQtD5UispgV71pGN_nU6GYfeN11BN0Sh_Dq9ycqGo,52
77
+ docling/utils/model_downloader.py,sha256=ocvud3G3qlBQhzMo69Q3RJMnvq5HPZ2DwNbMuEp8RCs,4142
78
+ docling/utils/ocr_utils.py,sha256=F7iOOjqolUcImUzir4qjDQd4QWSO3s6JC4WRn3U7uY4,263
79
+ docling/utils/profiling.py,sha256=YaMGoB9MMZpagF9mb5ndoHj8Lpb9aIdb7El-Pl7IcFs,1753
80
+ docling/utils/utils.py,sha256=kJtIYuzXeOyJHYlxmLAo7dGM5rEsDa1i84qEsUj1nio,1908
81
+ docling/utils/visualization.py,sha256=tY2ylE2aiQKkmzlSLnFW-HTfFyqUUMguW18ldd1PLfo,2868
82
+ docling-2.31.1.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
83
+ docling-2.31.1.dist-info/METADATA,sha256=31fTxA8TvMdw_KdThEyn3Z5GAHAhNEtvFYlrPdzqV4w,10108
84
+ docling-2.31.1.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
85
+ docling-2.31.1.dist-info/entry_points.txt,sha256=pIxel-UeVo1S7FhoNG5xgEfPjLZfBLi_N9TsGPtJSLo,144
86
+ docling-2.31.1.dist-info/RECORD,,
@@ -1,86 +0,0 @@
1
- docling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- docling/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- docling/backend/abstract_backend.py,sha256=1lNxzwDTn303aXduPDVmTyXn-5ZIoWMLYqNxANGWmQQ,1658
4
- docling/backend/asciidoc_backend.py,sha256=xBtmYkRkPICIfMbB8AFIw_or4IZGB17mP_LhXorvZ1k,14060
5
- docling/backend/csv_backend.py,sha256=lCNSkgB55IbAig7w4IyXRkX23aM3Nojj6GdXNoaNjY4,4536
6
- docling/backend/docling_parse_backend.py,sha256=tcy4cPD_dtGD37CjivbFvwzwXVcrb3HVmofyasxLum8,7991
7
- docling/backend/docling_parse_v2_backend.py,sha256=70kXqYhht-A8zb9z5emMe_1i0l9dyQGrM8lg1cmAvqc,9369
8
- docling/backend/docling_parse_v4_backend.py,sha256=IECMJQWEvYqQv043_1Ho6dLkCbuaK8cMUsqcxwqruXo,6287
9
- docling/backend/docx/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
- docling/backend/docx/latex/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
- docling/backend/docx/latex/latex_dict.py,sha256=5pOMY_KyxYmgBZ40IrA4q0t5L6JvXOCx5cVwoQE1lls,6690
12
- docling/backend/docx/latex/omml.py,sha256=5zuXYOQ10e9nSTKFURBjoU-XSQZVHsVyIiCsGYGVAk8,12127
13
- docling/backend/html_backend.py,sha256=ghPLZfdBEPBzLIO9IWzzx0t1Os9B9r4VyGyEZtMsZVI,19468
14
- docling/backend/json/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
- docling/backend/json/docling_json_backend.py,sha256=LlFMVoZrrCfVwbDuRbNN4Xg96Lujh4xxrTBt9jGhY9I,1984
16
- docling/backend/md_backend.py,sha256=lqDiKIBHGsA0u-H1n9oVpPlrcpVT4gYRuNXXcyGlftM,17219
17
- docling/backend/msexcel_backend.py,sha256=KRPoHRDv-mqko9RUHGQCzdRrvDo7g7zSU2Z5zoL_Hzo,18106
18
- docling/backend/mspowerpoint_backend.py,sha256=X55-1anXm562wxAuYn5uwQkqKjirmgrn1KfbeaKUbXw,17273
19
- docling/backend/msword_backend.py,sha256=CgNPjU8SQ7rkAYH_BGiUyv568MGhoH3R0M39WBT8gkc,32468
20
- docling/backend/pdf_backend.py,sha256=odWb1rxk3WCUIEJMhq-dYFNUQ1pSDuNHbU9wlTZIRAs,2211
21
- docling/backend/pypdfium2_backend.py,sha256=wRwhA5XHRqL7vyNhCAHM6P-ONkwtyjKG9LgC4NJ-4i8,10784
22
- docling/backend/xml/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
- docling/backend/xml/jats_backend.py,sha256=HXailrDjiwu4swwFnXy3lNfRtLZmkBBp4yqafCvdr7s,24945
24
- docling/backend/xml/uspto_backend.py,sha256=H0jwIt2skOke_yEUk0wfXCtodrB-hrj2ygLtB3jMWaI,71056
25
- docling/chunking/__init__.py,sha256=h83TDs0AuOV6oEPLAPrn9dpGKiU-2Vg6IRNo4cv6GDA,346
26
- docling/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
- docling/cli/main.py,sha256=TD-cEf4giuk1O5NPoB-heXHHteUqKoLsj4Rg4xsBUrs,26119
28
- docling/cli/models.py,sha256=tM_qbMM3YOPxFU7JlME96MLbtd1CX_bOAK7FS-NhJvY,3979
29
- docling/cli/tools.py,sha256=QhtRxQG0TVrfsMqdv5i7J0_qQy1ZZyWYnHPwJl7b5oY,322
30
- docling/datamodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
- docling/datamodel/base_models.py,sha256=fJfFMaHXc-CUrAVfhPF8lKrdb-gaXr2tohx6dHldvRU,7926
32
- docling/datamodel/document.py,sha256=V0iK1MYOkPIzd4eQa-G8unp-t01fktlG9wwQ1IwE6Zg,15109
33
- docling/datamodel/pipeline_options.py,sha256=iGLijZR-YOtmg0RQs59pqoG_1uGsDYbg5wMDD0FWYx4,13351
34
- docling/datamodel/settings.py,sha256=bNMdowIKv7RUchabQTo4rFNEsxfB6pGg2LoZSY634zo,1869
35
- docling/document_converter.py,sha256=LCX92FzgmXNJLFVSQfjqH9SGe3zA7FGwARedSigFIpY,13798
36
- docling/exceptions.py,sha256=K1WnCS1leK2JtMB5ewZWKkb0EaijFgl-tRzrO9ntgPM,134
37
- docling/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
38
- docling/models/api_vlm_model.py,sha256=6SxMsFPf0SbT365P67KspdpF3TXZSeu5kmPE3lXAhW4,2470
39
- docling/models/base_model.py,sha256=9xJ0VIlpR2BzqoEWMC8LYp5Y96QAEKip4b_HCwCDltY,2931
40
- docling/models/base_ocr_model.py,sha256=xvKMhE4ZOGkL2GAhpDvrAHLLFps3ZUfxXZ5ctL1lXUw,7226
41
- docling/models/code_formula_model.py,sha256=mOu5luYMzyrCCr8MRGOciNcSvULpQysDd_FXn96WPc8,11477
42
- docling/models/document_picture_classifier.py,sha256=fz77RsTdlnA_yC47O-KUq2xVWMKX0_9jm_EGcHliw-E,6235
43
- docling/models/easyocr_model.py,sha256=ezq3yv5lORe7T1bbSoTZALck2oHqyEHq57cRfhMYCCQ,7401
44
- docling/models/factories/__init__.py,sha256=e4lFmRfmW5hWqvJjY5xaVFbvCQhDBCrVeSq85Q2K_aM,872
45
- docling/models/factories/base_factory.py,sha256=pNR9-B_BKs2sYNyHnp2ON2l3r6Dy9lcof4qmwHlAryI,4032
46
- docling/models/factories/ocr_factory.py,sha256=G5RkmkKvkl-ihpo6qSj8WC77VdlVSQ1s0ekwUX2ILts,316
47
- docling/models/factories/picture_description_factory.py,sha256=Ru3-TnVVEKf5O07C_UpGf2HCOHc7j20AJzfficw3agM,385
48
- docling/models/hf_mlx_model.py,sha256=2eSHphJm5LAfiSA24blVMc2znJlKMYrtmmzq8ffc-rU,4924
49
- docling/models/hf_vlm_model.py,sha256=NUtLEuG-kNGJeDHWmQKAAOZG4WF0a5hn-KXUUM1mHBQ,6820
50
- docling/models/layout_model.py,sha256=7fQWipGV1HDrvbP4uOKa9QAicQl89jp7lailQmbFL3w,7804
51
- docling/models/ocr_mac_model.py,sha256=2pZaUWg19go_u88mKWr5y_52PAYEN__GsbyUYLdY4zo,5353
52
- docling/models/page_assemble_model.py,sha256=ivkCdbZJpFcGl7CazLegcP1tLK8ZixDfVhQXqsdW_UA,6359
53
- docling/models/page_preprocessing_model.py,sha256=Ja7RE1K-2fWxWrxOzNm6QDSGqFf-MY6_uY5OAZ7AQSo,3078
54
- docling/models/picture_description_api_model.py,sha256=DowWOU93MXAjj3N1A9ex88Sa3Nic2c3dfoOYir5jZEA,2064
55
- docling/models/picture_description_base_model.py,sha256=khuhQZDAZemZMe4BsrBMpjEwkY3nhMFXuczjQpSQrVY,2971
56
- docling/models/picture_description_vlm_model.py,sha256=I2Un3vfhQVeWEyZ3Sd3Kygw9la2QSZCwDfl_7XVlMm4,4042
57
- docling/models/plugins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
58
- docling/models/plugins/defaults.py,sha256=qslXGnRX07Z3GGttNriqaox0v0vXp4zs4KLurHCZjp4,858
59
- docling/models/rapid_ocr_model.py,sha256=C_I0Ek9mAPIyTFRHuNbqtXg1c15rLNDE1tJ6_hPIi4c,5869
60
- docling/models/readingorder_model.py,sha256=hNWbBX3uZv1FxMwKNKn2JFQuQqTspBLsJBVEidXr6Wk,14869
61
- docling/models/table_structure_model.py,sha256=pvTsqUa5QIANBUfot0XXG1UUeku-eaUi04EPE-Yh2g0,12597
62
- docling/models/tesseract_ocr_cli_model.py,sha256=CZ1W0QbvveIpXO0qSXmXFqz71P4PfLfJBQIqU_Wlg_E,10072
63
- docling/models/tesseract_ocr_model.py,sha256=UpLAgKgJtBgbKtJELmKBNMcejJJKBCyFK0q-WgZN1Eg,9256
64
- docling/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
65
- docling/pipeline/base_pipeline.py,sha256=9ABK-Cr235bxE5vweoIA5rgBZV_EF8qFxAqLI27H_Pg,8749
66
- docling/pipeline/simple_pipeline.py,sha256=mZqANqUtAOFAyqQEShErQnAUz6tJFOl6zVoazEDJ_wE,2254
67
- docling/pipeline/standard_pdf_pipeline.py,sha256=gPNqUparhIONG4AyMekW9OfZ7t8YMs0odhtbE6Z-Hxw,10784
68
- docling/pipeline/vlm_pipeline.py,sha256=dqQYAd3viW577TVSZltnB4P-f-ZUWQh0J8SSFDuQN6Q,9738
69
- docling/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
70
- docling/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
71
- docling/utils/accelerator_utils.py,sha256=ONNRrC8fH-8E93WUCNhfOq1t7WrQ1T7-YsmExTOY5f0,2292
72
- docling/utils/api_image_request.py,sha256=_CgdzmPqdsyXmyYUFGLZcXcoH586qC6A1p5vsNbj1Q0,1416
73
- docling/utils/export.py,sha256=4W-ptI1fLdVrtoqHdHY1RF9Xn2Yescs-hunITqxJ7Is,4697
74
- docling/utils/glm_utils.py,sha256=W4JRoP0xQ6SJmhhIoAfcKxm5dr1CFvLHp8pqI1kdhxs,12250
75
- docling/utils/layout_postprocessor.py,sha256=Q36DfcIYMuMfC6LzCBIrYtHK7pBE-Xyvjepz660s9UM,24508
76
- docling/utils/locks.py,sha256=RzqQtD5UispgV71pGN_nU6GYfeN11BN0Sh_Dq9ycqGo,52
77
- docling/utils/model_downloader.py,sha256=sxAQvjiIu9m2Ur5Ot5C5SATmgWJAHi0xSjzxj8QXYJk,3213
78
- docling/utils/ocr_utils.py,sha256=F7iOOjqolUcImUzir4qjDQd4QWSO3s6JC4WRn3U7uY4,263
79
- docling/utils/profiling.py,sha256=YaMGoB9MMZpagF9mb5ndoHj8Lpb9aIdb7El-Pl7IcFs,1753
80
- docling/utils/utils.py,sha256=0ozCk7zUkYzxRVmYoIB2zA1lqjQOuaQzxfGuf1wmKW4,1866
81
- docling/utils/visualization.py,sha256=tY2ylE2aiQKkmzlSLnFW-HTfFyqUUMguW18ldd1PLfo,2868
82
- docling-2.30.0.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
83
- docling-2.30.0.dist-info/METADATA,sha256=HSI154YUnSDJE8BMMjOuu-U3EXQg0ksFuyuyzv7-UdU,9982
84
- docling-2.30.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
85
- docling-2.30.0.dist-info/entry_points.txt,sha256=pIxel-UeVo1S7FhoNG5xgEfPjLZfBLi_N9TsGPtJSLo,144
86
- docling-2.30.0.dist-info/RECORD,,