docling 2.7.1__py3-none-any.whl → 2.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -507,18 +507,19 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
507
507
 
508
508
  image_data = get_docx_image(element, drawing_blip)
509
509
  image_bytes = BytesIO(image_data)
510
+ level = self.get_level()
510
511
  # Open the BytesIO object with PIL to create an Image
511
512
  try:
512
513
  pil_image = Image.open(image_bytes)
513
514
  doc.add_picture(
514
- parent=self.parents[self.level],
515
+ parent=self.parents[level - 1],
515
516
  image=ImageRef.from_pil(image=pil_image, dpi=72),
516
517
  caption=None,
517
518
  )
518
519
  except (UnidentifiedImageError, OSError) as e:
519
520
  _log.warning("Warning: image cannot be loaded by Pillow")
520
521
  doc.add_picture(
521
- parent=self.parents[self.level],
522
+ parent=self.parents[level - 1],
522
523
  caption=None,
523
524
  )
524
525
  return
docling/cli/main.py CHANGED
@@ -27,10 +27,12 @@ from docling.datamodel.pipeline_options import (
27
27
  OcrMacOptions,
28
28
  OcrOptions,
29
29
  PdfPipelineOptions,
30
+ RapidOcrOptions,
30
31
  TableFormerMode,
31
32
  TesseractCliOcrOptions,
32
33
  TesseractOcrOptions,
33
34
  )
35
+ from docling.datamodel.settings import settings
34
36
  from docling.document_converter import DocumentConverter, FormatOption, PdfFormatOption
35
37
 
36
38
  warnings.filterwarnings(action="ignore", category=UserWarning, module="pydantic|torch")
@@ -76,6 +78,7 @@ class OcrEngine(str, Enum):
76
78
  TESSERACT_CLI = "tesseract_cli"
77
79
  TESSERACT = "tesseract"
78
80
  OCRMAC = "ocrmac"
81
+ RAPIDOCR = "rapidocr"
79
82
 
80
83
 
81
84
  def export_documents(
@@ -210,6 +213,24 @@ def convert(
210
213
  help="Set the verbosity level. -v for info logging, -vv for debug logging.",
211
214
  ),
212
215
  ] = 0,
216
+ debug_visualize_cells: Annotated[
217
+ bool,
218
+ typer.Option(..., help="Enable debug output which visualizes the PDF cells"),
219
+ ] = False,
220
+ debug_visualize_ocr: Annotated[
221
+ bool,
222
+ typer.Option(..., help="Enable debug output which visualizes the OCR cells"),
223
+ ] = False,
224
+ debug_visualize_layout: Annotated[
225
+ bool,
226
+ typer.Option(
227
+ ..., help="Enable debug output which visualizes the layour clusters"
228
+ ),
229
+ ] = False,
230
+ debug_visualize_tables: Annotated[
231
+ bool,
232
+ typer.Option(..., help="Enable debug output which visualizes the table cells"),
233
+ ] = False,
213
234
  version: Annotated[
214
235
  Optional[bool],
215
236
  typer.Option(
@@ -227,6 +248,11 @@ def convert(
227
248
  elif verbose == 2:
228
249
  logging.basicConfig(level=logging.DEBUG)
229
250
 
251
+ settings.debug.visualize_cells = debug_visualize_cells
252
+ settings.debug.visualize_layout = debug_visualize_layout
253
+ settings.debug.visualize_tables = debug_visualize_tables
254
+ settings.debug.visualize_ocr = debug_visualize_ocr
255
+
230
256
  if from_formats is None:
231
257
  from_formats = [e for e in InputFormat]
232
258
 
@@ -262,6 +288,8 @@ def convert(
262
288
  ocr_options = TesseractOcrOptions(force_full_page_ocr=force_ocr)
263
289
  elif ocr_engine == OcrEngine.OCRMAC:
264
290
  ocr_options = OcrMacOptions(force_full_page_ocr=force_ocr)
291
+ elif ocr_engine == OcrEngine.RAPIDOCR:
292
+ ocr_options = RapidOcrOptions(force_full_page_ocr=force_ocr)
265
293
  else:
266
294
  raise RuntimeError(f"Unexpected OCR engine type {ocr_engine}")
267
295
 
@@ -29,6 +29,42 @@ class OcrOptions(BaseModel):
29
29
  )
30
30
 
31
31
 
32
+ class RapidOcrOptions(OcrOptions):
33
+ kind: Literal["rapidocr"] = "rapidocr"
34
+
35
+ # English and chinese are the most commly used models and have been tested with RapidOCR.
36
+ lang: List[str] = [
37
+ "english",
38
+ "chinese",
39
+ ] # However, language as a parameter is not supported by rapidocr yet and hence changing this options doesn't affect anything.
40
+ # For more details on supported languages by RapidOCR visit https://rapidai.github.io/RapidOCRDocs/blog/2022/09/28/%E6%94%AF%E6%8C%81%E8%AF%86%E5%88%AB%E8%AF%AD%E8%A8%80/
41
+
42
+ # For more details on the following options visit https://rapidai.github.io/RapidOCRDocs/install_usage/api/RapidOCR/
43
+ text_score: float = 0.5 # same default as rapidocr
44
+
45
+ use_det: Optional[bool] = None # same default as rapidocr
46
+ use_cls: Optional[bool] = None # same default as rapidocr
47
+ use_rec: Optional[bool] = None # same default as rapidocr
48
+
49
+ # class Device(Enum):
50
+ # CPU = "CPU"
51
+ # CUDA = "CUDA"
52
+ # DIRECTML = "DIRECTML"
53
+ # AUTO = "AUTO"
54
+
55
+ # device: Device = Device.AUTO # Default value is AUTO
56
+
57
+ print_verbose: bool = False # same default as rapidocr
58
+
59
+ det_model_path: Optional[str] = None # same default as rapidocr
60
+ cls_model_path: Optional[str] = None # same default as rapidocr
61
+ rec_model_path: Optional[str] = None # same default as rapidocr
62
+
63
+ model_config = ConfigDict(
64
+ extra="forbid",
65
+ )
66
+
67
+
32
68
  class EasyOcrOptions(OcrOptions):
33
69
  kind: Literal["easyocr"] = "easyocr"
34
70
  lang: List[str] = ["fr", "de", "es", "en"]
@@ -0,0 +1,147 @@
1
+ import logging
2
+ from typing import Iterable
3
+
4
+ import numpy
5
+ from docling_core.types.doc import BoundingBox, CoordOrigin
6
+
7
+ from docling.datamodel.base_models import OcrCell, Page
8
+ from docling.datamodel.document import ConversionResult
9
+ from docling.datamodel.pipeline_options import RapidOcrOptions
10
+ from docling.datamodel.settings import settings
11
+ from docling.models.base_ocr_model import BaseOcrModel
12
+ from docling.utils.profiling import TimeRecorder
13
+
14
+ _log = logging.getLogger(__name__)
15
+
16
+
17
+ class RapidOcrModel(BaseOcrModel):
18
+ def __init__(self, enabled: bool, options: RapidOcrOptions):
19
+ super().__init__(enabled=enabled, options=options)
20
+ self.options: RapidOcrOptions
21
+
22
+ self.scale = 3 # multiplier for 72 dpi == 216 dpi.
23
+
24
+ if self.enabled:
25
+ try:
26
+ from rapidocr_onnxruntime import RapidOCR # type: ignore
27
+ except ImportError:
28
+ raise ImportError(
29
+ "RapidOCR is not installed. Please install it via `pip install rapidocr_onnxruntime` to use this OCR engine. "
30
+ "Alternatively, Docling has support for other OCR engines. See the documentation."
31
+ )
32
+
33
+ # This configuration option will be revamped while introducing device settings for all models.
34
+ # For the moment we will default to auto and let onnx-runtime pick the best.
35
+ cls_use_cuda = True
36
+ rec_use_cuda = True
37
+ det_use_cuda = True
38
+ det_use_dml = True
39
+ cls_use_dml = True
40
+ rec_use_dml = True
41
+
42
+ # # Same as Defaults in RapidOCR
43
+ # cls_use_cuda = False
44
+ # rec_use_cuda = False
45
+ # det_use_cuda = False
46
+ # det_use_dml = False
47
+ # cls_use_dml = False
48
+ # rec_use_dml = False
49
+
50
+ # # If we set everything to true onnx-runtime would automatically choose the fastest accelerator
51
+ # if self.options.device == self.options.Device.AUTO:
52
+ # cls_use_cuda = True
53
+ # rec_use_cuda = True
54
+ # det_use_cuda = True
55
+ # det_use_dml = True
56
+ # cls_use_dml = True
57
+ # rec_use_dml = True
58
+
59
+ # # If we set use_cuda to true onnx would use the cuda device available in runtime if no cuda device is available it would run on CPU.
60
+ # elif self.options.device == self.options.Device.CUDA:
61
+ # cls_use_cuda = True
62
+ # rec_use_cuda = True
63
+ # det_use_cuda = True
64
+
65
+ # # If we set use_dml to true onnx would use the dml device available in runtime if no dml device is available it would work on CPU.
66
+ # elif self.options.device == self.options.Device.DIRECTML:
67
+ # det_use_dml = True
68
+ # cls_use_dml = True
69
+ # rec_use_dml = True
70
+
71
+ self.reader = RapidOCR(
72
+ text_score=self.options.text_score,
73
+ cls_use_cuda=cls_use_cuda,
74
+ rec_use_cuda=rec_use_cuda,
75
+ det_use_cuda=det_use_cuda,
76
+ det_use_dml=det_use_dml,
77
+ cls_use_dml=cls_use_dml,
78
+ rec_use_dml=rec_use_dml,
79
+ print_verbose=self.options.print_verbose,
80
+ det_model_path=self.options.det_model_path,
81
+ cls_model_path=self.options.cls_model_path,
82
+ rec_model_path=self.options.rec_model_path,
83
+ )
84
+
85
+ def __call__(
86
+ self, conv_res: ConversionResult, page_batch: Iterable[Page]
87
+ ) -> Iterable[Page]:
88
+
89
+ if not self.enabled:
90
+ yield from page_batch
91
+ return
92
+
93
+ for page in page_batch:
94
+
95
+ assert page._backend is not None
96
+ if not page._backend.is_valid():
97
+ yield page
98
+ else:
99
+ with TimeRecorder(conv_res, "ocr"):
100
+ ocr_rects = self.get_ocr_rects(page)
101
+
102
+ all_ocr_cells = []
103
+ for ocr_rect in ocr_rects:
104
+ # Skip zero area boxes
105
+ if ocr_rect.area() == 0:
106
+ continue
107
+ high_res_image = page._backend.get_page_image(
108
+ scale=self.scale, cropbox=ocr_rect
109
+ )
110
+ im = numpy.array(high_res_image)
111
+ result, _ = self.reader(
112
+ im,
113
+ use_det=self.options.use_det,
114
+ use_cls=self.options.use_cls,
115
+ use_rec=self.options.use_rec,
116
+ )
117
+
118
+ del high_res_image
119
+ del im
120
+
121
+ cells = [
122
+ OcrCell(
123
+ id=ix,
124
+ text=line[1],
125
+ confidence=line[2],
126
+ bbox=BoundingBox.from_tuple(
127
+ coord=(
128
+ (line[0][0][0] / self.scale) + ocr_rect.l,
129
+ (line[0][0][1] / self.scale) + ocr_rect.t,
130
+ (line[0][2][0] / self.scale) + ocr_rect.l,
131
+ (line[0][2][1] / self.scale) + ocr_rect.t,
132
+ ),
133
+ origin=CoordOrigin.TOPLEFT,
134
+ ),
135
+ )
136
+ for ix, line in enumerate(result)
137
+ ]
138
+ all_ocr_cells.extend(cells)
139
+
140
+ # Post-process the cells
141
+ page.cells = self.post_process_cells(all_ocr_cells, page.cells)
142
+
143
+ # DEBUG code:
144
+ if settings.debug.visualize_ocr:
145
+ self.draw_ocr_rects_and_cells(conv_res, page, ocr_rects)
146
+
147
+ yield page
@@ -13,6 +13,7 @@ from docling.datamodel.pipeline_options import (
13
13
  EasyOcrOptions,
14
14
  OcrMacOptions,
15
15
  PdfPipelineOptions,
16
+ RapidOcrOptions,
16
17
  TesseractCliOcrOptions,
17
18
  TesseractOcrOptions,
18
19
  )
@@ -26,6 +27,7 @@ from docling.models.page_preprocessing_model import (
26
27
  PagePreprocessingModel,
27
28
  PagePreprocessingOptions,
28
29
  )
30
+ from docling.models.rapid_ocr_model import RapidOcrModel
29
31
  from docling.models.table_structure_model import TableStructureModel
30
32
  from docling.models.tesseract_ocr_cli_model import TesseractOcrCliModel
31
33
  from docling.models.tesseract_ocr_model import TesseractOcrModel
@@ -121,6 +123,11 @@ class StandardPdfPipeline(PaginatedPipeline):
121
123
  enabled=self.pipeline_options.do_ocr,
122
124
  options=self.pipeline_options.ocr_options,
123
125
  )
126
+ elif isinstance(self.pipeline_options.ocr_options, RapidOcrOptions):
127
+ return RapidOcrModel(
128
+ enabled=self.pipeline_options.do_ocr,
129
+ options=self.pipeline_options.ocr_options,
130
+ )
124
131
  elif isinstance(self.pipeline_options.ocr_options, OcrMacOptions):
125
132
  if "darwin" != sys.platform:
126
133
  raise RuntimeError(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: docling
3
- Version: 2.7.1
3
+ Version: 2.8.1
4
4
  Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
5
5
  Home-page: https://github.com/DS4SD/docling
6
6
  License: MIT
@@ -21,11 +21,12 @@ Classifier: Programming Language :: Python :: 3.11
21
21
  Classifier: Programming Language :: Python :: 3.12
22
22
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
23
23
  Provides-Extra: ocrmac
24
+ Provides-Extra: rapidocr
24
25
  Provides-Extra: tesserocr
25
26
  Requires-Dist: beautifulsoup4 (>=4.12.3,<5.0.0)
26
27
  Requires-Dist: certifi (>=2024.7.4)
27
28
  Requires-Dist: deepsearch-glm (>=0.26.1,<0.27.0)
28
- Requires-Dist: docling-core (>=2.4.0,<3.0.0)
29
+ Requires-Dist: docling-core (>=2.5.1,<3.0.0)
29
30
  Requires-Dist: docling-ibm-models (>=2.0.6,<3.0.0)
30
31
  Requires-Dist: docling-parse (>=2.0.5,<3.0.0)
31
32
  Requires-Dist: easyocr (>=1.7,<2.0)
@@ -34,14 +35,16 @@ Requires-Dist: huggingface_hub (>=0.23,<1)
34
35
  Requires-Dist: lxml (>=4.0.0,<6.0.0)
35
36
  Requires-Dist: marko (>=2.1.2,<3.0.0)
36
37
  Requires-Dist: ocrmac (>=1.0.0,<2.0.0) ; (sys_platform == "darwin") and (extra == "ocrmac")
38
+ Requires-Dist: onnxruntime (>=1.7.0,<1.20.0) ; (python_version < "3.10") and (extra == "rapidocr")
39
+ Requires-Dist: onnxruntime (>=1.7.0,<2.0.0) ; (python_version >= "3.10") and (extra == "rapidocr")
37
40
  Requires-Dist: openpyxl (>=3.1.5,<4.0.0)
38
41
  Requires-Dist: pandas (>=2.1.4,<3.0.0)
39
- Requires-Dist: pyarrow (>=16.1.0,<17.0.0)
40
42
  Requires-Dist: pydantic (>=2.0.0,<2.10)
41
43
  Requires-Dist: pydantic-settings (>=2.3.0,<3.0.0)
42
44
  Requires-Dist: pypdfium2 (>=4.30.0,<5.0.0)
43
45
  Requires-Dist: python-docx (>=1.1.2,<2.0.0)
44
46
  Requires-Dist: python-pptx (>=1.0.2,<2.0.0)
47
+ Requires-Dist: rapidocr-onnxruntime (>=1.4.0,<2.0.0) ; (python_version < "3.13") and (extra == "rapidocr")
45
48
  Requires-Dist: requests (>=2.32.3,<3.0.0)
46
49
  Requires-Dist: rtree (>=1.3.0,<2.0.0)
47
50
  Requires-Dist: scipy (>=1.6.0,<2.0.0)
@@ -56,7 +59,7 @@ Description-Content-Type: text/markdown
56
59
  </a>
57
60
  </p>
58
61
 
59
- # Docling
62
+ # 🦆 Docling
60
63
 
61
64
  <p align="center">
62
65
  <a href="https://trendshift.io/repositories/12132" target="_blank"><img src="https://trendshift.io/api/badge/repositories/12132" alt="DS4SD%2Fdocling | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
@@ -81,7 +84,7 @@ Docling parses documents and exports them to the desired format with ease and sp
81
84
  * 🗂️ Reads popular document formats (PDF, DOCX, PPTX, XLSX, Images, HTML, AsciiDoc & Markdown) and exports to Markdown and JSON
82
85
  * 📑 Advanced PDF document understanding including page layout, reading order & table structures
83
86
  * 🧩 Unified, expressive [DoclingDocument](https://ds4sd.github.io/docling/concepts/docling_document/) representation format
84
- * 🤖 Easy integration with LlamaIndex 🦙 & LangChain 🦜🔗 for powerful RAG / QA applications
87
+ * 🤖 Easy integration with 🦙 LlamaIndex & 🦜🔗 LangChain for powerful RAG / QA applications
85
88
  * 🔍 OCR support for scanned PDFs
86
89
  * 💻 Simple and convenient CLI
87
90
 
@@ -117,8 +120,24 @@ result = converter.convert(source)
117
120
  print(result.document.export_to_markdown()) # output: "## Docling Technical Report[...]"
118
121
  ```
119
122
 
120
- Check out [Getting started](https://ds4sd.github.io/docling/).
121
- You will find lots of tuning options to leverage all the advanced capabilities.
123
+ More [advanced usage options](https://ds4sd.github.io/docling/usage/) are available in
124
+ the docs.
125
+
126
+ ## Documentation
127
+
128
+ Check out Docling's [documentation](https://ds4sd.github.io/docling/), for details on
129
+ installation, usage, concepts, recipes, extensions, and more.
130
+
131
+ ## Examples
132
+
133
+ Go hands-on with our [examples](https://ds4sd.github.io/docling/examples/),
134
+ demonstrating how to address different application use cases with Docling.
135
+
136
+ ## Integrations
137
+
138
+ To further accelerate your AI application development, check out Docling's native
139
+ [integrations](https://ds4sd.github.io/docling/integrations/) with popular frameworks
140
+ and tools.
122
141
 
123
142
  ## Get help and support
124
143
 
@@ -8,15 +8,15 @@ docling/backend/html_backend.py,sha256=qbu1W8xoTGnXMuZPRPLq68hDbCEj6ygnpxP5gYaod
8
8
  docling/backend/md_backend.py,sha256=tmuSCghjor9PqKIiVieCuZ4_t5JEjZMy3cq7u3yTgyU,14032
9
9
  docling/backend/msexcel_backend.py,sha256=23qUEScqr5GhY06xiqg-eBQ_JlAqO0FkPEmX6554sVA,12040
10
10
  docling/backend/mspowerpoint_backend.py,sha256=QD0NaatTO8U9CIFoiipkq3X5HxLZaaahH8nlrQ6ecDA,15710
11
- docling/backend/msword_backend.py,sha256=sMumfB9Xa2Md1a8WO-fGPPAKf1s3mCvErMyZ-xnBC2E,18495
11
+ docling/backend/msword_backend.py,sha256=VFHPr-gCak7w3NJToc5Cs-JaTb4Vm3a1JnnRIfJO3TI,18526
12
12
  docling/backend/pdf_backend.py,sha256=unnw7QiRE1VXg6Pj-eYrtnFGrp5SSYiI324OlFxyv6c,2050
13
13
  docling/backend/pypdfium2_backend.py,sha256=B4bfv-dfzlWiKTfF8LN5fto_99YBu8A2c1_XIVwRUWI,8996
14
14
  docling/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
- docling/cli/main.py,sha256=MpjbAXhOlbGnAnl5_OaKCdub61YPQBy1NOqroXQtNYE,10722
15
+ docling/cli/main.py,sha256=AgPD32NfM0_bmHeKjx5-fqk57ahX5tN3AeoDOerhTuE,11808
16
16
  docling/datamodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  docling/datamodel/base_models.py,sha256=6qlwPamDZ3XUsE2kTAyGKG6O2IJClVjCqaE7DZ74KHU,5533
18
18
  docling/datamodel/document.py,sha256=9dQf_J18X_MEWs-Mg3Ed6BykFPJ79ETmkkxcssY-vYo,20698
19
- docling/datamodel/pipeline_options.py,sha256=aC_CmtEhNLIbn9n3JuYhL_aA8UA0vFgw7HcGMUuOI4o,3117
19
+ docling/datamodel/pipeline_options.py,sha256=J-6kWugUrxahymKzgaEgiqPuyle1fbInPXV2wNos6Vc,4550
20
20
  docling/datamodel/settings.py,sha256=JK8lZPBjUx2kD2q-Qpg-o3vOElADMcyQbRUL0EHZ7us,1263
21
21
  docling/document_converter.py,sha256=L0A3g7IQBaKIK7dWpUFC72ZqKywIPYkyh71Qd6DiNPE,10940
22
22
  docling/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -28,20 +28,21 @@ docling/models/layout_model.py,sha256=ZvbTSyxvXB5yLHNEti0Wv3trz0vwGuHySI5TCdApb0
28
28
  docling/models/ocr_mac_model.py,sha256=bLP14UUmZcSzjDe-HLj-mtksTuBmsCTg2C1wCxUpan0,4502
29
29
  docling/models/page_assemble_model.py,sha256=kSGNiRKhmzkpFH7xCiT3rulMsgJmUXFa6Th_eB-cLEk,7103
30
30
  docling/models/page_preprocessing_model.py,sha256=1gVrZjObKxAvXkkKvXnIFApPOggzgiTFPtt1CGbMbSs,2763
31
+ docling/models/rapid_ocr_model.py,sha256=VQ0jaFmOzB9f-1JaqZ6d0o_El55Lr-nsFHfTNubMAuc,6005
31
32
  docling/models/table_structure_model.py,sha256=-ANSQpiN2avt3B9sbi7dHcoULUJbMBalAR5xxlrM7To,8421
32
33
  docling/models/tesseract_ocr_cli_model.py,sha256=OfopQnt2FGwtLJTMtW9jbJZ9EN2G2QFkA_aACjuUuDs,6372
33
34
  docling/models/tesseract_ocr_model.py,sha256=RDf6iV1q-oXaGfZXv0bW6SqjHNKQvBUDlUsOkuz0neY,6095
34
35
  docling/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
36
  docling/pipeline/base_pipeline.py,sha256=IF1XWYgUGbdB4-teLkmM4Hvg_UNEfPrGuhExMRTUsk8,7168
36
37
  docling/pipeline/simple_pipeline.py,sha256=mZqANqUtAOFAyqQEShErQnAUz6tJFOl6zVoazEDJ_wE,2254
37
- docling/pipeline/standard_pdf_pipeline.py,sha256=btm_y1ZsjUrtWvMbF6RA8BVM0ENrK4z_rqF0jjdeZmU,8473
38
+ docling/pipeline/standard_pdf_pipeline.py,sha256=7sbkh9EwXlhSfJSgf-WyjB5jdJ1El7Pn4siSssTJpq8,8789
38
39
  docling/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
39
40
  docling/utils/export.py,sha256=KyGF1BVDHPFfHVXZc8vegsWlFfOgGPP2YckWpTadyI8,4694
40
41
  docling/utils/layout_utils.py,sha256=vlN0rc8i0ayRGn3WnaG-pdmqEL00KKGl2zez3Gj-hrk,32074
41
42
  docling/utils/profiling.py,sha256=YaMGoB9MMZpagF9mb5ndoHj8Lpb9aIdb7El-Pl7IcFs,1753
42
43
  docling/utils/utils.py,sha256=llhXSbIDNZ1MHOwBEfLHBAoJIAYI7QlPIonlI1jLUJ0,1208
43
- docling-2.7.1.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
44
- docling-2.7.1.dist-info/METADATA,sha256=TvD3BGlbO1ci54NzwmLxqSITXIdMefyj71YjdZkD7Vs,6906
45
- docling-2.7.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
46
- docling-2.7.1.dist-info/entry_points.txt,sha256=VOSzV77znM52dz5ysaDuJ0ijl1cnfrh1ZPg8od5OcTs,48
47
- docling-2.7.1.dist-info/RECORD,,
44
+ docling-2.8.1.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
45
+ docling-2.8.1.dist-info/METADATA,sha256=auj5PtDj-UBB72sW8jk1CSVSwQpd9q0nYzoAYIItl8o,7682
46
+ docling-2.8.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
47
+ docling-2.8.1.dist-info/entry_points.txt,sha256=VOSzV77znM52dz5ysaDuJ0ijl1cnfrh1ZPg8od5OcTs,48
48
+ docling-2.8.1.dist-info/RECORD,,