docling 2.34.0__tar.gz → 2.35.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {docling-2.34.0 → docling-2.35.0}/PKG-INFO +2 -2
- {docling-2.34.0 → docling-2.35.0}/docling/cli/main.py +36 -3
- {docling-2.34.0 → docling-2.35.0}/docling/datamodel/document.py +7 -2
- {docling-2.34.0 → docling-2.35.0}/docling/models/layout_model.py +16 -6
- {docling-2.34.0 → docling-2.35.0}/docling/models/page_preprocessing_model.py +11 -6
- {docling-2.34.0 → docling-2.35.0}/docling/pipeline/standard_pdf_pipeline.py +69 -57
- {docling-2.34.0 → docling-2.35.0}/pyproject.toml +2 -2
- {docling-2.34.0 → docling-2.35.0}/LICENSE +0 -0
- {docling-2.34.0 → docling-2.35.0}/README.md +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/__init__.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/backend/__init__.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/backend/abstract_backend.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/backend/asciidoc_backend.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/backend/csv_backend.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/backend/docling_parse_backend.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/backend/docling_parse_v2_backend.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/backend/docling_parse_v4_backend.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/backend/docx/__init__.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/backend/docx/latex/__init__.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/backend/docx/latex/latex_dict.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/backend/docx/latex/omml.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/backend/html_backend.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/backend/json/__init__.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/backend/json/docling_json_backend.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/backend/md_backend.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/backend/msexcel_backend.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/backend/mspowerpoint_backend.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/backend/msword_backend.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/backend/pdf_backend.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/backend/pypdfium2_backend.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/backend/xml/__init__.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/backend/xml/jats_backend.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/backend/xml/uspto_backend.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/chunking/__init__.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/cli/__init__.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/cli/models.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/cli/tools.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/datamodel/__init__.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/datamodel/base_models.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/datamodel/pipeline_options.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/datamodel/settings.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/document_converter.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/exceptions.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/models/__init__.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/models/api_vlm_model.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/models/base_model.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/models/base_ocr_model.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/models/code_formula_model.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/models/document_picture_classifier.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/models/easyocr_model.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/models/factories/__init__.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/models/factories/base_factory.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/models/factories/ocr_factory.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/models/factories/picture_description_factory.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/models/hf_mlx_model.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/models/hf_vlm_model.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/models/ocr_mac_model.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/models/page_assemble_model.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/models/picture_description_api_model.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/models/picture_description_base_model.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/models/picture_description_vlm_model.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/models/plugins/__init__.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/models/plugins/defaults.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/models/rapid_ocr_model.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/models/readingorder_model.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/models/table_structure_model.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/models/tesseract_ocr_cli_model.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/models/tesseract_ocr_model.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/pipeline/__init__.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/pipeline/base_pipeline.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/pipeline/simple_pipeline.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/pipeline/vlm_pipeline.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/py.typed +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/utils/__init__.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/utils/accelerator_utils.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/utils/api_image_request.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/utils/export.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/utils/glm_utils.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/utils/layout_postprocessor.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/utils/locks.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/utils/model_downloader.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/utils/ocr_utils.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/utils/orientation.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/utils/profiling.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/utils/utils.py +0 -0
- {docling-2.34.0 → docling-2.35.0}/docling/utils/visualization.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: docling
|
3
|
-
Version: 2.
|
3
|
+
Version: 2.35.0
|
4
4
|
Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
|
5
5
|
Home-page: https://github.com/docling-project/docling
|
6
6
|
License: MIT
|
@@ -29,7 +29,7 @@ Requires-Dist: accelerate (>=1.2.1,<2.0.0) ; (sys_platform != "darwin" or platfo
|
|
29
29
|
Requires-Dist: beautifulsoup4 (>=4.12.3,<5.0.0)
|
30
30
|
Requires-Dist: certifi (>=2024.7.4)
|
31
31
|
Requires-Dist: click (<8.2.0)
|
32
|
-
Requires-Dist: docling-core[chunking] (>=2.
|
32
|
+
Requires-Dist: docling-core[chunking] (>=2.31.2,<3.0.0)
|
33
33
|
Requires-Dist: docling-ibm-models (>=3.4.0,<4.0.0)
|
34
34
|
Requires-Dist: docling-parse (>=4.0.0,<5.0.0)
|
35
35
|
Requires-Dist: easyocr (>=1.7,<2.0)
|
@@ -12,6 +12,12 @@ from typing import Annotated, Dict, List, Optional, Type
|
|
12
12
|
|
13
13
|
import rich.table
|
14
14
|
import typer
|
15
|
+
from docling_core.transforms.serializer.html import (
|
16
|
+
HTMLDocSerializer,
|
17
|
+
HTMLOutputStyle,
|
18
|
+
HTMLParams,
|
19
|
+
)
|
20
|
+
from docling_core.transforms.visualizer.layout_visualizer import LayoutVisualizer
|
15
21
|
from docling_core.types.doc import ImageRefMode
|
16
22
|
from docling_core.utils.file import resolve_source_to_path
|
17
23
|
from pydantic import TypeAdapter
|
@@ -156,6 +162,7 @@ def export_documents(
|
|
156
162
|
export_json: bool,
|
157
163
|
export_html: bool,
|
158
164
|
export_html_split_page: bool,
|
165
|
+
show_layout: bool,
|
159
166
|
export_md: bool,
|
160
167
|
export_txt: bool,
|
161
168
|
export_doctags: bool,
|
@@ -189,9 +196,27 @@ def export_documents(
|
|
189
196
|
if export_html_split_page:
|
190
197
|
fname = output_dir / f"{doc_filename}.html"
|
191
198
|
_log.info(f"writing HTML output to {fname}")
|
192
|
-
|
193
|
-
|
194
|
-
|
199
|
+
if show_layout:
|
200
|
+
ser = HTMLDocSerializer(
|
201
|
+
doc=conv_res.document,
|
202
|
+
params=HTMLParams(
|
203
|
+
image_mode=image_export_mode,
|
204
|
+
output_style=HTMLOutputStyle.SPLIT_PAGE,
|
205
|
+
),
|
206
|
+
)
|
207
|
+
visualizer = LayoutVisualizer()
|
208
|
+
visualizer.params.show_label = False
|
209
|
+
ser_res = ser.serialize(
|
210
|
+
visualizer=visualizer,
|
211
|
+
)
|
212
|
+
with open(fname, "w") as fw:
|
213
|
+
fw.write(ser_res.text)
|
214
|
+
else:
|
215
|
+
conv_res.document.save_as_html(
|
216
|
+
filename=fname,
|
217
|
+
image_mode=image_export_mode,
|
218
|
+
split_page_view=True,
|
219
|
+
)
|
195
220
|
|
196
221
|
# Export Text format:
|
197
222
|
if export_txt:
|
@@ -250,6 +275,13 @@ def convert( # noqa: C901
|
|
250
275
|
to_formats: List[OutputFormat] = typer.Option(
|
251
276
|
None, "--to", help="Specify output formats. Defaults to Markdown."
|
252
277
|
),
|
278
|
+
show_layout: Annotated[
|
279
|
+
bool,
|
280
|
+
typer.Option(
|
281
|
+
...,
|
282
|
+
help="If enabled, the page images will show the bounding-boxes of the items.",
|
283
|
+
),
|
284
|
+
] = False,
|
253
285
|
headers: str = typer.Option(
|
254
286
|
None,
|
255
287
|
"--headers",
|
@@ -596,6 +628,7 @@ def convert( # noqa: C901
|
|
596
628
|
export_json=export_json,
|
597
629
|
export_html=export_html,
|
598
630
|
export_html_split_page=export_html_split_page,
|
631
|
+
show_layout=show_layout,
|
599
632
|
export_md=export_md,
|
600
633
|
export_txt=export_txt,
|
601
634
|
export_doctags=export_doctags,
|
@@ -334,9 +334,9 @@ class _DocumentConversionInput(BaseModel):
|
|
334
334
|
) -> Optional[InputFormat]:
|
335
335
|
"""Guess the input format of a document by checking part of its content."""
|
336
336
|
input_format: Optional[InputFormat] = None
|
337
|
-
content_str = content.decode("utf-8")
|
338
337
|
|
339
338
|
if mime == "application/xml":
|
339
|
+
content_str = content.decode("utf-8")
|
340
340
|
match_doctype = re.search(r"<!DOCTYPE [^>]+>", content_str)
|
341
341
|
if match_doctype:
|
342
342
|
xml_doctype = match_doctype.group()
|
@@ -358,6 +358,7 @@ class _DocumentConversionInput(BaseModel):
|
|
358
358
|
input_format = InputFormat.XML_JATS
|
359
359
|
|
360
360
|
elif mime == "text/plain":
|
361
|
+
content_str = content.decode("utf-8")
|
361
362
|
if InputFormat.XML_USPTO in formats and content_str.startswith("PATN\r\n"):
|
362
363
|
input_format = InputFormat.XML_USPTO
|
363
364
|
|
@@ -411,7 +412,11 @@ class _DocumentConversionInput(BaseModel):
|
|
411
412
|
else:
|
412
413
|
return "application/xml"
|
413
414
|
|
414
|
-
if re.match(
|
415
|
+
if re.match(
|
416
|
+
r"(<script.*?>.*?</script>\s*)?(<!doctype\s+html|<html|<head|<body)",
|
417
|
+
content_str,
|
418
|
+
re.DOTALL,
|
419
|
+
):
|
415
420
|
return "text/html"
|
416
421
|
|
417
422
|
p = re.compile(
|
@@ -185,13 +185,23 @@ class LayoutModel(BasePageModel):
|
|
185
185
|
).postprocess()
|
186
186
|
# processed_clusters, processed_cells = clusters, page.cells
|
187
187
|
|
188
|
-
|
189
|
-
|
190
|
-
|
188
|
+
with warnings.catch_warnings():
|
189
|
+
warnings.filterwarnings(
|
190
|
+
"ignore",
|
191
|
+
"Mean of empty slice|invalid value encountered in scalar divide",
|
192
|
+
RuntimeWarning,
|
193
|
+
"numpy",
|
194
|
+
)
|
191
195
|
|
192
|
-
|
193
|
-
|
194
|
-
|
196
|
+
conv_res.confidence.pages[page.page_no].layout_score = float(
|
197
|
+
np.mean([c.confidence for c in processed_clusters])
|
198
|
+
)
|
199
|
+
|
200
|
+
conv_res.confidence.pages[page.page_no].ocr_score = float(
|
201
|
+
np.mean(
|
202
|
+
[c.confidence for c in processed_cells if c.from_ocr]
|
203
|
+
)
|
204
|
+
)
|
195
205
|
|
196
206
|
page.cells = processed_cells
|
197
207
|
page.predictions.layout = LayoutPrediction(
|
@@ -1,4 +1,5 @@
|
|
1
1
|
import re
|
2
|
+
import warnings
|
2
3
|
from collections.abc import Iterable
|
3
4
|
from pathlib import Path
|
4
5
|
from typing import Optional
|
@@ -7,7 +8,7 @@ import numpy as np
|
|
7
8
|
from PIL import ImageDraw
|
8
9
|
from pydantic import BaseModel
|
9
10
|
|
10
|
-
from docling.datamodel.base_models import Page
|
11
|
+
from docling.datamodel.base_models import Page
|
11
12
|
from docling.datamodel.document import ConversionResult
|
12
13
|
from docling.datamodel.settings import settings
|
13
14
|
from docling.models.base_model import BasePageModel
|
@@ -76,11 +77,15 @@ class PagePreprocessingModel(BasePageModel):
|
|
76
77
|
score = self.rate_text_quality(c.text)
|
77
78
|
text_scores.append(score)
|
78
79
|
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
)
|
83
|
-
|
80
|
+
with warnings.catch_warnings():
|
81
|
+
warnings.filterwarnings(
|
82
|
+
"ignore", "Mean of empty slice", RuntimeWarning, "numpy"
|
83
|
+
)
|
84
|
+
conv_res.confidence.pages[page.page_no].parse_score = float(
|
85
|
+
np.nanquantile(
|
86
|
+
text_scores, q=0.10
|
87
|
+
) # To emphasise problems in the parse_score, we take the 10% percentile score of all text cells.
|
88
|
+
)
|
84
89
|
|
85
90
|
# DEBUG code:
|
86
91
|
def draw_text_boxes(image, cells, show: bool = False):
|
@@ -8,7 +8,7 @@ from docling_core.types.doc import DocItem, ImageRef, PictureItem, TableItem
|
|
8
8
|
|
9
9
|
from docling.backend.abstract_backend import AbstractDocumentBackend
|
10
10
|
from docling.backend.pdf_backend import PdfDocumentBackend
|
11
|
-
from docling.datamodel.base_models import AssembledUnit, Page
|
11
|
+
from docling.datamodel.base_models import AssembledUnit, Page
|
12
12
|
from docling.datamodel.document import ConversionResult
|
13
13
|
from docling.datamodel.pipeline_options import PdfPipelineOptions
|
14
14
|
from docling.datamodel.settings import settings
|
@@ -55,11 +55,13 @@ class StandardPdfPipeline(PaginatedPipeline):
|
|
55
55
|
"When defined, it must point to a folder containing all models required by the pipeline."
|
56
56
|
)
|
57
57
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
58
|
+
with warnings.catch_warnings(): # deprecated generate_table_images
|
59
|
+
warnings.filterwarnings("ignore", category=DeprecationWarning)
|
60
|
+
self.keep_images = (
|
61
|
+
self.pipeline_options.generate_page_images
|
62
|
+
or self.pipeline_options.generate_picture_images
|
63
|
+
or self.pipeline_options.generate_table_images
|
64
|
+
)
|
63
65
|
|
64
66
|
self.reading_order_model = ReadingOrderModel(options=ReadingOrderOptions())
|
65
67
|
|
@@ -210,64 +212,74 @@ class StandardPdfPipeline(PaginatedPipeline):
|
|
210
212
|
)
|
211
213
|
|
212
214
|
# Generate images of the requested element types
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
215
|
+
with warnings.catch_warnings(): # deprecated generate_table_images
|
216
|
+
warnings.filterwarnings("ignore", category=DeprecationWarning)
|
217
|
+
if (
|
218
|
+
self.pipeline_options.generate_picture_images
|
219
|
+
or self.pipeline_options.generate_table_images
|
220
|
+
):
|
221
|
+
scale = self.pipeline_options.images_scale
|
222
|
+
for element, _level in conv_res.document.iterate_items():
|
223
|
+
if not isinstance(element, DocItem) or len(element.prov) == 0:
|
224
|
+
continue
|
225
|
+
if (
|
226
|
+
isinstance(element, PictureItem)
|
227
|
+
and self.pipeline_options.generate_picture_images
|
228
|
+
) or (
|
229
|
+
isinstance(element, TableItem)
|
230
|
+
and self.pipeline_options.generate_table_images
|
231
|
+
):
|
232
|
+
page_ix = element.prov[0].page_no - 1
|
233
|
+
page = next(
|
234
|
+
(p for p in conv_res.pages if p.page_no == page_ix),
|
235
|
+
cast("Page", None),
|
236
|
+
)
|
237
|
+
assert page is not None
|
238
|
+
assert page.size is not None
|
239
|
+
assert page.image is not None
|
240
|
+
|
241
|
+
crop_bbox = (
|
242
|
+
element.prov[0]
|
243
|
+
.bbox.scaled(scale=scale)
|
244
|
+
.to_top_left_origin(
|
245
|
+
page_height=page.size.height * scale
|
246
|
+
)
|
247
|
+
)
|
248
|
+
|
249
|
+
cropped_im = page.image.crop(crop_bbox.as_tuple())
|
250
|
+
element.image = ImageRef.from_pil(
|
251
|
+
cropped_im, dpi=int(72 * scale)
|
252
|
+
)
|
247
253
|
|
248
254
|
# Aggregate confidence values for document:
|
249
255
|
if len(conv_res.pages) > 0:
|
250
|
-
|
251
|
-
|
252
|
-
|
256
|
+
with warnings.catch_warnings():
|
257
|
+
warnings.filterwarnings(
|
258
|
+
"ignore",
|
259
|
+
category=RuntimeWarning,
|
260
|
+
message="Mean of empty slice|All-NaN slice encountered",
|
253
261
|
)
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
q=0.1, # parse score should relate to worst 10% of pages.
|
262
|
+
conv_res.confidence.layout_score = float(
|
263
|
+
np.nanmean(
|
264
|
+
[c.layout_score for c in conv_res.confidence.pages.values()]
|
265
|
+
)
|
259
266
|
)
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
267
|
+
conv_res.confidence.parse_score = float(
|
268
|
+
np.nanquantile(
|
269
|
+
[c.parse_score for c in conv_res.confidence.pages.values()],
|
270
|
+
q=0.1, # parse score should relate to worst 10% of pages.
|
271
|
+
)
|
264
272
|
)
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
273
|
+
conv_res.confidence.table_score = float(
|
274
|
+
np.nanmean(
|
275
|
+
[c.table_score for c in conv_res.confidence.pages.values()]
|
276
|
+
)
|
277
|
+
)
|
278
|
+
conv_res.confidence.ocr_score = float(
|
279
|
+
np.nanmean(
|
280
|
+
[c.ocr_score for c in conv_res.confidence.pages.values()]
|
281
|
+
)
|
269
282
|
)
|
270
|
-
)
|
271
283
|
|
272
284
|
return conv_res
|
273
285
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "docling"
|
3
|
-
version = "2.
|
3
|
+
version = "2.35.0" # DO NOT EDIT, updated automatically
|
4
4
|
description = "SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications."
|
5
5
|
authors = [
|
6
6
|
"Christoph Auer <cau@zurich.ibm.com>",
|
@@ -46,7 +46,7 @@ packages = [{ include = "docling" }]
|
|
46
46
|
######################
|
47
47
|
python = "^3.9"
|
48
48
|
pydantic = "^2.0.0"
|
49
|
-
docling-core = {version = "^2.
|
49
|
+
docling-core = {version = "^2.31.2", extras = ["chunking"]}
|
50
50
|
docling-ibm-models = "^3.4.0"
|
51
51
|
docling-parse = "^4.0.0"
|
52
52
|
filetype = "^1.2.0"
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|