docling 1.10.0__py3-none-any.whl → 1.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docling/datamodel/document.py +72 -3
- docling/utils/export.py +5 -1
- {docling-1.10.0.dist-info → docling-1.11.0.dist-info}/METADATA +3 -3
- {docling-1.10.0.dist-info → docling-1.11.0.dist-info}/RECORD +6 -6
- {docling-1.10.0.dist-info → docling-1.11.0.dist-info}/LICENSE +0 -0
- {docling-1.10.0.dist-info → docling-1.11.0.dist-info}/WHEEL +0 -0
docling/datamodel/document.py
CHANGED
@@ -11,6 +11,7 @@ from docling_core.types import FileInfoObject as DsFileInfoObject
|
|
11
11
|
from docling_core.types import PageDimensions, PageReference, Prov, Ref
|
12
12
|
from docling_core.types import Table as DsSchemaTable
|
13
13
|
from docling_core.types import TableCell
|
14
|
+
from docling_core.types.doc.base import Figure
|
14
15
|
from pydantic import BaseModel
|
15
16
|
from typing_extensions import deprecated
|
16
17
|
|
@@ -279,7 +280,7 @@ class ConvertedDocument(BaseModel):
|
|
279
280
|
),
|
280
281
|
)
|
281
282
|
figures.append(
|
282
|
-
|
283
|
+
Figure(
|
283
284
|
prov=[
|
284
285
|
Prov(
|
285
286
|
bbox=target_bbox,
|
@@ -312,8 +313,76 @@ class ConvertedDocument(BaseModel):
|
|
312
313
|
def render_as_dict(self):
|
313
314
|
return self.output.model_dump(by_alias=True, exclude_none=True)
|
314
315
|
|
315
|
-
def render_as_markdown(
|
316
|
-
|
316
|
+
def render_as_markdown(
|
317
|
+
self,
|
318
|
+
delim: str = "\n\n",
|
319
|
+
main_text_start: int = 0,
|
320
|
+
main_text_stop: Optional[int] = None,
|
321
|
+
main_text_labels: list[str] = [
|
322
|
+
"title",
|
323
|
+
"subtitle-level-1",
|
324
|
+
"paragraph",
|
325
|
+
"caption",
|
326
|
+
"table",
|
327
|
+
],
|
328
|
+
strict_text: bool = False,
|
329
|
+
):
|
330
|
+
return self.output.export_to_markdown(
|
331
|
+
delim=delim,
|
332
|
+
main_text_start=main_text_start,
|
333
|
+
main_text_stop=main_text_stop,
|
334
|
+
main_text_labels=main_text_labels,
|
335
|
+
strict_text=strict_text,
|
336
|
+
)
|
337
|
+
|
338
|
+
def render_as_text(
|
339
|
+
self,
|
340
|
+
delim: str = "\n\n",
|
341
|
+
main_text_start: int = 0,
|
342
|
+
main_text_stop: Optional[int] = None,
|
343
|
+
main_text_labels: list[str] = [
|
344
|
+
"title",
|
345
|
+
"subtitle-level-1",
|
346
|
+
"paragraph",
|
347
|
+
"caption",
|
348
|
+
],
|
349
|
+
):
|
350
|
+
return self.output.export_to_markdown(
|
351
|
+
delim=delim,
|
352
|
+
main_text_start=main_text_start,
|
353
|
+
main_text_stop=main_text_stop,
|
354
|
+
main_text_labels=main_text_labels,
|
355
|
+
strict_text=True,
|
356
|
+
)
|
357
|
+
|
358
|
+
def render_as_doctags(
|
359
|
+
self,
|
360
|
+
delim: str = "\n\n",
|
361
|
+
main_text_start: int = 0,
|
362
|
+
main_text_stop: Optional[int] = None,
|
363
|
+
main_text_labels: list[str] = [
|
364
|
+
"title",
|
365
|
+
"subtitle-level-1",
|
366
|
+
"paragraph",
|
367
|
+
"caption",
|
368
|
+
"table",
|
369
|
+
"figure",
|
370
|
+
],
|
371
|
+
page_tagging: bool = True,
|
372
|
+
location_tagging: bool = True,
|
373
|
+
location_dimensions: Tuple[int, int] = (100, 100),
|
374
|
+
add_new_line: bool = True,
|
375
|
+
) -> str:
|
376
|
+
return self.output.export_to_document_tokens(
|
377
|
+
delim=delim,
|
378
|
+
main_text_start=main_text_start,
|
379
|
+
main_text_stop=main_text_stop,
|
380
|
+
main_text_labels=main_text_labels,
|
381
|
+
page_tagging=page_tagging,
|
382
|
+
location_tagging=location_tagging,
|
383
|
+
location_dimensions=location_dimensions,
|
384
|
+
add_new_line=add_new_line,
|
385
|
+
)
|
317
386
|
|
318
387
|
def render_element_images(
|
319
388
|
self, element_types: Tuple[PageElement] = (FigureElement,)
|
docling/utils/export.py
CHANGED
@@ -163,8 +163,12 @@ def generate_multimodal_pages(
|
|
163
163
|
content_md = doc.export_to_markdown(
|
164
164
|
main_text_start=start_ix, main_text_stop=end_ix
|
165
165
|
)
|
166
|
+
# No page-tagging since we only do 1 page at the time
|
167
|
+
content_dt = doc.export_to_document_tokens(
|
168
|
+
main_text_start=start_ix, main_text_stop=end_ix, page_tagging=False
|
169
|
+
)
|
166
170
|
|
167
|
-
return content_text, content_md, page_cells, page_segments, page
|
171
|
+
return content_text, content_md, content_dt, page_cells, page_segments, page
|
168
172
|
|
169
173
|
for ix, orig_item in enumerate(doc.main_text):
|
170
174
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: docling
|
3
|
-
Version: 1.
|
3
|
+
Version: 1.11.0
|
4
4
|
Summary: Docling PDF conversion package
|
5
5
|
Home-page: https://github.com/DS4SD/docling
|
6
6
|
License: MIT
|
@@ -21,8 +21,8 @@ Classifier: Programming Language :: Python :: 3.12
|
|
21
21
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
22
22
|
Requires-Dist: certifi (>=2024.7.4)
|
23
23
|
Requires-Dist: deepsearch-glm (>=0.21.0,<0.22.0)
|
24
|
-
Requires-Dist: docling-core (>=1.
|
25
|
-
Requires-Dist: docling-ibm-models (>=1.1.
|
24
|
+
Requires-Dist: docling-core (>=1.2.0,<2.0.0)
|
25
|
+
Requires-Dist: docling-ibm-models (>=1.1.7,<2.0.0)
|
26
26
|
Requires-Dist: docling-parse (>=1.2.0,<2.0.0)
|
27
27
|
Requires-Dist: easyocr (>=1.7,<2.0)
|
28
28
|
Requires-Dist: filetype (>=1.2.0,<2.0.0)
|
@@ -5,7 +5,7 @@ docling/backend/docling_parse_backend.py,sha256=r3aJwsWR7qG47ElhOa9iQJJQauHMt950
|
|
5
5
|
docling/backend/pypdfium2_backend.py,sha256=FggVFitmyMMmLar6vk6XQsavGOPQx95TD14opWYRMAY,8837
|
6
6
|
docling/datamodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
7
|
docling/datamodel/base_models.py,sha256=PSJe_Qlh2VJfijg3kkXOOqZbi_uqRHCmLjX__c5Buck,9155
|
8
|
-
docling/datamodel/document.py,sha256=
|
8
|
+
docling/datamodel/document.py,sha256=oXPitPRd9Gyi7ZU4kfEc4K9eMVtTJDx1T-ellTwF3Ak,15716
|
9
9
|
docling/datamodel/settings.py,sha256=t5g6wrEJnPa9gBzMMl8ppgBRUYz-8xgopEtfMS0ZH28,733
|
10
10
|
docling/document_converter.py,sha256=5OiNafoaVcQhZ8ATF69xRp2KyFyKeSMhmwEFUoCzP-k,10980
|
11
11
|
docling/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -19,10 +19,10 @@ docling/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,
|
|
19
19
|
docling/pipeline/base_model_pipeline.py,sha256=AC5NTR0xLy5JIZqsTINkKEHeCPqpyvJpuE_bcnZhyvI,529
|
20
20
|
docling/pipeline/standard_model_pipeline.py,sha256=UTjyaEXvz9htYZz-IMTkn11cZwNjgvo_Fl2dfBVnRQs,1442
|
21
21
|
docling/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
22
|
-
docling/utils/export.py,sha256=
|
22
|
+
docling/utils/export.py,sha256=ltPhhruS8sulHTYW0Rtjfc1I9lW3oH6QAF0oYewkz7k,6115
|
23
23
|
docling/utils/layout_utils.py,sha256=FOFbL0hKzUoWXdZaeUvEtFqKv0IkPifIr4sdGW4suKs,31804
|
24
24
|
docling/utils/utils.py,sha256=llhXSbIDNZ1MHOwBEfLHBAoJIAYI7QlPIonlI1jLUJ0,1208
|
25
|
-
docling-1.
|
26
|
-
docling-1.
|
27
|
-
docling-1.
|
28
|
-
docling-1.
|
25
|
+
docling-1.11.0.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
|
26
|
+
docling-1.11.0.dist-info/METADATA,sha256=lDqzdtE1ohikNmN3eyPQ31Qa30x9F5XN6FUTkTNGU9s,8231
|
27
|
+
docling-1.11.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
28
|
+
docling-1.11.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|