docling 1.10.0__py3-none-any.whl → 1.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,6 +11,7 @@ from docling_core.types import FileInfoObject as DsFileInfoObject
11
11
  from docling_core.types import PageDimensions, PageReference, Prov, Ref
12
12
  from docling_core.types import Table as DsSchemaTable
13
13
  from docling_core.types import TableCell
14
+ from docling_core.types.doc.base import Figure
14
15
  from pydantic import BaseModel
15
16
  from typing_extensions import deprecated
16
17
 
@@ -279,7 +280,7 @@ class ConvertedDocument(BaseModel):
279
280
  ),
280
281
  )
281
282
  figures.append(
282
- BaseCell(
283
+ Figure(
283
284
  prov=[
284
285
  Prov(
285
286
  bbox=target_bbox,
@@ -312,8 +313,76 @@ class ConvertedDocument(BaseModel):
312
313
  def render_as_dict(self):
313
314
  return self.output.model_dump(by_alias=True, exclude_none=True)
314
315
 
315
- def render_as_markdown(self):
316
- return self.output.export_to_markdown()
316
+ def render_as_markdown(
317
+ self,
318
+ delim: str = "\n\n",
319
+ main_text_start: int = 0,
320
+ main_text_stop: Optional[int] = None,
321
+ main_text_labels: list[str] = [
322
+ "title",
323
+ "subtitle-level-1",
324
+ "paragraph",
325
+ "caption",
326
+ "table",
327
+ ],
328
+ strict_text: bool = False,
329
+ ):
330
+ return self.output.export_to_markdown(
331
+ delim=delim,
332
+ main_text_start=main_text_start,
333
+ main_text_stop=main_text_stop,
334
+ main_text_labels=main_text_labels,
335
+ strict_text=strict_text,
336
+ )
337
+
338
+ def render_as_text(
339
+ self,
340
+ delim: str = "\n\n",
341
+ main_text_start: int = 0,
342
+ main_text_stop: Optional[int] = None,
343
+ main_text_labels: list[str] = [
344
+ "title",
345
+ "subtitle-level-1",
346
+ "paragraph",
347
+ "caption",
348
+ ],
349
+ ):
350
+ return self.output.export_to_markdown(
351
+ delim=delim,
352
+ main_text_start=main_text_start,
353
+ main_text_stop=main_text_stop,
354
+ main_text_labels=main_text_labels,
355
+ strict_text=True,
356
+ )
357
+
358
+ def render_as_doctags(
359
+ self,
360
+ delim: str = "\n\n",
361
+ main_text_start: int = 0,
362
+ main_text_stop: Optional[int] = None,
363
+ main_text_labels: list[str] = [
364
+ "title",
365
+ "subtitle-level-1",
366
+ "paragraph",
367
+ "caption",
368
+ "table",
369
+ "figure",
370
+ ],
371
+ page_tagging: bool = True,
372
+ location_tagging: bool = True,
373
+ location_dimensions: Tuple[int, int] = (100, 100),
374
+ add_new_line: bool = True,
375
+ ) -> str:
376
+ return self.output.export_to_document_tokens(
377
+ delim=delim,
378
+ main_text_start=main_text_start,
379
+ main_text_stop=main_text_stop,
380
+ main_text_labels=main_text_labels,
381
+ page_tagging=page_tagging,
382
+ location_tagging=location_tagging,
383
+ location_dimensions=location_dimensions,
384
+ add_new_line=add_new_line,
385
+ )
317
386
 
318
387
  def render_element_images(
319
388
  self, element_types: Tuple[PageElement] = (FigureElement,)
docling/utils/export.py CHANGED
@@ -163,8 +163,12 @@ def generate_multimodal_pages(
163
163
  content_md = doc.export_to_markdown(
164
164
  main_text_start=start_ix, main_text_stop=end_ix
165
165
  )
166
+ # No page-tagging since we only do 1 page at the time
167
+ content_dt = doc.export_to_document_tokens(
168
+ main_text_start=start_ix, main_text_stop=end_ix, page_tagging=False
169
+ )
166
170
 
167
- return content_text, content_md, page_cells, page_segments, page
171
+ return content_text, content_md, content_dt, page_cells, page_segments, page
168
172
 
169
173
  for ix, orig_item in enumerate(doc.main_text):
170
174
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: docling
3
- Version: 1.10.0
3
+ Version: 1.11.0
4
4
  Summary: Docling PDF conversion package
5
5
  Home-page: https://github.com/DS4SD/docling
6
6
  License: MIT
@@ -21,8 +21,8 @@ Classifier: Programming Language :: Python :: 3.12
21
21
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
22
22
  Requires-Dist: certifi (>=2024.7.4)
23
23
  Requires-Dist: deepsearch-glm (>=0.21.0,<0.22.0)
24
- Requires-Dist: docling-core (>=1.1.3,<2.0.0)
25
- Requires-Dist: docling-ibm-models (>=1.1.3,<2.0.0)
24
+ Requires-Dist: docling-core (>=1.2.0,<2.0.0)
25
+ Requires-Dist: docling-ibm-models (>=1.1.7,<2.0.0)
26
26
  Requires-Dist: docling-parse (>=1.2.0,<2.0.0)
27
27
  Requires-Dist: easyocr (>=1.7,<2.0)
28
28
  Requires-Dist: filetype (>=1.2.0,<2.0.0)
@@ -5,7 +5,7 @@ docling/backend/docling_parse_backend.py,sha256=r3aJwsWR7qG47ElhOa9iQJJQauHMt950
5
5
  docling/backend/pypdfium2_backend.py,sha256=FggVFitmyMMmLar6vk6XQsavGOPQx95TD14opWYRMAY,8837
6
6
  docling/datamodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
7
  docling/datamodel/base_models.py,sha256=PSJe_Qlh2VJfijg3kkXOOqZbi_uqRHCmLjX__c5Buck,9155
8
- docling/datamodel/document.py,sha256=cG9RuAkFXCCGZqCHmhUtYeOA5PV6gjO3Y4i5lf2IM6I,13649
8
+ docling/datamodel/document.py,sha256=oXPitPRd9Gyi7ZU4kfEc4K9eMVtTJDx1T-ellTwF3Ak,15716
9
9
  docling/datamodel/settings.py,sha256=t5g6wrEJnPa9gBzMMl8ppgBRUYz-8xgopEtfMS0ZH28,733
10
10
  docling/document_converter.py,sha256=5OiNafoaVcQhZ8ATF69xRp2KyFyKeSMhmwEFUoCzP-k,10980
11
11
  docling/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -19,10 +19,10 @@ docling/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,
19
19
  docling/pipeline/base_model_pipeline.py,sha256=AC5NTR0xLy5JIZqsTINkKEHeCPqpyvJpuE_bcnZhyvI,529
20
20
  docling/pipeline/standard_model_pipeline.py,sha256=UTjyaEXvz9htYZz-IMTkn11cZwNjgvo_Fl2dfBVnRQs,1442
21
21
  docling/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
- docling/utils/export.py,sha256=gP8609DtHp6bNGPhYpwe0g3J4qvc2HqQpHZnfl7hQZQ,5899
22
+ docling/utils/export.py,sha256=ltPhhruS8sulHTYW0Rtjfc1I9lW3oH6QAF0oYewkz7k,6115
23
23
  docling/utils/layout_utils.py,sha256=FOFbL0hKzUoWXdZaeUvEtFqKv0IkPifIr4sdGW4suKs,31804
24
24
  docling/utils/utils.py,sha256=llhXSbIDNZ1MHOwBEfLHBAoJIAYI7QlPIonlI1jLUJ0,1208
25
- docling-1.10.0.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
26
- docling-1.10.0.dist-info/METADATA,sha256=1itpZzvKAruLgF_xPYhSFhqpUySogjDjT5u1HG2sGgM,8231
27
- docling-1.10.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
28
- docling-1.10.0.dist-info/RECORD,,
25
+ docling-1.11.0.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
26
+ docling-1.11.0.dist-info/METADATA,sha256=lDqzdtE1ohikNmN3eyPQ31Qa30x9F5XN6FUTkTNGU9s,8231
27
+ docling-1.11.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
28
+ docling-1.11.0.dist-info/RECORD,,