docling 0.4.0__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,7 +3,6 @@ from io import BytesIO
3
3
  from pathlib import Path, PurePath
4
4
  from typing import ClassVar, Dict, Iterable, List, Optional, Type, Union
5
5
 
6
- from deepsearch.documents.core.export import export_to_markdown
7
6
  from docling_core.types import BaseCell, BaseText
8
7
  from docling_core.types import BoundingBox as DsBoundingBox
9
8
  from docling_core.types import Document as DsDocument
@@ -299,9 +298,7 @@ class ConvertedDocument(BaseModel):
299
298
 
300
299
  def render_as_markdown(self):
301
300
  if self.output:
302
- return export_to_markdown(
303
- self.output.model_dump(by_alias=True, exclude_none=True)
304
- )
301
+ return self.output.export_to_markdown()
305
302
  else:
306
303
  return ""
307
304
 
@@ -1,13 +1,13 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: docling
3
- Version: 0.4.0
3
+ Version: 1.0.0
4
4
  Summary: Docling PDF conversion package
5
5
  Home-page: https://github.com/DS4SD/docling
6
6
  License: MIT
7
7
  Keywords: docling,convert,document,pdf,layout model,segmentation,table structure,table former
8
8
  Author: Christoph Auer
9
9
  Author-email: cau@zurich.ibm.com
10
- Requires-Python: >=3.11,<4.0
10
+ Requires-Python: >=3.10,<4.0
11
11
  Classifier: Development Status :: 5 - Production/Stable
12
12
  Classifier: Intended Audience :: Developers
13
13
  Classifier: Intended Audience :: Science/Research
@@ -15,13 +15,13 @@ Classifier: License :: OSI Approved :: MIT License
15
15
  Classifier: Operating System :: MacOS :: MacOS X
16
16
  Classifier: Operating System :: POSIX :: Linux
17
17
  Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.10
18
19
  Classifier: Programming Language :: Python :: 3.11
19
20
  Classifier: Programming Language :: Python :: 3.12
20
21
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
21
22
  Requires-Dist: deepsearch-glm (>=0.19.0,<1)
22
- Requires-Dist: deepsearch-toolkit (>=0.47.0,<1)
23
- Requires-Dist: docling-core (>=0.2.0,<0.3.0)
24
- Requires-Dist: docling-ibm-models (>=0.2.0,<0.3.0)
23
+ Requires-Dist: docling-core (>=1.1.0,<2.0.0)
24
+ Requires-Dist: docling-ibm-models (>=1.1.0,<2.0.0)
25
25
  Requires-Dist: filetype (>=1.2.0,<2.0.0)
26
26
  Requires-Dist: huggingface_hub (>=0.23,<1)
27
27
  Requires-Dist: pydantic (>=2.0.0,<3.0.0)
@@ -31,19 +31,21 @@ Project-URL: Repository, https://github.com/DS4SD/docling
31
31
  Description-Content-Type: text/markdown
32
32
 
33
33
  <p align="center">
34
- <a href="https://github.com/ds4sd/docling"> <img loading="lazy" alt="Docling" src="https://github.com/DS4SD/docling/raw/main/logo.png" width="150" />
34
+ <a href="https://github.com/ds4sd/docling">
35
+ <img loading="lazy" alt="Docling" src="https://github.com/DS4SD/docling/raw/main/logo.png" width="150" />
36
+ </a>
35
37
  </p>
36
38
 
37
39
  # Docling
38
40
 
39
41
  [![PyPI version](https://img.shields.io/pypi/v/docling)](https://pypi.org/project/docling/)
40
- ![Python](https://img.shields.io/badge/python-3.11%20%7C%203.12-blue)
42
+ ![Python](https://img.shields.io/badge/python-3.10%20%7C%203.11%20%7C%203.12-blue)
41
43
  [![Poetry](https://img.shields.io/endpoint?url=https://python-poetry.org/badge/v0.json)](https://python-poetry.org/)
42
44
  [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
43
45
  [![Imports: isort](https://img.shields.io/badge/%20imports-isort-%231674b1?style=flat&labelColor=ef8336)](https://pycqa.github.io/isort/)
44
46
  [![Pydantic v2](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/pydantic/pydantic/main/docs/badge/v2.json)](https://pydantic.dev)
45
47
  [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://github.com/pre-commit/pre-commit)
46
- [![License MIT](https://img.shields.io/github/license/ds4sd/deepsearch-toolkit)](https://opensource.org/licenses/MIT)
48
+ [![License MIT](https://img.shields.io/github/license/DS4SD/docling)](https://opensource.org/licenses/MIT)
47
49
 
48
50
  Docling bundles PDF document conversion to JSON and Markdown in an easy, self-contained package.
49
51
 
@@ -65,7 +67,7 @@ pip install docling
65
67
 
66
68
  ### Development setup
67
69
 
68
- To develop for Docling, you need Python 3.11 / 3.12 and Poetry. You can then install from your local clone's root dir:
70
+ To develop for Docling, you need Python 3.10 / 3.11 / 3.12 and Poetry. You can then install from your local clone's root dir:
69
71
  ```bash
70
72
  poetry install
71
73
  ```
@@ -81,7 +83,7 @@ The output of the above command will be written to `./scratch`.
81
83
 
82
84
  ### Adjust pipeline features
83
85
 
84
- **Control pipeline options**
86
+ #### Control pipeline options
85
87
 
86
88
  You can control if table structure recognition or OCR should be performed by arguments passed to `DocumentConverter`:
87
89
  ```python
@@ -94,16 +96,15 @@ doc_converter = DocumentConverter(
94
96
  )
95
97
  ```
96
98
 
97
- **Control table extraction options**
99
+ #### Control table extraction options
98
100
 
99
101
  You can control if table structure recognition should map the recognized structure back to PDF cells (default) or use text cells from the structure prediction itself.
100
102
  This can improve output quality if you find that multiple columns in extracted tables are erroneously merged into one.
101
103
 
102
104
 
103
105
  ```python
104
-
105
106
  pipeline_options = PipelineOptions(do_table_structure=True)
106
- pipeline_options.table_structure_options.do_cell_matching = False # Uses text cells predicted from table structure model
107
+ pipeline_options.table_structure_options.do_cell_matching = False # uses text cells predicted from table structure model
107
108
 
108
109
  doc_converter = DocumentConverter(
109
110
  artifacts_path=artifacts_path,
@@ -4,7 +4,7 @@ docling/backend/abstract_backend.py,sha256=dINr8oTax9Fq31Y1AR0CGWNZtAHN5aqB_M7TA
4
4
  docling/backend/pypdfium2_backend.py,sha256=sJMoActFyc3qdKB6RFly3auHXuXM4noQAG0ypUlj26o,7647
5
5
  docling/datamodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  docling/datamodel/base_models.py,sha256=k7gLFPnq3ArEMAFz6qUcp5qemlYzVhOmR9qtBTkAiX4,6862
7
- docling/datamodel/document.py,sha256=S4USz13mqLS9WUwTgEkoocykcmY6B3cC3f4JlfTSYcM,12635
7
+ docling/datamodel/document.py,sha256=7caefzaii6itMQgtXfA4SJhB1TAF32v1c8zRwbiU03s,12497
8
8
  docling/datamodel/settings.py,sha256=t5g6wrEJnPa9gBzMMl8ppgBRUYz-8xgopEtfMS0ZH28,733
9
9
  docling/document_converter.py,sha256=MZw23oPlRmRi1ggzoD1PukUnqo-6boO3RZB06dZ5Xt0,7305
10
10
  docling/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -19,7 +19,7 @@ docling/pipeline/standard_model_pipeline.py,sha256=UTwodKUKrisLoVcntbNUBDhjzRyFv
19
19
  docling/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
20
  docling/utils/layout_utils.py,sha256=FOFbL0hKzUoWXdZaeUvEtFqKv0IkPifIr4sdGW4suKs,31804
21
21
  docling/utils/utils.py,sha256=llhXSbIDNZ1MHOwBEfLHBAoJIAYI7QlPIonlI1jLUJ0,1208
22
- docling-0.4.0.dist-info/LICENSE,sha256=ACwmltkrXIz5VsEQcrqljq-fat6ZXAMepjXGoe40KtE,1069
23
- docling-0.4.0.dist-info/METADATA,sha256=aWx7RrxtFIXHkPqEBrzO_gJfPjgWcpgENqx02cdBQys,6044
24
- docling-0.4.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
25
- docling-0.4.0.dist-info/RECORD,,
22
+ docling-1.0.0.dist-info/LICENSE,sha256=ACwmltkrXIz5VsEQcrqljq-fat6ZXAMepjXGoe40KtE,1069
23
+ docling-1.0.0.dist-info/METADATA,sha256=9LJBnEDF3ZwgBd8t4d_2x_9EYzJgSc34dBpBHZcRQeU,6069
24
+ docling-1.0.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
25
+ docling-1.0.0.dist-info/RECORD,,