docling 1.9.0__py3-none-any.whl → 1.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@ import copy
2
2
  import random
3
3
 
4
4
  from deepsearch_glm.nlp_utils import init_nlp_model
5
- from deepsearch_glm.utils.ds_utils import to_legacy_document_format
5
+ from deepsearch_glm.utils.doc_utils import to_legacy_document_format
6
6
  from deepsearch_glm.utils.load_pretrained_models import load_pretrained_nlp_models
7
7
  from docling_core.types import BaseText
8
8
  from docling_core.types import Document as DsDocument
@@ -1,6 +1,6 @@
1
1
  MIT License
2
2
 
3
- Copyright (c) [year] [fullname]
3
+ Copyright (c) 2024 International Business Machines
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: docling
3
- Version: 1.9.0
3
+ Version: 1.10.0
4
4
  Summary: Docling PDF conversion package
5
5
  Home-page: https://github.com/DS4SD/docling
6
6
  License: MIT
@@ -20,14 +20,14 @@ Classifier: Programming Language :: Python :: 3.11
20
20
  Classifier: Programming Language :: Python :: 3.12
21
21
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
22
22
  Requires-Dist: certifi (>=2024.7.4)
23
- Requires-Dist: deepsearch-glm (>=0.19.1,<0.20.0)
23
+ Requires-Dist: deepsearch-glm (>=0.21.0,<0.22.0)
24
24
  Requires-Dist: docling-core (>=1.1.3,<2.0.0)
25
25
  Requires-Dist: docling-ibm-models (>=1.1.3,<2.0.0)
26
- Requires-Dist: docling-parse (>=1.1.3,<2.0.0)
26
+ Requires-Dist: docling-parse (>=1.2.0,<2.0.0)
27
27
  Requires-Dist: easyocr (>=1.7,<2.0)
28
28
  Requires-Dist: filetype (>=1.2.0,<2.0.0)
29
29
  Requires-Dist: huggingface_hub (>=0.23,<1)
30
- Requires-Dist: pyarrow (>=17.0.0,<18.0.0)
30
+ Requires-Dist: pyarrow (>=16.1.0,<17.0.0)
31
31
  Requires-Dist: pydantic (>=2.0.0,<3.0.0)
32
32
  Requires-Dist: pydantic-settings (>=2.3.0,<3.0.0)
33
33
  Requires-Dist: pypdfium2 (>=4.30.0,<5.0.0)
@@ -63,7 +63,7 @@ Docling bundles PDF document conversion to JSON and Markdown in an easy, self-co
63
63
  * 📝 Extracts metadata from the document, such as title, authors, references and language
64
64
  * 🔍 Optionally applies OCR (use with scanned PDFs)
65
65
 
66
- Doing RAG or Q/A? Also consider [Quackling](https://github.com/DS4SD/quackling) to get the most out of your documents.
66
+ For RAG, check out [Quackling](https://github.com/DS4SD/quackling) to get the most out of your docs, be it using LlamaIndex, LangChain or your pipeline.
67
67
 
68
68
  ## Installation
69
69
 
@@ -183,6 +183,10 @@ results = doc_converter.convert(conv_input)
183
183
  You can limit the CPU threads used by Docling by setting the environment variable `OMP_NUM_THREADS` accordingly. The default setting is using 4 CPU threads.
184
184
 
185
185
 
186
+ ## Technical report
187
+
188
+ For more details on Docling's inner workings, check out the [Docling Technical Report](https://arxiv.org/abs/2408.09869).
189
+
186
190
  ## Contributing
187
191
 
188
192
  Please read [Contributing to Docling](https://github.com/DS4SD/docling/blob/main/CONTRIBUTING.md) for details.
@@ -196,10 +200,10 @@ If you use Docling in your projects, please consider citing the following:
196
200
  @techreport{Docling,
197
201
  author = {Deep Search Team},
198
202
  month = {8},
199
- title = {{Docling Technical Report}},
200
- url={https://arxiv.org/abs/2408.09869},
201
- eprint={2408.09869},
202
- doi = "10.48550/arXiv.2408.09869",
203
+ title = {Docling Technical Report},
204
+ url = {https://arxiv.org/abs/2408.09869},
205
+ eprint = {2408.09869},
206
+ doi = {10.48550/arXiv.2408.09869},
203
207
  version = {1.0.0},
204
208
  year = {2024}
205
209
  }
@@ -10,7 +10,7 @@ docling/datamodel/settings.py,sha256=t5g6wrEJnPa9gBzMMl8ppgBRUYz-8xgopEtfMS0ZH28
10
10
  docling/document_converter.py,sha256=5OiNafoaVcQhZ8ATF69xRp2KyFyKeSMhmwEFUoCzP-k,10980
11
11
  docling/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
12
  docling/models/base_ocr_model.py,sha256=Ipl82a3AV2OsgMQSMEMpnWJ6MXcmyIQzmp52PmTaB0g,4465
13
- docling/models/ds_glm_model.py,sha256=inNsmlriiDuqe3Q4LWL2DbqPTScP-3-dFgFoaJprFtQ,3367
13
+ docling/models/ds_glm_model.py,sha256=VXGmj8cW0WKMz1He4tp1lZhXHkS8Z39U1G-ujkc7deU,3368
14
14
  docling/models/easyocr_model.py,sha256=ABIqALvtNNrDQ47fXaZ0lDFhOwKsYGUUlAPnIsFZgZA,2232
15
15
  docling/models/layout_model.py,sha256=ZFmaLXlRWUfsT1pJCiYVxhQFrBBsiz6Aw0m9GM3UvVM,11249
16
16
  docling/models/page_assemble_model.py,sha256=8eoG2WiFxPxq9TPvM-wkngb2gkr0tdtCRVXg1JcTETo,5550
@@ -22,7 +22,7 @@ docling/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
22
  docling/utils/export.py,sha256=gP8609DtHp6bNGPhYpwe0g3J4qvc2HqQpHZnfl7hQZQ,5899
23
23
  docling/utils/layout_utils.py,sha256=FOFbL0hKzUoWXdZaeUvEtFqKv0IkPifIr4sdGW4suKs,31804
24
24
  docling/utils/utils.py,sha256=llhXSbIDNZ1MHOwBEfLHBAoJIAYI7QlPIonlI1jLUJ0,1208
25
- docling-1.9.0.dist-info/LICENSE,sha256=ACwmltkrXIz5VsEQcrqljq-fat6ZXAMepjXGoe40KtE,1069
26
- docling-1.9.0.dist-info/METADATA,sha256=YV5QVsWcEyeDIYezvMWyFg7csluluDQ2xT7LLT1J6Qg,8051
27
- docling-1.9.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
28
- docling-1.9.0.dist-info/RECORD,,
25
+ docling-1.10.0.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
26
+ docling-1.10.0.dist-info/METADATA,sha256=1itpZzvKAruLgF_xPYhSFhqpUySogjDjT5u1HG2sGgM,8231
27
+ docling-1.10.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
28
+ docling-1.10.0.dist-info/RECORD,,