docling 1.8.3__py3-none-any.whl → 1.8.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docling/datamodel/base_models.py +3 -3
- docling/datamodel/document.py +2 -0
- docling/models/ds_glm_model.py +5 -1
- docling/models/table_structure_model.py +10 -1
- {docling-1.8.3.dist-info → docling-1.8.5.dist-info}/METADATA +20 -3
- {docling-1.8.3.dist-info → docling-1.8.5.dist-info}/RECORD +8 -8
- {docling-1.8.3.dist-info → docling-1.8.5.dist-info}/LICENSE +0 -0
- {docling-1.8.3.dist-info → docling-1.8.5.dist-info}/WHEEL +0 -0
docling/datamodel/base_models.py
CHANGED
@@ -238,9 +238,9 @@ class EquationPrediction(BaseModel):
|
|
238
238
|
|
239
239
|
class PagePredictions(BaseModel):
|
240
240
|
layout: LayoutPrediction = None
|
241
|
-
tablestructure: TableStructurePrediction = None
|
242
|
-
figures_classification: FigureClassificationPrediction = None
|
243
|
-
equations_prediction: EquationPrediction = None
|
241
|
+
tablestructure: Optional[TableStructurePrediction] = None
|
242
|
+
figures_classification: Optional[FigureClassificationPrediction] = None
|
243
|
+
equations_prediction: Optional[EquationPrediction] = None
|
244
244
|
|
245
245
|
|
246
246
|
PageElement = Union[TextElement, TableElement, FigureElement]
|
docling/datamodel/document.py
CHANGED
docling/models/ds_glm_model.py
CHANGED
@@ -16,8 +16,12 @@ from docling.datamodel.document import ConversionResult
|
|
16
16
|
class GlmModel:
|
17
17
|
def __init__(self, config):
|
18
18
|
self.config = config
|
19
|
+
self.model_names = self.config.get(
|
20
|
+
"model_names", ""
|
21
|
+
) # "language;term;reference"
|
19
22
|
load_pretrained_nlp_models()
|
20
|
-
model = init_nlp_model(model_names="language;term;reference")
|
23
|
+
# model = init_nlp_model(model_names="language;term;reference")
|
24
|
+
model = init_nlp_model(model_names=self.model_names)
|
21
25
|
self.model = model
|
22
26
|
|
23
27
|
def __call__(self, conv_res: ConversionResult) -> DsDocument:
|
@@ -44,7 +44,16 @@ class TableStructureModel:
|
|
44
44
|
|
45
45
|
for tc in table_element.table_cells:
|
46
46
|
x0, y0, x1, y1 = tc.bbox.as_tuple()
|
47
|
-
|
47
|
+
if tc.column_header:
|
48
|
+
width = 3
|
49
|
+
else:
|
50
|
+
width = 1
|
51
|
+
draw.rectangle([(x0, y0), (x1, y1)], outline="blue", width=width)
|
52
|
+
draw.text(
|
53
|
+
(x0 + 3, y0 + 3),
|
54
|
+
text=f"{tc.start_row_offset_idx}, {tc.start_col_offset_idx}",
|
55
|
+
fill="black",
|
56
|
+
)
|
48
57
|
|
49
58
|
image.show()
|
50
59
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: docling
|
3
|
-
Version: 1.8.
|
3
|
+
Version: 1.8.5
|
4
4
|
Summary: Docling PDF conversion package
|
5
5
|
Home-page: https://github.com/DS4SD/docling
|
6
6
|
License: MIT
|
@@ -20,10 +20,10 @@ Classifier: Programming Language :: Python :: 3.11
|
|
20
20
|
Classifier: Programming Language :: Python :: 3.12
|
21
21
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
22
22
|
Requires-Dist: certifi (>=2024.7.4)
|
23
|
-
Requires-Dist: deepsearch-glm (>=0.19.0
|
23
|
+
Requires-Dist: deepsearch-glm (>=0.19.1,<0.20.0)
|
24
24
|
Requires-Dist: docling-core (>=1.1.2,<2.0.0)
|
25
25
|
Requires-Dist: docling-ibm-models (>=1.1.3,<2.0.0)
|
26
|
-
Requires-Dist: docling-parse (>=1.1.
|
26
|
+
Requires-Dist: docling-parse (>=1.1.3,<2.0.0)
|
27
27
|
Requires-Dist: easyocr (>=1.7,<2.0)
|
28
28
|
Requires-Dist: filetype (>=1.2.0,<2.0.0)
|
29
29
|
Requires-Dist: huggingface_hub (>=0.23,<1)
|
@@ -72,6 +72,23 @@ pip install docling
|
|
72
72
|
> [!NOTE]
|
73
73
|
> Works on macOS and Linux environments. Windows platforms are currently not tested.
|
74
74
|
|
75
|
+
|
76
|
+
### Use alternative PyTorch distributions
|
77
|
+
|
78
|
+
The Docling models depend on the [PyTorch](https://pytorch.org/) library.
|
79
|
+
Depending on your architecture, you might want to use a different distribution of `torch`.
|
80
|
+
For example, you might want support for different accelerator or for a cpu-only version.
|
81
|
+
All the different ways for installing `torch` are listed on their website <https://pytorch.org/>.
|
82
|
+
|
83
|
+
One common situation is the installation on Linux systems with cpu-only support.
|
84
|
+
In this case, we suggest the installation of Docling with the following options
|
85
|
+
|
86
|
+
```bash
|
87
|
+
# Example for installing on the Linux cpu-only version
|
88
|
+
pip install docling --extra-index-url https://download.pytorch.org/whl/cpu
|
89
|
+
```
|
90
|
+
|
91
|
+
|
75
92
|
### Development setup
|
76
93
|
|
77
94
|
To develop for Docling, you need Python 3.10 / 3.11 / 3.12 and Poetry. You can then install from your local clone's root dir:
|
@@ -4,24 +4,24 @@ docling/backend/abstract_backend.py,sha256=xfNNiZKksPPa9KAiA-fHD86flg0It4n_29ccp
|
|
4
4
|
docling/backend/docling_parse_backend.py,sha256=r3aJwsWR7qG47ElhOa9iQJJQauHMt950FfCsf6fhlP4,7480
|
5
5
|
docling/backend/pypdfium2_backend.py,sha256=FggVFitmyMMmLar6vk6XQsavGOPQx95TD14opWYRMAY,8837
|
6
6
|
docling/datamodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
|
-
docling/datamodel/base_models.py,sha256=
|
8
|
-
docling/datamodel/document.py,sha256=
|
7
|
+
docling/datamodel/base_models.py,sha256=K2lb2itk_VtJ7Zjz9a5zXgsCQ4PCTwf2blT-gI4UIRs,8872
|
8
|
+
docling/datamodel/document.py,sha256=cG9RuAkFXCCGZqCHmhUtYeOA5PV6gjO3Y4i5lf2IM6I,13649
|
9
9
|
docling/datamodel/settings.py,sha256=t5g6wrEJnPa9gBzMMl8ppgBRUYz-8xgopEtfMS0ZH28,733
|
10
10
|
docling/document_converter.py,sha256=5OiNafoaVcQhZ8ATF69xRp2KyFyKeSMhmwEFUoCzP-k,10980
|
11
11
|
docling/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
12
12
|
docling/models/base_ocr_model.py,sha256=Ipl82a3AV2OsgMQSMEMpnWJ6MXcmyIQzmp52PmTaB0g,4465
|
13
|
-
docling/models/ds_glm_model.py,sha256=
|
13
|
+
docling/models/ds_glm_model.py,sha256=inNsmlriiDuqe3Q4LWL2DbqPTScP-3-dFgFoaJprFtQ,3367
|
14
14
|
docling/models/easyocr_model.py,sha256=ABIqALvtNNrDQ47fXaZ0lDFhOwKsYGUUlAPnIsFZgZA,2232
|
15
15
|
docling/models/layout_model.py,sha256=ZFmaLXlRWUfsT1pJCiYVxhQFrBBsiz6Aw0m9GM3UvVM,11249
|
16
16
|
docling/models/page_assemble_model.py,sha256=8eoG2WiFxPxq9TPvM-wkngb2gkr0tdtCRVXg1JcTETo,5550
|
17
|
-
docling/models/table_structure_model.py,sha256=
|
17
|
+
docling/models/table_structure_model.py,sha256=0wOeiRoma6et7FtoJZw2SA3wBd9-R9ivp5uvXBQqeM4,5768
|
18
18
|
docling/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
19
19
|
docling/pipeline/base_model_pipeline.py,sha256=AC5NTR0xLy5JIZqsTINkKEHeCPqpyvJpuE_bcnZhyvI,529
|
20
20
|
docling/pipeline/standard_model_pipeline.py,sha256=UTjyaEXvz9htYZz-IMTkn11cZwNjgvo_Fl2dfBVnRQs,1442
|
21
21
|
docling/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
22
22
|
docling/utils/layout_utils.py,sha256=FOFbL0hKzUoWXdZaeUvEtFqKv0IkPifIr4sdGW4suKs,31804
|
23
23
|
docling/utils/utils.py,sha256=llhXSbIDNZ1MHOwBEfLHBAoJIAYI7QlPIonlI1jLUJ0,1208
|
24
|
-
docling-1.8.
|
25
|
-
docling-1.8.
|
26
|
-
docling-1.8.
|
27
|
-
docling-1.8.
|
24
|
+
docling-1.8.5.dist-info/LICENSE,sha256=ACwmltkrXIz5VsEQcrqljq-fat6ZXAMepjXGoe40KtE,1069
|
25
|
+
docling-1.8.5.dist-info/METADATA,sha256=-H_jbB493OWBMNwkFqmb8KNqXcv7p32AykV_biiFNPA,7888
|
26
|
+
docling-1.8.5.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
27
|
+
docling-1.8.5.dist-info/RECORD,,
|
File without changes
|
File without changes
|