docling 1.16.1__tar.gz → 1.18.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {docling-1.16.1 → docling-1.18.0}/PKG-INFO +5 -6
- {docling-1.16.1 → docling-1.18.0}/README.md +1 -2
- {docling-1.16.1 → docling-1.18.0}/docling/document_converter.py +4 -1
- {docling-1.16.1 → docling-1.18.0}/docling/models/layout_model.py +3 -3
- {docling-1.16.1 → docling-1.18.0}/docling/pipeline/standard_model_pipeline.py +1 -1
- {docling-1.16.1 → docling-1.18.0}/pyproject.toml +4 -4
- {docling-1.16.1 → docling-1.18.0}/LICENSE +0 -0
- {docling-1.16.1 → docling-1.18.0}/docling/__init__.py +0 -0
- {docling-1.16.1 → docling-1.18.0}/docling/backend/__init__.py +0 -0
- {docling-1.16.1 → docling-1.18.0}/docling/backend/abstract_backend.py +0 -0
- {docling-1.16.1 → docling-1.18.0}/docling/backend/docling_parse_backend.py +0 -0
- {docling-1.16.1 → docling-1.18.0}/docling/backend/pypdfium2_backend.py +0 -0
- {docling-1.16.1 → docling-1.18.0}/docling/cli/__init__.py +0 -0
- {docling-1.16.1 → docling-1.18.0}/docling/cli/main.py +0 -0
- {docling-1.16.1 → docling-1.18.0}/docling/datamodel/__init__.py +0 -0
- {docling-1.16.1 → docling-1.18.0}/docling/datamodel/base_models.py +0 -0
- {docling-1.16.1 → docling-1.18.0}/docling/datamodel/document.py +0 -0
- {docling-1.16.1 → docling-1.18.0}/docling/datamodel/pipeline_options.py +0 -0
- {docling-1.16.1 → docling-1.18.0}/docling/datamodel/settings.py +0 -0
- {docling-1.16.1 → docling-1.18.0}/docling/models/__init__.py +0 -0
- {docling-1.16.1 → docling-1.18.0}/docling/models/base_ocr_model.py +0 -0
- {docling-1.16.1 → docling-1.18.0}/docling/models/ds_glm_model.py +0 -0
- {docling-1.16.1 → docling-1.18.0}/docling/models/easyocr_model.py +0 -0
- {docling-1.16.1 → docling-1.18.0}/docling/models/page_assemble_model.py +0 -0
- {docling-1.16.1 → docling-1.18.0}/docling/models/table_structure_model.py +0 -0
- {docling-1.16.1 → docling-1.18.0}/docling/pipeline/__init__.py +0 -0
- {docling-1.16.1 → docling-1.18.0}/docling/pipeline/base_model_pipeline.py +0 -0
- {docling-1.16.1 → docling-1.18.0}/docling/utils/__init__.py +0 -0
- {docling-1.16.1 → docling-1.18.0}/docling/utils/export.py +0 -0
- {docling-1.16.1 → docling-1.18.0}/docling/utils/layout_utils.py +0 -0
- {docling-1.16.1 → docling-1.18.0}/docling/utils/utils.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: docling
|
3
|
-
Version: 1.
|
3
|
+
Version: 1.18.0
|
4
4
|
Summary: Docling PDF conversion package
|
5
5
|
Home-page: https://github.com/DS4SD/docling
|
6
6
|
License: MIT
|
@@ -20,10 +20,10 @@ Classifier: Programming Language :: Python :: 3.11
|
|
20
20
|
Classifier: Programming Language :: Python :: 3.12
|
21
21
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
22
22
|
Requires-Dist: certifi (>=2024.7.4)
|
23
|
-
Requires-Dist: deepsearch-glm (>=0.
|
23
|
+
Requires-Dist: deepsearch-glm (>=0.22.0,<0.23.0)
|
24
24
|
Requires-Dist: docling-core (>=1.6.2,<2.0.0)
|
25
|
-
Requires-Dist: docling-ibm-models (>=
|
26
|
-
Requires-Dist: docling-parse (>=1.
|
25
|
+
Requires-Dist: docling-ibm-models (>=2.0.0,<3.0.0)
|
26
|
+
Requires-Dist: docling-parse (>=1.4.1,<2.0.0)
|
27
27
|
Requires-Dist: easyocr (>=1.7,<2.0)
|
28
28
|
Requires-Dist: filetype (>=1.2.0,<2.0.0)
|
29
29
|
Requires-Dist: huggingface_hub (>=0.23,<1)
|
@@ -77,8 +77,7 @@ To use Docling, simply install `docling` from your package manager, e.g. pip:
|
|
77
77
|
pip install docling
|
78
78
|
```
|
79
79
|
|
80
|
-
|
81
|
-
> Works on macOS and Linux environments. Windows platforms are currently not tested.
|
80
|
+
Works on macOS, Linux and Windows environments. Both x86_64 and arm64 architectures.
|
82
81
|
|
83
82
|
<details>
|
84
83
|
<summary><b>Alternative PyTorch distributions</b></summary>
|
@@ -33,8 +33,7 @@ To use Docling, simply install `docling` from your package manager, e.g. pip:
|
|
33
33
|
pip install docling
|
34
34
|
```
|
35
35
|
|
36
|
-
|
37
|
-
> Works on macOS and Linux environments. Windows platforms are currently not tested.
|
36
|
+
Works on macOS, Linux and Windows environments. Both x86_64 and arm64 architectures.
|
38
37
|
|
39
38
|
<details>
|
40
39
|
<summary><b>Alternative PyTorch distributions</b></summary>
|
@@ -67,7 +67,10 @@ class DocumentConverter:
|
|
67
67
|
from huggingface_hub import snapshot_download
|
68
68
|
|
69
69
|
download_path = snapshot_download(
|
70
|
-
repo_id="ds4sd/docling-models",
|
70
|
+
repo_id="ds4sd/docling-models",
|
71
|
+
force_download=force,
|
72
|
+
local_dir=local_dir,
|
73
|
+
revision="v2.0.0",
|
71
74
|
)
|
72
75
|
|
73
76
|
return Path(download_path)
|
@@ -33,6 +33,7 @@ class LayoutModel:
|
|
33
33
|
"Page-footer",
|
34
34
|
"Code",
|
35
35
|
"List-item",
|
36
|
+
# "Title"
|
36
37
|
# "Formula",
|
37
38
|
]
|
38
39
|
PAGE_HEADER_LABELS = ["Page-header", "Page-footer"]
|
@@ -69,9 +70,7 @@ class LayoutModel:
|
|
69
70
|
"Key-Value Region": 0.45,
|
70
71
|
}
|
71
72
|
|
72
|
-
CLASS_REMAPPINGS = {
|
73
|
-
"Document Index": "Table",
|
74
|
-
}
|
73
|
+
CLASS_REMAPPINGS = {"Document Index": "Table", "Title": "Section-header"}
|
75
74
|
|
76
75
|
_log.debug("================= Start postprocess function ====================")
|
77
76
|
start_time = time.time()
|
@@ -277,6 +276,7 @@ class LayoutModel:
|
|
277
276
|
bbox=BoundingBox.model_validate(pred_item),
|
278
277
|
cells=[],
|
279
278
|
)
|
279
|
+
|
280
280
|
clusters.append(cluster)
|
281
281
|
|
282
282
|
# Map cells to clusters
|
@@ -8,7 +8,7 @@ from docling.pipeline.base_model_pipeline import BaseModelPipeline
|
|
8
8
|
|
9
9
|
|
10
10
|
class StandardModelPipeline(BaseModelPipeline):
|
11
|
-
_layout_model_path = "model_artifacts/layout/beehive_v0.0.
|
11
|
+
_layout_model_path = "model_artifacts/layout/beehive_v0.0.5_pt"
|
12
12
|
_table_model_path = "model_artifacts/tableformer"
|
13
13
|
|
14
14
|
def __init__(self, artifacts_path: Path, pipeline_options: PipelineOptions):
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "docling"
|
3
|
-
version = "1.
|
3
|
+
version = "1.18.0" # DO NOT EDIT, updated automatically
|
4
4
|
description = "Docling PDF conversion package"
|
5
5
|
authors = ["Christoph Auer <cau@zurich.ibm.com>", "Michele Dolfi <dol@zurich.ibm.com>", "Maxim Lysak <mly@zurich.ibm.com>", "Nikos Livathinos <nli@zurich.ibm.com>", "Ahmed Nassar <ahn@zurich.ibm.com>", "Peter Staar <taa@zurich.ibm.com>"]
|
6
6
|
license = "MIT"
|
@@ -38,15 +38,15 @@ torchvision = [
|
|
38
38
|
python = "^3.10"
|
39
39
|
pydantic = "^2.0.0"
|
40
40
|
docling-core = "^1.6.2"
|
41
|
-
docling-ibm-models = "^
|
42
|
-
deepsearch-glm = "^0.
|
41
|
+
docling-ibm-models = "^2.0.0"
|
42
|
+
deepsearch-glm = "^0.22.0"
|
43
43
|
filetype = "^1.2.0"
|
44
44
|
pypdfium2 = "^4.30.0"
|
45
45
|
pydantic-settings = "^2.3.0"
|
46
46
|
huggingface_hub = ">=0.23,<1"
|
47
47
|
requests = "^2.32.3"
|
48
48
|
easyocr = "^1.7"
|
49
|
-
docling-parse = "^1.
|
49
|
+
docling-parse = "^1.4.1"
|
50
50
|
certifi = ">=2024.7.4"
|
51
51
|
rtree = "^1.3.0"
|
52
52
|
scipy = "^1.14.1"
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|