docling 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docling/__init__.py +0 -0
- docling/backend/__init__.py +0 -0
- docling/backend/abstract_backend.py +55 -0
- docling/backend/pypdfium2_backend.py +223 -0
- docling/datamodel/__init__.py +0 -0
- docling/datamodel/base_models.py +247 -0
- docling/datamodel/document.py +351 -0
- docling/datamodel/settings.py +32 -0
- docling/document_converter.py +207 -0
- docling/models/__init__.py +0 -0
- docling/models/ds_glm_model.py +82 -0
- docling/models/easyocr_model.py +77 -0
- docling/models/layout_model.py +318 -0
- docling/models/page_assemble_model.py +160 -0
- docling/models/table_structure_model.py +114 -0
- docling/pipeline/__init__.py +0 -0
- docling/pipeline/base_model_pipeline.py +18 -0
- docling/pipeline/standard_model_pipeline.py +40 -0
- docling/utils/__init__.py +0 -0
- docling/utils/layout_utils.py +806 -0
- docling/utils/utils.py +41 -0
- docling-0.1.0.dist-info/LICENSE +21 -0
- docling-0.1.0.dist-info/METADATA +130 -0
- docling-0.1.0.dist-info/RECORD +25 -0
- docling-0.1.0.dist-info/WHEEL +4 -0
@@ -0,0 +1,40 @@
|
|
1
|
+
from pathlib import Path
|
2
|
+
from typing import Iterable
|
3
|
+
|
4
|
+
from docling.datamodel.base_models import Page, PipelineOptions
|
5
|
+
from docling.models.easyocr_model import EasyOcrModel
|
6
|
+
from docling.models.layout_model import LayoutModel
|
7
|
+
from docling.models.page_assemble_model import PageAssembleModel
|
8
|
+
from docling.models.table_structure_model import TableStructureModel
|
9
|
+
from docling.pipeline.base_model_pipeline import BaseModelPipeline
|
10
|
+
|
11
|
+
|
12
|
+
class StandardModelPipeline(BaseModelPipeline):
|
13
|
+
_layout_model_path = "model_artifacts/layout/beehive_v0.0.5"
|
14
|
+
_table_model_path = "model_artifacts/tableformer"
|
15
|
+
|
16
|
+
def __init__(self, artifacts_path: Path, pipeline_options: PipelineOptions):
|
17
|
+
super().__init__(artifacts_path, pipeline_options)
|
18
|
+
|
19
|
+
self.model_pipe = [
|
20
|
+
EasyOcrModel(
|
21
|
+
config={
|
22
|
+
"lang": ["fr", "de", "es", "en"],
|
23
|
+
"enabled": pipeline_options.do_ocr,
|
24
|
+
}
|
25
|
+
),
|
26
|
+
LayoutModel(
|
27
|
+
config={
|
28
|
+
"artifacts_path": artifacts_path
|
29
|
+
/ StandardModelPipeline._layout_model_path
|
30
|
+
}
|
31
|
+
),
|
32
|
+
TableStructureModel(
|
33
|
+
config={
|
34
|
+
"artifacts_path": artifacts_path
|
35
|
+
/ StandardModelPipeline._table_model_path,
|
36
|
+
"enabled": pipeline_options.do_table_structure,
|
37
|
+
"do_cell_matching": False,
|
38
|
+
}
|
39
|
+
),
|
40
|
+
]
|
File without changes
|