docling 2.26.0__py3-none-any.whl → 2.27.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docling/backend/asciidoc_backend.py +1 -1
- docling/backend/csv_backend.py +1 -1
- docling/backend/docling_parse_backend.py +21 -13
- docling/backend/docling_parse_v2_backend.py +20 -12
- docling/backend/docling_parse_v4_backend.py +185 -0
- docling/backend/docx/__init__.py +0 -0
- docling/backend/docx/latex/__init__.py +0 -0
- docling/backend/docx/latex/latex_dict.py +271 -0
- docling/backend/docx/latex/omml.py +453 -0
- docling/backend/html_backend.py +7 -7
- docling/backend/md_backend.py +1 -1
- docling/backend/msexcel_backend.py +2 -45
- docling/backend/mspowerpoint_backend.py +1 -1
- docling/backend/msword_backend.py +65 -3
- docling/backend/pdf_backend.py +7 -2
- docling/backend/pypdfium2_backend.py +52 -30
- docling/backend/xml/uspto_backend.py +1 -1
- docling/cli/main.py +60 -21
- docling/cli/models.py +1 -1
- docling/datamodel/base_models.py +8 -10
- docling/datamodel/pipeline_options.py +26 -30
- docling/document_converter.py +5 -5
- docling/models/base_model.py +9 -1
- docling/models/base_ocr_model.py +27 -16
- docling/models/easyocr_model.py +28 -13
- docling/models/factories/__init__.py +27 -0
- docling/models/factories/base_factory.py +122 -0
- docling/models/factories/ocr_factory.py +11 -0
- docling/models/factories/picture_description_factory.py +11 -0
- docling/models/ocr_mac_model.py +39 -11
- docling/models/page_preprocessing_model.py +4 -0
- docling/models/picture_description_api_model.py +20 -3
- docling/models/picture_description_base_model.py +19 -3
- docling/models/picture_description_vlm_model.py +14 -2
- docling/models/plugins/__init__.py +0 -0
- docling/models/plugins/defaults.py +28 -0
- docling/models/rapid_ocr_model.py +34 -13
- docling/models/table_structure_model.py +13 -4
- docling/models/tesseract_ocr_cli_model.py +40 -15
- docling/models/tesseract_ocr_model.py +37 -12
- docling/pipeline/standard_pdf_pipeline.py +25 -78
- docling/utils/export.py +8 -6
- docling/utils/layout_postprocessor.py +26 -23
- docling/utils/visualization.py +1 -1
- {docling-2.26.0.dist-info → docling-2.27.0.dist-info}/METADATA +48 -19
- docling-2.27.0.dist-info/RECORD +83 -0
- {docling-2.26.0.dist-info → docling-2.27.0.dist-info}/entry_points.txt +3 -0
- docling-2.26.0.dist-info/RECORD +0 -72
- {docling-2.26.0.dist-info → docling-2.27.0.dist-info}/LICENSE +0 -0
- {docling-2.26.0.dist-info → docling-2.27.0.dist-info}/WHEEL +0 -0
docling/models/base_ocr_model.py
CHANGED
@@ -2,25 +2,33 @@ import copy
|
|
2
2
|
import logging
|
3
3
|
from abc import abstractmethod
|
4
4
|
from pathlib import Path
|
5
|
-
from typing import Iterable, List
|
5
|
+
from typing import Iterable, List, Optional, Type
|
6
6
|
|
7
7
|
import numpy as np
|
8
8
|
from docling_core.types.doc import BoundingBox, CoordOrigin
|
9
|
+
from docling_core.types.doc.page import BoundingRectangle, PdfTextCell, TextCell
|
9
10
|
from PIL import Image, ImageDraw
|
10
11
|
from rtree import index
|
11
12
|
from scipy.ndimage import binary_dilation, find_objects, label
|
12
13
|
|
13
|
-
from docling.datamodel.base_models import
|
14
|
+
from docling.datamodel.base_models import Page
|
14
15
|
from docling.datamodel.document import ConversionResult
|
15
|
-
from docling.datamodel.pipeline_options import OcrOptions
|
16
|
+
from docling.datamodel.pipeline_options import AcceleratorOptions, OcrOptions
|
16
17
|
from docling.datamodel.settings import settings
|
17
|
-
from docling.models.base_model import BasePageModel
|
18
|
+
from docling.models.base_model import BaseModelWithOptions, BasePageModel
|
18
19
|
|
19
20
|
_log = logging.getLogger(__name__)
|
20
21
|
|
21
22
|
|
22
|
-
class BaseOcrModel(BasePageModel):
|
23
|
-
def __init__(
|
23
|
+
class BaseOcrModel(BasePageModel, BaseModelWithOptions):
|
24
|
+
def __init__(
|
25
|
+
self,
|
26
|
+
*,
|
27
|
+
enabled: bool,
|
28
|
+
artifacts_path: Optional[Path],
|
29
|
+
options: OcrOptions,
|
30
|
+
accelerator_options: AcceleratorOptions,
|
31
|
+
):
|
24
32
|
self.enabled = enabled
|
25
33
|
self.options = options
|
26
34
|
|
@@ -104,11 +112,13 @@ class BaseOcrModel(BasePageModel):
|
|
104
112
|
p.dimension = 2
|
105
113
|
idx = index.Index(properties=p)
|
106
114
|
for i, cell in enumerate(programmatic_cells):
|
107
|
-
idx.insert(i, cell.
|
115
|
+
idx.insert(i, cell.rect.to_bounding_box().as_tuple())
|
108
116
|
|
109
117
|
def is_overlapping_with_existing_cells(ocr_cell):
|
110
118
|
# Query the R-tree to get overlapping rectangles
|
111
|
-
possible_matches_index = list(
|
119
|
+
possible_matches_index = list(
|
120
|
+
idx.intersection(ocr_cell.rect.to_bounding_box().as_tuple())
|
121
|
+
)
|
112
122
|
|
113
123
|
return (
|
114
124
|
len(possible_matches_index) > 0
|
@@ -125,10 +135,7 @@ class BaseOcrModel(BasePageModel):
|
|
125
135
|
"""
|
126
136
|
if self.options.force_full_page_ocr:
|
127
137
|
# If a full page OCR is forced, use only the OCR cells
|
128
|
-
cells =
|
129
|
-
Cell(id=c_ocr.id, text=c_ocr.text, bbox=c_ocr.bbox)
|
130
|
-
for c_ocr in ocr_cells
|
131
|
-
]
|
138
|
+
cells = ocr_cells
|
132
139
|
return cells
|
133
140
|
|
134
141
|
## Remove OCR cells which overlap with programmatic cells.
|
@@ -156,7 +163,7 @@ class BaseOcrModel(BasePageModel):
|
|
156
163
|
|
157
164
|
# Draw OCR and programmatic cells
|
158
165
|
for tc in page.cells:
|
159
|
-
x0, y0, x1, y1 = tc.
|
166
|
+
x0, y0, x1, y1 = tc.rect.to_bounding_box().as_tuple()
|
160
167
|
y0 *= scale_x
|
161
168
|
y1 *= scale_y
|
162
169
|
x0 *= scale_x
|
@@ -165,9 +172,8 @@ class BaseOcrModel(BasePageModel):
|
|
165
172
|
if y1 <= y0:
|
166
173
|
y1, y0 = y0, y1
|
167
174
|
|
168
|
-
color = "gray"
|
169
|
-
|
170
|
-
color = "magenta"
|
175
|
+
color = "magenta" if tc.from_ocr else "gray"
|
176
|
+
|
171
177
|
draw.rectangle([(x0, y0), (x1, y1)], outline=color)
|
172
178
|
|
173
179
|
if show:
|
@@ -187,3 +193,8 @@ class BaseOcrModel(BasePageModel):
|
|
187
193
|
self, conv_res: ConversionResult, page_batch: Iterable[Page]
|
188
194
|
) -> Iterable[Page]:
|
189
195
|
pass
|
196
|
+
|
197
|
+
@classmethod
|
198
|
+
@abstractmethod
|
199
|
+
def get_options_type(cls) -> Type[OcrOptions]:
|
200
|
+
pass
|
docling/models/easyocr_model.py
CHANGED
@@ -2,17 +2,19 @@ import logging
|
|
2
2
|
import warnings
|
3
3
|
import zipfile
|
4
4
|
from pathlib import Path
|
5
|
-
from typing import Iterable, List, Optional
|
5
|
+
from typing import Iterable, List, Optional, Type
|
6
6
|
|
7
7
|
import numpy
|
8
8
|
from docling_core.types.doc import BoundingBox, CoordOrigin
|
9
|
+
from docling_core.types.doc.page import BoundingRectangle, TextCell
|
9
10
|
|
10
|
-
from docling.datamodel.base_models import
|
11
|
+
from docling.datamodel.base_models import Page
|
11
12
|
from docling.datamodel.document import ConversionResult
|
12
13
|
from docling.datamodel.pipeline_options import (
|
13
14
|
AcceleratorDevice,
|
14
15
|
AcceleratorOptions,
|
15
16
|
EasyOcrOptions,
|
17
|
+
OcrOptions,
|
16
18
|
)
|
17
19
|
from docling.datamodel.settings import settings
|
18
20
|
from docling.models.base_ocr_model import BaseOcrModel
|
@@ -33,7 +35,12 @@ class EasyOcrModel(BaseOcrModel):
|
|
33
35
|
options: EasyOcrOptions,
|
34
36
|
accelerator_options: AcceleratorOptions,
|
35
37
|
):
|
36
|
-
super().__init__(
|
38
|
+
super().__init__(
|
39
|
+
enabled=enabled,
|
40
|
+
artifacts_path=artifacts_path,
|
41
|
+
options=options,
|
42
|
+
accelerator_options=accelerator_options,
|
43
|
+
)
|
37
44
|
self.options: EasyOcrOptions
|
38
45
|
|
39
46
|
self.scale = 3 # multiplier for 72 dpi == 216 dpi.
|
@@ -148,18 +155,22 @@ class EasyOcrModel(BaseOcrModel):
|
|
148
155
|
del im
|
149
156
|
|
150
157
|
cells = [
|
151
|
-
|
152
|
-
|
158
|
+
TextCell(
|
159
|
+
index=ix,
|
153
160
|
text=line[1],
|
161
|
+
orig=line[1],
|
162
|
+
from_ocr=True,
|
154
163
|
confidence=line[2],
|
155
|
-
|
156
|
-
|
157
|
-
(
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
164
|
+
rect=BoundingRectangle.from_bounding_box(
|
165
|
+
BoundingBox.from_tuple(
|
166
|
+
coord=(
|
167
|
+
(line[0][0][0] / self.scale) + ocr_rect.l,
|
168
|
+
(line[0][0][1] / self.scale) + ocr_rect.t,
|
169
|
+
(line[0][2][0] / self.scale) + ocr_rect.l,
|
170
|
+
(line[0][2][1] / self.scale) + ocr_rect.t,
|
171
|
+
),
|
172
|
+
origin=CoordOrigin.TOPLEFT,
|
173
|
+
)
|
163
174
|
),
|
164
175
|
)
|
165
176
|
for ix, line in enumerate(result)
|
@@ -175,3 +186,7 @@ class EasyOcrModel(BaseOcrModel):
|
|
175
186
|
self.draw_ocr_rects_and_cells(conv_res, page, ocr_rects)
|
176
187
|
|
177
188
|
yield page
|
189
|
+
|
190
|
+
@classmethod
|
191
|
+
def get_options_type(cls) -> Type[OcrOptions]:
|
192
|
+
return EasyOcrOptions
|
@@ -0,0 +1,27 @@
|
|
1
|
+
import logging
|
2
|
+
from functools import lru_cache
|
3
|
+
|
4
|
+
from docling.models.factories.ocr_factory import OcrFactory
|
5
|
+
from docling.models.factories.picture_description_factory import (
|
6
|
+
PictureDescriptionFactory,
|
7
|
+
)
|
8
|
+
|
9
|
+
logger = logging.getLogger(__name__)
|
10
|
+
|
11
|
+
|
12
|
+
@lru_cache()
|
13
|
+
def get_ocr_factory(allow_external_plugins: bool = False) -> OcrFactory:
|
14
|
+
factory = OcrFactory()
|
15
|
+
factory.load_from_plugins(allow_external_plugins=allow_external_plugins)
|
16
|
+
logger.info("Registered ocr engines: %r", factory.registered_kind)
|
17
|
+
return factory
|
18
|
+
|
19
|
+
|
20
|
+
@lru_cache()
|
21
|
+
def get_picture_description_factory(
|
22
|
+
allow_external_plugins: bool = False,
|
23
|
+
) -> PictureDescriptionFactory:
|
24
|
+
factory = PictureDescriptionFactory()
|
25
|
+
factory.load_from_plugins(allow_external_plugins=allow_external_plugins)
|
26
|
+
logger.info("Registered picture descriptions: %r", factory.registered_kind)
|
27
|
+
return factory
|
@@ -0,0 +1,122 @@
|
|
1
|
+
import enum
|
2
|
+
import logging
|
3
|
+
from abc import ABCMeta
|
4
|
+
from typing import Generic, Optional, Type, TypeVar
|
5
|
+
|
6
|
+
from pluggy import PluginManager
|
7
|
+
from pydantic import BaseModel
|
8
|
+
|
9
|
+
from docling.datamodel.pipeline_options import BaseOptions
|
10
|
+
from docling.models.base_model import BaseModelWithOptions
|
11
|
+
|
12
|
+
A = TypeVar("A", bound=BaseModelWithOptions)
|
13
|
+
|
14
|
+
|
15
|
+
logger = logging.getLogger(__name__)
|
16
|
+
|
17
|
+
|
18
|
+
class FactoryMeta(BaseModel):
|
19
|
+
kind: str
|
20
|
+
plugin_name: str
|
21
|
+
module: str
|
22
|
+
|
23
|
+
|
24
|
+
class BaseFactory(Generic[A], metaclass=ABCMeta):
|
25
|
+
default_plugin_name = "docling"
|
26
|
+
|
27
|
+
def __init__(self, plugin_attr_name: str, plugin_name=default_plugin_name):
|
28
|
+
self.plugin_name = plugin_name
|
29
|
+
self.plugin_attr_name = plugin_attr_name
|
30
|
+
|
31
|
+
self._classes: dict[Type[BaseOptions], Type[A]] = {}
|
32
|
+
self._meta: dict[Type[BaseOptions], FactoryMeta] = {}
|
33
|
+
|
34
|
+
@property
|
35
|
+
def registered_kind(self) -> list[str]:
|
36
|
+
return list(opt.kind for opt in self._classes.keys())
|
37
|
+
|
38
|
+
def get_enum(self) -> enum.Enum:
|
39
|
+
return enum.Enum(
|
40
|
+
self.plugin_attr_name + "_enum",
|
41
|
+
names={kind: kind for kind in self.registered_kind},
|
42
|
+
type=str,
|
43
|
+
module=__name__,
|
44
|
+
)
|
45
|
+
|
46
|
+
@property
|
47
|
+
def classes(self):
|
48
|
+
return self._classes
|
49
|
+
|
50
|
+
@property
|
51
|
+
def registered_meta(self):
|
52
|
+
return self._meta
|
53
|
+
|
54
|
+
def create_instance(self, options: BaseOptions, **kwargs) -> A:
|
55
|
+
try:
|
56
|
+
_cls = self._classes[type(options)]
|
57
|
+
return _cls(options=options, **kwargs)
|
58
|
+
except KeyError:
|
59
|
+
raise RuntimeError(self._err_msg_on_class_not_found(options.kind))
|
60
|
+
|
61
|
+
def create_options(self, kind: str, *args, **kwargs) -> BaseOptions:
|
62
|
+
for opt_cls, _ in self._classes.items():
|
63
|
+
if opt_cls.kind == kind:
|
64
|
+
return opt_cls(*args, **kwargs)
|
65
|
+
raise RuntimeError(self._err_msg_on_class_not_found(kind))
|
66
|
+
|
67
|
+
def _err_msg_on_class_not_found(self, kind: str):
|
68
|
+
msg = []
|
69
|
+
|
70
|
+
for opt, cls in self._classes.items():
|
71
|
+
msg.append(f"\t{opt.kind!r} => {cls!r}")
|
72
|
+
|
73
|
+
msg_str = "\n".join(msg)
|
74
|
+
|
75
|
+
return f"No class found with the name {kind!r}, known classes are:\n{msg_str}"
|
76
|
+
|
77
|
+
def register(self, cls: Type[A], plugin_name: str, plugin_module_name: str):
|
78
|
+
opt_type = cls.get_options_type()
|
79
|
+
|
80
|
+
if opt_type in self._classes:
|
81
|
+
raise ValueError(
|
82
|
+
f"{opt_type.kind!r} already registered to class {self._classes[opt_type]!r}"
|
83
|
+
)
|
84
|
+
|
85
|
+
self._classes[opt_type] = cls
|
86
|
+
self._meta[opt_type] = FactoryMeta(
|
87
|
+
kind=opt_type.kind, plugin_name=plugin_name, module=plugin_module_name
|
88
|
+
)
|
89
|
+
|
90
|
+
def load_from_plugins(
|
91
|
+
self, plugin_name: Optional[str] = None, allow_external_plugins: bool = False
|
92
|
+
):
|
93
|
+
plugin_name = plugin_name or self.plugin_name
|
94
|
+
|
95
|
+
plugin_manager = PluginManager(plugin_name)
|
96
|
+
plugin_manager.load_setuptools_entrypoints(plugin_name)
|
97
|
+
|
98
|
+
for plugin_name, plugin_module in plugin_manager.list_name_plugin():
|
99
|
+
plugin_module_name = str(plugin_module.__name__) # type: ignore
|
100
|
+
|
101
|
+
if not allow_external_plugins and not plugin_module_name.startswith(
|
102
|
+
"docling."
|
103
|
+
):
|
104
|
+
logger.warning(
|
105
|
+
f"The plugin {plugin_name} will not be loaded because Docling is being executed with allow_external_plugins=false."
|
106
|
+
)
|
107
|
+
continue
|
108
|
+
|
109
|
+
attr = getattr(plugin_module, self.plugin_attr_name, None)
|
110
|
+
|
111
|
+
if callable(attr):
|
112
|
+
logger.info("Loading plugin %r", plugin_name)
|
113
|
+
|
114
|
+
config = attr()
|
115
|
+
self.process_plugin(config, plugin_name, plugin_module_name)
|
116
|
+
|
117
|
+
def process_plugin(self, config, plugin_name: str, plugin_module_name: str):
|
118
|
+
for item in config[self.plugin_attr_name]:
|
119
|
+
try:
|
120
|
+
self.register(item, plugin_name, plugin_module_name)
|
121
|
+
except ValueError:
|
122
|
+
logger.warning("%r already registered", item)
|
@@ -0,0 +1,11 @@
|
|
1
|
+
import logging
|
2
|
+
|
3
|
+
from docling.models.base_ocr_model import BaseOcrModel
|
4
|
+
from docling.models.factories.base_factory import BaseFactory
|
5
|
+
|
6
|
+
logger = logging.getLogger(__name__)
|
7
|
+
|
8
|
+
|
9
|
+
class OcrFactory(BaseFactory[BaseOcrModel]):
|
10
|
+
def __init__(self, *args, **kwargs):
|
11
|
+
super().__init__("ocr_engines", *args, **kwargs)
|
@@ -0,0 +1,11 @@
|
|
1
|
+
import logging
|
2
|
+
|
3
|
+
from docling.models.factories.base_factory import BaseFactory
|
4
|
+
from docling.models.picture_description_base_model import PictureDescriptionBaseModel
|
5
|
+
|
6
|
+
logger = logging.getLogger(__name__)
|
7
|
+
|
8
|
+
|
9
|
+
class PictureDescriptionFactory(BaseFactory[PictureDescriptionBaseModel]):
|
10
|
+
def __init__(self, *args, **kwargs):
|
11
|
+
super().__init__("picture_description", *args, **kwargs)
|
docling/models/ocr_mac_model.py
CHANGED
@@ -1,12 +1,19 @@
|
|
1
1
|
import logging
|
2
|
+
import sys
|
2
3
|
import tempfile
|
3
|
-
from
|
4
|
+
from pathlib import Path
|
5
|
+
from typing import Iterable, Optional, Tuple, Type
|
4
6
|
|
5
7
|
from docling_core.types.doc import BoundingBox, CoordOrigin
|
8
|
+
from docling_core.types.doc.page import BoundingRectangle, TextCell
|
6
9
|
|
7
|
-
from docling.datamodel.base_models import
|
10
|
+
from docling.datamodel.base_models import Page
|
8
11
|
from docling.datamodel.document import ConversionResult
|
9
|
-
from docling.datamodel.pipeline_options import
|
12
|
+
from docling.datamodel.pipeline_options import (
|
13
|
+
AcceleratorOptions,
|
14
|
+
OcrMacOptions,
|
15
|
+
OcrOptions,
|
16
|
+
)
|
10
17
|
from docling.datamodel.settings import settings
|
11
18
|
from docling.models.base_ocr_model import BaseOcrModel
|
12
19
|
from docling.utils.profiling import TimeRecorder
|
@@ -15,18 +22,31 @@ _log = logging.getLogger(__name__)
|
|
15
22
|
|
16
23
|
|
17
24
|
class OcrMacModel(BaseOcrModel):
|
18
|
-
def __init__(
|
19
|
-
|
25
|
+
def __init__(
|
26
|
+
self,
|
27
|
+
enabled: bool,
|
28
|
+
artifacts_path: Optional[Path],
|
29
|
+
options: OcrMacOptions,
|
30
|
+
accelerator_options: AcceleratorOptions,
|
31
|
+
):
|
32
|
+
super().__init__(
|
33
|
+
enabled=enabled,
|
34
|
+
artifacts_path=artifacts_path,
|
35
|
+
options=options,
|
36
|
+
accelerator_options=accelerator_options,
|
37
|
+
)
|
20
38
|
self.options: OcrMacOptions
|
21
39
|
|
22
40
|
self.scale = 3 # multiplier for 72 dpi == 216 dpi.
|
23
41
|
|
24
42
|
if self.enabled:
|
43
|
+
if "darwin" != sys.platform:
|
44
|
+
raise RuntimeError(f"OcrMac is only supported on Mac.")
|
25
45
|
install_errmsg = (
|
26
46
|
"ocrmac is not correctly installed. "
|
27
47
|
"Please install it via `pip install ocrmac` to use this OCR engine. "
|
28
48
|
"Alternatively, Docling has support for other OCR engines. See the documentation: "
|
29
|
-
"https://
|
49
|
+
"https://docling-project.github.io/docling/installation/"
|
30
50
|
)
|
31
51
|
try:
|
32
52
|
from ocrmac import ocrmac
|
@@ -94,13 +114,17 @@ class OcrMacModel(BaseOcrModel):
|
|
94
114
|
bottom = y2 / self.scale
|
95
115
|
|
96
116
|
cells.append(
|
97
|
-
|
98
|
-
|
117
|
+
TextCell(
|
118
|
+
index=ix,
|
99
119
|
text=text,
|
120
|
+
orig=text,
|
121
|
+
from_ocr=True,
|
100
122
|
confidence=confidence,
|
101
|
-
|
102
|
-
|
103
|
-
|
123
|
+
rect=BoundingRectangle.from_bounding_box(
|
124
|
+
BoundingBox.from_tuple(
|
125
|
+
coord=(left, top, right, bottom),
|
126
|
+
origin=CoordOrigin.TOPLEFT,
|
127
|
+
)
|
104
128
|
),
|
105
129
|
)
|
106
130
|
)
|
@@ -116,3 +140,7 @@ class OcrMacModel(BaseOcrModel):
|
|
116
140
|
self.draw_ocr_rects_and_cells(conv_res, page, ocr_rects)
|
117
141
|
|
118
142
|
yield page
|
143
|
+
|
144
|
+
@classmethod
|
145
|
+
def get_options_type(cls) -> Type[OcrOptions]:
|
146
|
+
return OcrMacOptions
|
@@ -13,6 +13,7 @@ from docling.utils.profiling import TimeRecorder
|
|
13
13
|
|
14
14
|
class PagePreprocessingOptions(BaseModel):
|
15
15
|
images_scale: Optional[float]
|
16
|
+
create_parsed_page: bool
|
16
17
|
|
17
18
|
|
18
19
|
class PagePreprocessingModel(BasePageModel):
|
@@ -55,6 +56,9 @@ class PagePreprocessingModel(BasePageModel):
|
|
55
56
|
|
56
57
|
page.cells = list(page._backend.get_text_cells())
|
57
58
|
|
59
|
+
if self.options.create_parsed_page:
|
60
|
+
page.parsed_page = page._backend.get_segmented_page()
|
61
|
+
|
58
62
|
# DEBUG code:
|
59
63
|
def draw_text_boxes(image, cells, show: bool = False):
|
60
64
|
draw = ImageDraw.Draw(image)
|
@@ -1,13 +1,18 @@
|
|
1
1
|
import base64
|
2
2
|
import io
|
3
3
|
import logging
|
4
|
-
from
|
4
|
+
from pathlib import Path
|
5
|
+
from typing import Iterable, List, Optional, Type, Union
|
5
6
|
|
6
7
|
import requests
|
7
8
|
from PIL import Image
|
8
9
|
from pydantic import BaseModel, ConfigDict
|
9
10
|
|
10
|
-
from docling.datamodel.pipeline_options import
|
11
|
+
from docling.datamodel.pipeline_options import (
|
12
|
+
AcceleratorOptions,
|
13
|
+
PictureDescriptionApiOptions,
|
14
|
+
PictureDescriptionBaseOptions,
|
15
|
+
)
|
11
16
|
from docling.exceptions import OperationNotAllowed
|
12
17
|
from docling.models.picture_description_base_model import PictureDescriptionBaseModel
|
13
18
|
|
@@ -46,13 +51,25 @@ class ApiResponse(BaseModel):
|
|
46
51
|
class PictureDescriptionApiModel(PictureDescriptionBaseModel):
|
47
52
|
# elements_batch_size = 4
|
48
53
|
|
54
|
+
@classmethod
|
55
|
+
def get_options_type(cls) -> Type[PictureDescriptionBaseOptions]:
|
56
|
+
return PictureDescriptionApiOptions
|
57
|
+
|
49
58
|
def __init__(
|
50
59
|
self,
|
51
60
|
enabled: bool,
|
52
61
|
enable_remote_services: bool,
|
62
|
+
artifacts_path: Optional[Union[Path, str]],
|
53
63
|
options: PictureDescriptionApiOptions,
|
64
|
+
accelerator_options: AcceleratorOptions,
|
54
65
|
):
|
55
|
-
super().__init__(
|
66
|
+
super().__init__(
|
67
|
+
enabled=enabled,
|
68
|
+
enable_remote_services=enable_remote_services,
|
69
|
+
artifacts_path=artifacts_path,
|
70
|
+
options=options,
|
71
|
+
accelerator_options=accelerator_options,
|
72
|
+
)
|
56
73
|
self.options: PictureDescriptionApiOptions
|
57
74
|
|
58
75
|
if self.enabled:
|
@@ -1,6 +1,7 @@
|
|
1
1
|
import logging
|
2
|
+
from abc import abstractmethod
|
2
3
|
from pathlib import Path
|
3
|
-
from typing import Any, Iterable, List, Optional, Union
|
4
|
+
from typing import Any, Iterable, List, Optional, Type, Union
|
4
5
|
|
5
6
|
from docling_core.types.doc import (
|
6
7
|
DoclingDocument,
|
@@ -13,20 +14,30 @@ from docling_core.types.doc.document import ( # TODO: move import to docling_co
|
|
13
14
|
)
|
14
15
|
from PIL import Image
|
15
16
|
|
16
|
-
from docling.datamodel.pipeline_options import
|
17
|
+
from docling.datamodel.pipeline_options import (
|
18
|
+
AcceleratorOptions,
|
19
|
+
PictureDescriptionBaseOptions,
|
20
|
+
)
|
17
21
|
from docling.models.base_model import (
|
18
22
|
BaseItemAndImageEnrichmentModel,
|
23
|
+
BaseModelWithOptions,
|
19
24
|
ItemAndImageEnrichmentElement,
|
20
25
|
)
|
21
26
|
|
22
27
|
|
23
|
-
class PictureDescriptionBaseModel(
|
28
|
+
class PictureDescriptionBaseModel(
|
29
|
+
BaseItemAndImageEnrichmentModel, BaseModelWithOptions
|
30
|
+
):
|
24
31
|
images_scale: float = 2.0
|
25
32
|
|
26
33
|
def __init__(
|
27
34
|
self,
|
35
|
+
*,
|
28
36
|
enabled: bool,
|
37
|
+
enable_remote_services: bool,
|
38
|
+
artifacts_path: Optional[Union[Path, str]],
|
29
39
|
options: PictureDescriptionBaseOptions,
|
40
|
+
accelerator_options: AcceleratorOptions,
|
30
41
|
):
|
31
42
|
self.enabled = enabled
|
32
43
|
self.options = options
|
@@ -62,3 +73,8 @@ class PictureDescriptionBaseModel(BaseItemAndImageEnrichmentModel):
|
|
62
73
|
PictureDescriptionData(text=output, provenance=self.provenance)
|
63
74
|
)
|
64
75
|
yield item
|
76
|
+
|
77
|
+
@classmethod
|
78
|
+
@abstractmethod
|
79
|
+
def get_options_type(cls) -> Type[PictureDescriptionBaseOptions]:
|
80
|
+
pass
|
@@ -1,10 +1,11 @@
|
|
1
1
|
from pathlib import Path
|
2
|
-
from typing import Iterable, Optional, Union
|
2
|
+
from typing import Iterable, Optional, Type, Union
|
3
3
|
|
4
4
|
from PIL import Image
|
5
5
|
|
6
6
|
from docling.datamodel.pipeline_options import (
|
7
7
|
AcceleratorOptions,
|
8
|
+
PictureDescriptionBaseOptions,
|
8
9
|
PictureDescriptionVlmOptions,
|
9
10
|
)
|
10
11
|
from docling.models.picture_description_base_model import PictureDescriptionBaseModel
|
@@ -13,14 +14,25 @@ from docling.utils.accelerator_utils import decide_device
|
|
13
14
|
|
14
15
|
class PictureDescriptionVlmModel(PictureDescriptionBaseModel):
|
15
16
|
|
17
|
+
@classmethod
|
18
|
+
def get_options_type(cls) -> Type[PictureDescriptionBaseOptions]:
|
19
|
+
return PictureDescriptionVlmOptions
|
20
|
+
|
16
21
|
def __init__(
|
17
22
|
self,
|
18
23
|
enabled: bool,
|
24
|
+
enable_remote_services: bool,
|
19
25
|
artifacts_path: Optional[Union[Path, str]],
|
20
26
|
options: PictureDescriptionVlmOptions,
|
21
27
|
accelerator_options: AcceleratorOptions,
|
22
28
|
):
|
23
|
-
super().__init__(
|
29
|
+
super().__init__(
|
30
|
+
enabled=enabled,
|
31
|
+
enable_remote_services=enable_remote_services,
|
32
|
+
artifacts_path=artifacts_path,
|
33
|
+
options=options,
|
34
|
+
accelerator_options=accelerator_options,
|
35
|
+
)
|
24
36
|
self.options: PictureDescriptionVlmOptions
|
25
37
|
|
26
38
|
if self.enabled:
|
File without changes
|
@@ -0,0 +1,28 @@
|
|
1
|
+
from docling.models.easyocr_model import EasyOcrModel
|
2
|
+
from docling.models.ocr_mac_model import OcrMacModel
|
3
|
+
from docling.models.picture_description_api_model import PictureDescriptionApiModel
|
4
|
+
from docling.models.picture_description_vlm_model import PictureDescriptionVlmModel
|
5
|
+
from docling.models.rapid_ocr_model import RapidOcrModel
|
6
|
+
from docling.models.tesseract_ocr_cli_model import TesseractOcrCliModel
|
7
|
+
from docling.models.tesseract_ocr_model import TesseractOcrModel
|
8
|
+
|
9
|
+
|
10
|
+
def ocr_engines():
|
11
|
+
return {
|
12
|
+
"ocr_engines": [
|
13
|
+
EasyOcrModel,
|
14
|
+
OcrMacModel,
|
15
|
+
RapidOcrModel,
|
16
|
+
TesseractOcrModel,
|
17
|
+
TesseractOcrCliModel,
|
18
|
+
]
|
19
|
+
}
|
20
|
+
|
21
|
+
|
22
|
+
def picture_description():
|
23
|
+
return {
|
24
|
+
"picture_description": [
|
25
|
+
PictureDescriptionVlmModel,
|
26
|
+
PictureDescriptionApiModel,
|
27
|
+
]
|
28
|
+
}
|