docling-core 2.34.1__tar.gz → 2.35.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {docling_core-2.34.1 → docling_core-2.35.0}/PKG-INFO +1 -1
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/transforms/visualizer/layout_visualizer.py +8 -4
- docling_core-2.35.0/docling_core/transforms/visualizer/table_visualizer.py +135 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/types/doc/document.py +13 -6
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core.egg-info/PKG-INFO +1 -1
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core.egg-info/SOURCES.txt +1 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/pyproject.toml +1 -1
- {docling_core-2.34.1 → docling_core-2.35.0}/test/test_visualization.py +14 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/LICENSE +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/README.md +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/__init__.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/cli/__init__.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/cli/view.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/experimental/__init__.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/py.typed +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/resources/schemas/doc/ANN.json +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/resources/schemas/doc/DOC.json +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/resources/schemas/doc/OCR-output.json +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/resources/schemas/doc/RAW.json +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/resources/schemas/generated/ccs_document_schema.json +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/resources/schemas/generated/minimal_document_schema_flat.json +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/resources/schemas/search/search_doc_mapping.json +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/resources/schemas/search/search_doc_mapping_v2.json +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/search/__init__.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/search/json_schema_to_search_mapper.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/search/mapping.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/search/meta.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/search/package.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/transforms/__init__.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/transforms/chunker/__init__.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/transforms/chunker/base.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/transforms/chunker/hierarchical_chunker.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/transforms/chunker/hybrid_chunker.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/transforms/chunker/tokenizer/__init__.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/transforms/chunker/tokenizer/base.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/transforms/chunker/tokenizer/huggingface.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/transforms/chunker/tokenizer/openai.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/transforms/serializer/__init__.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/transforms/serializer/base.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/transforms/serializer/common.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/transforms/serializer/doctags.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/transforms/serializer/html.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/transforms/serializer/html_styles.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/transforms/serializer/markdown.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/transforms/visualizer/__init__.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/transforms/visualizer/base.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/transforms/visualizer/reading_order_visualizer.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/types/__init__.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/types/base.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/types/doc/__init__.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/types/doc/base.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/types/doc/labels.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/types/doc/page.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/types/doc/tokens.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/types/doc/utils.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/types/gen/__init__.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/types/gen/generic.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/types/io/__init__.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/types/legacy_doc/__init__.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/types/legacy_doc/base.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/types/legacy_doc/doc_ann.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/types/legacy_doc/doc_ocr.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/types/legacy_doc/doc_raw.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/types/legacy_doc/document.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/types/legacy_doc/tokens.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/types/nlp/__init__.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/types/nlp/qa.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/types/nlp/qa_labels.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/types/rec/__init__.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/types/rec/attribute.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/types/rec/base.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/types/rec/predicate.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/types/rec/record.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/types/rec/statement.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/types/rec/subject.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/utils/__init__.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/utils/alias.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/utils/file.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/utils/generate_docs.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/utils/generate_jsonschema.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/utils/legacy.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/utils/validate.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core/utils/validators.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core.egg-info/dependency_links.txt +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core.egg-info/entry_points.txt +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core.egg-info/requires.txt +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/docling_core.egg-info/top_level.txt +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/setup.cfg +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/test/test_base.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/test/test_collection.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/test/test_data_gen_flag.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/test/test_doc_base.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/test/test_doc_legacy_convert.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/test/test_doc_schema.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/test/test_doc_schema_extractor.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/test/test_docling_doc.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/test/test_doctags_load.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/test/test_hierarchical_chunker.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/test/test_hybrid_chunker.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/test/test_json_schema_to_search_mapper.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/test/test_nlp_qa.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/test/test_otsl_table_export.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/test/test_page.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/test/test_rec_schema.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/test/test_search_meta.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/test/test_serialization.py +0 -0
- {docling_core-2.34.1 → docling_core-2.35.0}/test/test_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: docling-core
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.35.0
|
|
4
4
|
Summary: A python library to define and validate data types in Docling.
|
|
5
5
|
Author-email: Cesar Berrospi Ramis <ceb@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
|
|
6
6
|
Maintainer-email: Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>, Cesar Berrospi Ramis <ceb@zurich.ibm.com>
|
{docling_core-2.34.1 → docling_core-2.35.0}/docling_core/transforms/visualizer/layout_visualizer.py
RENAMED
|
@@ -119,7 +119,10 @@ class LayoutVisualizer(BaseVisualizer):
|
|
|
119
119
|
)
|
|
120
120
|
|
|
121
121
|
def _draw_doc_layout(
|
|
122
|
-
self,
|
|
122
|
+
self,
|
|
123
|
+
doc: DoclingDocument,
|
|
124
|
+
images: Optional[dict[Optional[int], Image]] = None,
|
|
125
|
+
included_content_layers: Optional[set[ContentLayer]] = None,
|
|
123
126
|
):
|
|
124
127
|
"""Draw the document clusters and optionaly the reading order."""
|
|
125
128
|
clusters = []
|
|
@@ -128,6 +131,9 @@ class LayoutVisualizer(BaseVisualizer):
|
|
|
128
131
|
if images is not None:
|
|
129
132
|
my_images = images
|
|
130
133
|
|
|
134
|
+
if included_content_layers is None:
|
|
135
|
+
included_content_layers = {c for c in ContentLayer}
|
|
136
|
+
|
|
131
137
|
# Initialise `my_images` beforehand: sometimes, you have the
|
|
132
138
|
# page-images but no DocItems!
|
|
133
139
|
for page_nr, page in doc.pages.items():
|
|
@@ -141,9 +147,7 @@ class LayoutVisualizer(BaseVisualizer):
|
|
|
141
147
|
prev_image = None
|
|
142
148
|
prev_page_nr = None
|
|
143
149
|
for idx, (elem, _) in enumerate(
|
|
144
|
-
doc.iterate_items(
|
|
145
|
-
included_content_layers={ContentLayer.BODY, ContentLayer.FURNITURE}
|
|
146
|
-
)
|
|
150
|
+
doc.iterate_items(included_content_layers=included_content_layers)
|
|
147
151
|
):
|
|
148
152
|
if not isinstance(elem, DocItem):
|
|
149
153
|
continue
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
"""Define classes for layout visualization."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from copy import deepcopy
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
from PIL import ImageDraw
|
|
8
|
+
from PIL.Image import Image
|
|
9
|
+
from pydantic import BaseModel
|
|
10
|
+
from typing_extensions import override
|
|
11
|
+
|
|
12
|
+
from docling_core.transforms.visualizer.base import BaseVisualizer
|
|
13
|
+
from docling_core.types.doc.document import ContentLayer, DoclingDocument, TableItem
|
|
14
|
+
|
|
15
|
+
_log = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class TableVisualizer(BaseVisualizer):
|
|
19
|
+
"""Table visualizer."""
|
|
20
|
+
|
|
21
|
+
class Params(BaseModel):
|
|
22
|
+
"""Table visualization parameters."""
|
|
23
|
+
|
|
24
|
+
# show_Label: bool = False
|
|
25
|
+
show_cells: bool = True
|
|
26
|
+
# show_rows: bool = False
|
|
27
|
+
# show_cols: bool = False
|
|
28
|
+
|
|
29
|
+
base_visualizer: Optional[BaseVisualizer] = None
|
|
30
|
+
params: Params = Params()
|
|
31
|
+
|
|
32
|
+
def _draw_table_cells(
|
|
33
|
+
self,
|
|
34
|
+
table: TableItem,
|
|
35
|
+
page_image: Image,
|
|
36
|
+
page_height: float,
|
|
37
|
+
scale_x: float,
|
|
38
|
+
scale_y: float,
|
|
39
|
+
):
|
|
40
|
+
"""Draw individual table cells."""
|
|
41
|
+
draw = ImageDraw.Draw(page_image, "RGBA")
|
|
42
|
+
|
|
43
|
+
for cell in table.data.table_cells:
|
|
44
|
+
if cell.bbox is not None:
|
|
45
|
+
|
|
46
|
+
tl_bbox = cell.bbox.to_top_left_origin(page_height=page_height)
|
|
47
|
+
|
|
48
|
+
cell_color = (256, 0, 0, 32) # Transparent black for cells
|
|
49
|
+
|
|
50
|
+
cx0, cy0, cx1, cy1 = tl_bbox.as_tuple()
|
|
51
|
+
cx0 *= scale_x
|
|
52
|
+
cx1 *= scale_x
|
|
53
|
+
cy0 *= scale_y
|
|
54
|
+
cy1 *= scale_y
|
|
55
|
+
|
|
56
|
+
draw.rectangle(
|
|
57
|
+
[(cx0, cy0), (cx1, cy1)],
|
|
58
|
+
outline=(256, 0, 0, 128),
|
|
59
|
+
fill=cell_color,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
def _draw_doc_tables(
|
|
63
|
+
self,
|
|
64
|
+
doc: DoclingDocument,
|
|
65
|
+
images: Optional[dict[Optional[int], Image]] = None,
|
|
66
|
+
included_content_layers: Optional[set[ContentLayer]] = None,
|
|
67
|
+
):
|
|
68
|
+
"""Draw the document tables."""
|
|
69
|
+
my_images: dict[Optional[int], Image] = {}
|
|
70
|
+
|
|
71
|
+
if images is not None:
|
|
72
|
+
my_images = images
|
|
73
|
+
|
|
74
|
+
if included_content_layers is None:
|
|
75
|
+
included_content_layers = {c for c in ContentLayer}
|
|
76
|
+
|
|
77
|
+
# Initialise `my_images` beforehand: sometimes, you have the
|
|
78
|
+
# page-images but no DocItems!
|
|
79
|
+
for page_nr, page in doc.pages.items():
|
|
80
|
+
page_image = doc.pages[page_nr].image
|
|
81
|
+
if page_image is None or (pil_img := page_image.pil_image) is None:
|
|
82
|
+
raise RuntimeError("Cannot visualize document without images")
|
|
83
|
+
elif page_nr not in my_images:
|
|
84
|
+
image = deepcopy(pil_img)
|
|
85
|
+
my_images[page_nr] = image
|
|
86
|
+
|
|
87
|
+
for idx, (elem, _) in enumerate(
|
|
88
|
+
doc.iterate_items(included_content_layers=included_content_layers)
|
|
89
|
+
):
|
|
90
|
+
if not isinstance(elem, TableItem):
|
|
91
|
+
continue
|
|
92
|
+
if len(elem.prov) == 0:
|
|
93
|
+
continue # Skip elements without provenances
|
|
94
|
+
|
|
95
|
+
if len(elem.prov) == 1:
|
|
96
|
+
|
|
97
|
+
page_nr = elem.prov[0].page_no
|
|
98
|
+
|
|
99
|
+
if page_nr in my_images:
|
|
100
|
+
image = my_images[page_nr]
|
|
101
|
+
|
|
102
|
+
if self.params.show_cells:
|
|
103
|
+
self._draw_table_cells(
|
|
104
|
+
table=elem,
|
|
105
|
+
page_height=doc.pages[page_nr].size.height,
|
|
106
|
+
page_image=image,
|
|
107
|
+
scale_x=image.width / doc.pages[page_nr].size.width,
|
|
108
|
+
scale_y=image.height / doc.pages[page_nr].size.height,
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
else:
|
|
112
|
+
raise RuntimeError(f"Cannot visualize page-image for {page_nr}")
|
|
113
|
+
|
|
114
|
+
else:
|
|
115
|
+
_log.error("Can not yet visualise tables with multiple provenances")
|
|
116
|
+
|
|
117
|
+
return my_images
|
|
118
|
+
|
|
119
|
+
@override
|
|
120
|
+
def get_visualization(
|
|
121
|
+
self,
|
|
122
|
+
*,
|
|
123
|
+
doc: DoclingDocument,
|
|
124
|
+
**kwargs,
|
|
125
|
+
) -> dict[Optional[int], Image]:
|
|
126
|
+
"""Get visualization of the document as images by page."""
|
|
127
|
+
base_images = (
|
|
128
|
+
self.base_visualizer.get_visualization(doc=doc, **kwargs)
|
|
129
|
+
if self.base_visualizer
|
|
130
|
+
else None
|
|
131
|
+
)
|
|
132
|
+
return self._draw_doc_tables(
|
|
133
|
+
doc=doc,
|
|
134
|
+
images=base_images,
|
|
135
|
+
)
|
|
@@ -1874,12 +1874,19 @@ class DoclingDocument(BaseModel):
|
|
|
1874
1874
|
|
|
1875
1875
|
return item.get_ref()
|
|
1876
1876
|
|
|
1877
|
-
def _delete_items(self, refs: list[RefItem])
|
|
1877
|
+
def _delete_items(self, refs: list[RefItem]):
|
|
1878
1878
|
"""Delete document item using the self-reference."""
|
|
1879
1879
|
to_be_deleted_items: dict[tuple[int, ...], str] = {} # stack to cref
|
|
1880
1880
|
|
|
1881
|
+
if not refs:
|
|
1882
|
+
return
|
|
1883
|
+
|
|
1881
1884
|
# Identify the to_be_deleted_items
|
|
1882
|
-
for item, stack in self._iterate_items_with_stack(
|
|
1885
|
+
for item, stack in self._iterate_items_with_stack(
|
|
1886
|
+
with_groups=True,
|
|
1887
|
+
traverse_pictures=True,
|
|
1888
|
+
included_content_layers={c for c in ContentLayer},
|
|
1889
|
+
):
|
|
1883
1890
|
ref = item.get_ref()
|
|
1884
1891
|
|
|
1885
1892
|
if ref in refs:
|
|
@@ -1890,8 +1897,10 @@ class DoclingDocument(BaseModel):
|
|
|
1890
1897
|
if tuple(substack) in to_be_deleted_items:
|
|
1891
1898
|
to_be_deleted_items[tuple(stack)] = ref.cref
|
|
1892
1899
|
|
|
1893
|
-
if len(to_be_deleted_items)
|
|
1894
|
-
raise ValueError(
|
|
1900
|
+
if len(to_be_deleted_items) < len(refs):
|
|
1901
|
+
raise ValueError(
|
|
1902
|
+
f"Cannot find all provided RefItems in doc: {[r.cref for r in refs]}"
|
|
1903
|
+
)
|
|
1895
1904
|
|
|
1896
1905
|
# Clean the tree, reverse the order to not have to update
|
|
1897
1906
|
for stack_, ref_ in reversed(sorted(to_be_deleted_items.items())):
|
|
@@ -1931,8 +1940,6 @@ class DoclingDocument(BaseModel):
|
|
|
1931
1940
|
node=self.body, refs_to_be_deleted=refs, lookup=lookup
|
|
1932
1941
|
)
|
|
1933
1942
|
|
|
1934
|
-
return True
|
|
1935
|
-
|
|
1936
1943
|
# Update the references
|
|
1937
1944
|
def _update_ref_with_lookup(
|
|
1938
1945
|
self, item_label: str, item_index: int, lookup: dict[str, dict[int, int]]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: docling-core
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.35.0
|
|
4
4
|
Summary: A python library to define and validate data types in Docling.
|
|
5
5
|
Author-email: Cesar Berrospi Ramis <ceb@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
|
|
6
6
|
Maintainer-email: Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>, Cesar Berrospi Ramis <ceb@zurich.ibm.com>
|
|
@@ -45,6 +45,7 @@ docling_core/transforms/visualizer/__init__.py
|
|
|
45
45
|
docling_core/transforms/visualizer/base.py
|
|
46
46
|
docling_core/transforms/visualizer/layout_visualizer.py
|
|
47
47
|
docling_core/transforms/visualizer/reading_order_visualizer.py
|
|
48
|
+
docling_core/transforms/visualizer/table_visualizer.py
|
|
48
49
|
docling_core/types/__init__.py
|
|
49
50
|
docling_core/types/base.py
|
|
50
51
|
docling_core/types/doc/__init__.py
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "docling-core"
|
|
3
|
-
version = "2.
|
|
3
|
+
version = "2.35.0" # DO NOT EDIT, updated automatically
|
|
4
4
|
description = "A python library to define and validate data types in Docling."
|
|
5
5
|
license = "MIT"
|
|
6
6
|
license-files = ["LICENSE"]
|
|
@@ -2,6 +2,7 @@ from pathlib import Path
|
|
|
2
2
|
|
|
3
3
|
import PIL.Image
|
|
4
4
|
|
|
5
|
+
from docling_core.transforms.visualizer.table_visualizer import TableVisualizer
|
|
5
6
|
from docling_core.types.doc.document import DoclingDocument
|
|
6
7
|
|
|
7
8
|
from .test_data_gen_flag import GEN_TEST_DATA
|
|
@@ -52,3 +53,16 @@ def test_doc_visualization_no_label():
|
|
|
52
53
|
exp_file=VIZ_TEST_DATA_PATH / f"{src.stem}_viz_wout_lbl_p{k}.png",
|
|
53
54
|
actual=viz_pages[k],
|
|
54
55
|
)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def test_table_visualization_no_label():
|
|
59
|
+
src = Path("./test/data/doc/2408.09869v3_enriched.json")
|
|
60
|
+
doc = DoclingDocument.load_from_json(src)
|
|
61
|
+
|
|
62
|
+
visualizer = TableVisualizer()
|
|
63
|
+
viz_pages = visualizer.get_visualization(doc=doc)
|
|
64
|
+
|
|
65
|
+
verify(
|
|
66
|
+
exp_file=VIZ_TEST_DATA_PATH / f"{src.stem}_table_viz_wout_lbl_p5.png",
|
|
67
|
+
actual=viz_pages[5],
|
|
68
|
+
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-2.34.1 → docling_core-2.35.0}/docling_core/resources/schemas/doc/OCR-output.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-2.34.1 → docling_core-2.35.0}/docling_core/search/json_schema_to_search_mapper.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-2.34.1 → docling_core-2.35.0}/docling_core/transforms/chunker/hierarchical_chunker.py
RENAMED
|
File without changes
|
{docling_core-2.34.1 → docling_core-2.35.0}/docling_core/transforms/chunker/hybrid_chunker.py
RENAMED
|
File without changes
|
{docling_core-2.34.1 → docling_core-2.35.0}/docling_core/transforms/chunker/tokenizer/__init__.py
RENAMED
|
File without changes
|
{docling_core-2.34.1 → docling_core-2.35.0}/docling_core/transforms/chunker/tokenizer/base.py
RENAMED
|
File without changes
|
{docling_core-2.34.1 → docling_core-2.35.0}/docling_core/transforms/chunker/tokenizer/huggingface.py
RENAMED
|
File without changes
|
{docling_core-2.34.1 → docling_core-2.35.0}/docling_core/transforms/chunker/tokenizer/openai.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-2.34.1 → docling_core-2.35.0}/docling_core/transforms/serializer/html_styles.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|