docling-core 2.28.0__tar.gz → 2.28.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of docling-core might be problematic. Click here for more details.
- {docling_core-2.28.0 → docling_core-2.28.1}/PKG-INFO +1 -1
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/transforms/visualizer/layout_visualizer.py +20 -9
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/types/doc/document.py +1 -1
- {docling_core-2.28.0 → docling_core-2.28.1}/pyproject.toml +1 -1
- {docling_core-2.28.0 → docling_core-2.28.1}/LICENSE +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/README.md +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/__init__.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/cli/__init__.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/cli/view.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/experimental/__init__.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/experimental/serializer/__init__.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/experimental/serializer/base.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/experimental/serializer/common.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/experimental/serializer/doctags.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/experimental/serializer/html.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/experimental/serializer/html_styles.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/experimental/serializer/markdown.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/py.typed +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/resources/schemas/doc/ANN.json +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/resources/schemas/doc/DOC.json +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/resources/schemas/doc/OCR-output.json +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/resources/schemas/doc/RAW.json +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/resources/schemas/generated/ccs_document_schema.json +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/resources/schemas/generated/minimal_document_schema_flat.json +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/resources/schemas/search/search_doc_mapping.json +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/resources/schemas/search/search_doc_mapping_v2.json +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/search/__init__.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/search/json_schema_to_search_mapper.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/search/mapping.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/search/meta.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/search/package.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/transforms/__init__.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/transforms/chunker/__init__.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/transforms/chunker/base.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/transforms/chunker/hierarchical_chunker.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/transforms/chunker/hybrid_chunker.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/transforms/chunker/tokenizer/__init__.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/transforms/chunker/tokenizer/base.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/transforms/chunker/tokenizer/huggingface.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/transforms/chunker/tokenizer/openai.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/transforms/visualizer/__init__.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/transforms/visualizer/base.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/transforms/visualizer/reading_order_visualizer.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/types/__init__.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/types/base.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/types/doc/__init__.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/types/doc/base.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/types/doc/labels.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/types/doc/page.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/types/doc/tokens.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/types/doc/utils.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/types/gen/__init__.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/types/gen/generic.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/types/io/__init__.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/types/legacy_doc/__init__.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/types/legacy_doc/base.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/types/legacy_doc/doc_ann.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/types/legacy_doc/doc_ocr.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/types/legacy_doc/doc_raw.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/types/legacy_doc/document.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/types/legacy_doc/tokens.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/types/nlp/__init__.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/types/nlp/qa.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/types/nlp/qa_labels.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/types/rec/__init__.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/types/rec/attribute.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/types/rec/base.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/types/rec/predicate.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/types/rec/record.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/types/rec/statement.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/types/rec/subject.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/utils/__init__.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/utils/alias.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/utils/file.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/utils/generate_docs.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/utils/generate_jsonschema.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/utils/legacy.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/utils/validate.py +0 -0
- {docling_core-2.28.0 → docling_core-2.28.1}/docling_core/utils/validators.py +0 -0
{docling_core-2.28.0 → docling_core-2.28.1}/docling_core/transforms/visualizer/layout_visualizer.py
RENAMED
|
@@ -123,7 +123,21 @@ class LayoutVisualizer(BaseVisualizer):
|
|
|
123
123
|
):
|
|
124
124
|
"""Draw the document clusters and optionaly the reading order."""
|
|
125
125
|
clusters = []
|
|
126
|
-
my_images
|
|
126
|
+
my_images: dict[Optional[int], Image] = {}
|
|
127
|
+
|
|
128
|
+
if images is not None:
|
|
129
|
+
my_images = images
|
|
130
|
+
|
|
131
|
+
# Initialise `my_images` beforehand: sometimes, you have the
|
|
132
|
+
# page-images but no DocItems!
|
|
133
|
+
for page_nr, page in doc.pages.items():
|
|
134
|
+
page_image = doc.pages[page_nr].image
|
|
135
|
+
if page_image is None or (pil_img := page_image.pil_image) is None:
|
|
136
|
+
raise RuntimeError("Cannot visualize document without images")
|
|
137
|
+
elif page_nr not in my_images:
|
|
138
|
+
image = deepcopy(pil_img)
|
|
139
|
+
my_images[page_nr] = image
|
|
140
|
+
|
|
127
141
|
prev_image = None
|
|
128
142
|
prev_page_nr = None
|
|
129
143
|
for idx, (elem, _) in enumerate(
|
|
@@ -137,7 +151,11 @@ class LayoutVisualizer(BaseVisualizer):
|
|
|
137
151
|
continue # Skip elements without provenances
|
|
138
152
|
prov = elem.prov[0]
|
|
139
153
|
page_nr = prov.page_no
|
|
140
|
-
|
|
154
|
+
|
|
155
|
+
if page_nr in my_images:
|
|
156
|
+
image = my_images[page_nr]
|
|
157
|
+
else:
|
|
158
|
+
raise RuntimeError(f"Cannot visualize page-image for {page_nr}")
|
|
141
159
|
|
|
142
160
|
if prev_page_nr is None or page_nr > prev_page_nr: # new page begins
|
|
143
161
|
# complete previous drawing
|
|
@@ -150,13 +168,6 @@ class LayoutVisualizer(BaseVisualizer):
|
|
|
150
168
|
)
|
|
151
169
|
clusters = []
|
|
152
170
|
|
|
153
|
-
if image is None:
|
|
154
|
-
page_image = doc.pages[page_nr].image
|
|
155
|
-
if page_image is None or (pil_img := page_image.pil_image) is None:
|
|
156
|
-
raise RuntimeError("Cannot visualize document without images")
|
|
157
|
-
else:
|
|
158
|
-
image = deepcopy(pil_img)
|
|
159
|
-
my_images[page_nr] = image
|
|
160
171
|
tlo_bbox = prov.bbox.to_top_left_origin(
|
|
161
172
|
page_height=doc.pages[prov.page_no].size.height
|
|
162
173
|
)
|
|
@@ -1383,7 +1383,7 @@ class TableItem(FloatingItem):
|
|
|
1383
1383
|
if add_cross_cell:
|
|
1384
1384
|
body.append(str(TableToken.OTSL_XCEL.value))
|
|
1385
1385
|
body.append(str(TableToken.OTSL_NL.value))
|
|
1386
|
-
|
|
1386
|
+
body_str = "".join(body)
|
|
1387
1387
|
return body_str
|
|
1388
1388
|
|
|
1389
1389
|
@deprecated("Use export_to_doctags() instead.")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-2.28.0 → docling_core-2.28.1}/docling_core/experimental/serializer/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-2.28.0 → docling_core-2.28.1}/docling_core/experimental/serializer/html_styles.py
RENAMED
|
File without changes
|
{docling_core-2.28.0 → docling_core-2.28.1}/docling_core/experimental/serializer/markdown.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-2.28.0 → docling_core-2.28.1}/docling_core/resources/schemas/doc/OCR-output.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-2.28.0 → docling_core-2.28.1}/docling_core/search/json_schema_to_search_mapper.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-2.28.0 → docling_core-2.28.1}/docling_core/transforms/chunker/hierarchical_chunker.py
RENAMED
|
File without changes
|
{docling_core-2.28.0 → docling_core-2.28.1}/docling_core/transforms/chunker/hybrid_chunker.py
RENAMED
|
File without changes
|
{docling_core-2.28.0 → docling_core-2.28.1}/docling_core/transforms/chunker/tokenizer/__init__.py
RENAMED
|
File without changes
|
{docling_core-2.28.0 → docling_core-2.28.1}/docling_core/transforms/chunker/tokenizer/base.py
RENAMED
|
File without changes
|
{docling_core-2.28.0 → docling_core-2.28.1}/docling_core/transforms/chunker/tokenizer/huggingface.py
RENAMED
|
File without changes
|
{docling_core-2.28.0 → docling_core-2.28.1}/docling_core/transforms/chunker/tokenizer/openai.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|