docling-core 2.34.0__tar.gz → 2.34.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of docling-core might be problematic. Click here for more details.
- {docling_core-2.34.0 → docling_core-2.34.2}/PKG-INFO +1 -1
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/doc/document.py +22 -11
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core.egg-info/PKG-INFO +1 -1
- {docling_core-2.34.0 → docling_core-2.34.2}/pyproject.toml +1 -1
- {docling_core-2.34.0 → docling_core-2.34.2}/test/test_docling_doc.py +6 -4
- {docling_core-2.34.0 → docling_core-2.34.2}/LICENSE +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/README.md +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/__init__.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/cli/__init__.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/cli/view.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/experimental/__init__.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/py.typed +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/resources/schemas/doc/ANN.json +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/resources/schemas/doc/DOC.json +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/resources/schemas/doc/OCR-output.json +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/resources/schemas/doc/RAW.json +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/resources/schemas/generated/ccs_document_schema.json +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/resources/schemas/generated/minimal_document_schema_flat.json +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/resources/schemas/search/search_doc_mapping.json +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/resources/schemas/search/search_doc_mapping_v2.json +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/search/__init__.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/search/json_schema_to_search_mapper.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/search/mapping.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/search/meta.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/search/package.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/transforms/__init__.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/transforms/chunker/__init__.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/transforms/chunker/base.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/transforms/chunker/hierarchical_chunker.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/transforms/chunker/hybrid_chunker.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/transforms/chunker/tokenizer/__init__.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/transforms/chunker/tokenizer/base.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/transforms/chunker/tokenizer/huggingface.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/transforms/chunker/tokenizer/openai.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/transforms/serializer/__init__.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/transforms/serializer/base.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/transforms/serializer/common.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/transforms/serializer/doctags.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/transforms/serializer/html.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/transforms/serializer/html_styles.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/transforms/serializer/markdown.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/transforms/visualizer/__init__.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/transforms/visualizer/base.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/transforms/visualizer/layout_visualizer.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/transforms/visualizer/reading_order_visualizer.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/__init__.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/base.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/doc/__init__.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/doc/base.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/doc/labels.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/doc/page.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/doc/tokens.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/doc/utils.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/gen/__init__.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/gen/generic.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/io/__init__.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/legacy_doc/__init__.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/legacy_doc/base.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/legacy_doc/doc_ann.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/legacy_doc/doc_ocr.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/legacy_doc/doc_raw.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/legacy_doc/document.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/legacy_doc/tokens.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/nlp/__init__.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/nlp/qa.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/nlp/qa_labels.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/rec/__init__.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/rec/attribute.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/rec/base.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/rec/predicate.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/rec/record.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/rec/statement.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/rec/subject.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/utils/__init__.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/utils/alias.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/utils/file.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/utils/generate_docs.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/utils/generate_jsonschema.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/utils/legacy.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/utils/validate.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/utils/validators.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core.egg-info/SOURCES.txt +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core.egg-info/dependency_links.txt +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core.egg-info/entry_points.txt +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core.egg-info/requires.txt +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/docling_core.egg-info/top_level.txt +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/setup.cfg +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/test/test_base.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/test/test_collection.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/test/test_data_gen_flag.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/test/test_doc_base.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/test/test_doc_legacy_convert.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/test/test_doc_schema.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/test/test_doc_schema_extractor.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/test/test_doctags_load.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/test/test_hierarchical_chunker.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/test/test_hybrid_chunker.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/test/test_json_schema_to_search_mapper.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/test/test_nlp_qa.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/test/test_otsl_table_export.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/test/test_page.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/test/test_rec_schema.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/test/test_search_meta.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/test/test_serialization.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/test/test_utils.py +0 -0
- {docling_core-2.34.0 → docling_core-2.34.2}/test/test_visualization.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: docling-core
|
|
3
|
-
Version: 2.34.
|
|
3
|
+
Version: 2.34.2
|
|
4
4
|
Summary: A python library to define and validate data types in Docling.
|
|
5
5
|
Author-email: Cesar Berrospi Ramis <ceb@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
|
|
6
6
|
Maintainer-email: Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>, Cesar Berrospi Ramis <ceb@zurich.ibm.com>
|
|
@@ -1874,12 +1874,19 @@ class DoclingDocument(BaseModel):
|
|
|
1874
1874
|
|
|
1875
1875
|
return item.get_ref()
|
|
1876
1876
|
|
|
1877
|
-
def _delete_items(self, refs: list[RefItem])
|
|
1877
|
+
def _delete_items(self, refs: list[RefItem]):
|
|
1878
1878
|
"""Delete document item using the self-reference."""
|
|
1879
1879
|
to_be_deleted_items: dict[tuple[int, ...], str] = {} # stack to cref
|
|
1880
1880
|
|
|
1881
|
+
if not refs:
|
|
1882
|
+
return
|
|
1883
|
+
|
|
1881
1884
|
# Identify the to_be_deleted_items
|
|
1882
|
-
for item, stack in self._iterate_items_with_stack(
|
|
1885
|
+
for item, stack in self._iterate_items_with_stack(
|
|
1886
|
+
with_groups=True,
|
|
1887
|
+
traverse_pictures=True,
|
|
1888
|
+
included_content_layers={c for c in ContentLayer},
|
|
1889
|
+
):
|
|
1883
1890
|
ref = item.get_ref()
|
|
1884
1891
|
|
|
1885
1892
|
if ref in refs:
|
|
@@ -1890,8 +1897,10 @@ class DoclingDocument(BaseModel):
|
|
|
1890
1897
|
if tuple(substack) in to_be_deleted_items:
|
|
1891
1898
|
to_be_deleted_items[tuple(stack)] = ref.cref
|
|
1892
1899
|
|
|
1893
|
-
if len(to_be_deleted_items)
|
|
1894
|
-
raise ValueError(
|
|
1900
|
+
if len(to_be_deleted_items) < len(refs):
|
|
1901
|
+
raise ValueError(
|
|
1902
|
+
f"Cannot find all provided RefItems in doc: {[r.cref for r in refs]}"
|
|
1903
|
+
)
|
|
1895
1904
|
|
|
1896
1905
|
# Clean the tree, reverse the order to not have to update
|
|
1897
1906
|
for stack_, ref_ in reversed(sorted(to_be_deleted_items.items())):
|
|
@@ -1931,8 +1940,6 @@ class DoclingDocument(BaseModel):
|
|
|
1931
1940
|
node=self.body, refs_to_be_deleted=refs, lookup=lookup
|
|
1932
1941
|
)
|
|
1933
1942
|
|
|
1934
|
-
return True
|
|
1935
|
-
|
|
1936
1943
|
# Update the references
|
|
1937
1944
|
def _update_ref_with_lookup(
|
|
1938
1945
|
self, item_label: str, item_index: int, lookup: dict[str, dict[int, int]]
|
|
@@ -2167,7 +2174,10 @@ class DoclingDocument(BaseModel):
|
|
|
2167
2174
|
|
|
2168
2175
|
"""
|
|
2169
2176
|
if not isinstance(parent, (OrderedList, UnorderedList)):
|
|
2170
|
-
|
|
2177
|
+
warnings.warn("ListItem's parent must be a list group.", DeprecationWarning)
|
|
2178
|
+
|
|
2179
|
+
if not parent:
|
|
2180
|
+
parent = self.body
|
|
2171
2181
|
|
|
2172
2182
|
if not orig:
|
|
2173
2183
|
orig = text
|
|
@@ -4270,11 +4280,12 @@ class DoclingDocument(BaseModel):
|
|
|
4270
4280
|
item.parent.resolve(doc=self), (OrderedList, UnorderedList)
|
|
4271
4281
|
)
|
|
4272
4282
|
):
|
|
4273
|
-
|
|
4274
|
-
|
|
4275
|
-
|
|
4276
|
-
else:
|
|
4283
|
+
if isinstance(prev, ListItem) and (
|
|
4284
|
+
prev.parent is None or prev.parent.resolve(self) == self.body
|
|
4285
|
+
): # case of continuing list
|
|
4277
4286
|
misplaced_list_items[-1].append(item)
|
|
4287
|
+
else: # case of new list
|
|
4288
|
+
misplaced_list_items.append([item])
|
|
4278
4289
|
prev = item
|
|
4279
4290
|
|
|
4280
4291
|
for curr_list_items in reversed(misplaced_list_items):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: docling-core
|
|
3
|
-
Version: 2.34.
|
|
3
|
+
Version: 2.34.2
|
|
4
4
|
Summary: A python library to define and validate data types in Docling.
|
|
5
5
|
Author-email: Cesar Berrospi Ramis <ceb@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
|
|
6
6
|
Maintainer-email: Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>, Cesar Berrospi Ramis <ceb@zurich.ibm.com>
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "docling-core"
|
|
3
|
-
version = "2.34.
|
|
3
|
+
version = "2.34.2" # DO NOT EDIT, updated automatically
|
|
4
4
|
description = "A python library to define and validate data types in Docling."
|
|
5
5
|
license = "MIT"
|
|
6
6
|
license-files = ["LICENSE"]
|
|
@@ -757,9 +757,6 @@ def _construct_doc() -> DoclingDocument:
|
|
|
757
757
|
leading_list = doc.add_group(parent=None, label=GroupLabel.LIST)
|
|
758
758
|
doc.add_list_item(parent=leading_list, text="item of leading list")
|
|
759
759
|
|
|
760
|
-
with pytest.raises(ValueError, match="list group"):
|
|
761
|
-
doc.add_list_item(text="Misplaced list item")
|
|
762
|
-
|
|
763
760
|
title = doc.add_title(
|
|
764
761
|
text="Title of the Document"
|
|
765
762
|
) # can be done if such information is present, or ommitted.
|
|
@@ -1080,6 +1077,9 @@ def _construct_doc() -> DoclingDocument:
|
|
|
1080
1077
|
|
|
1081
1078
|
doc.add_list_item(text="Item 4 in A", enumerated=True, parent=parent_A)
|
|
1082
1079
|
|
|
1080
|
+
with pytest.warns(DeprecationWarning, match="list group"):
|
|
1081
|
+
doc.add_list_item(text="List item without parent list group")
|
|
1082
|
+
|
|
1083
1083
|
doc.add_text(label=DocItemLabel.TEXT, text="The end.", parent=None)
|
|
1084
1084
|
|
|
1085
1085
|
return doc
|
|
@@ -1519,7 +1519,9 @@ def test_document_manipulation():
|
|
|
1519
1519
|
DoclingDocument.load_from_json(filename=_gt_filename(filename=filename))
|
|
1520
1520
|
|
|
1521
1521
|
# test if the document is the same as the stored GT
|
|
1522
|
-
_verify_loaded_output(
|
|
1522
|
+
_verify_loaded_output(
|
|
1523
|
+
filename=filename, pred=DoclingDocument.model_validate(document)
|
|
1524
|
+
)
|
|
1523
1525
|
|
|
1524
1526
|
image_dir = Path("./test/data/doc/constructed_images/")
|
|
1525
1527
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-2.34.0 → docling_core-2.34.2}/docling_core/resources/schemas/doc/OCR-output.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-2.34.0 → docling_core-2.34.2}/docling_core/search/json_schema_to_search_mapper.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-2.34.0 → docling_core-2.34.2}/docling_core/transforms/chunker/hierarchical_chunker.py
RENAMED
|
File without changes
|
{docling_core-2.34.0 → docling_core-2.34.2}/docling_core/transforms/chunker/hybrid_chunker.py
RENAMED
|
File without changes
|
{docling_core-2.34.0 → docling_core-2.34.2}/docling_core/transforms/chunker/tokenizer/__init__.py
RENAMED
|
File without changes
|
{docling_core-2.34.0 → docling_core-2.34.2}/docling_core/transforms/chunker/tokenizer/base.py
RENAMED
|
File without changes
|
{docling_core-2.34.0 → docling_core-2.34.2}/docling_core/transforms/chunker/tokenizer/huggingface.py
RENAMED
|
File without changes
|
{docling_core-2.34.0 → docling_core-2.34.2}/docling_core/transforms/chunker/tokenizer/openai.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-2.34.0 → docling_core-2.34.2}/docling_core/transforms/serializer/html_styles.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-2.34.0 → docling_core-2.34.2}/docling_core/transforms/visualizer/layout_visualizer.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|