docling-core 2.34.0__tar.gz → 2.34.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docling-core might be problematic. Click here for more details.

Files changed (106) hide show
  1. {docling_core-2.34.0 → docling_core-2.34.2}/PKG-INFO +1 -1
  2. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/doc/document.py +22 -11
  3. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core.egg-info/PKG-INFO +1 -1
  4. {docling_core-2.34.0 → docling_core-2.34.2}/pyproject.toml +1 -1
  5. {docling_core-2.34.0 → docling_core-2.34.2}/test/test_docling_doc.py +6 -4
  6. {docling_core-2.34.0 → docling_core-2.34.2}/LICENSE +0 -0
  7. {docling_core-2.34.0 → docling_core-2.34.2}/README.md +0 -0
  8. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/__init__.py +0 -0
  9. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/cli/__init__.py +0 -0
  10. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/cli/view.py +0 -0
  11. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/experimental/__init__.py +0 -0
  12. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/py.typed +0 -0
  13. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/resources/schemas/doc/ANN.json +0 -0
  14. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/resources/schemas/doc/DOC.json +0 -0
  15. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/resources/schemas/doc/OCR-output.json +0 -0
  16. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/resources/schemas/doc/RAW.json +0 -0
  17. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/resources/schemas/generated/ccs_document_schema.json +0 -0
  18. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/resources/schemas/generated/minimal_document_schema_flat.json +0 -0
  19. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/resources/schemas/search/search_doc_mapping.json +0 -0
  20. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/resources/schemas/search/search_doc_mapping_v2.json +0 -0
  21. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/search/__init__.py +0 -0
  22. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/search/json_schema_to_search_mapper.py +0 -0
  23. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/search/mapping.py +0 -0
  24. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/search/meta.py +0 -0
  25. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/search/package.py +0 -0
  26. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/transforms/__init__.py +0 -0
  27. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/transforms/chunker/__init__.py +0 -0
  28. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/transforms/chunker/base.py +0 -0
  29. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/transforms/chunker/hierarchical_chunker.py +0 -0
  30. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/transforms/chunker/hybrid_chunker.py +0 -0
  31. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/transforms/chunker/tokenizer/__init__.py +0 -0
  32. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/transforms/chunker/tokenizer/base.py +0 -0
  33. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/transforms/chunker/tokenizer/huggingface.py +0 -0
  34. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/transforms/chunker/tokenizer/openai.py +0 -0
  35. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/transforms/serializer/__init__.py +0 -0
  36. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/transforms/serializer/base.py +0 -0
  37. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/transforms/serializer/common.py +0 -0
  38. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/transforms/serializer/doctags.py +0 -0
  39. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/transforms/serializer/html.py +0 -0
  40. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/transforms/serializer/html_styles.py +0 -0
  41. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/transforms/serializer/markdown.py +0 -0
  42. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/transforms/visualizer/__init__.py +0 -0
  43. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/transforms/visualizer/base.py +0 -0
  44. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/transforms/visualizer/layout_visualizer.py +0 -0
  45. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/transforms/visualizer/reading_order_visualizer.py +0 -0
  46. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/__init__.py +0 -0
  47. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/base.py +0 -0
  48. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/doc/__init__.py +0 -0
  49. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/doc/base.py +0 -0
  50. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/doc/labels.py +0 -0
  51. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/doc/page.py +0 -0
  52. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/doc/tokens.py +0 -0
  53. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/doc/utils.py +0 -0
  54. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/gen/__init__.py +0 -0
  55. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/gen/generic.py +0 -0
  56. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/io/__init__.py +0 -0
  57. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/legacy_doc/__init__.py +0 -0
  58. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/legacy_doc/base.py +0 -0
  59. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/legacy_doc/doc_ann.py +0 -0
  60. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/legacy_doc/doc_ocr.py +0 -0
  61. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/legacy_doc/doc_raw.py +0 -0
  62. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/legacy_doc/document.py +0 -0
  63. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/legacy_doc/tokens.py +0 -0
  64. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/nlp/__init__.py +0 -0
  65. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/nlp/qa.py +0 -0
  66. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/nlp/qa_labels.py +0 -0
  67. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/rec/__init__.py +0 -0
  68. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/rec/attribute.py +0 -0
  69. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/rec/base.py +0 -0
  70. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/rec/predicate.py +0 -0
  71. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/rec/record.py +0 -0
  72. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/rec/statement.py +0 -0
  73. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/types/rec/subject.py +0 -0
  74. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/utils/__init__.py +0 -0
  75. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/utils/alias.py +0 -0
  76. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/utils/file.py +0 -0
  77. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/utils/generate_docs.py +0 -0
  78. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/utils/generate_jsonschema.py +0 -0
  79. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/utils/legacy.py +0 -0
  80. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/utils/validate.py +0 -0
  81. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core/utils/validators.py +0 -0
  82. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core.egg-info/SOURCES.txt +0 -0
  83. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core.egg-info/dependency_links.txt +0 -0
  84. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core.egg-info/entry_points.txt +0 -0
  85. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core.egg-info/requires.txt +0 -0
  86. {docling_core-2.34.0 → docling_core-2.34.2}/docling_core.egg-info/top_level.txt +0 -0
  87. {docling_core-2.34.0 → docling_core-2.34.2}/setup.cfg +0 -0
  88. {docling_core-2.34.0 → docling_core-2.34.2}/test/test_base.py +0 -0
  89. {docling_core-2.34.0 → docling_core-2.34.2}/test/test_collection.py +0 -0
  90. {docling_core-2.34.0 → docling_core-2.34.2}/test/test_data_gen_flag.py +0 -0
  91. {docling_core-2.34.0 → docling_core-2.34.2}/test/test_doc_base.py +0 -0
  92. {docling_core-2.34.0 → docling_core-2.34.2}/test/test_doc_legacy_convert.py +0 -0
  93. {docling_core-2.34.0 → docling_core-2.34.2}/test/test_doc_schema.py +0 -0
  94. {docling_core-2.34.0 → docling_core-2.34.2}/test/test_doc_schema_extractor.py +0 -0
  95. {docling_core-2.34.0 → docling_core-2.34.2}/test/test_doctags_load.py +0 -0
  96. {docling_core-2.34.0 → docling_core-2.34.2}/test/test_hierarchical_chunker.py +0 -0
  97. {docling_core-2.34.0 → docling_core-2.34.2}/test/test_hybrid_chunker.py +0 -0
  98. {docling_core-2.34.0 → docling_core-2.34.2}/test/test_json_schema_to_search_mapper.py +0 -0
  99. {docling_core-2.34.0 → docling_core-2.34.2}/test/test_nlp_qa.py +0 -0
  100. {docling_core-2.34.0 → docling_core-2.34.2}/test/test_otsl_table_export.py +0 -0
  101. {docling_core-2.34.0 → docling_core-2.34.2}/test/test_page.py +0 -0
  102. {docling_core-2.34.0 → docling_core-2.34.2}/test/test_rec_schema.py +0 -0
  103. {docling_core-2.34.0 → docling_core-2.34.2}/test/test_search_meta.py +0 -0
  104. {docling_core-2.34.0 → docling_core-2.34.2}/test/test_serialization.py +0 -0
  105. {docling_core-2.34.0 → docling_core-2.34.2}/test/test_utils.py +0 -0
  106. {docling_core-2.34.0 → docling_core-2.34.2}/test/test_visualization.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docling-core
3
- Version: 2.34.0
3
+ Version: 2.34.2
4
4
  Summary: A python library to define and validate data types in Docling.
5
5
  Author-email: Cesar Berrospi Ramis <ceb@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
6
6
  Maintainer-email: Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>, Cesar Berrospi Ramis <ceb@zurich.ibm.com>
@@ -1874,12 +1874,19 @@ class DoclingDocument(BaseModel):
1874
1874
 
1875
1875
  return item.get_ref()
1876
1876
 
1877
- def _delete_items(self, refs: list[RefItem]) -> bool:
1877
+ def _delete_items(self, refs: list[RefItem]):
1878
1878
  """Delete document item using the self-reference."""
1879
1879
  to_be_deleted_items: dict[tuple[int, ...], str] = {} # stack to cref
1880
1880
 
1881
+ if not refs:
1882
+ return
1883
+
1881
1884
  # Identify the to_be_deleted_items
1882
- for item, stack in self._iterate_items_with_stack(with_groups=True):
1885
+ for item, stack in self._iterate_items_with_stack(
1886
+ with_groups=True,
1887
+ traverse_pictures=True,
1888
+ included_content_layers={c for c in ContentLayer},
1889
+ ):
1883
1890
  ref = item.get_ref()
1884
1891
 
1885
1892
  if ref in refs:
@@ -1890,8 +1897,10 @@ class DoclingDocument(BaseModel):
1890
1897
  if tuple(substack) in to_be_deleted_items:
1891
1898
  to_be_deleted_items[tuple(stack)] = ref.cref
1892
1899
 
1893
- if len(to_be_deleted_items) == 0:
1894
- raise ValueError("Nothing to be deleted ...")
1900
+ if len(to_be_deleted_items) < len(refs):
1901
+ raise ValueError(
1902
+ f"Cannot find all provided RefItems in doc: {[r.cref for r in refs]}"
1903
+ )
1895
1904
 
1896
1905
  # Clean the tree, reverse the order to not have to update
1897
1906
  for stack_, ref_ in reversed(sorted(to_be_deleted_items.items())):
@@ -1931,8 +1940,6 @@ class DoclingDocument(BaseModel):
1931
1940
  node=self.body, refs_to_be_deleted=refs, lookup=lookup
1932
1941
  )
1933
1942
 
1934
- return True
1935
-
1936
1943
  # Update the references
1937
1944
  def _update_ref_with_lookup(
1938
1945
  self, item_label: str, item_index: int, lookup: dict[str, dict[int, int]]
@@ -2167,7 +2174,10 @@ class DoclingDocument(BaseModel):
2167
2174
 
2168
2175
  """
2169
2176
  if not isinstance(parent, (OrderedList, UnorderedList)):
2170
- raise ValueError("ListItem's parent must be a list group")
2177
+ warnings.warn("ListItem's parent must be a list group.", DeprecationWarning)
2178
+
2179
+ if not parent:
2180
+ parent = self.body
2171
2181
 
2172
2182
  if not orig:
2173
2183
  orig = text
@@ -4270,11 +4280,12 @@ class DoclingDocument(BaseModel):
4270
4280
  item.parent.resolve(doc=self), (OrderedList, UnorderedList)
4271
4281
  )
4272
4282
  ):
4273
- # non_group_list_items.append(item)
4274
- if prev is None or not isinstance(prev, ListItem): # if new list
4275
- misplaced_list_items.append([item])
4276
- else:
4283
+ if isinstance(prev, ListItem) and (
4284
+ prev.parent is None or prev.parent.resolve(self) == self.body
4285
+ ): # case of continuing list
4277
4286
  misplaced_list_items[-1].append(item)
4287
+ else: # case of new list
4288
+ misplaced_list_items.append([item])
4278
4289
  prev = item
4279
4290
 
4280
4291
  for curr_list_items in reversed(misplaced_list_items):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docling-core
3
- Version: 2.34.0
3
+ Version: 2.34.2
4
4
  Summary: A python library to define and validate data types in Docling.
5
5
  Author-email: Cesar Berrospi Ramis <ceb@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
6
6
  Maintainer-email: Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>, Cesar Berrospi Ramis <ceb@zurich.ibm.com>
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "docling-core"
3
- version = "2.34.0" # DO NOT EDIT, updated automatically
3
+ version = "2.34.2" # DO NOT EDIT, updated automatically
4
4
  description = "A python library to define and validate data types in Docling."
5
5
  license = "MIT"
6
6
  license-files = ["LICENSE"]
@@ -757,9 +757,6 @@ def _construct_doc() -> DoclingDocument:
757
757
  leading_list = doc.add_group(parent=None, label=GroupLabel.LIST)
758
758
  doc.add_list_item(parent=leading_list, text="item of leading list")
759
759
 
760
- with pytest.raises(ValueError, match="list group"):
761
- doc.add_list_item(text="Misplaced list item")
762
-
763
760
  title = doc.add_title(
764
761
  text="Title of the Document"
765
762
  ) # can be done if such information is present, or ommitted.
@@ -1080,6 +1077,9 @@ def _construct_doc() -> DoclingDocument:
1080
1077
 
1081
1078
  doc.add_list_item(text="Item 4 in A", enumerated=True, parent=parent_A)
1082
1079
 
1080
+ with pytest.warns(DeprecationWarning, match="list group"):
1081
+ doc.add_list_item(text="List item without parent list group")
1082
+
1083
1083
  doc.add_text(label=DocItemLabel.TEXT, text="The end.", parent=None)
1084
1084
 
1085
1085
  return doc
@@ -1519,7 +1519,9 @@ def test_document_manipulation():
1519
1519
  DoclingDocument.load_from_json(filename=_gt_filename(filename=filename))
1520
1520
 
1521
1521
  # test if the document is the same as the stored GT
1522
- _verify_loaded_output(filename=filename, pred=doc)
1522
+ _verify_loaded_output(
1523
+ filename=filename, pred=DoclingDocument.model_validate(document)
1524
+ )
1523
1525
 
1524
1526
  image_dir = Path("./test/data/doc/constructed_images/")
1525
1527
 
File without changes
File without changes
File without changes