docling-core 2.43.0__tar.gz → 2.43.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docling-core might be problematic. Click here for more details.

Files changed (109) hide show
  1. {docling_core-2.43.0 → docling_core-2.43.1}/PKG-INFO +1 -1
  2. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/transforms/serializer/html.py +17 -0
  3. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/transforms/visualizer/layout_visualizer.py +3 -1
  4. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core.egg-info/PKG-INFO +1 -1
  5. {docling_core-2.43.0 → docling_core-2.43.1}/pyproject.toml +1 -1
  6. {docling_core-2.43.0 → docling_core-2.43.1}/test/test_base.py +2 -2
  7. {docling_core-2.43.0 → docling_core-2.43.1}/test/test_docling_doc.py +11 -9
  8. {docling_core-2.43.0 → docling_core-2.43.1}/test/test_doctags_load.py +8 -4
  9. {docling_core-2.43.0 → docling_core-2.43.1}/LICENSE +0 -0
  10. {docling_core-2.43.0 → docling_core-2.43.1}/README.md +0 -0
  11. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/__init__.py +0 -0
  12. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/cli/__init__.py +0 -0
  13. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/cli/view.py +0 -0
  14. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/experimental/__init__.py +0 -0
  15. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/py.typed +0 -0
  16. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/resources/schemas/doc/ANN.json +0 -0
  17. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/resources/schemas/doc/DOC.json +0 -0
  18. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/resources/schemas/doc/OCR-output.json +0 -0
  19. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/resources/schemas/doc/RAW.json +0 -0
  20. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/resources/schemas/generated/ccs_document_schema.json +0 -0
  21. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/resources/schemas/generated/minimal_document_schema_flat.json +0 -0
  22. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/resources/schemas/search/search_doc_mapping.json +0 -0
  23. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/resources/schemas/search/search_doc_mapping_v2.json +0 -0
  24. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/search/__init__.py +0 -0
  25. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/search/json_schema_to_search_mapper.py +0 -0
  26. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/search/mapping.py +0 -0
  27. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/search/meta.py +0 -0
  28. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/search/package.py +0 -0
  29. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/transforms/__init__.py +0 -0
  30. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/transforms/chunker/__init__.py +0 -0
  31. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/transforms/chunker/base.py +0 -0
  32. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/transforms/chunker/hierarchical_chunker.py +0 -0
  33. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/transforms/chunker/hybrid_chunker.py +0 -0
  34. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/transforms/chunker/page_chunker.py +0 -0
  35. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/transforms/chunker/tokenizer/__init__.py +0 -0
  36. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/transforms/chunker/tokenizer/base.py +0 -0
  37. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/transforms/chunker/tokenizer/huggingface.py +0 -0
  38. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/transforms/chunker/tokenizer/openai.py +0 -0
  39. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/transforms/serializer/__init__.py +0 -0
  40. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/transforms/serializer/base.py +0 -0
  41. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/transforms/serializer/common.py +0 -0
  42. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/transforms/serializer/doctags.py +0 -0
  43. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/transforms/serializer/html_styles.py +0 -0
  44. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/transforms/serializer/markdown.py +0 -0
  45. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/transforms/visualizer/__init__.py +0 -0
  46. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/transforms/visualizer/base.py +0 -0
  47. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/transforms/visualizer/reading_order_visualizer.py +0 -0
  48. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/transforms/visualizer/table_visualizer.py +0 -0
  49. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/types/__init__.py +0 -0
  50. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/types/base.py +0 -0
  51. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/types/doc/__init__.py +0 -0
  52. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/types/doc/base.py +0 -0
  53. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/types/doc/document.py +0 -0
  54. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/types/doc/labels.py +0 -0
  55. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/types/doc/page.py +0 -0
  56. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/types/doc/tokens.py +0 -0
  57. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/types/doc/utils.py +0 -0
  58. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/types/gen/__init__.py +0 -0
  59. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/types/gen/generic.py +0 -0
  60. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/types/io/__init__.py +0 -0
  61. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/types/legacy_doc/__init__.py +0 -0
  62. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/types/legacy_doc/base.py +0 -0
  63. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/types/legacy_doc/doc_ann.py +0 -0
  64. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/types/legacy_doc/doc_ocr.py +0 -0
  65. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/types/legacy_doc/doc_raw.py +0 -0
  66. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/types/legacy_doc/document.py +0 -0
  67. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/types/legacy_doc/tokens.py +0 -0
  68. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/types/nlp/__init__.py +0 -0
  69. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/types/nlp/qa.py +0 -0
  70. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/types/nlp/qa_labels.py +0 -0
  71. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/types/rec/__init__.py +0 -0
  72. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/types/rec/attribute.py +0 -0
  73. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/types/rec/base.py +0 -0
  74. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/types/rec/predicate.py +0 -0
  75. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/types/rec/record.py +0 -0
  76. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/types/rec/statement.py +0 -0
  77. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/types/rec/subject.py +0 -0
  78. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/utils/__init__.py +0 -0
  79. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/utils/alias.py +0 -0
  80. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/utils/file.py +0 -0
  81. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/utils/generate_docs.py +0 -0
  82. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/utils/generate_jsonschema.py +0 -0
  83. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/utils/legacy.py +0 -0
  84. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/utils/validate.py +0 -0
  85. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core/utils/validators.py +0 -0
  86. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core.egg-info/SOURCES.txt +0 -0
  87. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core.egg-info/dependency_links.txt +0 -0
  88. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core.egg-info/entry_points.txt +0 -0
  89. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core.egg-info/requires.txt +0 -0
  90. {docling_core-2.43.0 → docling_core-2.43.1}/docling_core.egg-info/top_level.txt +0 -0
  91. {docling_core-2.43.0 → docling_core-2.43.1}/setup.cfg +0 -0
  92. {docling_core-2.43.0 → docling_core-2.43.1}/test/test_collection.py +0 -0
  93. {docling_core-2.43.0 → docling_core-2.43.1}/test/test_data_gen_flag.py +0 -0
  94. {docling_core-2.43.0 → docling_core-2.43.1}/test/test_doc_base.py +0 -0
  95. {docling_core-2.43.0 → docling_core-2.43.1}/test/test_doc_legacy_convert.py +0 -0
  96. {docling_core-2.43.0 → docling_core-2.43.1}/test/test_doc_schema.py +0 -0
  97. {docling_core-2.43.0 → docling_core-2.43.1}/test/test_doc_schema_extractor.py +0 -0
  98. {docling_core-2.43.0 → docling_core-2.43.1}/test/test_hierarchical_chunker.py +0 -0
  99. {docling_core-2.43.0 → docling_core-2.43.1}/test/test_hybrid_chunker.py +0 -0
  100. {docling_core-2.43.0 → docling_core-2.43.1}/test/test_json_schema_to_search_mapper.py +0 -0
  101. {docling_core-2.43.0 → docling_core-2.43.1}/test/test_nlp_qa.py +0 -0
  102. {docling_core-2.43.0 → docling_core-2.43.1}/test/test_otsl_table_export.py +0 -0
  103. {docling_core-2.43.0 → docling_core-2.43.1}/test/test_page.py +0 -0
  104. {docling_core-2.43.0 → docling_core-2.43.1}/test/test_page_chunker.py +0 -0
  105. {docling_core-2.43.0 → docling_core-2.43.1}/test/test_rec_schema.py +0 -0
  106. {docling_core-2.43.0 → docling_core-2.43.1}/test/test_search_meta.py +0 -0
  107. {docling_core-2.43.0 → docling_core-2.43.1}/test/test_serialization.py +0 -0
  108. {docling_core-2.43.0 → docling_core-2.43.1}/test/test_utils.py +0 -0
  109. {docling_core-2.43.0 → docling_core-2.43.1}/test/test_visualization.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docling-core
3
- Version: 2.43.0
3
+ Version: 2.43.1
4
4
  Summary: A python library to define and validate data types in Docling.
5
5
  Author-email: Cesar Berrospi Ramis <ceb@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
6
6
  Maintainer-email: Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>, Cesar Berrospi Ramis <ceb@zurich.ibm.com>
@@ -713,6 +713,23 @@ class HTMLListSerializer(BaseModel, BaseListSerializer):
713
713
  **kwargs,
714
714
  )
715
715
 
716
+ # Append nested list to parent list item:
717
+ i = 0
718
+ while i < len(parts):
719
+ prt = parts[i]
720
+ if prt.text.startswith(("<ul>", "<ol>")):
721
+ for j in range(i - 1, -1, -1):
722
+ if parts[j].text.startswith(("<li>", "<li ")) and parts[
723
+ j
724
+ ].text.endswith("</li>"):
725
+ before, _, _ = parts[j].text.rpartition("</li>")
726
+ parts[j].text = f"{before}\n{prt.text}\n</li>"
727
+ break
728
+ if j > -1:
729
+ parts.pop(i)
730
+ else:
731
+ i += 1
732
+
716
733
  # Add all child parts
717
734
  text_res = "\n".join(
718
735
  [
@@ -148,7 +148,9 @@ class LayoutVisualizer(BaseVisualizer):
148
148
  prev_image = None
149
149
  prev_page_nr = None
150
150
  for idx, (elem, _) in enumerate(
151
- doc.iterate_items(included_content_layers=included_content_layers)
151
+ doc.iterate_items(
152
+ included_content_layers=included_content_layers, traverse_pictures=True
153
+ )
152
154
  ):
153
155
  if not isinstance(elem, DocItem):
154
156
  continue
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docling-core
3
- Version: 2.43.0
3
+ Version: 2.43.1
4
4
  Summary: A python library to define and validate data types in Docling.
5
5
  Author-email: Cesar Berrospi Ramis <ceb@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
6
6
  Maintainer-email: Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>, Cesar Berrospi Ramis <ceb@zurich.ibm.com>
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "docling-core"
3
- version = "2.43.0" # DO NOT EDIT, updated automatically
3
+ version = "2.43.1" # DO NOT EDIT, updated automatically
4
4
  description = "A python library to define and validate data types in Docling."
5
5
  license = "MIT"
6
6
  license-files = ["LICENSE"]
@@ -36,8 +36,8 @@ def test_identifier():
36
36
  )
37
37
 
38
38
  # schema_json(): no need to set by_alias since it is True by the default
39
- tf = open("test/data/json_schemas/base_identifier.json", encoding="utf-8")
40
- gold_json = json.load(tf)
39
+ with open("test/data/json_schemas/base_identifier.json", encoding="utf-8") as tf:
40
+ gold_json = json.load(tf)
41
41
 
42
42
  assert Identifier.model_json_schema() == gold_json
43
43
 
@@ -1819,9 +1819,10 @@ def test_document_manipulation():
1819
1819
 
1820
1820
  # Test the handling of list items in insert_* methods, both with and without parent groups
1821
1821
 
1822
- li_sibling = doc.insert_list_item(
1823
- sibling=node, text="Inserted List Item, Incorrect Parent", after=False
1824
- )
1822
+ with pytest.warns(DeprecationWarning, match="ListItem parent must be a ListGroup"):
1823
+ li_sibling = doc.insert_list_item(
1824
+ sibling=node, text="Inserted List Item, Incorrect Parent", after=False
1825
+ )
1825
1826
  doc.insert_list_item(
1826
1827
  sibling=li_sibling, text="Inserted List Item, Correct Parent", after=True
1827
1828
  )
@@ -1831,12 +1832,13 @@ def test_document_manipulation():
1831
1832
  text="Inserted Text with LIST_ITEM Label, Correct Parent",
1832
1833
  after=False,
1833
1834
  )
1834
- doc.insert_text(
1835
- sibling=node,
1836
- label=DocItemLabel.LIST_ITEM,
1837
- text="Inserted Text with LIST_ITEM Label, Incorrect Parent",
1838
- after=True,
1839
- )
1835
+ with pytest.warns(DeprecationWarning, match="ListItem parent must be a ListGroup"):
1836
+ doc.insert_text(
1837
+ sibling=node,
1838
+ label=DocItemLabel.LIST_ITEM,
1839
+ text="Inserted Text with LIST_ITEM Label, Incorrect Parent",
1840
+ after=True,
1841
+ )
1840
1842
 
1841
1843
  filename = Path(
1842
1844
  "test/data/doc/constructed_doc.inserted_list_items_with_insert_*.json"
@@ -60,7 +60,8 @@ def test_doctags_load_from_files():
60
60
 
61
61
  def test_doctags_load_from_memory():
62
62
 
63
- doctags = Path("test/data/doc/page_with_pic.dt").open("r").read()
63
+ with Path("test/data/doc/page_with_pic.dt").open() as file:
64
+ doctags = file.read()
64
65
  image = PILImage.open(Path("test/data/doc/page_with_pic.png"))
65
66
 
66
67
  doctags_doc = DocTagsDocument.from_doctags_and_image_pairs([doctags], [image])
@@ -75,7 +76,8 @@ def test_doctags_load_from_memory():
75
76
 
76
77
 
77
78
  def test_doctags_load_without_image():
78
- doctags = Path("test/data/doc/page_with_pic.dt").open("r").read()
79
+ with Path("test/data/doc/page_with_pic.dt").open() as file:
80
+ doctags = file.read()
79
81
  doctags_doc = DocTagsDocument.from_doctags_and_image_pairs([doctags], None)
80
82
  doc = DoclingDocument.load_from_doctags(doctags_doc)
81
83
  exp = "test/data/doc/page_without_pic.dt.json"
@@ -86,7 +88,8 @@ def test_doctags_load_without_image():
86
88
 
87
89
 
88
90
  def test_doctags_load_for_kv_region():
89
- doctags = Path("test/data/doc/doc_with_kv.dt").open("r").read()
91
+ with Path("test/data/doc/doc_with_kv.dt").open() as file:
92
+ doctags = file.read()
90
93
  image = PILImage.open(Path("test/data/doc/doc_with_kv.png"))
91
94
  doctags_doc = DocTagsDocument.from_doctags_and_image_pairs([doctags], [image])
92
95
  doc = DoclingDocument.load_from_doctags(doctags_doc)
@@ -98,7 +101,8 @@ def test_doctags_load_for_kv_region():
98
101
 
99
102
 
100
103
  def test_multipage_doctags_load():
101
- doctags = Path("test/data/doc/2206.01062.yaml.dt").open("r").read()
104
+ with Path("test/data/doc/2206.01062.yaml.dt").open() as file:
105
+ doctags = file.read()
102
106
  doctags_doc = DocTagsDocument.from_multipage_doctags_and_images(doctags, None)
103
107
  doc = DoclingDocument.load_from_doctags(doctags_doc)
104
108
  exp = "test/data/doc/2206.01062.yaml.dt.json"
File without changes
File without changes
File without changes