docling-core 2.26.3__tar.gz → 2.26.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docling-core might be problematic. Click here for more details.

Files changed (71) hide show
  1. {docling_core-2.26.3 → docling_core-2.26.4}/PKG-INFO +1 -1
  2. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/experimental/serializer/common.py +28 -3
  3. {docling_core-2.26.3 → docling_core-2.26.4}/pyproject.toml +1 -1
  4. {docling_core-2.26.3 → docling_core-2.26.4}/LICENSE +0 -0
  5. {docling_core-2.26.3 → docling_core-2.26.4}/README.md +0 -0
  6. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/__init__.py +0 -0
  7. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/cli/__init__.py +0 -0
  8. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/cli/view.py +0 -0
  9. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/experimental/__init__.py +0 -0
  10. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/experimental/serializer/__init__.py +0 -0
  11. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/experimental/serializer/base.py +0 -0
  12. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/experimental/serializer/doctags.py +0 -0
  13. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/experimental/serializer/html.py +0 -0
  14. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/experimental/serializer/html_styles.py +0 -0
  15. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/experimental/serializer/markdown.py +0 -0
  16. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/py.typed +0 -0
  17. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/resources/schemas/doc/ANN.json +0 -0
  18. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/resources/schemas/doc/DOC.json +0 -0
  19. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/resources/schemas/doc/OCR-output.json +0 -0
  20. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/resources/schemas/doc/RAW.json +0 -0
  21. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/resources/schemas/generated/ccs_document_schema.json +0 -0
  22. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/resources/schemas/generated/minimal_document_schema_flat.json +0 -0
  23. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/resources/schemas/search/search_doc_mapping.json +0 -0
  24. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/resources/schemas/search/search_doc_mapping_v2.json +0 -0
  25. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/search/__init__.py +0 -0
  26. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/search/json_schema_to_search_mapper.py +0 -0
  27. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/search/mapping.py +0 -0
  28. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/search/meta.py +0 -0
  29. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/search/package.py +0 -0
  30. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/transforms/__init__.py +0 -0
  31. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/transforms/chunker/__init__.py +0 -0
  32. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/transforms/chunker/base.py +0 -0
  33. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/transforms/chunker/hierarchical_chunker.py +0 -0
  34. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/transforms/chunker/hybrid_chunker.py +0 -0
  35. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/types/__init__.py +0 -0
  36. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/types/base.py +0 -0
  37. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/types/doc/__init__.py +0 -0
  38. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/types/doc/base.py +0 -0
  39. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/types/doc/document.py +0 -0
  40. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/types/doc/labels.py +0 -0
  41. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/types/doc/page.py +0 -0
  42. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/types/doc/tokens.py +0 -0
  43. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/types/doc/utils.py +0 -0
  44. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/types/gen/__init__.py +0 -0
  45. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/types/gen/generic.py +0 -0
  46. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/types/io/__init__.py +0 -0
  47. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/types/legacy_doc/__init__.py +0 -0
  48. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/types/legacy_doc/base.py +0 -0
  49. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/types/legacy_doc/doc_ann.py +0 -0
  50. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/types/legacy_doc/doc_ocr.py +0 -0
  51. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/types/legacy_doc/doc_raw.py +0 -0
  52. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/types/legacy_doc/document.py +0 -0
  53. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/types/legacy_doc/tokens.py +0 -0
  54. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/types/nlp/__init__.py +0 -0
  55. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/types/nlp/qa.py +0 -0
  56. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/types/nlp/qa_labels.py +0 -0
  57. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/types/rec/__init__.py +0 -0
  58. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/types/rec/attribute.py +0 -0
  59. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/types/rec/base.py +0 -0
  60. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/types/rec/predicate.py +0 -0
  61. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/types/rec/record.py +0 -0
  62. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/types/rec/statement.py +0 -0
  63. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/types/rec/subject.py +0 -0
  64. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/utils/__init__.py +0 -0
  65. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/utils/alias.py +0 -0
  66. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/utils/file.py +0 -0
  67. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/utils/generate_docs.py +0 -0
  68. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/utils/generate_jsonschema.py +0 -0
  69. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/utils/legacy.py +0 -0
  70. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/utils/validate.py +0 -0
  71. {docling_core-2.26.3 → docling_core-2.26.4}/docling_core/utils/validators.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: docling-core
3
- Version: 2.26.3
3
+ Version: 2.26.4
4
4
  Summary: A python library to define and validate data types in Docling.
5
5
  Home-page: https://github.com/docling-project
6
6
  License: MIT
@@ -69,7 +69,9 @@ def _iterate_items(
69
69
  node: Optional[NodeItem] = None,
70
70
  traverse_pictures: bool = False,
71
71
  add_page_breaks: bool = False,
72
+ visited: Optional[set[str]] = None,
72
73
  ):
74
+ my_visited: set[str] = visited if visited is not None else set()
73
75
  prev_page_nr: Optional[int] = None
74
76
  page_break_i = 0
75
77
  for item, _ in doc.iterate_items(
@@ -78,10 +80,33 @@ def _iterate_items(
78
80
  included_content_layers=layers,
79
81
  traverse_pictures=traverse_pictures,
80
82
  ):
81
- if isinstance(item, DocItem):
82
- if item.prov:
83
+ if add_page_breaks:
84
+ if (
85
+ isinstance(item, (UnorderedList, OrderedList, InlineGroup))
86
+ and item.self_ref not in my_visited
87
+ ):
88
+ # if group starts with new page, yield page break before group node
89
+ my_visited.add(item.self_ref)
90
+ for it in _iterate_items(
91
+ doc=doc,
92
+ layers=layers,
93
+ node=item,
94
+ traverse_pictures=traverse_pictures,
95
+ add_page_breaks=add_page_breaks,
96
+ visited=my_visited,
97
+ ):
98
+ if isinstance(it, DocItem) and it.prov:
99
+ page_no = it.prov[0].page_no
100
+ if prev_page_nr is not None and page_no > prev_page_nr:
101
+ yield _PageBreakNode(
102
+ self_ref=f"#/pb/{page_break_i}",
103
+ prev_page=prev_page_nr,
104
+ next_page=page_no,
105
+ )
106
+ break
107
+ elif isinstance(item, DocItem) and item.prov:
83
108
  page_no = item.prov[0].page_no
84
- if add_page_breaks and (prev_page_nr is None or page_no > prev_page_nr):
109
+ if prev_page_nr is None or page_no > prev_page_nr:
85
110
  if prev_page_nr is not None: # close previous range
86
111
  yield _PageBreakNode(
87
112
  self_ref=f"#/pb/{page_break_i}",
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "docling-core"
3
- version = "2.26.3"
3
+ version = "2.26.4"
4
4
  description = "A python library to define and validate data types in Docling."
5
5
  license = "MIT"
6
6
  authors = [
File without changes
File without changes