docling-core 2.26.2__py3-none-any.whl → 2.26.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docling-core might be problematic. Click here for more details.

@@ -69,7 +69,9 @@ def _iterate_items(
69
69
  node: Optional[NodeItem] = None,
70
70
  traverse_pictures: bool = False,
71
71
  add_page_breaks: bool = False,
72
+ visited: Optional[set[str]] = None,
72
73
  ):
74
+ my_visited: set[str] = visited if visited is not None else set()
73
75
  prev_page_nr: Optional[int] = None
74
76
  page_break_i = 0
75
77
  for item, _ in doc.iterate_items(
@@ -78,10 +80,33 @@ def _iterate_items(
78
80
  included_content_layers=layers,
79
81
  traverse_pictures=traverse_pictures,
80
82
  ):
81
- if isinstance(item, DocItem):
82
- if item.prov:
83
+ if add_page_breaks:
84
+ if (
85
+ isinstance(item, (UnorderedList, OrderedList, InlineGroup))
86
+ and item.self_ref not in my_visited
87
+ ):
88
+ # if group starts with new page, yield page break before group node
89
+ my_visited.add(item.self_ref)
90
+ for it in _iterate_items(
91
+ doc=doc,
92
+ layers=layers,
93
+ node=item,
94
+ traverse_pictures=traverse_pictures,
95
+ add_page_breaks=add_page_breaks,
96
+ visited=my_visited,
97
+ ):
98
+ if isinstance(it, DocItem) and it.prov:
99
+ page_no = it.prov[0].page_no
100
+ if prev_page_nr is not None and page_no > prev_page_nr:
101
+ yield _PageBreakNode(
102
+ self_ref=f"#/pb/{page_break_i}",
103
+ prev_page=prev_page_nr,
104
+ next_page=page_no,
105
+ )
106
+ break
107
+ elif isinstance(item, DocItem) and item.prov:
83
108
  page_no = item.prov[0].page_no
84
- if add_page_breaks and (prev_page_nr is None or page_no > prev_page_nr):
109
+ if prev_page_nr is None or page_no > prev_page_nr:
85
110
  if prev_page_nr is not None: # close previous range
86
111
  yield _PageBreakNode(
87
112
  self_ref=f"#/pb/{page_break_i}",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: docling-core
3
- Version: 2.26.2
3
+ Version: 2.26.4
4
4
  Summary: A python library to define and validate data types in Docling.
5
5
  Home-page: https://github.com/docling-project
6
6
  License: MIT
@@ -36,7 +36,7 @@ Requires-Dist: pyyaml (>=5.1,<7.0.0)
36
36
  Requires-Dist: semchunk (>=2.2.0,<3.0.0) ; extra == "chunking"
37
37
  Requires-Dist: tabulate (>=0.9.0,<0.10.0)
38
38
  Requires-Dist: transformers (>=4.34.0,<5.0.0) ; extra == "chunking"
39
- Requires-Dist: typer (>=0.12.5,<0.13.0)
39
+ Requires-Dist: typer (>=0.12.5,<0.16.0)
40
40
  Requires-Dist: typing-extensions (>=4.12.2,<5.0.0)
41
41
  Project-URL: Repository, https://github.com/docling-project/docling-core
42
42
  Description-Content-Type: text/markdown
@@ -4,7 +4,7 @@ docling_core/cli/view.py,sha256=gwxSBYhGqwznMR8pdXaEuAh2bjFD5X_g11xFYSgFgtM,1764
4
4
  docling_core/experimental/__init__.py,sha256=XnAVSUHbA6OFhNSpoYqSD3u83-xVaUaki1DIKFw69Ew,99
5
5
  docling_core/experimental/serializer/__init__.py,sha256=CECQlMoCDUxkg4RAUdC3itA3I3qFhKhe2HcYghN6_xw,105
6
6
  docling_core/experimental/serializer/base.py,sha256=1sD1v5rWC4MT_Y6BWpMDjUAwuEqC0TR9YjQJZlhPt50,5901
7
- docling_core/experimental/serializer/common.py,sha256=z80B2BzUdDfp_HgZ1KA64vK-oV07jcgxLs1XyCgp7sI,16152
7
+ docling_core/experimental/serializer/common.py,sha256=iQUJPRZUhpGMi_s4makkZcINy5sdtxu2ehS9N8lnoMM,17332
8
8
  docling_core/experimental/serializer/doctags.py,sha256=e97FJHh77x--g2t1O2YprBzF8lkihn_xOr59EjnR7ag,17794
9
9
  docling_core/experimental/serializer/html.py,sha256=C-xf_PH28R6DoISFDoZwBK67rsfFmTemgZ_VW2i0haY,31896
10
10
  docling_core/experimental/serializer/html_styles.py,sha256=-jBwS4EU7yfKoz0GSoxhwx90OmIKieO6TwPw57IuxcA,4692
@@ -65,8 +65,8 @@ docling_core/utils/generate_jsonschema.py,sha256=uNX1O5XnjyB5nA66XqZXTt3YbGuR2ty
65
65
  docling_core/utils/legacy.py,sha256=SqNQAxl97aHfoJEsC9vZcMJg5FNkmqKPFi-wdSrnfI0,24442
66
66
  docling_core/utils/validate.py,sha256=aQ11UbFyl8iD_N7yTTZmm_VVeXz8KcCyn3GLXgkfYRM,2049
67
67
  docling_core/utils/validators.py,sha256=azcrndLzhNkTWnbFSu9shJ5D3j_znnLrIFA5R8hzmGU,2798
68
- docling_core-2.26.2.dist-info/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
69
- docling_core-2.26.2.dist-info/METADATA,sha256=motwJC3ixvU3oYstkDMsu5YmnHryDzuFfn3udzpDGrY,5843
70
- docling_core-2.26.2.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
71
- docling_core-2.26.2.dist-info/entry_points.txt,sha256=oClcdb2L2RKx4jdqUykY16Kum_f0_whwWhGzIodyidc,216
72
- docling_core-2.26.2.dist-info/RECORD,,
68
+ docling_core-2.26.4.dist-info/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
69
+ docling_core-2.26.4.dist-info/METADATA,sha256=S9qfeO5R35zkBwzIYnd5q3-O3LBl3-WOsQRlL50klqw,5843
70
+ docling_core-2.26.4.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
71
+ docling_core-2.26.4.dist-info/entry_points.txt,sha256=oClcdb2L2RKx4jdqUykY16Kum_f0_whwWhGzIodyidc,216
72
+ docling_core-2.26.4.dist-info/RECORD,,