natural-pdf 0.1.35__py3-none-any.whl → 0.1.37__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- natural_pdf/analyzers/__init__.py +16 -4
- natural_pdf/analyzers/guides.py +1053 -26
- natural_pdf/core/page.py +205 -45
- natural_pdf/core/pdf.py +16 -1
- natural_pdf/elements/collections.py +10 -0
- natural_pdf/elements/region.py +106 -14
- natural_pdf/elements/text.py +36 -2
- natural_pdf/flows/region.py +128 -26
- natural_pdf/selectors/parser.py +24 -0
- natural_pdf/utils/layout.py +26 -0
- natural_pdf/utils/text_extraction.py +76 -1
- {natural_pdf-0.1.35.dist-info → natural_pdf-0.1.37.dist-info}/METADATA +2 -1
- {natural_pdf-0.1.35.dist-info → natural_pdf-0.1.37.dist-info}/RECORD +17 -16
- {natural_pdf-0.1.35.dist-info → natural_pdf-0.1.37.dist-info}/WHEEL +0 -0
- {natural_pdf-0.1.35.dist-info → natural_pdf-0.1.37.dist-info}/entry_points.txt +0 -0
- {natural_pdf-0.1.35.dist-info → natural_pdf-0.1.37.dist-info}/licenses/LICENSE +0 -0
- {natural_pdf-0.1.35.dist-info → natural_pdf-0.1.37.dist-info}/top_level.txt +0 -0
natural_pdf/analyzers/guides.py
CHANGED
@@ -8,17 +8,20 @@ from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Tuple, Uni
|
|
8
8
|
import numpy as np
|
9
9
|
from PIL import Image, ImageDraw
|
10
10
|
|
11
|
+
from natural_pdf.utils.layout import merge_bboxes
|
12
|
+
|
11
13
|
if TYPE_CHECKING:
|
12
14
|
from natural_pdf.core.page import Page
|
13
15
|
from natural_pdf.elements.base import Element
|
14
16
|
from natural_pdf.elements.collections import ElementCollection
|
15
17
|
from natural_pdf.elements.region import Region
|
18
|
+
from natural_pdf.flows.region import FlowRegion
|
16
19
|
|
17
20
|
logger = logging.getLogger(__name__)
|
18
21
|
|
19
22
|
|
20
23
|
def _normalize_markers(
|
21
|
-
markers: Union[str, List[str], "ElementCollection", None], obj: Union["Page", "Region"]
|
24
|
+
markers: Union[str, List[str], "ElementCollection", None], obj: Union["Page", "Region", "FlowRegion"]
|
22
25
|
) -> List[str]:
|
23
26
|
"""
|
24
27
|
Normalize markers parameter to a list of text strings for guide creation.
|
@@ -37,6 +40,21 @@ def _normalize_markers(
|
|
37
40
|
if markers is None:
|
38
41
|
return []
|
39
42
|
|
43
|
+
# Handle FlowRegion by collecting markers from all constituent regions
|
44
|
+
if hasattr(obj, "constituent_regions"):
|
45
|
+
all_markers = []
|
46
|
+
for region in obj.constituent_regions:
|
47
|
+
region_markers = _normalize_markers(markers, region)
|
48
|
+
all_markers.extend(region_markers)
|
49
|
+
# Remove duplicates while preserving order
|
50
|
+
seen = set()
|
51
|
+
unique_markers = []
|
52
|
+
for m in all_markers:
|
53
|
+
if m not in seen:
|
54
|
+
seen.add(m)
|
55
|
+
unique_markers.append(m)
|
56
|
+
return unique_markers
|
57
|
+
|
40
58
|
if isinstance(markers, str):
|
41
59
|
# Single selector or text string
|
42
60
|
if markers.startswith(("text", "region", "line", "rect", "blob", "image")):
|
@@ -115,7 +133,7 @@ class GuidesList(UserList):
|
|
115
133
|
def from_content(
|
116
134
|
self,
|
117
135
|
markers: Union[str, List[str], "ElementCollection", None],
|
118
|
-
obj: Optional[Union["Page", "Region"]] = None,
|
136
|
+
obj: Optional[Union["Page", "Region", "FlowRegion"]] = None,
|
119
137
|
align: Literal["left", "right", "center", "between"] = "left",
|
120
138
|
outer: bool = True,
|
121
139
|
tolerance: float = 5,
|
@@ -131,7 +149,7 @@ class GuidesList(UserList):
|
|
131
149
|
- List[str]: list of selectors or literal text strings
|
132
150
|
- ElementCollection: collection of elements to extract text from
|
133
151
|
- None: no markers
|
134
|
-
obj: Page/Region to search (uses parent's context if None)
|
152
|
+
obj: Page/Region/FlowRegion to search (uses parent's context if None)
|
135
153
|
align: How to align guides relative to found elements
|
136
154
|
outer: Whether to add outer boundary guides
|
137
155
|
tolerance: Tolerance for snapping to element edges
|
@@ -143,6 +161,88 @@ class GuidesList(UserList):
|
|
143
161
|
if target_obj is None:
|
144
162
|
raise ValueError("No object provided and no context available")
|
145
163
|
|
164
|
+
# Check if parent is in flow mode
|
165
|
+
if self._parent.is_flow_region:
|
166
|
+
# Create guides across all constituent regions
|
167
|
+
all_guides = []
|
168
|
+
for region in self._parent.context.constituent_regions:
|
169
|
+
# Normalize markers for this region
|
170
|
+
marker_texts = _normalize_markers(markers, region)
|
171
|
+
|
172
|
+
# Create guides for this region
|
173
|
+
region_guides = Guides.from_content(
|
174
|
+
obj=region,
|
175
|
+
axis=self._axis,
|
176
|
+
markers=marker_texts,
|
177
|
+
align=align,
|
178
|
+
outer=outer,
|
179
|
+
tolerance=tolerance,
|
180
|
+
)
|
181
|
+
|
182
|
+
# Collect guides from this region
|
183
|
+
if self._axis == "vertical":
|
184
|
+
all_guides.extend(region_guides.vertical)
|
185
|
+
else:
|
186
|
+
all_guides.extend(region_guides.horizontal)
|
187
|
+
|
188
|
+
# Update parent's flow guides structure
|
189
|
+
if append:
|
190
|
+
# Append to existing
|
191
|
+
existing = [coord for coord, _ in
|
192
|
+
(self._parent._unified_vertical if self._axis == "vertical"
|
193
|
+
else self._parent._unified_horizontal)]
|
194
|
+
all_guides = existing + all_guides
|
195
|
+
|
196
|
+
# Remove duplicates and sort
|
197
|
+
unique_guides = sorted(list(set(all_guides)))
|
198
|
+
|
199
|
+
# Clear and rebuild unified view
|
200
|
+
if self._axis == "vertical":
|
201
|
+
self._parent._unified_vertical = []
|
202
|
+
for coord in unique_guides:
|
203
|
+
# Find which region(s) this guide belongs to
|
204
|
+
for region in self._parent.context.constituent_regions:
|
205
|
+
if hasattr(region, "bbox"):
|
206
|
+
x0, _, x1, _ = region.bbox
|
207
|
+
if x0 <= coord <= x1:
|
208
|
+
self._parent._unified_vertical.append((coord, region))
|
209
|
+
break
|
210
|
+
self._parent._vertical_cache = None
|
211
|
+
self.data = unique_guides
|
212
|
+
else:
|
213
|
+
self._parent._unified_horizontal = []
|
214
|
+
for coord in unique_guides:
|
215
|
+
# Find which region(s) this guide belongs to
|
216
|
+
for region in self._parent.context.constituent_regions:
|
217
|
+
if hasattr(region, "bbox"):
|
218
|
+
_, y0, _, y1 = region.bbox
|
219
|
+
if y0 <= coord <= y1:
|
220
|
+
self._parent._unified_horizontal.append((coord, region))
|
221
|
+
break
|
222
|
+
self._parent._horizontal_cache = None
|
223
|
+
self.data = unique_guides
|
224
|
+
|
225
|
+
# Update per-region guides
|
226
|
+
for region in self._parent.context.constituent_regions:
|
227
|
+
region_verticals = []
|
228
|
+
region_horizontals = []
|
229
|
+
|
230
|
+
for coord, r in self._parent._unified_vertical:
|
231
|
+
if r == region:
|
232
|
+
region_verticals.append(coord)
|
233
|
+
|
234
|
+
for coord, r in self._parent._unified_horizontal:
|
235
|
+
if r == region:
|
236
|
+
region_horizontals.append(coord)
|
237
|
+
|
238
|
+
self._parent._flow_guides[region] = (
|
239
|
+
sorted(region_verticals),
|
240
|
+
sorted(region_horizontals)
|
241
|
+
)
|
242
|
+
|
243
|
+
return self._parent
|
244
|
+
|
245
|
+
# Original single-region logic
|
146
246
|
# Normalize markers to list of text strings
|
147
247
|
marker_texts = _normalize_markers(markers, target_obj)
|
148
248
|
|
@@ -181,7 +281,7 @@ class GuidesList(UserList):
|
|
181
281
|
|
182
282
|
def from_lines(
|
183
283
|
self,
|
184
|
-
obj: Optional[Union["Page", "Region"]] = None,
|
284
|
+
obj: Optional[Union["Page", "Region", "FlowRegion"]] = None,
|
185
285
|
threshold: Union[float, str] = "auto",
|
186
286
|
source_label: Optional[str] = None,
|
187
287
|
max_lines: Optional[int] = None,
|
@@ -198,7 +298,7 @@ class GuidesList(UserList):
|
|
198
298
|
Create guides from detected line elements.
|
199
299
|
|
200
300
|
Args:
|
201
|
-
obj: Page/Region to search (uses parent's context if None)
|
301
|
+
obj: Page/Region/FlowRegion to search (uses parent's context if None)
|
202
302
|
threshold: Line detection threshold ('auto' or float 0.0-1.0)
|
203
303
|
source_label: Filter lines by source label (for vector method)
|
204
304
|
max_lines: Maximum lines to use (alias: n)
|
@@ -236,6 +336,90 @@ class GuidesList(UserList):
|
|
236
336
|
axis_key = "min_gap_h" if self._axis == "horizontal" else "min_gap_v"
|
237
337
|
detect_kwargs.setdefault(axis_key, min_gap)
|
238
338
|
|
339
|
+
# Check if parent is in flow mode
|
340
|
+
if self._parent.is_flow_region:
|
341
|
+
# Create guides across all constituent regions
|
342
|
+
all_guides = []
|
343
|
+
|
344
|
+
for region in self._parent.context.constituent_regions:
|
345
|
+
# Create guides for this specific region
|
346
|
+
region_guides = Guides.from_lines(
|
347
|
+
obj=region,
|
348
|
+
axis=self._axis,
|
349
|
+
threshold=threshold,
|
350
|
+
source_label=source_label,
|
351
|
+
max_lines_h=max_lines_h,
|
352
|
+
max_lines_v=max_lines_v,
|
353
|
+
outer=outer,
|
354
|
+
detection_method=detection_method,
|
355
|
+
resolution=resolution,
|
356
|
+
**detect_kwargs
|
357
|
+
)
|
358
|
+
|
359
|
+
# Collect guides from this region
|
360
|
+
if self._axis == "vertical":
|
361
|
+
all_guides.extend(region_guides.vertical)
|
362
|
+
else:
|
363
|
+
all_guides.extend(region_guides.horizontal)
|
364
|
+
|
365
|
+
# Update parent's flow guides structure
|
366
|
+
if append:
|
367
|
+
# Append to existing
|
368
|
+
existing = [coord for coord, _ in
|
369
|
+
(self._parent._unified_vertical if self._axis == "vertical"
|
370
|
+
else self._parent._unified_horizontal)]
|
371
|
+
all_guides = existing + all_guides
|
372
|
+
|
373
|
+
# Remove duplicates and sort
|
374
|
+
unique_guides = sorted(list(set(all_guides)))
|
375
|
+
|
376
|
+
# Clear and rebuild unified view
|
377
|
+
if self._axis == "vertical":
|
378
|
+
self._parent._unified_vertical = []
|
379
|
+
for coord in unique_guides:
|
380
|
+
# Find which region(s) this guide belongs to
|
381
|
+
for region in self._parent.context.constituent_regions:
|
382
|
+
if hasattr(region, "bbox"):
|
383
|
+
x0, _, x1, _ = region.bbox
|
384
|
+
if x0 <= coord <= x1:
|
385
|
+
self._parent._unified_vertical.append((coord, region))
|
386
|
+
break
|
387
|
+
self._parent._vertical_cache = None
|
388
|
+
self.data = unique_guides
|
389
|
+
else:
|
390
|
+
self._parent._unified_horizontal = []
|
391
|
+
for coord in unique_guides:
|
392
|
+
# Find which region(s) this guide belongs to
|
393
|
+
for region in self._parent.context.constituent_regions:
|
394
|
+
if hasattr(region, "bbox"):
|
395
|
+
_, y0, _, y1 = region.bbox
|
396
|
+
if y0 <= coord <= y1:
|
397
|
+
self._parent._unified_horizontal.append((coord, region))
|
398
|
+
break
|
399
|
+
self._parent._horizontal_cache = None
|
400
|
+
self.data = unique_guides
|
401
|
+
|
402
|
+
# Update per-region guides
|
403
|
+
for region in self._parent.context.constituent_regions:
|
404
|
+
region_verticals = []
|
405
|
+
region_horizontals = []
|
406
|
+
|
407
|
+
for coord, r in self._parent._unified_vertical:
|
408
|
+
if r == region:
|
409
|
+
region_verticals.append(coord)
|
410
|
+
|
411
|
+
for coord, r in self._parent._unified_horizontal:
|
412
|
+
if r == region:
|
413
|
+
region_horizontals.append(coord)
|
414
|
+
|
415
|
+
self._parent._flow_guides[region] = (
|
416
|
+
sorted(region_verticals),
|
417
|
+
sorted(region_horizontals)
|
418
|
+
)
|
419
|
+
|
420
|
+
return self._parent
|
421
|
+
|
422
|
+
# Original single-region logic
|
239
423
|
# Create guides for this axis
|
240
424
|
new_guides = Guides.from_lines(
|
241
425
|
obj=target_obj,
|
@@ -274,14 +458,14 @@ class GuidesList(UserList):
|
|
274
458
|
return self._parent
|
275
459
|
|
276
460
|
def from_whitespace(
|
277
|
-
self, obj: Optional[Union["Page", "Region"]] = None, min_gap: float = 10,
|
461
|
+
self, obj: Optional[Union["Page", "Region", "FlowRegion"]] = None, min_gap: float = 10,
|
278
462
|
*, append: bool = False
|
279
463
|
) -> "Guides":
|
280
464
|
"""
|
281
465
|
Create guides from whitespace gaps.
|
282
466
|
|
283
467
|
Args:
|
284
|
-
obj: Page/Region to analyze (uses parent's context if None)
|
468
|
+
obj: Page/Region/FlowRegion to analyze (uses parent's context if None)
|
285
469
|
min_gap: Minimum gap size to consider
|
286
470
|
|
287
471
|
Returns:
|
@@ -291,6 +475,83 @@ class GuidesList(UserList):
|
|
291
475
|
if target_obj is None:
|
292
476
|
raise ValueError("No object provided and no context available")
|
293
477
|
|
478
|
+
# Check if parent is in flow mode
|
479
|
+
if self._parent.is_flow_region:
|
480
|
+
# Create guides across all constituent regions
|
481
|
+
all_guides = []
|
482
|
+
|
483
|
+
for region in self._parent.context.constituent_regions:
|
484
|
+
# Create guides for this specific region
|
485
|
+
region_guides = Guides.from_whitespace(
|
486
|
+
obj=region,
|
487
|
+
axis=self._axis,
|
488
|
+
min_gap=min_gap
|
489
|
+
)
|
490
|
+
|
491
|
+
# Collect guides from this region
|
492
|
+
if self._axis == "vertical":
|
493
|
+
all_guides.extend(region_guides.vertical)
|
494
|
+
else:
|
495
|
+
all_guides.extend(region_guides.horizontal)
|
496
|
+
|
497
|
+
# Update parent's flow guides structure
|
498
|
+
if append:
|
499
|
+
# Append to existing
|
500
|
+
existing = [coord for coord, _ in
|
501
|
+
(self._parent._unified_vertical if self._axis == "vertical"
|
502
|
+
else self._parent._unified_horizontal)]
|
503
|
+
all_guides = existing + all_guides
|
504
|
+
|
505
|
+
# Remove duplicates and sort
|
506
|
+
unique_guides = sorted(list(set(all_guides)))
|
507
|
+
|
508
|
+
# Clear and rebuild unified view
|
509
|
+
if self._axis == "vertical":
|
510
|
+
self._parent._unified_vertical = []
|
511
|
+
for coord in unique_guides:
|
512
|
+
# Find which region(s) this guide belongs to
|
513
|
+
for region in self._parent.context.constituent_regions:
|
514
|
+
if hasattr(region, "bbox"):
|
515
|
+
x0, _, x1, _ = region.bbox
|
516
|
+
if x0 <= coord <= x1:
|
517
|
+
self._parent._unified_vertical.append((coord, region))
|
518
|
+
break
|
519
|
+
self._parent._vertical_cache = None
|
520
|
+
self.data = unique_guides
|
521
|
+
else:
|
522
|
+
self._parent._unified_horizontal = []
|
523
|
+
for coord in unique_guides:
|
524
|
+
# Find which region(s) this guide belongs to
|
525
|
+
for region in self._parent.context.constituent_regions:
|
526
|
+
if hasattr(region, "bbox"):
|
527
|
+
_, y0, _, y1 = region.bbox
|
528
|
+
if y0 <= coord <= y1:
|
529
|
+
self._parent._unified_horizontal.append((coord, region))
|
530
|
+
break
|
531
|
+
self._parent._horizontal_cache = None
|
532
|
+
self.data = unique_guides
|
533
|
+
|
534
|
+
# Update per-region guides
|
535
|
+
for region in self._parent.context.constituent_regions:
|
536
|
+
region_verticals = []
|
537
|
+
region_horizontals = []
|
538
|
+
|
539
|
+
for coord, r in self._parent._unified_vertical:
|
540
|
+
if r == region:
|
541
|
+
region_verticals.append(coord)
|
542
|
+
|
543
|
+
for coord, r in self._parent._unified_horizontal:
|
544
|
+
if r == region:
|
545
|
+
region_horizontals.append(coord)
|
546
|
+
|
547
|
+
self._parent._flow_guides[region] = (
|
548
|
+
sorted(region_verticals),
|
549
|
+
sorted(region_horizontals)
|
550
|
+
)
|
551
|
+
|
552
|
+
return self._parent
|
553
|
+
|
554
|
+
# Original single-region logic
|
294
555
|
# Create guides for this axis
|
295
556
|
new_guides = Guides.from_whitespace(obj=target_obj, axis=self._axis, min_gap=min_gap)
|
296
557
|
|
@@ -618,9 +879,9 @@ class Guides:
|
|
618
879
|
|
619
880
|
def __init__(
|
620
881
|
self,
|
621
|
-
verticals: Optional[Union[List[float], "Page", "Region"]] = None,
|
882
|
+
verticals: Optional[Union[List[float], "Page", "Region", "FlowRegion"]] = None,
|
622
883
|
horizontals: Optional[List[float]] = None,
|
623
|
-
context: Optional[Union["Page", "Region"]] = None,
|
884
|
+
context: Optional[Union["Page", "Region", "FlowRegion"]] = None,
|
624
885
|
bounds: Optional[Tuple[float, float, float, float]] = None,
|
625
886
|
relative: bool = False,
|
626
887
|
snap_behavior: Literal["raise", "warn", "ignore"] = "warn",
|
@@ -629,21 +890,21 @@ class Guides:
|
|
629
890
|
Initialize a Guides object.
|
630
891
|
|
631
892
|
Args:
|
632
|
-
verticals: List of x-coordinates for vertical guides, or a Page/Region as context
|
893
|
+
verticals: List of x-coordinates for vertical guides, or a Page/Region/FlowRegion as context
|
633
894
|
horizontals: List of y-coordinates for horizontal guides
|
634
|
-
context: Page or
|
895
|
+
context: Page, Region, or FlowRegion object these guides were created from
|
635
896
|
bounds: Bounding box (x0, top, x1, bottom) if context not provided
|
636
897
|
relative: Whether coordinates are relative (0-1) or absolute
|
637
898
|
snap_behavior: How to handle snapping conflicts ('raise', 'warn', or 'ignore')
|
638
899
|
"""
|
639
|
-
# Handle Guides(page) shorthand
|
900
|
+
# Handle Guides(page) or Guides(flow_region) shorthand
|
640
901
|
if (
|
641
902
|
verticals is not None
|
642
903
|
and not isinstance(verticals, (list, tuple))
|
643
904
|
and horizontals is None
|
644
905
|
and context is None
|
645
906
|
):
|
646
|
-
# First argument is a page/region, not coordinates
|
907
|
+
# First argument is a page/region/flow_region, not coordinates
|
647
908
|
context = verticals
|
648
909
|
verticals = None
|
649
910
|
|
@@ -652,6 +913,19 @@ class Guides:
|
|
652
913
|
self.relative = relative
|
653
914
|
self.snap_behavior = snap_behavior
|
654
915
|
|
916
|
+
# Check if we're dealing with a FlowRegion
|
917
|
+
self.is_flow_region = hasattr(context, "constituent_regions")
|
918
|
+
|
919
|
+
# If FlowRegion, we'll store guides per constituent region
|
920
|
+
if self.is_flow_region:
|
921
|
+
self._flow_guides: Dict["Region", Tuple[List[float], List[float]]] = {}
|
922
|
+
# For unified view across all regions
|
923
|
+
self._unified_vertical: List[Tuple[float, "Region"]] = []
|
924
|
+
self._unified_horizontal: List[Tuple[float, "Region"]] = []
|
925
|
+
# Cache for sorted unique coordinates
|
926
|
+
self._vertical_cache: Optional[List[float]] = None
|
927
|
+
self._horizontal_cache: Optional[List[float]] = None
|
928
|
+
|
655
929
|
# Initialize with GuidesList instances
|
656
930
|
self._vertical = GuidesList(self, "vertical", sorted([float(x) for x in (verticals or [])]))
|
657
931
|
self._horizontal = GuidesList(
|
@@ -683,11 +957,26 @@ class Guides:
|
|
683
957
|
@property
|
684
958
|
def vertical(self) -> GuidesList:
|
685
959
|
"""Get vertical guide coordinates."""
|
960
|
+
if self.is_flow_region and self._vertical_cache is not None:
|
961
|
+
# Return cached unified view
|
962
|
+
self._vertical.data = self._vertical_cache
|
963
|
+
elif self.is_flow_region and self._unified_vertical:
|
964
|
+
# Build unified view from flow guides
|
965
|
+
all_verticals = []
|
966
|
+
for coord, region in self._unified_vertical:
|
967
|
+
all_verticals.append(coord)
|
968
|
+
# Remove duplicates and sort
|
969
|
+
self._vertical_cache = sorted(list(set(all_verticals)))
|
970
|
+
self._vertical.data = self._vertical_cache
|
686
971
|
return self._vertical
|
687
972
|
|
688
973
|
@vertical.setter
|
689
974
|
def vertical(self, value: Union[List[float], "Guides", None]):
|
690
975
|
"""Set vertical guides from a list of coordinates or another Guides object."""
|
976
|
+
if self.is_flow_region:
|
977
|
+
# Invalidate cache when setting new values
|
978
|
+
self._vertical_cache = None
|
979
|
+
|
691
980
|
if value is None:
|
692
981
|
self._vertical.data = []
|
693
982
|
elif isinstance(value, Guides):
|
@@ -710,11 +999,26 @@ class Guides:
|
|
710
999
|
@property
|
711
1000
|
def horizontal(self) -> GuidesList:
|
712
1001
|
"""Get horizontal guide coordinates."""
|
1002
|
+
if self.is_flow_region and self._horizontal_cache is not None:
|
1003
|
+
# Return cached unified view
|
1004
|
+
self._horizontal.data = self._horizontal_cache
|
1005
|
+
elif self.is_flow_region and self._unified_horizontal:
|
1006
|
+
# Build unified view from flow guides
|
1007
|
+
all_horizontals = []
|
1008
|
+
for coord, region in self._unified_horizontal:
|
1009
|
+
all_horizontals.append(coord)
|
1010
|
+
# Remove duplicates and sort
|
1011
|
+
self._horizontal_cache = sorted(list(set(all_horizontals)))
|
1012
|
+
self._horizontal.data = self._horizontal_cache
|
713
1013
|
return self._horizontal
|
714
1014
|
|
715
1015
|
@horizontal.setter
|
716
1016
|
def horizontal(self, value: Union[List[float], "Guides", None]):
|
717
1017
|
"""Set horizontal guides from a list of coordinates or another Guides object."""
|
1018
|
+
if self.is_flow_region:
|
1019
|
+
# Invalidate cache when setting new values
|
1020
|
+
self._horizontal_cache = None
|
1021
|
+
|
718
1022
|
if value is None:
|
719
1023
|
self._horizontal.data = []
|
720
1024
|
elif isinstance(value, Guides):
|
@@ -821,7 +1125,7 @@ class Guides:
|
|
821
1125
|
@classmethod
|
822
1126
|
def from_lines(
|
823
1127
|
cls,
|
824
|
-
obj: Union["Page", "Region"],
|
1128
|
+
obj: Union["Page", "Region", "FlowRegion"],
|
825
1129
|
axis: Literal["vertical", "horizontal", "both"] = "both",
|
826
1130
|
threshold: Union[float, str] = "auto",
|
827
1131
|
source_label: Optional[str] = None,
|
@@ -836,7 +1140,7 @@ class Guides:
|
|
836
1140
|
Create guides from detected line elements.
|
837
1141
|
|
838
1142
|
Args:
|
839
|
-
obj: Page or
|
1143
|
+
obj: Page, Region, or FlowRegion to detect lines from
|
840
1144
|
axis: Which orientations to detect
|
841
1145
|
threshold: Detection threshold ('auto' or float 0.0-1.0) - used for pixel detection
|
842
1146
|
source_label: Filter for line source (vector method) or label for detected lines (pixel method)
|
@@ -856,6 +1160,45 @@ class Guides:
|
|
856
1160
|
Returns:
|
857
1161
|
New Guides object with detected line positions
|
858
1162
|
"""
|
1163
|
+
# Handle FlowRegion
|
1164
|
+
if hasattr(obj, "constituent_regions"):
|
1165
|
+
guides = cls(context=obj)
|
1166
|
+
|
1167
|
+
# Process each constituent region
|
1168
|
+
for region in obj.constituent_regions:
|
1169
|
+
# Create guides for this specific region
|
1170
|
+
region_guides = cls.from_lines(
|
1171
|
+
region,
|
1172
|
+
axis=axis,
|
1173
|
+
threshold=threshold,
|
1174
|
+
source_label=source_label,
|
1175
|
+
max_lines_h=max_lines_h,
|
1176
|
+
max_lines_v=max_lines_v,
|
1177
|
+
outer=outer,
|
1178
|
+
detection_method=detection_method,
|
1179
|
+
resolution=resolution,
|
1180
|
+
**detect_kwargs
|
1181
|
+
)
|
1182
|
+
|
1183
|
+
# Store in flow guides
|
1184
|
+
guides._flow_guides[region] = (
|
1185
|
+
list(region_guides.vertical),
|
1186
|
+
list(region_guides.horizontal)
|
1187
|
+
)
|
1188
|
+
|
1189
|
+
# Add to unified view
|
1190
|
+
for v in region_guides.vertical:
|
1191
|
+
guides._unified_vertical.append((v, region))
|
1192
|
+
for h in region_guides.horizontal:
|
1193
|
+
guides._unified_horizontal.append((h, region))
|
1194
|
+
|
1195
|
+
# Invalidate caches to force rebuild on next access
|
1196
|
+
guides._vertical_cache = None
|
1197
|
+
guides._horizontal_cache = None
|
1198
|
+
|
1199
|
+
return guides
|
1200
|
+
|
1201
|
+
# Original single-region logic follows...
|
859
1202
|
# Get bounds for potential outer guides
|
860
1203
|
if hasattr(obj, "bbox"):
|
861
1204
|
bounds = obj.bbox
|
@@ -1028,7 +1371,7 @@ class Guides:
|
|
1028
1371
|
@classmethod
|
1029
1372
|
def from_content(
|
1030
1373
|
cls,
|
1031
|
-
obj: Union["Page", "Region"],
|
1374
|
+
obj: Union["Page", "Region", "FlowRegion"],
|
1032
1375
|
axis: Literal["vertical", "horizontal"] = "vertical",
|
1033
1376
|
markers: Union[str, List[str], "ElementCollection", None] = None,
|
1034
1377
|
align: Literal["left", "right", "center", "between"] = "left",
|
@@ -1039,7 +1382,7 @@ class Guides:
|
|
1039
1382
|
Create guides based on text content positions.
|
1040
1383
|
|
1041
1384
|
Args:
|
1042
|
-
obj: Page or
|
1385
|
+
obj: Page, Region, or FlowRegion to search for content
|
1043
1386
|
axis: Whether to create vertical or horizontal guides
|
1044
1387
|
markers: Content to search for. Can be:
|
1045
1388
|
- str: single selector (e.g., 'text:contains("Name")') or literal text
|
@@ -1053,6 +1396,41 @@ class Guides:
|
|
1053
1396
|
Returns:
|
1054
1397
|
New Guides object aligned to text content
|
1055
1398
|
"""
|
1399
|
+
# Handle FlowRegion
|
1400
|
+
if hasattr(obj, "constituent_regions"):
|
1401
|
+
guides = cls(context=obj)
|
1402
|
+
|
1403
|
+
# Process each constituent region
|
1404
|
+
for region in obj.constituent_regions:
|
1405
|
+
# Create guides for this specific region
|
1406
|
+
region_guides = cls.from_content(
|
1407
|
+
region,
|
1408
|
+
axis=axis,
|
1409
|
+
markers=markers,
|
1410
|
+
align=align,
|
1411
|
+
outer=outer,
|
1412
|
+
tolerance=tolerance
|
1413
|
+
)
|
1414
|
+
|
1415
|
+
# Store in flow guides
|
1416
|
+
guides._flow_guides[region] = (
|
1417
|
+
list(region_guides.vertical),
|
1418
|
+
list(region_guides.horizontal)
|
1419
|
+
)
|
1420
|
+
|
1421
|
+
# Add to unified view
|
1422
|
+
for v in region_guides.vertical:
|
1423
|
+
guides._unified_vertical.append((v, region))
|
1424
|
+
for h in region_guides.horizontal:
|
1425
|
+
guides._unified_horizontal.append((h, region))
|
1426
|
+
|
1427
|
+
# Invalidate caches
|
1428
|
+
guides._vertical_cache = None
|
1429
|
+
guides._horizontal_cache = None
|
1430
|
+
|
1431
|
+
return guides
|
1432
|
+
|
1433
|
+
# Original single-region logic follows...
|
1056
1434
|
guides_coords = []
|
1057
1435
|
bounds = None
|
1058
1436
|
|
@@ -1141,7 +1519,7 @@ class Guides:
|
|
1141
1519
|
@classmethod
|
1142
1520
|
def from_whitespace(
|
1143
1521
|
cls,
|
1144
|
-
obj: Union["Page", "Region"],
|
1522
|
+
obj: Union["Page", "Region", "FlowRegion"],
|
1145
1523
|
axis: Literal["vertical", "horizontal", "both"] = "both",
|
1146
1524
|
min_gap: float = 10,
|
1147
1525
|
) -> "Guides":
|
@@ -1212,6 +1590,117 @@ class Guides:
|
|
1212
1590
|
logger.warning("No context available for whitespace detection")
|
1213
1591
|
return self
|
1214
1592
|
|
1593
|
+
# Handle FlowRegion case - collect all text elements across regions
|
1594
|
+
if self.is_flow_region:
|
1595
|
+
all_text_elements = []
|
1596
|
+
region_bounds = {}
|
1597
|
+
|
1598
|
+
for region in self.context.constituent_regions:
|
1599
|
+
# Get text elements from this region
|
1600
|
+
if hasattr(region, "find_all"):
|
1601
|
+
try:
|
1602
|
+
text_elements = region.find_all("text", apply_exclusions=False)
|
1603
|
+
elements = text_elements.elements if hasattr(text_elements, "elements") else text_elements
|
1604
|
+
all_text_elements.extend(elements)
|
1605
|
+
|
1606
|
+
# Store bounds for each region
|
1607
|
+
if hasattr(region, "bbox"):
|
1608
|
+
region_bounds[region] = region.bbox
|
1609
|
+
elif hasattr(region, "x0"):
|
1610
|
+
region_bounds[region] = (region.x0, region.top, region.x1, region.bottom)
|
1611
|
+
except Exception as e:
|
1612
|
+
logger.warning(f"Error getting text elements from region: {e}")
|
1613
|
+
|
1614
|
+
if not all_text_elements:
|
1615
|
+
logger.warning("No text elements found across flow regions for whitespace detection")
|
1616
|
+
return self
|
1617
|
+
|
1618
|
+
# Find whitespace gaps across all regions
|
1619
|
+
if axis == "vertical":
|
1620
|
+
gaps = self._find_vertical_whitespace_gaps(all_text_elements, min_gap, threshold)
|
1621
|
+
# Get all vertical guides across regions
|
1622
|
+
all_guides = []
|
1623
|
+
guide_to_region_map = {} # Map guide coordinate to its original list of regions
|
1624
|
+
for coord, region in self._unified_vertical:
|
1625
|
+
all_guides.append(coord)
|
1626
|
+
guide_to_region_map.setdefault(coord, []).append(region)
|
1627
|
+
|
1628
|
+
if gaps and all_guides:
|
1629
|
+
# Keep a copy of original guides to maintain mapping
|
1630
|
+
original_guides = all_guides.copy()
|
1631
|
+
|
1632
|
+
# Snap guides to gaps
|
1633
|
+
self._snap_guides_to_gaps(all_guides, gaps, axis)
|
1634
|
+
|
1635
|
+
# Update the unified view with snapped positions
|
1636
|
+
self._unified_vertical = []
|
1637
|
+
for i, new_coord in enumerate(all_guides):
|
1638
|
+
# Find the original region for this guide using the original position
|
1639
|
+
original_coord = original_guides[i]
|
1640
|
+
# A guide might be associated with multiple regions, add them all
|
1641
|
+
regions = guide_to_region_map.get(original_coord, [])
|
1642
|
+
for region in regions:
|
1643
|
+
self._unified_vertical.append((new_coord, region))
|
1644
|
+
|
1645
|
+
# Update individual region guides
|
1646
|
+
for region in self._flow_guides:
|
1647
|
+
region_verticals = []
|
1648
|
+
for coord, r in self._unified_vertical:
|
1649
|
+
if r == region:
|
1650
|
+
region_verticals.append(coord)
|
1651
|
+
self._flow_guides[region] = (
|
1652
|
+
sorted(list(set(region_verticals))), # Deduplicate here
|
1653
|
+
self._flow_guides[region][1]
|
1654
|
+
)
|
1655
|
+
|
1656
|
+
# Invalidate cache
|
1657
|
+
self._vertical_cache = None
|
1658
|
+
|
1659
|
+
elif axis == "horizontal":
|
1660
|
+
gaps = self._find_horizontal_whitespace_gaps(all_text_elements, min_gap, threshold)
|
1661
|
+
# Get all horizontal guides across regions
|
1662
|
+
all_guides = []
|
1663
|
+
guide_to_region_map = {} # Map guide coordinate to its original list of regions
|
1664
|
+
for coord, region in self._unified_horizontal:
|
1665
|
+
all_guides.append(coord)
|
1666
|
+
guide_to_region_map.setdefault(coord, []).append(region)
|
1667
|
+
|
1668
|
+
if gaps and all_guides:
|
1669
|
+
# Keep a copy of original guides to maintain mapping
|
1670
|
+
original_guides = all_guides.copy()
|
1671
|
+
|
1672
|
+
# Snap guides to gaps
|
1673
|
+
self._snap_guides_to_gaps(all_guides, gaps, axis)
|
1674
|
+
|
1675
|
+
# Update the unified view with snapped positions
|
1676
|
+
self._unified_horizontal = []
|
1677
|
+
for i, new_coord in enumerate(all_guides):
|
1678
|
+
# Find the original region for this guide using the original position
|
1679
|
+
original_coord = original_guides[i]
|
1680
|
+
regions = guide_to_region_map.get(original_coord, [])
|
1681
|
+
for region in regions:
|
1682
|
+
self._unified_horizontal.append((new_coord, region))
|
1683
|
+
|
1684
|
+
# Update individual region guides
|
1685
|
+
for region in self._flow_guides:
|
1686
|
+
region_horizontals = []
|
1687
|
+
for coord, r in self._unified_horizontal:
|
1688
|
+
if r == region:
|
1689
|
+
region_horizontals.append(coord)
|
1690
|
+
self._flow_guides[region] = (
|
1691
|
+
self._flow_guides[region][0],
|
1692
|
+
sorted(list(set(region_horizontals))) # Deduplicate here
|
1693
|
+
)
|
1694
|
+
|
1695
|
+
# Invalidate cache
|
1696
|
+
self._horizontal_cache = None
|
1697
|
+
|
1698
|
+
else:
|
1699
|
+
raise ValueError("axis must be 'vertical' or 'horizontal'")
|
1700
|
+
|
1701
|
+
return self
|
1702
|
+
|
1703
|
+
# Original single-region logic
|
1215
1704
|
# Get elements for trough detection
|
1216
1705
|
text_elements = self._get_text_elements()
|
1217
1706
|
if not text_elements:
|
@@ -1303,14 +1792,47 @@ class Guides:
|
|
1303
1792
|
combined_verticals = sorted([float(x) for x in set(self.vertical + other.vertical)])
|
1304
1793
|
combined_horizontals = sorted([float(y) for y in set(self.horizontal + other.horizontal)])
|
1305
1794
|
|
1306
|
-
#
|
1307
|
-
|
1795
|
+
# Handle FlowRegion context merging
|
1796
|
+
new_context = self.context or other.context
|
1797
|
+
|
1798
|
+
# If both are flow regions, we might need a more complex merge,
|
1799
|
+
# but for now, just picking one context is sufficient.
|
1800
|
+
|
1801
|
+
# Create the new Guides object
|
1802
|
+
new_guides = Guides(
|
1308
1803
|
verticals=combined_verticals,
|
1309
1804
|
horizontals=combined_horizontals,
|
1310
|
-
context=
|
1805
|
+
context=new_context,
|
1311
1806
|
bounds=self.bounds or other.bounds,
|
1312
1807
|
)
|
1313
1808
|
|
1809
|
+
# If the new context is a FlowRegion, we need to rebuild the flow-related state
|
1810
|
+
if new_guides.is_flow_region:
|
1811
|
+
# Re-initialize flow guides from both sources
|
1812
|
+
# This is a simplification; a true merge would be more complex.
|
1813
|
+
# For now, we combine the flow_guides dictionaries.
|
1814
|
+
if hasattr(self, "_flow_guides"):
|
1815
|
+
new_guides._flow_guides.update(self._flow_guides)
|
1816
|
+
if hasattr(other, "_flow_guides"):
|
1817
|
+
new_guides._flow_guides.update(other._flow_guides)
|
1818
|
+
|
1819
|
+
# Re-initialize unified views
|
1820
|
+
if hasattr(self, "_unified_vertical"):
|
1821
|
+
new_guides._unified_vertical.extend(self._unified_vertical)
|
1822
|
+
if hasattr(other, "_unified_vertical"):
|
1823
|
+
new_guides._unified_vertical.extend(other._unified_vertical)
|
1824
|
+
|
1825
|
+
if hasattr(self, "_unified_horizontal"):
|
1826
|
+
new_guides._unified_horizontal.extend(self._unified_horizontal)
|
1827
|
+
if hasattr(other, "_unified_horizontal"):
|
1828
|
+
new_guides._unified_horizontal.extend(other._unified_horizontal)
|
1829
|
+
|
1830
|
+
# Invalidate caches to force rebuild
|
1831
|
+
new_guides._vertical_cache = None
|
1832
|
+
new_guides._horizontal_cache = None
|
1833
|
+
|
1834
|
+
return new_guides
|
1835
|
+
|
1314
1836
|
def show(self, on=None, **kwargs):
|
1315
1837
|
"""
|
1316
1838
|
Display the guides overlaid on a page or region.
|
@@ -1324,6 +1846,122 @@ class Guides:
|
|
1324
1846
|
Returns:
|
1325
1847
|
PIL Image with guides drawn on it.
|
1326
1848
|
"""
|
1849
|
+
# Handle FlowRegion case
|
1850
|
+
if self.is_flow_region and (on is None or on == self.context):
|
1851
|
+
if not self._flow_guides:
|
1852
|
+
raise ValueError("No guides to show for FlowRegion")
|
1853
|
+
|
1854
|
+
# Get stacking parameters from kwargs or use defaults
|
1855
|
+
stack_direction = kwargs.get('stack_direction', 'vertical')
|
1856
|
+
stack_gap = kwargs.get('stack_gap', 5)
|
1857
|
+
stack_background_color = kwargs.get('stack_background_color', (255, 255, 255))
|
1858
|
+
|
1859
|
+
# First, render all constituent regions without guides to get base images
|
1860
|
+
base_images = []
|
1861
|
+
region_infos = [] # Store region info for guide coordinate mapping
|
1862
|
+
|
1863
|
+
for region in self.context.constituent_regions:
|
1864
|
+
try:
|
1865
|
+
# Render region without guides
|
1866
|
+
img = region.to_image(**kwargs)
|
1867
|
+
if img:
|
1868
|
+
base_images.append(img)
|
1869
|
+
|
1870
|
+
# Calculate scaling factors for this region
|
1871
|
+
scale_x = img.width / region.width
|
1872
|
+
scale_y = img.height / region.height
|
1873
|
+
|
1874
|
+
region_infos.append({
|
1875
|
+
'region': region,
|
1876
|
+
'img_width': img.width,
|
1877
|
+
'img_height': img.height,
|
1878
|
+
'scale_x': scale_x,
|
1879
|
+
'scale_y': scale_y,
|
1880
|
+
'pdf_x0': region.x0,
|
1881
|
+
'pdf_top': region.top,
|
1882
|
+
'pdf_x1': region.x1,
|
1883
|
+
'pdf_bottom': region.bottom
|
1884
|
+
})
|
1885
|
+
except Exception as e:
|
1886
|
+
logger.warning(f"Failed to render region: {e}")
|
1887
|
+
|
1888
|
+
if not base_images:
|
1889
|
+
raise ValueError("Failed to render any images for FlowRegion")
|
1890
|
+
|
1891
|
+
# Calculate final canvas size based on stacking direction
|
1892
|
+
if stack_direction == "vertical":
|
1893
|
+
final_width = max(img.width for img in base_images)
|
1894
|
+
final_height = (
|
1895
|
+
sum(img.height for img in base_images)
|
1896
|
+
+ (len(base_images) - 1) * stack_gap
|
1897
|
+
)
|
1898
|
+
else: # horizontal
|
1899
|
+
final_width = (
|
1900
|
+
sum(img.width for img in base_images)
|
1901
|
+
+ (len(base_images) - 1) * stack_gap
|
1902
|
+
)
|
1903
|
+
final_height = max(img.height for img in base_images)
|
1904
|
+
|
1905
|
+
# Create unified canvas
|
1906
|
+
canvas = Image.new("RGB", (final_width, final_height), stack_background_color)
|
1907
|
+
draw = ImageDraw.Draw(canvas)
|
1908
|
+
|
1909
|
+
# Paste base images and track positions
|
1910
|
+
region_positions = [] # (region_info, paste_x, paste_y)
|
1911
|
+
|
1912
|
+
if stack_direction == "vertical":
|
1913
|
+
current_y = 0
|
1914
|
+
for i, (img, info) in enumerate(zip(base_images, region_infos)):
|
1915
|
+
paste_x = (final_width - img.width) // 2 # Center horizontally
|
1916
|
+
canvas.paste(img, (paste_x, current_y))
|
1917
|
+
region_positions.append((info, paste_x, current_y))
|
1918
|
+
current_y += img.height + stack_gap
|
1919
|
+
else: # horizontal
|
1920
|
+
current_x = 0
|
1921
|
+
for i, (img, info) in enumerate(zip(base_images, region_infos)):
|
1922
|
+
paste_y = (final_height - img.height) // 2 # Center vertically
|
1923
|
+
canvas.paste(img, (current_x, paste_y))
|
1924
|
+
region_positions.append((info, current_x, paste_y))
|
1925
|
+
current_x += img.width + stack_gap
|
1926
|
+
|
1927
|
+
# Now draw guides on the unified canvas
|
1928
|
+
# Draw vertical guides (blue) - these extend through the full canvas height
|
1929
|
+
for v_coord in self.vertical:
|
1930
|
+
# Find which region(s) this guide intersects
|
1931
|
+
for info, paste_x, paste_y in region_positions:
|
1932
|
+
if info['pdf_x0'] <= v_coord <= info['pdf_x1']:
|
1933
|
+
# This guide is within this region's x-bounds
|
1934
|
+
# Convert PDF coordinate to pixel coordinate relative to the region
|
1935
|
+
adjusted_x = v_coord - info['pdf_x0']
|
1936
|
+
pixel_x = adjusted_x * info['scale_x'] + paste_x
|
1937
|
+
|
1938
|
+
# Draw full-height line on canvas (not clipped to region)
|
1939
|
+
if 0 <= pixel_x <= final_width:
|
1940
|
+
x_pixel = int(pixel_x)
|
1941
|
+
draw.line([(x_pixel, 0), (x_pixel, final_height - 1)],
|
1942
|
+
fill=(0, 0, 255, 200), width=2)
|
1943
|
+
break # Only draw once per guide
|
1944
|
+
|
1945
|
+
# Draw horizontal guides (red) - these extend through the full canvas width
|
1946
|
+
for h_coord in self.horizontal:
|
1947
|
+
# Find which region(s) this guide intersects
|
1948
|
+
for info, paste_x, paste_y in region_positions:
|
1949
|
+
if info['pdf_top'] <= h_coord <= info['pdf_bottom']:
|
1950
|
+
# This guide is within this region's y-bounds
|
1951
|
+
# Convert PDF coordinate to pixel coordinate relative to the region
|
1952
|
+
adjusted_y = h_coord - info['pdf_top']
|
1953
|
+
pixel_y = adjusted_y * info['scale_y'] + paste_y
|
1954
|
+
|
1955
|
+
# Draw full-width line on canvas (not clipped to region)
|
1956
|
+
if 0 <= pixel_y <= final_height:
|
1957
|
+
y_pixel = int(pixel_y)
|
1958
|
+
draw.line([(0, y_pixel), (final_width - 1, y_pixel)],
|
1959
|
+
fill=(255, 0, 0, 200), width=2)
|
1960
|
+
break # Only draw once per guide
|
1961
|
+
|
1962
|
+
return canvas
|
1963
|
+
|
1964
|
+
# Original single-region logic follows...
|
1327
1965
|
# Determine what to display guides on
|
1328
1966
|
target = on if on is not None else self.context
|
1329
1967
|
|
@@ -1950,7 +2588,9 @@ class Guides:
|
|
1950
2588
|
source: str = "guides",
|
1951
2589
|
cell_padding: float = 0.5,
|
1952
2590
|
include_outer_boundaries: bool = False,
|
1953
|
-
|
2591
|
+
*,
|
2592
|
+
multi_page: Literal["auto", True, False] = "auto",
|
2593
|
+
) -> Dict[str, Any]:
|
1954
2594
|
"""
|
1955
2595
|
Create table structure (table, rows, columns, cells) from guide coordinates.
|
1956
2596
|
|
@@ -1959,11 +2599,331 @@ class Guides:
|
|
1959
2599
|
source: Source label for created regions (for identification)
|
1960
2600
|
cell_padding: Internal padding for cell regions in points
|
1961
2601
|
include_outer_boundaries: Whether to add boundaries at edges if missing
|
2602
|
+
multi_page: Controls multi-page table creation for FlowRegions.
|
2603
|
+
- "auto": (default) Creates a multi-page grid if guides span pages.
|
2604
|
+
- True: Forces creation of a multi-page grid.
|
2605
|
+
- False: Creates separate grids for each page.
|
1962
2606
|
|
1963
2607
|
Returns:
|
1964
|
-
Dictionary with counts
|
2608
|
+
Dictionary with 'counts' and 'regions' created.
|
2609
|
+
"""
|
2610
|
+
# Dispatch to appropriate implementation based on context and flags
|
2611
|
+
if self.is_flow_region:
|
2612
|
+
spans_pages = self._spans_pages()
|
2613
|
+
if multi_page is True or (multi_page == "auto" and spans_pages):
|
2614
|
+
return self._build_grid_multi_page(
|
2615
|
+
source=source,
|
2616
|
+
cell_padding=cell_padding,
|
2617
|
+
include_outer_boundaries=include_outer_boundaries,
|
2618
|
+
)
|
2619
|
+
else:
|
2620
|
+
# FlowRegion context, but creating separate tables per page
|
2621
|
+
total_counts = {"table": 0, "rows": 0, "columns": 0, "cells": 0}
|
2622
|
+
all_regions = {"table": [], "rows": [], "columns": [], "cells": []}
|
2623
|
+
|
2624
|
+
for region in self.context.constituent_regions:
|
2625
|
+
if region in self._flow_guides:
|
2626
|
+
verticals, horizontals = self._flow_guides[region]
|
2627
|
+
|
2628
|
+
region_guides = Guides(
|
2629
|
+
verticals=verticals,
|
2630
|
+
horizontals=horizontals,
|
2631
|
+
context=region
|
2632
|
+
)
|
2633
|
+
|
2634
|
+
try:
|
2635
|
+
result = region_guides._build_grid_single_page(
|
2636
|
+
target=region,
|
2637
|
+
source=source,
|
2638
|
+
cell_padding=cell_padding,
|
2639
|
+
include_outer_boundaries=include_outer_boundaries
|
2640
|
+
)
|
2641
|
+
|
2642
|
+
for key in total_counts:
|
2643
|
+
total_counts[key] += result["counts"][key]
|
2644
|
+
|
2645
|
+
if result["regions"]["table"]:
|
2646
|
+
all_regions["table"].append(result["regions"]["table"])
|
2647
|
+
all_regions["rows"].extend(result["regions"]["rows"])
|
2648
|
+
all_regions["columns"].extend(result["regions"]["columns"])
|
2649
|
+
all_regions["cells"].extend(result["regions"]["cells"])
|
2650
|
+
|
2651
|
+
except Exception as e:
|
2652
|
+
logger.warning(f"Failed to build grid on region: {e}")
|
2653
|
+
|
2654
|
+
logger.info(
|
2655
|
+
f"Created {total_counts['table']} tables, {total_counts['rows']} rows, "
|
2656
|
+
f"{total_counts['columns']} columns, and {total_counts['cells']} cells "
|
2657
|
+
f"from guides across {len(self._flow_guides)} regions"
|
2658
|
+
)
|
2659
|
+
|
2660
|
+
return {"counts": total_counts, "regions": all_regions}
|
2661
|
+
|
2662
|
+
# Fallback for single page/region
|
2663
|
+
return self._build_grid_single_page(
|
2664
|
+
target=target,
|
2665
|
+
source=source,
|
2666
|
+
cell_padding=cell_padding,
|
2667
|
+
include_outer_boundaries=include_outer_boundaries,
|
2668
|
+
)
|
2669
|
+
|
2670
|
+
def _build_grid_multi_page(
|
2671
|
+
self,
|
2672
|
+
source: str,
|
2673
|
+
cell_padding: float,
|
2674
|
+
include_outer_boundaries: bool,
|
2675
|
+
) -> Dict[str, Any]:
|
2676
|
+
"""Builds a single, coherent grid across multiple pages of a FlowRegion."""
|
2677
|
+
from natural_pdf.flows.region import FlowRegion
|
2678
|
+
|
2679
|
+
if not self.is_flow_region or not hasattr(self.context, "flow") or not self.context.flow:
|
2680
|
+
raise ValueError("Multi-page grid building requires a FlowRegion with a valid Flow.")
|
2681
|
+
|
2682
|
+
# Determine flow orientation to guide stitching
|
2683
|
+
orientation = self._get_flow_orientation()
|
2684
|
+
|
2685
|
+
# Phase 1: Build physical grid on each page, clipping guides to that page's region
|
2686
|
+
results_by_region = []
|
2687
|
+
unified_verticals = self.vertical.data
|
2688
|
+
unified_horizontals = self.horizontal.data
|
2689
|
+
|
2690
|
+
for region in self.context.constituent_regions:
|
2691
|
+
bounds = region.bbox
|
2692
|
+
if not bounds:
|
2693
|
+
continue
|
2694
|
+
|
2695
|
+
# Clip unified guides to the current region's bounds
|
2696
|
+
clipped_verticals = [v for v in unified_verticals if bounds[0] <= v <= bounds[2]]
|
2697
|
+
clipped_horizontals = [h for h in unified_horizontals if bounds[1] <= h <= bounds[3]]
|
2698
|
+
|
2699
|
+
# Ensure the region's own boundaries are included to close off cells at page breaks
|
2700
|
+
clipped_verticals = sorted(list(set([bounds[0], bounds[2]] + clipped_verticals)))
|
2701
|
+
clipped_horizontals = sorted(list(set([bounds[1], bounds[3]] + clipped_horizontals)))
|
2702
|
+
|
2703
|
+
if len(clipped_verticals) < 2 or len(clipped_horizontals) < 2:
|
2704
|
+
continue # Not enough guides to form a cell
|
2705
|
+
|
2706
|
+
region_guides = Guides(
|
2707
|
+
verticals=clipped_verticals,
|
2708
|
+
horizontals=clipped_horizontals,
|
2709
|
+
context=region,
|
2710
|
+
)
|
2711
|
+
|
2712
|
+
grid_parts = region_guides._build_grid_single_page(
|
2713
|
+
target=region,
|
2714
|
+
source=source,
|
2715
|
+
cell_padding=cell_padding,
|
2716
|
+
include_outer_boundaries=False, # Boundaries are already handled
|
2717
|
+
)
|
2718
|
+
|
2719
|
+
if grid_parts["counts"]["table"] > 0:
|
2720
|
+
results_by_region.append(grid_parts)
|
2721
|
+
|
2722
|
+
if not results_by_region:
|
2723
|
+
return {
|
2724
|
+
"counts": {"table": 0, "rows": 0, "columns": 0, "cells": 0},
|
2725
|
+
"regions": {"table": None, "rows": [], "columns": [], "cells": []},
|
2726
|
+
}
|
2727
|
+
|
2728
|
+
# Phase 2: Stitch physical regions into logical FlowRegions based on orientation
|
2729
|
+
flow = self.context.flow
|
2730
|
+
|
2731
|
+
# The overall table is always a FlowRegion
|
2732
|
+
physical_tables = [res["regions"]["table"] for res in results_by_region]
|
2733
|
+
multi_page_table = FlowRegion(
|
2734
|
+
flow=flow, constituent_regions=physical_tables, source_flow_element=None
|
2735
|
+
)
|
2736
|
+
multi_page_table.source = source
|
2737
|
+
multi_page_table.region_type = "table"
|
2738
|
+
multi_page_table.metadata.update(
|
2739
|
+
{"is_multi_page": True, "num_rows": self.n_rows, "num_cols": self.n_cols}
|
2740
|
+
)
|
2741
|
+
|
2742
|
+
# Initialize final region collections
|
2743
|
+
final_rows = []
|
2744
|
+
final_cols = []
|
2745
|
+
final_cells = []
|
2746
|
+
|
2747
|
+
orientation = self._get_flow_orientation()
|
2748
|
+
|
2749
|
+
if orientation == "vertical":
|
2750
|
+
# Start with all rows & cells from the first page's grid
|
2751
|
+
if results_by_region:
|
2752
|
+
# Make copies to modify
|
2753
|
+
page_rows = [res["regions"]["rows"] for res in results_by_region]
|
2754
|
+
page_cells = [res["regions"]["cells"] for res in results_by_region]
|
2755
|
+
|
2756
|
+
# Iterate through page breaks to merge split rows/cells
|
2757
|
+
for i in range(len(results_by_region) - 1):
|
2758
|
+
region_A = self.context.constituent_regions[i]
|
2759
|
+
|
2760
|
+
# Check if a guide exists at the boundary
|
2761
|
+
is_break_bounded = any(abs(h - region_A.bottom) < 0.1 for h in self.horizontal.data)
|
2762
|
+
|
2763
|
+
if not is_break_bounded and page_rows[i] and page_rows[i+1]:
|
2764
|
+
# No guide at break -> merge last row of A with first row of B
|
2765
|
+
last_row_A = page_rows[i].pop(-1)
|
2766
|
+
first_row_B = page_rows[i+1].pop(0)
|
2767
|
+
|
2768
|
+
merged_row = FlowRegion(flow, [last_row_A, first_row_B], source_flow_element=None)
|
2769
|
+
merged_row.source = source
|
2770
|
+
merged_row.region_type = "table_row"
|
2771
|
+
merged_row.metadata.update({"row_index": last_row_A.metadata.get("row_index"), "is_multi_page": True})
|
2772
|
+
page_rows[i].append(merged_row) # Add merged row back in place of A's last
|
2773
|
+
|
2774
|
+
# Merge the corresponding cells using explicit row/col indices
|
2775
|
+
last_row_idx = last_row_A.metadata.get("row_index")
|
2776
|
+
first_row_idx = first_row_B.metadata.get("row_index")
|
2777
|
+
|
2778
|
+
# Cells belonging to those rows
|
2779
|
+
last_cells_A = [c for c in page_cells[i] if c.metadata.get("row_index") == last_row_idx]
|
2780
|
+
first_cells_B = [c for c in page_cells[i+1] if c.metadata.get("row_index") == first_row_idx]
|
2781
|
+
|
2782
|
+
# Remove them from their page lists
|
2783
|
+
page_cells[i] = [c for c in page_cells[i] if c.metadata.get("row_index") != last_row_idx]
|
2784
|
+
page_cells[i+1] = [c for c in page_cells[i+1] if c.metadata.get("row_index") != first_row_idx]
|
2785
|
+
|
2786
|
+
# Sort both lists by column index to keep alignment stable
|
2787
|
+
last_cells_A.sort(key=lambda c: c.metadata.get("col_index", 0))
|
2788
|
+
first_cells_B.sort(key=lambda c: c.metadata.get("col_index", 0))
|
2789
|
+
|
2790
|
+
# Pair-wise merge
|
2791
|
+
for cell_A, cell_B in zip(last_cells_A, first_cells_B):
|
2792
|
+
merged_cell = FlowRegion(flow, [cell_A, cell_B], source_flow_element=None)
|
2793
|
+
merged_cell.source = source
|
2794
|
+
merged_cell.region_type = "table_cell"
|
2795
|
+
merged_cell.metadata.update({
|
2796
|
+
"row_index": cell_A.metadata.get("row_index"),
|
2797
|
+
"col_index": cell_A.metadata.get("col_index"),
|
2798
|
+
"is_multi_page": True
|
2799
|
+
})
|
2800
|
+
page_cells[i].append(merged_cell)
|
2801
|
+
|
2802
|
+
# Flatten the potentially modified lists of rows and cells
|
2803
|
+
final_rows = [row for rows_list in page_rows for row in rows_list]
|
2804
|
+
final_cells = [cell for cells_list in page_cells for cell in cells_list]
|
2805
|
+
|
2806
|
+
# Stitch columns, which always span vertically
|
2807
|
+
physical_cols_by_index = zip(*(res["regions"]["columns"] for res in results_by_region))
|
2808
|
+
for j, physical_cols in enumerate(physical_cols_by_index):
|
2809
|
+
col_fr = FlowRegion(flow=flow, constituent_regions=list(physical_cols), source_flow_element=None)
|
2810
|
+
col_fr.source = source
|
2811
|
+
col_fr.region_type = "table_column"
|
2812
|
+
col_fr.metadata.update({"col_index": j, "is_multi_page": True})
|
2813
|
+
final_cols.append(col_fr)
|
2814
|
+
|
2815
|
+
elif orientation == "horizontal":
|
2816
|
+
# Symmetric logic for horizontal flow (not fully implemented here for brevity)
|
2817
|
+
# This would merge last column of A with first column of B if no vertical guide exists
|
2818
|
+
logger.warning("Horizontal table stitching not fully implemented.")
|
2819
|
+
final_rows = [row for res in results_by_region for row in res["regions"]["rows"]]
|
2820
|
+
final_cols = [col for res in results_by_region for col in res["regions"]["columns"]]
|
2821
|
+
final_cells = [cell for res in results_by_region for cell in res["regions"]["cells"]]
|
2822
|
+
|
2823
|
+
else: # Unknown orientation, just flatten everything
|
2824
|
+
final_rows = [row for res in results_by_region for row in res["regions"]["rows"]]
|
2825
|
+
final_cols = [col for res in results_by_region for col in res["regions"]["columns"]]
|
2826
|
+
final_cells = [cell for res in results_by_region for cell in res["regions"]["cells"]]
|
2827
|
+
|
2828
|
+
# SMART PAGE-LEVEL REGISTRY: Remove individual tables and replace with multi-page table
|
2829
|
+
# This ensures that page.find('table') finds the logical multi-page table, not fragments
|
2830
|
+
constituent_pages = set()
|
2831
|
+
for region in self.context.constituent_regions:
|
2832
|
+
if hasattr(region, 'page') and hasattr(region.page, '_element_mgr'):
|
2833
|
+
constituent_pages.add(region.page)
|
2834
|
+
|
2835
|
+
# First, remove ONLY the specific individual Region tables that were created during this build
|
2836
|
+
# (i.e., the physical_tables), not ALL tables with the same source
|
2837
|
+
physical_tables_to_remove = set(physical_tables) # Convert to set for fast lookup
|
2838
|
+
|
2839
|
+
for page in constituent_pages:
|
2840
|
+
try:
|
2841
|
+
# Find and remove only the specific physical tables that are part of this multi-page table
|
2842
|
+
existing_tables = page.find_all('table')
|
2843
|
+
tables_to_remove = [
|
2844
|
+
table for table in existing_tables
|
2845
|
+
if (table in physical_tables_to_remove and
|
2846
|
+
not isinstance(table, FlowRegion)) # Only remove the specific Region tables we created
|
2847
|
+
]
|
2848
|
+
|
2849
|
+
for table in tables_to_remove:
|
2850
|
+
page._element_mgr.remove_element(table, element_type="regions")
|
2851
|
+
logger.debug(f"Removed physical table fragment from page {page.page_number}")
|
2852
|
+
|
2853
|
+
# Now register the multi-page table
|
2854
|
+
page._element_mgr.add_element(multi_page_table, element_type="regions")
|
2855
|
+
logger.debug(f"Registered multi-page table with page {page.page_number}")
|
2856
|
+
|
2857
|
+
except Exception as e:
|
2858
|
+
logger.warning(f"Failed to register multi-page table with page {page.page_number}: {e}")
|
2859
|
+
|
2860
|
+
# SMART PAGE-LEVEL REGISTRY: Also register rows, columns, and cells with their respective pages
|
2861
|
+
# This ensures that page.find('table_cell') etc. also work across the multi-page structure
|
2862
|
+
for row in final_rows:
|
2863
|
+
if hasattr(row, 'constituent_regions'):
|
2864
|
+
# This is a FlowRegion row spanning multiple pages
|
2865
|
+
for constituent_region in row.constituent_regions:
|
2866
|
+
if hasattr(constituent_region, 'page') and hasattr(constituent_region.page, '_element_mgr'):
|
2867
|
+
try:
|
2868
|
+
constituent_region.page._element_mgr.add_element(row, element_type="regions")
|
2869
|
+
except Exception as e:
|
2870
|
+
logger.warning(f"Failed to register multi-page row: {e}")
|
2871
|
+
|
2872
|
+
for col in final_cols:
|
2873
|
+
if hasattr(col, 'constituent_regions'):
|
2874
|
+
# This is a FlowRegion column spanning multiple pages
|
2875
|
+
for constituent_region in col.constituent_regions:
|
2876
|
+
if hasattr(constituent_region, 'page') and hasattr(constituent_region.page, '_element_mgr'):
|
2877
|
+
try:
|
2878
|
+
constituent_region.page._element_mgr.add_element(col, element_type="regions")
|
2879
|
+
except Exception as e:
|
2880
|
+
logger.warning(f"Failed to register multi-page column: {e}")
|
2881
|
+
|
2882
|
+
for cell in final_cells:
|
2883
|
+
if hasattr(cell, 'constituent_regions'):
|
2884
|
+
# This is a FlowRegion cell spanning multiple pages
|
2885
|
+
for constituent_region in cell.constituent_regions:
|
2886
|
+
if hasattr(constituent_region, 'page') and hasattr(constituent_region.page, '_element_mgr'):
|
2887
|
+
try:
|
2888
|
+
constituent_region.page._element_mgr.add_element(cell, element_type="regions")
|
2889
|
+
except Exception as e:
|
2890
|
+
logger.warning(f"Failed to register multi-page cell: {e}")
|
2891
|
+
|
2892
|
+
final_counts = {
|
2893
|
+
"table": 1,
|
2894
|
+
"rows": len(final_rows),
|
2895
|
+
"columns": len(final_cols),
|
2896
|
+
"cells": len(final_cells),
|
2897
|
+
}
|
2898
|
+
final_regions = {
|
2899
|
+
"table": multi_page_table,
|
2900
|
+
"rows": final_rows,
|
2901
|
+
"columns": final_cols,
|
2902
|
+
"cells": final_cells,
|
2903
|
+
}
|
2904
|
+
|
2905
|
+
logger.info(
|
2906
|
+
f"Created 1 multi-page table, {final_counts['rows']} logical rows, "
|
2907
|
+
f"{final_counts['columns']} logical columns from guides and registered with all constituent pages"
|
2908
|
+
)
|
2909
|
+
|
2910
|
+
return {"counts": final_counts, "regions": final_regions}
|
2911
|
+
|
2912
|
+
def _build_grid_single_page(
|
2913
|
+
self,
|
2914
|
+
target: Optional[Union["Page", "Region"]] = None,
|
2915
|
+
source: str = "guides",
|
2916
|
+
cell_padding: float = 0.5,
|
2917
|
+
include_outer_boundaries: bool = False,
|
2918
|
+
) -> Dict[str, Any]:
|
2919
|
+
"""
|
2920
|
+
Private method to create table structure on a single page or region.
|
2921
|
+
(Refactored from the original public build_grid method).
|
1965
2922
|
"""
|
1966
|
-
#
|
2923
|
+
# This method now only handles a single page/region context.
|
2924
|
+
# Looping for FlowRegions is handled by the public `build_grid` method.
|
2925
|
+
|
2926
|
+
# Original single-region logic follows...
|
1967
2927
|
target_obj = target or self.context
|
1968
2928
|
if not target_obj:
|
1969
2929
|
raise ValueError("No target object available. Provide target parameter or context.")
|
@@ -2055,8 +3015,9 @@ class Guides:
|
|
2055
3015
|
f"Building grid with {len(row_boundaries)} row and {len(col_boundaries)} col boundaries"
|
2056
3016
|
)
|
2057
3017
|
|
2058
|
-
# Track creation counts
|
3018
|
+
# Track creation counts and regions
|
2059
3019
|
counts = {"table": 0, "rows": 0, "columns": 0, "cells": 0}
|
3020
|
+
created_regions = {"table": None, "rows": [], "columns": [], "cells": []}
|
2060
3021
|
|
2061
3022
|
# Create overall table region
|
2062
3023
|
if len(row_boundaries) >= 2 and len(col_boundaries) >= 2:
|
@@ -2076,6 +3037,7 @@ class Guides:
|
|
2076
3037
|
)
|
2077
3038
|
element_manager.add_element(table_region, element_type="regions")
|
2078
3039
|
counts["table"] = 1
|
3040
|
+
created_regions["table"] = table_region
|
2079
3041
|
|
2080
3042
|
# Create row regions
|
2081
3043
|
if len(row_boundaries) >= 2 and len(col_boundaries) >= 2:
|
@@ -2089,6 +3051,7 @@ class Guides:
|
|
2089
3051
|
row_region.metadata.update({"row_index": i, "source_guides": True})
|
2090
3052
|
element_manager.add_element(row_region, element_type="regions")
|
2091
3053
|
counts["rows"] += 1
|
3054
|
+
created_regions["rows"].append(row_region)
|
2092
3055
|
|
2093
3056
|
# Create column regions
|
2094
3057
|
if len(col_boundaries) >= 2 and len(row_boundaries) >= 2:
|
@@ -2102,6 +3065,7 @@ class Guides:
|
|
2102
3065
|
col_region.metadata.update({"col_index": j, "source_guides": True})
|
2103
3066
|
element_manager.add_element(col_region, element_type="regions")
|
2104
3067
|
counts["columns"] += 1
|
3068
|
+
created_regions["columns"].append(col_region)
|
2105
3069
|
|
2106
3070
|
# Create cell regions
|
2107
3071
|
if len(row_boundaries) >= 2 and len(col_boundaries) >= 2:
|
@@ -2136,13 +3100,14 @@ class Guides:
|
|
2136
3100
|
)
|
2137
3101
|
element_manager.add_element(cell_region, element_type="regions")
|
2138
3102
|
counts["cells"] += 1
|
3103
|
+
created_regions["cells"].append(cell_region)
|
2139
3104
|
|
2140
3105
|
logger.info(
|
2141
3106
|
f"Created {counts['table']} table, {counts['rows']} rows, "
|
2142
3107
|
f"{counts['columns']} columns, and {counts['cells']} cells from guides"
|
2143
3108
|
)
|
2144
3109
|
|
2145
|
-
return counts
|
3110
|
+
return {"counts": counts, "regions": created_regions}
|
2146
3111
|
|
2147
3112
|
def __repr__(self) -> str:
|
2148
3113
|
"""String representation of the guides."""
|
@@ -2157,6 +3122,22 @@ class Guides:
|
|
2157
3122
|
if not self.context:
|
2158
3123
|
return []
|
2159
3124
|
|
3125
|
+
# Handle FlowRegion context
|
3126
|
+
if self.is_flow_region:
|
3127
|
+
all_text_elements = []
|
3128
|
+
for region in self.context.constituent_regions:
|
3129
|
+
if hasattr(region, "find_all"):
|
3130
|
+
try:
|
3131
|
+
text_elements = region.find_all("text", apply_exclusions=False)
|
3132
|
+
elements = (
|
3133
|
+
text_elements.elements if hasattr(text_elements, "elements") else text_elements
|
3134
|
+
)
|
3135
|
+
all_text_elements.extend(elements)
|
3136
|
+
except Exception as e:
|
3137
|
+
logger.warning(f"Error getting text elements from region: {e}")
|
3138
|
+
return all_text_elements
|
3139
|
+
|
3140
|
+
# Original single-region logic
|
2160
3141
|
# Get text elements from the context
|
2161
3142
|
if hasattr(self.context, "find_all"):
|
2162
3143
|
try:
|
@@ -2171,6 +3152,31 @@ class Guides:
|
|
2171
3152
|
logger.warning("Context does not support text element search")
|
2172
3153
|
return []
|
2173
3154
|
|
3155
|
+
def _spans_pages(self) -> bool:
|
3156
|
+
"""Check if any guides are defined across multiple pages in a FlowRegion."""
|
3157
|
+
if not self.is_flow_region:
|
3158
|
+
return False
|
3159
|
+
|
3160
|
+
# Check vertical guides
|
3161
|
+
v_guide_pages = {}
|
3162
|
+
for coord, region in self._unified_vertical:
|
3163
|
+
v_guide_pages.setdefault(coord, set()).add(region.page.page_number)
|
3164
|
+
|
3165
|
+
for pages in v_guide_pages.values():
|
3166
|
+
if len(pages) > 1:
|
3167
|
+
return True
|
3168
|
+
|
3169
|
+
# Check horizontal guides
|
3170
|
+
h_guide_pages = {}
|
3171
|
+
for coord, region in self._unified_horizontal:
|
3172
|
+
h_guide_pages.setdefault(coord, set()).add(region.page.page_number)
|
3173
|
+
|
3174
|
+
for pages in h_guide_pages.values():
|
3175
|
+
if len(pages) > 1:
|
3176
|
+
return True
|
3177
|
+
|
3178
|
+
return False
|
3179
|
+
|
2174
3180
|
# -------------------------------------------------------------------------
|
2175
3181
|
# Instance methods for fluent chaining (avoid name conflicts with class methods)
|
2176
3182
|
# -------------------------------------------------------------------------
|
@@ -2318,3 +3324,24 @@ class Guides:
|
|
2318
3324
|
self.horizontal = list(set(self.horizontal + new_guides.horizontal))
|
2319
3325
|
|
2320
3326
|
return self
|
3327
|
+
|
3328
|
+
def _get_flow_orientation(self) -> Literal["vertical", "horizontal", "unknown"]:
|
3329
|
+
"""Determines if a FlowRegion's constituent parts are arranged vertically or horizontally."""
|
3330
|
+
if not self.is_flow_region or len(self.context.constituent_regions) < 2:
|
3331
|
+
return "unknown"
|
3332
|
+
|
3333
|
+
r1 = self.context.constituent_regions[0]
|
3334
|
+
r2 = self.context.constituent_regions[1] # Compare first two regions
|
3335
|
+
|
3336
|
+
if not r1.bbox or not r2.bbox:
|
3337
|
+
return "unknown"
|
3338
|
+
|
3339
|
+
# Calculate non-overlapping distances.
|
3340
|
+
# This determines the primary direction of separation.
|
3341
|
+
x_dist = max(0, max(r1.x0, r2.x0) - min(r1.x1, r2.x1))
|
3342
|
+
y_dist = max(0, max(r1.top, r2.top) - min(r1.bottom, r2.bottom))
|
3343
|
+
|
3344
|
+
if y_dist > x_dist:
|
3345
|
+
return "vertical"
|
3346
|
+
else:
|
3347
|
+
return "horizontal"
|