natural-pdf 0.1.12__py3-none-any.whl → 0.1.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,382 @@
1
+ import logging
2
+ from typing import TYPE_CHECKING, Any, List, Optional, Tuple, Union, cast
3
+
4
+ if TYPE_CHECKING:
5
+ from natural_pdf.elements.base import Element as PhysicalElement
6
+ from natural_pdf.elements.region import Region as PhysicalRegion
7
+ from natural_pdf.core.page import Page as PhysicalPage # For type checking physical_object.page
8
+ from .flow import Flow
9
+ from .region import FlowRegion
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ class FlowElement:
15
+ """
16
+ Represents a physical PDF Element or Region that is anchored within a Flow.
17
+ This class provides methods for flow-aware directional navigation (e.g., below, above)
18
+ that operate across the segments defined in its associated Flow.
19
+ """
20
+
21
+ def __init__(self, physical_object: Union["PhysicalElement", "PhysicalRegion"], flow: "Flow"):
22
+ """
23
+ Initializes a FlowElement.
24
+
25
+ Args:
26
+ physical_object: The actual natural_pdf.elements.base.Element or
27
+ natural_pdf.elements.region.Region object.
28
+ flow: The Flow instance this element is part of.
29
+ """
30
+ if not (hasattr(physical_object, 'bbox') and hasattr(physical_object, 'page')):
31
+ raise TypeError(
32
+ f"physical_object must be a valid PDF element-like object with 'bbox' and 'page' attributes. Got {type(physical_object)}"
33
+ )
34
+ self.physical_object: Union["PhysicalElement", "PhysicalRegion"] = physical_object
35
+ self.flow: "Flow" = flow
36
+
37
+ # --- Properties to delegate to the physical_object ---
38
+ @property
39
+ def bbox(self) -> Tuple[float, float, float, float]:
40
+ return self.physical_object.bbox
41
+
42
+ @property
43
+ def x0(self) -> float:
44
+ return self.physical_object.x0
45
+
46
+ @property
47
+ def top(self) -> float:
48
+ return self.physical_object.top
49
+
50
+ @property
51
+ def x1(self) -> float:
52
+ return self.physical_object.x1
53
+
54
+ @property
55
+ def bottom(self) -> float:
56
+ return self.physical_object.bottom
57
+
58
+ @property
59
+ def width(self) -> float:
60
+ return self.physical_object.width
61
+
62
+ @property
63
+ def height(self) -> float:
64
+ return self.physical_object.height
65
+
66
+ @property
67
+ def text(self) -> Optional[str]:
68
+ return getattr(self.physical_object, 'text', None)
69
+
70
+ @property
71
+ def page(self) -> Optional["PhysicalPage"]:
72
+ """Returns the physical page of the underlying element."""
73
+ return getattr(self.physical_object, 'page', None)
74
+
75
+ def _flow_direction(
76
+ self,
77
+ direction: str, # "above", "below", "left", "right"
78
+ size: Optional[float] = None,
79
+ cross_size_ratio: Optional[float] = None, # Default to None for full flow width
80
+ cross_size_absolute: Optional[float] = None,
81
+ cross_alignment: str = "center", # "start", "center", "end"
82
+ until: Optional[str] = None,
83
+ include_endpoint: bool = True,
84
+ **kwargs,
85
+ ) -> "FlowRegion":
86
+ from .region import FlowRegion # Runtime import for return if not stringized, but stringizing is safer
87
+ # Ensure correct import for creating new PhysicalRegion instances if needed
88
+ from natural_pdf.elements.region import Region as PhysicalRegion_Class # Runtime import
89
+
90
+ collected_constituent_regions: List[PhysicalRegion_Class] = [] # PhysicalRegion_Class is runtime
91
+ boundary_element_hit: Optional["PhysicalElement"] = None # Stringized
92
+ # Ensure remaining_size is float, even if size is int.
93
+ remaining_size = float(size) if size is not None else float('inf')
94
+
95
+
96
+ # 1. Identify Starting Segment and its index
97
+ start_segment_index = -1
98
+ for i, segment_in_flow in enumerate(self.flow.segments):
99
+ if self.physical_object.page != segment_in_flow.page:
100
+ continue
101
+
102
+ obj_center_x = (self.physical_object.x0 + self.physical_object.x1) / 2
103
+ obj_center_y = (self.physical_object.top + self.physical_object.bottom) / 2
104
+
105
+ if segment_in_flow.is_point_inside(obj_center_x, obj_center_y):
106
+ start_segment_index = i
107
+ break
108
+ obj_bbox = self.physical_object.bbox
109
+ seg_bbox = segment_in_flow.bbox
110
+ if not (obj_bbox[2] < seg_bbox[0] or obj_bbox[0] > seg_bbox[2] or \
111
+ obj_bbox[3] < seg_bbox[1] or obj_bbox[1] > seg_bbox[3]):
112
+ if start_segment_index == -1:
113
+ start_segment_index = i
114
+
115
+ if start_segment_index == -1:
116
+ page_num_str = str(self.physical_object.page.page_number) if self.physical_object.page else 'N/A'
117
+ logger.warning(
118
+ f"FlowElement's physical object {self.physical_object.bbox} on page {page_num_str} "
119
+ f"not found within any flow segment. Cannot perform directional operation '{direction}'."
120
+ )
121
+ # Need FlowRegion for the return type, ensure it's available or stringized
122
+ from .region import FlowRegion as RuntimeFlowRegion
123
+ return RuntimeFlowRegion(
124
+ flow=self.flow,
125
+ constituent_regions=[],
126
+ source_flow_element=self,
127
+ boundary_element_found=None
128
+ )
129
+
130
+ is_primary_vertical = self.flow.arrangement == "vertical"
131
+ segment_iterator: range
132
+
133
+ if direction == "below":
134
+ if not is_primary_vertical: raise NotImplementedError("'below' is for vertical flows.")
135
+ is_forward = True
136
+ segment_iterator = range(start_segment_index, len(self.flow.segments))
137
+ elif direction == "above":
138
+ if not is_primary_vertical: raise NotImplementedError("'above' is for vertical flows.")
139
+ is_forward = False
140
+ segment_iterator = range(start_segment_index, -1, -1)
141
+ elif direction == "right":
142
+ if is_primary_vertical: raise NotImplementedError("'right' is for horizontal flows.")
143
+ is_forward = True
144
+ segment_iterator = range(start_segment_index, len(self.flow.segments))
145
+ elif direction == "left":
146
+ if is_primary_vertical: raise NotImplementedError("'left' is for horizontal flows.")
147
+ is_forward = False
148
+ segment_iterator = range(start_segment_index, -1, -1)
149
+ else:
150
+ raise ValueError(f"Internal error: Invalid direction '{direction}' for _flow_direction.")
151
+
152
+ for current_segment_idx in segment_iterator:
153
+ if remaining_size <= 0 and size is not None: break
154
+ if boundary_element_hit: break
155
+
156
+ current_segment: PhysicalRegion_Class = self.flow.segments[current_segment_idx]
157
+ segment_contribution: Optional[PhysicalRegion_Class] = None
158
+
159
+ op_source: Union["PhysicalElement", PhysicalRegion_Class] # Stringized PhysicalElement
160
+ op_direction_params: dict = {
161
+ "direction": direction, "until": until, "include_endpoint": include_endpoint, **kwargs
162
+ }
163
+
164
+ # --- Cross-size logic: Default to "full" if no specific ratio or absolute is given ---
165
+ cross_size_for_op: Union[str, float]
166
+ if cross_size_absolute is not None:
167
+ cross_size_for_op = cross_size_absolute
168
+ elif cross_size_ratio is not None: # User explicitly provided a ratio
169
+ base_cross_dim = self.physical_object.width if is_primary_vertical else self.physical_object.height
170
+ cross_size_for_op = base_cross_dim * cross_size_ratio
171
+ else: # Default case: neither absolute nor ratio provided, so use "full"
172
+ cross_size_for_op = "full"
173
+ op_direction_params["cross_size"] = cross_size_for_op
174
+
175
+ if current_segment_idx == start_segment_index:
176
+ op_source = self.physical_object
177
+ op_direction_params["size"] = remaining_size if size is not None else None
178
+ op_direction_params["include_element"] = False
179
+
180
+ source_for_op_call = op_source
181
+ if not isinstance(source_for_op_call, PhysicalRegion_Class):
182
+ if hasattr(source_for_op_call, 'to_region'):
183
+ source_for_op_call = source_for_op_call.to_region()
184
+ else:
185
+ logger.error(f"FlowElement: Cannot convert op_source {type(op_source)} to region.")
186
+ continue
187
+
188
+ # 1. Perform directional operation *without* 'until' initially to get basic shape.
189
+ initial_op_params = {
190
+ "direction": direction,
191
+ "size": remaining_size if size is not None else None,
192
+ "cross_size": cross_size_for_op,
193
+ "cross_alignment": cross_alignment, # Pass alignment
194
+ "include_element": False,
195
+ # Pass other relevant kwargs if Region._direction uses them (e.g. strict_type)
196
+ **{k: v for k, v in kwargs.items() if k in ['strict_type', 'first_match_only']}
197
+ }
198
+ initial_region_from_op = source_for_op_call._direction(**initial_op_params)
199
+
200
+ # 2. Clip this initial region to the current flow segment's boundaries.
201
+ clipped_search_area = current_segment.clip(initial_region_from_op)
202
+ segment_contribution = clipped_search_area # Default contribution
203
+
204
+ # 3. If 'until' is specified, search for it *within* the clipped_search_area.
205
+ if until and clipped_search_area and clipped_search_area.width > 0 and clipped_search_area.height > 0:
206
+ # kwargs for find_all are the general kwargs passed to _flow_direction
207
+ until_matches = clipped_search_area.find_all(until, **kwargs)
208
+
209
+ if until_matches:
210
+ potential_hit: Optional["PhysicalElement"] = None
211
+ if direction == "below": potential_hit = until_matches.sort(key=lambda m: m.top).first
212
+ elif direction == "above": potential_hit = until_matches.sort(key=lambda m: m.bottom, reverse=True).first
213
+ elif direction == "right": potential_hit = until_matches.sort(key=lambda m: m.x0).first
214
+ elif direction == "left": potential_hit = until_matches.sort(key=lambda m: m.x1, reverse=True).first
215
+
216
+ if potential_hit:
217
+ boundary_element_hit = potential_hit # Set the overall boundary flag
218
+ # Adjust segment_contribution to stop at this boundary_element_hit.
219
+ if is_primary_vertical:
220
+ if direction == "below":
221
+ edge = boundary_element_hit.bottom if include_endpoint else (boundary_element_hit.top - 1)
222
+ else: # direction == "above"
223
+ edge = boundary_element_hit.top if include_endpoint else (boundary_element_hit.bottom + 1)
224
+ segment_contribution = segment_contribution.clip(
225
+ bottom=edge if direction == "below" else None,
226
+ top=edge if direction == "above" else None
227
+ )
228
+ else:
229
+ if direction == "right":
230
+ edge = boundary_element_hit.x1 if include_endpoint else (boundary_element_hit.x0 - 1)
231
+ else: # direction == "left"
232
+ edge = boundary_element_hit.x0 if include_endpoint else (boundary_element_hit.x1 + 1)
233
+ segment_contribution = segment_contribution.clip(
234
+ right=edge if direction == "right" else None,
235
+ left=edge if direction == "left" else None
236
+ )
237
+ else:
238
+ candidate_region_in_segment = current_segment
239
+ if until and not boundary_element_hit:
240
+ until_matches = candidate_region_in_segment.find_all(until, **kwargs)
241
+ if until_matches:
242
+ potential_hit = None
243
+ if direction == "below": potential_hit = until_matches.sort(key=lambda m: m.top).first
244
+ elif direction == "above": potential_hit = until_matches.sort(key=lambda m: m.bottom, reverse=True).first
245
+ elif direction == "right": potential_hit = until_matches.sort(key=lambda m: m.x0).first
246
+ elif direction == "left": potential_hit = until_matches.sort(key=lambda m: m.x1, reverse=True).first
247
+
248
+ if potential_hit:
249
+ boundary_element_hit = potential_hit
250
+ if is_primary_vertical:
251
+ if direction == "below":
252
+ edge = boundary_element_hit.bottom if include_endpoint else (boundary_element_hit.top - 1)
253
+ else: # direction == "above"
254
+ edge = boundary_element_hit.top if include_endpoint else (boundary_element_hit.bottom + 1)
255
+ candidate_region_in_segment = candidate_region_in_segment.clip(bottom=edge if direction == "below" else None, top=edge if direction == "above" else None)
256
+ else:
257
+ if direction == "right":
258
+ edge = boundary_element_hit.x1 if include_endpoint else (boundary_element_hit.x0 - 1)
259
+ else: # direction == "left"
260
+ edge = boundary_element_hit.x0 if include_endpoint else (boundary_element_hit.x1 + 1)
261
+ candidate_region_in_segment = candidate_region_in_segment.clip(right=edge if direction == "right" else None, left=edge if direction == "left" else None)
262
+ segment_contribution = candidate_region_in_segment
263
+
264
+ if segment_contribution and segment_contribution.width > 0 and segment_contribution.height > 0 and size is not None:
265
+ current_part_consumed_size = 0.0
266
+ if is_primary_vertical:
267
+ current_part_consumed_size = segment_contribution.height
268
+ if current_part_consumed_size > remaining_size:
269
+ new_edge = (segment_contribution.top + remaining_size) if is_forward else (segment_contribution.bottom - remaining_size)
270
+ segment_contribution = segment_contribution.clip(bottom=new_edge if is_forward else None, top=new_edge if not is_forward else None)
271
+ current_part_consumed_size = remaining_size
272
+ else:
273
+ current_part_consumed_size = segment_contribution.width
274
+ if current_part_consumed_size > remaining_size:
275
+ new_edge = (segment_contribution.x0 + remaining_size) if is_forward else (segment_contribution.x1 - remaining_size)
276
+ segment_contribution = segment_contribution.clip(right=new_edge if is_forward else None, left=new_edge if not is_forward else None)
277
+ current_part_consumed_size = remaining_size
278
+ remaining_size -= current_part_consumed_size
279
+
280
+ if segment_contribution and segment_contribution.width > 0 and segment_contribution.height > 0:
281
+ collected_constituent_regions.append(segment_contribution)
282
+
283
+ # If boundary was hit in this segment, and we are not on the start segment (where we might still collect part of it)
284
+ # or if we are on the start segment AND the contribution became zero (e.g. until was immediate)
285
+ if boundary_element_hit and (current_segment_idx != start_segment_index or not segment_contribution or (segment_contribution.width <= 0 or segment_contribution.height <= 0)):
286
+ break # Stop iterating through more segments
287
+
288
+ is_logically_last_segment = (is_forward and current_segment_idx == len(self.flow.segments) - 1) or \
289
+ (not is_forward and current_segment_idx == 0)
290
+ if not is_logically_last_segment and self.flow.segment_gap > 0 and size is not None:
291
+ if remaining_size > 0 :
292
+ remaining_size -= self.flow.segment_gap
293
+
294
+ from .region import FlowRegion as RuntimeFlowRegion # Ensure it's available for return
295
+ return RuntimeFlowRegion(
296
+ flow=self.flow,
297
+ constituent_regions=collected_constituent_regions,
298
+ source_flow_element=self,
299
+ boundary_element_found=boundary_element_hit
300
+ )
301
+
302
+ # --- Public Directional Methods ---
303
+ # These will largely mirror DirectionalMixin but call _flow_direction.
304
+
305
+ def above(
306
+ self,
307
+ height: Optional[float] = None,
308
+ width_ratio: Optional[float] = None,
309
+ width_absolute: Optional[float] = None,
310
+ width_alignment: str = "center",
311
+ until: Optional[str] = None,
312
+ include_endpoint: bool = True,
313
+ **kwargs,
314
+ ) -> "FlowRegion": # Stringized
315
+ if self.flow.arrangement == "vertical":
316
+ return self._flow_direction(
317
+ direction="above", size=height, cross_size_ratio=width_ratio,
318
+ cross_size_absolute=width_absolute, cross_alignment=width_alignment,
319
+ until=until, include_endpoint=include_endpoint, **kwargs,
320
+ )
321
+ else:
322
+ raise NotImplementedError("'above' in a horizontal flow is ambiguous with current 1D flow logic and not yet implemented.")
323
+
324
+ def below(
325
+ self,
326
+ height: Optional[float] = None,
327
+ width_ratio: Optional[float] = None,
328
+ width_absolute: Optional[float] = None,
329
+ width_alignment: str = "center",
330
+ until: Optional[str] = None,
331
+ include_endpoint: bool = True,
332
+ **kwargs,
333
+ ) -> "FlowRegion": # Stringized
334
+ if self.flow.arrangement == "vertical":
335
+ return self._flow_direction(
336
+ direction="below", size=height, cross_size_ratio=width_ratio,
337
+ cross_size_absolute=width_absolute, cross_alignment=width_alignment,
338
+ until=until, include_endpoint=include_endpoint, **kwargs,
339
+ )
340
+ else:
341
+ raise NotImplementedError("'below' in a horizontal flow is ambiguous with current 1D flow logic and not yet implemented.")
342
+
343
+ def left(
344
+ self,
345
+ width: Optional[float] = None,
346
+ height_ratio: Optional[float] = None,
347
+ height_absolute: Optional[float] = None,
348
+ height_alignment: str = "center",
349
+ until: Optional[str] = None,
350
+ include_endpoint: bool = True,
351
+ **kwargs,
352
+ ) -> "FlowRegion": # Stringized
353
+ if self.flow.arrangement == "horizontal":
354
+ return self._flow_direction(
355
+ direction="left", size=width, cross_size_ratio=height_ratio,
356
+ cross_size_absolute=height_absolute, cross_alignment=height_alignment,
357
+ until=until, include_endpoint=include_endpoint, **kwargs,
358
+ )
359
+ else:
360
+ raise NotImplementedError("'left' in a vertical flow is ambiguous with current 1D flow logic and not yet implemented.")
361
+
362
+ def right(
363
+ self,
364
+ width: Optional[float] = None,
365
+ height_ratio: Optional[float] = None,
366
+ height_absolute: Optional[float] = None,
367
+ height_alignment: str = "center",
368
+ until: Optional[str] = None,
369
+ include_endpoint: bool = True,
370
+ **kwargs,
371
+ ) -> "FlowRegion": # Stringized
372
+ if self.flow.arrangement == "horizontal":
373
+ return self._flow_direction(
374
+ direction="right", size=width, cross_size_ratio=height_ratio,
375
+ cross_size_absolute=height_absolute, cross_alignment=height_alignment,
376
+ until=until, include_endpoint=include_endpoint, **kwargs,
377
+ )
378
+ else:
379
+ raise NotImplementedError("'right' in a vertical flow is ambiguous with current 1D flow logic and not yet implemented.")
380
+
381
+ def __repr__(self) -> str:
382
+ return f"<FlowElement for {self.physical_object.__class__.__name__} {self.bbox} in {self.flow}>"
@@ -0,0 +1,216 @@
1
+ import logging
2
+ from typing import TYPE_CHECKING, Any, List, Literal, Optional, Union
3
+
4
+ if TYPE_CHECKING:
5
+ from natural_pdf.core.page import Page
6
+ from natural_pdf.elements.region import Region as PhysicalRegion
7
+ from natural_pdf.elements.base import Element as PhysicalElement
8
+ from natural_pdf.elements.collections import ElementCollection as PhysicalElementCollection
9
+ from .element import FlowElement
10
+ from .collections import FlowElementCollection
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ class Flow:
16
+ """
17
+ Defines a logical flow or sequence of physical Page or Region objects,
18
+ specifying their arrangement and alignment to enable operations that
19
+ span across these segments as if they were a continuous area.
20
+ """
21
+
22
+ def __init__(
23
+ self,
24
+ segments: List[Union["Page", "PhysicalRegion"]],
25
+ arrangement: Literal["vertical", "horizontal"],
26
+ alignment: Literal["start", "center", "end", "top", "left", "bottom", "right"] = "start",
27
+ segment_gap: float = 0.0,
28
+ ):
29
+ """
30
+ Initializes a Flow object.
31
+
32
+ Args:
33
+ segments: An ordered list of natural_pdf.core.page.Page or
34
+ natural_pdf.elements.region.Region objects that constitute the flow.
35
+ arrangement: The primary direction of the flow.
36
+ - "vertical": Segments are stacked top-to-bottom.
37
+ - "horizontal": Segments are arranged left-to-right.
38
+ alignment: How segments are aligned on their cross-axis if they have
39
+ differing dimensions. For a "vertical" arrangement:
40
+ - "left" (or "start"): Align left edges.
41
+ - "center": Align centers.
42
+ - "right" (or "end"): Align right edges.
43
+ For a "horizontal" arrangement:
44
+ - "top" (or "start"): Align top edges.
45
+ - "center": Align centers.
46
+ - "bottom" (or "end"): Align bottom edges.
47
+ segment_gap: The virtual gap (in PDF points) between segments.
48
+ """
49
+ if not segments:
50
+ raise ValueError("Flow segments cannot be empty.")
51
+ if arrangement not in ["vertical", "horizontal"]:
52
+ raise ValueError("Arrangement must be 'vertical' or 'horizontal'.")
53
+
54
+ self.segments: List["PhysicalRegion"] = self._normalize_segments(segments)
55
+ self.arrangement: Literal["vertical", "horizontal"] = arrangement
56
+ self.alignment: Literal["start", "center", "end", "top", "left", "bottom", "right"] = alignment
57
+ self.segment_gap: float = segment_gap
58
+
59
+ self._validate_alignment()
60
+
61
+ # TODO: Pre-calculate segment offsets for faster lookups if needed
62
+
63
+ def _normalize_segments(self, segments: List[Union["Page", "PhysicalRegion"]]) -> List["PhysicalRegion"]:
64
+ """Converts all Page segments to full-page Region objects for uniform processing."""
65
+ normalized = []
66
+ from natural_pdf.core.page import Page as CorePage
67
+ from natural_pdf.elements.region import Region as ElementsRegion
68
+
69
+ for i, segment in enumerate(segments):
70
+ if isinstance(segment, CorePage):
71
+ normalized.append(segment.region(0, 0, segment.width, segment.height))
72
+ elif isinstance(segment, ElementsRegion):
73
+ normalized.append(segment)
74
+ elif hasattr(segment, 'object_type') and segment.object_type == "page":
75
+ if not isinstance(segment, CorePage):
76
+ raise TypeError(f"Segment {i} has object_type 'page' but is not an instance of natural_pdf.core.page.Page. Got {type(segment)}")
77
+ normalized.append(segment.region(0, 0, segment.width, segment.height))
78
+ elif hasattr(segment, 'object_type') and segment.object_type == "region":
79
+ if not isinstance(segment, ElementsRegion):
80
+ raise TypeError(f"Segment {i} has object_type 'region' but is not an instance of natural_pdf.elements.region.Region. Got {type(segment)}")
81
+ normalized.append(segment)
82
+ else:
83
+ raise TypeError(
84
+ f"Segment {i} is not a valid Page or Region object. Got {type(segment)}."
85
+ )
86
+ return normalized
87
+
88
+ def _validate_alignment(self) -> None:
89
+ """Validates the alignment based on the arrangement."""
90
+ valid_alignments = {
91
+ "vertical": ["start", "center", "end", "left", "right"],
92
+ "horizontal": ["start", "center", "end", "top", "bottom"],
93
+ }
94
+ if self.alignment not in valid_alignments[self.arrangement]:
95
+ raise ValueError(
96
+ f"Invalid alignment '{self.alignment}' for '{self.arrangement}' arrangement. "
97
+ f"Valid options are: {valid_alignments[self.arrangement]}"
98
+ )
99
+
100
+ def find(
101
+ self,
102
+ selector: Optional[str] = None,
103
+ *,
104
+ text: Optional[str] = None,
105
+ apply_exclusions: bool = True,
106
+ regex: bool = False,
107
+ case: bool = True,
108
+ **kwargs,
109
+ ) -> Optional["FlowElement"]:
110
+ """
111
+ Finds the first element within the flow that matches the given selector or text criteria.
112
+
113
+ Elements found are wrapped as FlowElement objects, anchored to this Flow.
114
+
115
+ Args:
116
+ selector: CSS-like selector string.
117
+ text: Text content to search for.
118
+ apply_exclusions: Whether to respect exclusion zones on the original pages/regions.
119
+ regex: Whether the text search uses regex.
120
+ case: Whether the text search is case-sensitive.
121
+ **kwargs: Additional filter parameters for the underlying find operation.
122
+
123
+ Returns:
124
+ A FlowElement if a match is found, otherwise None.
125
+ """
126
+ results = self.find_all(
127
+ selector=selector,
128
+ text=text,
129
+ apply_exclusions=apply_exclusions,
130
+ regex=regex,
131
+ case=case,
132
+ **kwargs
133
+ )
134
+ return results.first if results else None
135
+
136
+ def find_all(
137
+ self,
138
+ selector: Optional[str] = None,
139
+ *,
140
+ text: Optional[str] = None,
141
+ apply_exclusions: bool = True,
142
+ regex: bool = False,
143
+ case: bool = True,
144
+ **kwargs,
145
+ ) -> "FlowElementCollection":
146
+ """
147
+ Finds all elements within the flow that match the given selector or text criteria.
148
+ Elements are collected segment by segment, preserving the flow order.
149
+
150
+ Elements found are wrapped as FlowElement objects, anchored to this Flow,
151
+ and returned in a FlowElementCollection.
152
+ """
153
+ from .collections import FlowElementCollection
154
+ from .element import FlowElement
155
+
156
+ all_flow_elements: List["FlowElement"] = []
157
+
158
+ # Iterate through segments in their defined flow order
159
+ for physical_segment in self.segments:
160
+ # Find all matching physical elements within the current segment
161
+ # Region.find_all() should return elements in local reading order.
162
+ matches_in_segment: "PhysicalElementCollection" = physical_segment.find_all(
163
+ selector=selector,
164
+ text=text,
165
+ apply_exclusions=apply_exclusions,
166
+ regex=regex,
167
+ case=case,
168
+ **kwargs,
169
+ )
170
+ if matches_in_segment:
171
+ # Wrap each found physical element as a FlowElement and add to the list
172
+ # This preserves the order from matches_in_segment.elements
173
+ for phys_elem in matches_in_segment.elements:
174
+ all_flow_elements.append(FlowElement(physical_object=phys_elem, flow=self))
175
+
176
+ # The global sort that was here previously has been removed.
177
+ # The order is now determined by segment sequence, then by local order within each segment.
178
+
179
+ return FlowElementCollection(all_flow_elements)
180
+
181
+ def __repr__(self) -> str:
182
+ return (
183
+ f"<Flow segments={len(self.segments)}, "
184
+ f"arrangement='{self.arrangement}', alignment='{self.alignment}', gap={self.segment_gap}>"
185
+ )
186
+
187
+ # --- Helper methods for coordinate transformations and segment iteration ---
188
+ # These will be crucial for FlowElement's directional methods.
189
+
190
+ def get_segment_bounding_box_in_flow(self, segment_index: int) -> Optional[tuple[float, float, float, float]]:
191
+ """
192
+ Calculates the conceptual bounding box of a segment within the flow's coordinate system.
193
+ This considers arrangement, alignment, and segment gaps.
194
+ (This is a placeholder for more complex logic if a true virtual coordinate system is needed)
195
+ For now, it might just return the physical segment's bbox if gaps are 0 and alignment is simple.
196
+ """
197
+ if segment_index < 0 or segment_index >= len(self.segments):
198
+ return None
199
+
200
+ # This is a simplified version. A full implementation would calculate offsets.
201
+ # For now, we assume FlowElement directional logic handles segment traversal and uses physical coords.
202
+ # If we were to *draw* the flow or get a FlowRegion bbox that spans gaps, this would be critical.
203
+ # physical_segment = self.segments[segment_index]
204
+ # return physical_segment.bbox
205
+ raise NotImplementedError("Calculating a segment's bbox *within the flow's virtual coordinate system* is not yet fully implemented.")
206
+
207
+ def get_element_flow_coordinates(self, physical_element: "PhysicalElement") -> Optional[tuple[float, float, float, float]]:
208
+ """
209
+ Translates a physical element's coordinates into the flow's virtual coordinate system.
210
+ (Placeholder - very complex if segment_gap > 0 or complex alignments)
211
+ """
212
+ # For now, elements operate in their own physical coordinates. This method would be needed
213
+ # if FlowRegion.bbox or other operations needed to present a unified coordinate space.
214
+ # As per our discussion, elements *within* a FlowRegion retain original physical coordinates.
215
+ # So, this might not be strictly necessary for the current design's core functionality.
216
+ raise NotImplementedError("Translating element coordinates to a unified flow coordinate system is not yet implemented.")