natural-pdf 0.2.16__py3-none-any.whl → 0.2.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -207,22 +207,82 @@ class ElementCollection(
207
207
  if not self._elements:
208
208
  return []
209
209
 
210
- # Group elements by page
211
- elements_by_page = {}
210
+ # Check for FlowRegions which need special handling
211
+ from natural_pdf.flows.region import FlowRegion
212
+
213
+ flow_regions = []
214
+ regular_elements = []
215
+
212
216
  for elem in self._elements:
217
+ if isinstance(elem, FlowRegion):
218
+ flow_regions.append(elem)
219
+ else:
220
+ regular_elements.append(elem)
221
+
222
+ # Start with specs from FlowRegions (they handle their own multi-page rendering)
223
+ all_specs = []
224
+ specs_by_page = {} # Track specs by page for merging
225
+
226
+ for flow_region in flow_regions:
227
+ # FlowRegions have their own _get_render_specs method
228
+ flow_specs = flow_region._get_render_specs(
229
+ mode=mode,
230
+ color=color,
231
+ highlights=highlights,
232
+ crop=crop,
233
+ crop_bbox=crop_bbox,
234
+ **kwargs,
235
+ )
236
+ for spec in flow_specs:
237
+ # Check if we already have a spec for this page
238
+ if spec.page in specs_by_page:
239
+ # Merge highlights into existing spec
240
+ existing_spec = specs_by_page[spec.page]
241
+ # Add all highlights from this spec to the existing one
242
+ existing_spec.highlights.extend(spec.highlights)
243
+ # Merge crop bbox if needed
244
+ if spec.crop_bbox and not existing_spec.crop_bbox:
245
+ existing_spec.crop_bbox = spec.crop_bbox
246
+ elif spec.crop_bbox and existing_spec.crop_bbox:
247
+ # Expand crop bbox to include both
248
+ x0 = min(spec.crop_bbox[0], existing_spec.crop_bbox[0])
249
+ y0 = min(spec.crop_bbox[1], existing_spec.crop_bbox[1])
250
+ x1 = max(spec.crop_bbox[2], existing_spec.crop_bbox[2])
251
+ y1 = max(spec.crop_bbox[3], existing_spec.crop_bbox[3])
252
+ existing_spec.crop_bbox = (x0, y0, x1, y1)
253
+ else:
254
+ # First spec for this page
255
+ all_specs.append(spec)
256
+ specs_by_page[spec.page] = spec
257
+
258
+ # Group regular elements by page
259
+ elements_by_page = {}
260
+ for elem in regular_elements:
213
261
  if hasattr(elem, "page"):
214
262
  page = elem.page
215
263
  if page not in elements_by_page:
216
264
  elements_by_page[page] = []
217
265
  elements_by_page[page].append(elem)
218
266
 
219
- if not elements_by_page:
267
+ if not elements_by_page and not flow_regions:
220
268
  return []
221
269
 
222
- # Create RenderSpec for each page
223
- specs = []
270
+ # Create or update RenderSpec for each page with regular elements
224
271
  for page, page_elements in elements_by_page.items():
225
- spec = RenderSpec(page=page)
272
+ # Check if we already have a spec for this page from FlowRegions
273
+ existing_spec = None
274
+ for spec in all_specs:
275
+ if spec.page == page:
276
+ existing_spec = spec
277
+ break
278
+
279
+ if existing_spec:
280
+ # We'll add to the existing spec
281
+ spec = existing_spec
282
+ else:
283
+ # Create new spec for this page
284
+ spec = RenderSpec(page=page)
285
+ all_specs.append(spec)
226
286
 
227
287
  # Handle cropping
228
288
  if crop_bbox:
@@ -390,9 +450,7 @@ class ElementCollection(
390
450
  element=elem, color=group_color, label=group_label
391
451
  )
392
452
 
393
- specs.append(spec)
394
-
395
- return specs
453
+ return all_specs
396
454
 
397
455
  def _get_highlighter(self):
398
456
  """Get the highlighting service for rendering.
@@ -889,7 +947,22 @@ class ElementCollection(
889
947
  return self
890
948
 
891
949
  def exclude(self):
892
- self.page.add_exclusion(self)
950
+ """
951
+ Excludes all elements in the collection from their respective pages.
952
+
953
+ Since a collection can span multiple pages, this method iterates through
954
+ all elements and calls exclude() on each one individually.
955
+
956
+ Each element type is handled appropriately:
957
+ - Region elements exclude everything within their bounds
958
+ - Text/other elements exclude only the specific element, not the area
959
+
960
+ Returns:
961
+ Self for method chaining
962
+ """
963
+ for element in self._elements:
964
+ element.exclude()
965
+ return self
893
966
 
894
967
  def highlight(
895
968
  self,