natural-pdf 0.2.15__py3-none-any.whl → 0.2.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- natural_pdf/__init__.py +45 -0
- natural_pdf/analyzers/guides.py +359 -0
- natural_pdf/core/element_manager.py +4 -0
- natural_pdf/core/page.py +88 -22
- natural_pdf/core/page_collection.py +75 -0
- natural_pdf/core/pdf.py +33 -0
- natural_pdf/describe/base.py +48 -7
- natural_pdf/elements/base.py +408 -43
- natural_pdf/elements/element_collection.py +83 -10
- natural_pdf/elements/region.py +217 -178
- natural_pdf/elements/text.py +5 -3
- natural_pdf/flows/element.py +48 -46
- natural_pdf/flows/flow.py +175 -480
- natural_pdf/flows/region.py +76 -0
- natural_pdf/selectors/parser.py +180 -9
- natural_pdf/utils/pdfminer_patches.py +136 -0
- natural_pdf/utils/sections.py +346 -0
- natural_pdf/utils/spatial.py +169 -0
- {natural_pdf-0.2.15.dist-info → natural_pdf-0.2.17.dist-info}/METADATA +1 -1
- {natural_pdf-0.2.15.dist-info → natural_pdf-0.2.17.dist-info}/RECORD +24 -21
- {natural_pdf-0.2.15.dist-info → natural_pdf-0.2.17.dist-info}/WHEEL +0 -0
- {natural_pdf-0.2.15.dist-info → natural_pdf-0.2.17.dist-info}/entry_points.txt +0 -0
- {natural_pdf-0.2.15.dist-info → natural_pdf-0.2.17.dist-info}/licenses/LICENSE +0 -0
- {natural_pdf-0.2.15.dist-info → natural_pdf-0.2.17.dist-info}/top_level.txt +0 -0
natural_pdf/flows/element.py
CHANGED
@@ -106,6 +106,7 @@ class FlowElement:
|
|
106
106
|
cross_size_absolute: Optional[float] = None,
|
107
107
|
cross_alignment: str = "center", # "start", "center", "end"
|
108
108
|
until: Optional[str] = None,
|
109
|
+
include_source: bool = False,
|
109
110
|
include_endpoint: bool = True,
|
110
111
|
**kwargs,
|
111
112
|
) -> "FlowRegion":
|
@@ -178,13 +179,9 @@ class FlowElement:
|
|
178
179
|
is_forward = False
|
179
180
|
segment_iterator = range(start_segment_index, -1, -1)
|
180
181
|
elif direction == "right":
|
181
|
-
if is_primary_vertical:
|
182
|
-
raise NotImplementedError("'right' is for horizontal flows.")
|
183
182
|
is_forward = True
|
184
183
|
segment_iterator = range(start_segment_index, len(self.flow.segments))
|
185
184
|
elif direction == "left":
|
186
|
-
if is_primary_vertical:
|
187
|
-
raise NotImplementedError("'left' is for horizontal flows.")
|
188
185
|
is_forward = False
|
189
186
|
segment_iterator = range(start_segment_index, -1, -1)
|
190
187
|
else:
|
@@ -206,28 +203,34 @@ class FlowElement:
|
|
206
203
|
"direction": direction,
|
207
204
|
"until": until,
|
208
205
|
"include_endpoint": include_endpoint,
|
206
|
+
"include_source": include_source,
|
209
207
|
**kwargs,
|
210
208
|
}
|
211
209
|
|
212
|
-
# --- Cross-size logic: Default
|
210
|
+
# --- Cross-size logic: Default based on direction ---
|
213
211
|
cross_size_for_op: Union[str, float]
|
214
212
|
if cross_size_absolute is not None:
|
215
213
|
cross_size_for_op = cross_size_absolute
|
216
214
|
elif cross_size_ratio is not None: # User explicitly provided a ratio
|
215
|
+
# Cross dimension depends on direction, not flow arrangement
|
217
216
|
base_cross_dim = (
|
218
217
|
self.physical_object.width
|
219
|
-
if
|
218
|
+
if direction in ["above", "below"]
|
220
219
|
else self.physical_object.height
|
221
220
|
)
|
222
221
|
cross_size_for_op = base_cross_dim * cross_size_ratio
|
223
|
-
else: # Default case: neither absolute nor ratio provided
|
224
|
-
|
222
|
+
else: # Default case: neither absolute nor ratio provided
|
223
|
+
# Default to element size for left/right, full for above/below
|
224
|
+
if direction in ["left", "right"]:
|
225
|
+
cross_size_for_op = self.physical_object.height
|
226
|
+
else:
|
227
|
+
cross_size_for_op = "full"
|
225
228
|
op_direction_params["cross_size"] = cross_size_for_op
|
226
229
|
|
227
230
|
if current_segment_idx == start_segment_index:
|
228
231
|
op_source = self.physical_object
|
229
232
|
op_direction_params["size"] = remaining_size if size is not None else None
|
230
|
-
op_direction_params["include_source"] =
|
233
|
+
op_direction_params["include_source"] = include_source
|
231
234
|
|
232
235
|
source_for_op_call = op_source
|
233
236
|
if not isinstance(source_for_op_call, PhysicalRegion_Class):
|
@@ -245,7 +248,8 @@ class FlowElement:
|
|
245
248
|
"size": remaining_size if size is not None else None,
|
246
249
|
"cross_size": cross_size_for_op,
|
247
250
|
"cross_alignment": cross_alignment, # Pass alignment
|
248
|
-
"include_source":
|
251
|
+
"include_source": include_source,
|
252
|
+
"_from_flow": True, # Prevent multipage recursion
|
249
253
|
# Pass other relevant kwargs if Region._direction uses them (e.g. strict_type)
|
250
254
|
**{k: v for k, v in kwargs.items() if k in ["strict_type", "first_match_only"]},
|
251
255
|
}
|
@@ -283,7 +287,7 @@ class FlowElement:
|
|
283
287
|
if potential_hit:
|
284
288
|
boundary_element_hit = potential_hit # Set the overall boundary flag
|
285
289
|
# Adjust segment_contribution to stop at this boundary_element_hit.
|
286
|
-
if
|
290
|
+
if direction in ["below", "above"]:
|
287
291
|
if direction == "below":
|
288
292
|
edge = (
|
289
293
|
boundary_element_hit.bottom
|
@@ -300,7 +304,7 @@ class FlowElement:
|
|
300
304
|
bottom=edge if direction == "below" else None,
|
301
305
|
top=edge if direction == "above" else None,
|
302
306
|
)
|
303
|
-
else:
|
307
|
+
else: # direction in ["right", "left"]
|
304
308
|
if direction == "right":
|
305
309
|
edge = (
|
306
310
|
boundary_element_hit.x1
|
@@ -338,7 +342,7 @@ class FlowElement:
|
|
338
342
|
|
339
343
|
if potential_hit:
|
340
344
|
boundary_element_hit = potential_hit
|
341
|
-
if
|
345
|
+
if direction in ["below", "above"]:
|
342
346
|
if direction == "below":
|
343
347
|
edge = (
|
344
348
|
boundary_element_hit.bottom
|
@@ -355,7 +359,7 @@ class FlowElement:
|
|
355
359
|
bottom=edge if direction == "below" else None,
|
356
360
|
top=edge if direction == "above" else None,
|
357
361
|
)
|
358
|
-
else:
|
362
|
+
else: # direction in ["right", "left"]
|
359
363
|
if direction == "right":
|
360
364
|
edge = (
|
361
365
|
boundary_element_hit.x1
|
@@ -381,7 +385,7 @@ class FlowElement:
|
|
381
385
|
and size is not None
|
382
386
|
):
|
383
387
|
current_part_consumed_size = 0.0
|
384
|
-
if
|
388
|
+
if direction in ["below", "above"]:
|
385
389
|
current_part_consumed_size = segment_contribution.height
|
386
390
|
if current_part_consumed_size > remaining_size:
|
387
391
|
new_edge = (
|
@@ -394,7 +398,7 @@ class FlowElement:
|
|
394
398
|
top=new_edge if not is_forward else None,
|
395
399
|
)
|
396
400
|
current_part_consumed_size = remaining_size
|
397
|
-
else:
|
401
|
+
else: # direction in ["left", "right"]
|
398
402
|
current_part_consumed_size = segment_contribution.width
|
399
403
|
if current_part_consumed_size > remaining_size:
|
400
404
|
new_edge = (
|
@@ -451,6 +455,7 @@ class FlowElement:
|
|
451
455
|
width_absolute: Optional[float] = None,
|
452
456
|
width_alignment: str = "center",
|
453
457
|
until: Optional[str] = None,
|
458
|
+
include_source: bool = False,
|
454
459
|
include_endpoint: bool = True,
|
455
460
|
**kwargs,
|
456
461
|
) -> "FlowRegion": # Stringized
|
@@ -462,6 +467,7 @@ class FlowElement:
|
|
462
467
|
cross_size_absolute=width_absolute,
|
463
468
|
cross_alignment=width_alignment,
|
464
469
|
until=until,
|
470
|
+
include_source=include_source,
|
465
471
|
include_endpoint=include_endpoint,
|
466
472
|
**kwargs,
|
467
473
|
)
|
@@ -477,6 +483,7 @@ class FlowElement:
|
|
477
483
|
width_absolute: Optional[float] = None,
|
478
484
|
width_alignment: str = "center",
|
479
485
|
until: Optional[str] = None,
|
486
|
+
include_source: bool = False,
|
480
487
|
include_endpoint: bool = True,
|
481
488
|
**kwargs,
|
482
489
|
) -> "FlowRegion": # Stringized
|
@@ -488,6 +495,7 @@ class FlowElement:
|
|
488
495
|
cross_size_absolute=width_absolute,
|
489
496
|
cross_alignment=width_alignment,
|
490
497
|
until=until,
|
498
|
+
include_source=include_source,
|
491
499
|
include_endpoint=include_endpoint,
|
492
500
|
**kwargs,
|
493
501
|
)
|
@@ -503,24 +511,21 @@ class FlowElement:
|
|
503
511
|
height_absolute: Optional[float] = None,
|
504
512
|
height_alignment: str = "center",
|
505
513
|
until: Optional[str] = None,
|
514
|
+
include_source: bool = False,
|
506
515
|
include_endpoint: bool = True,
|
507
516
|
**kwargs,
|
508
517
|
) -> "FlowRegion": # Stringized
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
else:
|
521
|
-
raise NotImplementedError(
|
522
|
-
"'left' in a vertical flow is ambiguous with current 1D flow logic and not yet implemented."
|
523
|
-
)
|
518
|
+
return self._flow_direction(
|
519
|
+
direction="left",
|
520
|
+
size=width,
|
521
|
+
cross_size_ratio=height_ratio,
|
522
|
+
cross_size_absolute=height_absolute,
|
523
|
+
cross_alignment=height_alignment,
|
524
|
+
until=until,
|
525
|
+
include_source=include_source,
|
526
|
+
include_endpoint=include_endpoint,
|
527
|
+
**kwargs,
|
528
|
+
)
|
524
529
|
|
525
530
|
def right(
|
526
531
|
self,
|
@@ -529,24 +534,21 @@ class FlowElement:
|
|
529
534
|
height_absolute: Optional[float] = None,
|
530
535
|
height_alignment: str = "center",
|
531
536
|
until: Optional[str] = None,
|
537
|
+
include_source: bool = False,
|
532
538
|
include_endpoint: bool = True,
|
533
539
|
**kwargs,
|
534
540
|
) -> "FlowRegion": # Stringized
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
else:
|
547
|
-
raise NotImplementedError(
|
548
|
-
"'right' in a vertical flow is ambiguous with current 1D flow logic and not yet implemented."
|
549
|
-
)
|
541
|
+
return self._flow_direction(
|
542
|
+
direction="right",
|
543
|
+
size=width,
|
544
|
+
cross_size_ratio=height_ratio,
|
545
|
+
cross_size_absolute=height_absolute,
|
546
|
+
cross_alignment=height_alignment,
|
547
|
+
until=until,
|
548
|
+
include_source=include_source,
|
549
|
+
include_endpoint=include_endpoint,
|
550
|
+
**kwargs,
|
551
|
+
)
|
550
552
|
|
551
553
|
def __repr__(self) -> str:
|
552
554
|
return f"<FlowElement for {self.physical_object.__class__.__name__} {self.bbox} in {self.flow}>"
|