natural-pdf 0.2.5__py3-none-any.whl → 0.2.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- natural_pdf/analyzers/guides.py +94 -42
- natural_pdf/core/page.py +110 -44
- natural_pdf/core/page_collection.py +223 -34
- natural_pdf/core/page_groupby.py +20 -2
- natural_pdf/core/pdf.py +3 -0
- natural_pdf/core/render_spec.py +20 -5
- natural_pdf/describe/base.py +1 -1
- natural_pdf/describe/elements.py +1 -1
- natural_pdf/elements/base.py +84 -8
- natural_pdf/elements/element_collection.py +730 -12
- natural_pdf/elements/region.py +181 -48
- natural_pdf/flows/flow.py +3 -0
- natural_pdf/selectors/parser.py +2 -2
- natural_pdf/utils/color_utils.py +100 -0
- {natural_pdf-0.2.5.dist-info → natural_pdf-0.2.6.dist-info}/METADATA +1 -1
- {natural_pdf-0.2.5.dist-info → natural_pdf-0.2.6.dist-info}/RECORD +20 -19
- {natural_pdf-0.2.5.dist-info → natural_pdf-0.2.6.dist-info}/WHEEL +0 -0
- {natural_pdf-0.2.5.dist-info → natural_pdf-0.2.6.dist-info}/entry_points.txt +0 -0
- {natural_pdf-0.2.5.dist-info → natural_pdf-0.2.6.dist-info}/licenses/LICENSE +0 -0
- {natural_pdf-0.2.5.dist-info → natural_pdf-0.2.6.dist-info}/top_level.txt +0 -0
natural_pdf/elements/base.py
CHANGED
@@ -180,7 +180,14 @@ class DirectionalMixin:
|
|
180
180
|
# 3. Handle 'until' selector if provided
|
181
181
|
target = None
|
182
182
|
if until:
|
183
|
-
|
183
|
+
from natural_pdf.elements.element_collection import ElementCollection
|
184
|
+
|
185
|
+
# If until is an elementcollection, just use it
|
186
|
+
if isinstance(until, ElementCollection):
|
187
|
+
# Only take ones on the same page
|
188
|
+
all_matches = [m for m in until if m.page == self.page]
|
189
|
+
else:
|
190
|
+
all_matches = self.page.find_all(until, **kwargs)
|
184
191
|
matches_in_direction = []
|
185
192
|
|
186
193
|
# Filter and sort matches based on direction
|
@@ -1193,7 +1200,7 @@ class Element(
|
|
1193
1200
|
mode: Literal["show", "render"] = "show",
|
1194
1201
|
color: Optional[Union[str, Tuple[int, int, int]]] = None,
|
1195
1202
|
highlights: Optional[Union[List[Dict[str, Any]], bool]] = None,
|
1196
|
-
crop: Union[bool, Literal["
|
1203
|
+
crop: Union[bool, int, str, "Region", Literal["wide"]] = False,
|
1197
1204
|
crop_bbox: Optional[Tuple[float, float, float, float]] = None,
|
1198
1205
|
label: Optional[str] = None,
|
1199
1206
|
**kwargs,
|
@@ -1204,7 +1211,12 @@ class Element(
|
|
1204
1211
|
mode: Rendering mode - 'show' includes highlights, 'render' is clean
|
1205
1212
|
color: Color for highlighting this element in show mode
|
1206
1213
|
highlights: Additional highlight groups to show, or False to disable all highlights
|
1207
|
-
crop:
|
1214
|
+
crop: Cropping mode:
|
1215
|
+
- False: No cropping (default)
|
1216
|
+
- True: Tight crop to element bounds
|
1217
|
+
- int: Padding in pixels around element
|
1218
|
+
- 'wide': Full page width, cropped vertically to element
|
1219
|
+
- Region: Crop to the bounds of another region
|
1208
1220
|
crop_bbox: Explicit crop bounds
|
1209
1221
|
label: Optional label for this element
|
1210
1222
|
**kwargs: Additional parameters
|
@@ -1220,17 +1232,37 @@ class Element(
|
|
1220
1232
|
# Handle cropping
|
1221
1233
|
if crop_bbox:
|
1222
1234
|
spec.crop_bbox = crop_bbox
|
1223
|
-
elif crop
|
1224
|
-
#
|
1235
|
+
elif crop:
|
1236
|
+
# Get element bounds as starting point
|
1225
1237
|
if hasattr(self, "bbox") and self.bbox:
|
1226
|
-
|
1238
|
+
x0, y0, x1, y1 = self.bbox
|
1239
|
+
|
1240
|
+
if crop is True:
|
1241
|
+
# Tight crop to element bounds
|
1242
|
+
spec.crop_bbox = self.bbox
|
1243
|
+
elif isinstance(crop, (int, float)):
|
1244
|
+
# Add padding around element
|
1245
|
+
padding = float(crop)
|
1246
|
+
spec.crop_bbox = (
|
1247
|
+
max(0, x0 - padding),
|
1248
|
+
max(0, y0 - padding),
|
1249
|
+
min(self.page.width, x1 + padding),
|
1250
|
+
min(self.page.height, y1 + padding),
|
1251
|
+
)
|
1252
|
+
elif crop == "wide":
|
1253
|
+
# Full page width, cropped vertically to element
|
1254
|
+
spec.crop_bbox = (0, y0, self.page.width, y1)
|
1255
|
+
elif hasattr(crop, "bbox"):
|
1256
|
+
# Crop to another region's bounds
|
1257
|
+
spec.crop_bbox = crop.bbox
|
1227
1258
|
|
1228
1259
|
# Add highlight in show mode (unless explicitly disabled with highlights=False)
|
1229
1260
|
if mode == "show" and highlights is not False:
|
1230
1261
|
# Only highlight this element if:
|
1231
1262
|
# 1. We're not cropping, OR
|
1232
|
-
# 2. We're cropping but color was explicitly specified
|
1233
|
-
|
1263
|
+
# 2. We're cropping but color was explicitly specified, OR
|
1264
|
+
# 3. We're cropping to another region (not tight crop)
|
1265
|
+
if not crop or color is not None or (crop and not isinstance(crop, bool)):
|
1234
1266
|
# Use provided label or generate one
|
1235
1267
|
element_label = label if label is not None else self.__class__.__name__
|
1236
1268
|
|
@@ -1289,6 +1321,50 @@ class Element(
|
|
1289
1321
|
|
1290
1322
|
# Note: save_image method removed in favor of save()
|
1291
1323
|
|
1324
|
+
def __add__(self, other: Union["Element", "ElementCollection"]) -> "ElementCollection":
|
1325
|
+
"""Add elements together to create an ElementCollection.
|
1326
|
+
|
1327
|
+
This allows intuitive combination of elements using the + operator:
|
1328
|
+
```python
|
1329
|
+
complainant = section.find("text:contains(Complainant)").right(until='text')
|
1330
|
+
dob = section.find("text:contains(DOB)").right(until='text')
|
1331
|
+
combined = complainant + dob # Creates ElementCollection with both regions
|
1332
|
+
```
|
1333
|
+
|
1334
|
+
Args:
|
1335
|
+
other: Another Element or ElementCollection to combine with this element
|
1336
|
+
|
1337
|
+
Returns:
|
1338
|
+
ElementCollection containing all elements
|
1339
|
+
"""
|
1340
|
+
from natural_pdf.elements.element_collection import ElementCollection
|
1341
|
+
from natural_pdf.elements.region import Region
|
1342
|
+
|
1343
|
+
# Create a list starting with self
|
1344
|
+
elements = [self]
|
1345
|
+
|
1346
|
+
# Add the other element(s)
|
1347
|
+
if isinstance(other, (Element, Region)):
|
1348
|
+
elements.append(other)
|
1349
|
+
elif isinstance(other, ElementCollection):
|
1350
|
+
elements.extend(other)
|
1351
|
+
elif hasattr(other, "__iter__") and not isinstance(other, (str, bytes)):
|
1352
|
+
# Handle other iterables but exclude strings
|
1353
|
+
elements.extend(other)
|
1354
|
+
else:
|
1355
|
+
raise TypeError(f"Cannot add Element with {type(other)}")
|
1356
|
+
|
1357
|
+
return ElementCollection(elements)
|
1358
|
+
|
1359
|
+
def __radd__(self, other: Union["Element", "ElementCollection"]) -> "ElementCollection":
|
1360
|
+
"""Right-hand addition to support ElementCollection + Element."""
|
1361
|
+
if other == 0:
|
1362
|
+
# This handles sum() which starts with 0
|
1363
|
+
from natural_pdf.elements.element_collection import ElementCollection
|
1364
|
+
|
1365
|
+
return ElementCollection([self])
|
1366
|
+
return self.__add__(other)
|
1367
|
+
|
1292
1368
|
def __repr__(self) -> str:
|
1293
1369
|
"""String representation of the element."""
|
1294
1370
|
return f"<{self.__class__.__name__} bbox={self.bbox}>"
|