natural-pdf 0.1.15__py3-none-any.whl → 0.1.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- natural_pdf/__init__.py +31 -0
- natural_pdf/analyzers/layout/gemini.py +137 -162
- natural_pdf/analyzers/layout/layout_manager.py +9 -5
- natural_pdf/analyzers/layout/layout_options.py +77 -7
- natural_pdf/analyzers/layout/paddle.py +318 -165
- natural_pdf/analyzers/layout/table_structure_utils.py +78 -0
- natural_pdf/analyzers/shape_detection_mixin.py +770 -405
- natural_pdf/classification/mixin.py +2 -8
- natural_pdf/collections/pdf_collection.py +25 -30
- natural_pdf/core/highlighting_service.py +47 -32
- natural_pdf/core/page.py +119 -76
- natural_pdf/core/pdf.py +19 -22
- natural_pdf/describe/__init__.py +21 -0
- natural_pdf/describe/base.py +457 -0
- natural_pdf/describe/elements.py +411 -0
- natural_pdf/describe/mixin.py +84 -0
- natural_pdf/describe/summary.py +186 -0
- natural_pdf/elements/base.py +11 -10
- natural_pdf/elements/collections.py +116 -51
- natural_pdf/elements/region.py +204 -127
- natural_pdf/exporters/paddleocr.py +38 -13
- natural_pdf/flows/__init__.py +3 -3
- natural_pdf/flows/collections.py +303 -132
- natural_pdf/flows/element.py +277 -132
- natural_pdf/flows/flow.py +33 -16
- natural_pdf/flows/region.py +142 -79
- natural_pdf/ocr/engine_doctr.py +37 -4
- natural_pdf/ocr/engine_easyocr.py +23 -3
- natural_pdf/ocr/engine_paddle.py +281 -30
- natural_pdf/ocr/engine_surya.py +8 -3
- natural_pdf/ocr/ocr_manager.py +75 -76
- natural_pdf/ocr/ocr_options.py +52 -87
- natural_pdf/search/__init__.py +25 -12
- natural_pdf/search/lancedb_search_service.py +91 -54
- natural_pdf/search/numpy_search_service.py +86 -65
- natural_pdf/search/searchable_mixin.py +2 -2
- natural_pdf/selectors/parser.py +125 -81
- natural_pdf/widgets/__init__.py +1 -1
- natural_pdf/widgets/viewer.py +205 -449
- {natural_pdf-0.1.15.dist-info → natural_pdf-0.1.17.dist-info}/METADATA +27 -45
- {natural_pdf-0.1.15.dist-info → natural_pdf-0.1.17.dist-info}/RECORD +44 -38
- {natural_pdf-0.1.15.dist-info → natural_pdf-0.1.17.dist-info}/WHEEL +0 -0
- {natural_pdf-0.1.15.dist-info → natural_pdf-0.1.17.dist-info}/licenses/LICENSE +0 -0
- {natural_pdf-0.1.15.dist-info → natural_pdf-0.1.17.dist-info}/top_level.txt +0 -0
@@ -3,14 +3,15 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
|
|
3
3
|
|
4
4
|
import numpy as np
|
5
5
|
from PIL import Image, ImageDraw
|
6
|
+
from scipy.ndimage import binary_closing, binary_opening, gaussian_filter1d
|
6
7
|
from scipy.signal import find_peaks
|
7
|
-
from scipy.ndimage import gaussian_filter1d, binary_opening, binary_closing
|
8
8
|
|
9
9
|
if TYPE_CHECKING:
|
10
10
|
from natural_pdf.core.page import Page
|
11
11
|
from natural_pdf.core.pdf import PDF
|
12
12
|
from natural_pdf.elements.collections import ElementCollection, PageCollection
|
13
13
|
from natural_pdf.elements.line import LineElement
|
14
|
+
|
14
15
|
# from natural_pdf.elements.rect import RectangleElement # Removed
|
15
16
|
from natural_pdf.elements.region import Region
|
16
17
|
|
@@ -30,13 +31,16 @@ LINE_DETECTION_PARAM_DEFAULTS = {
|
|
30
31
|
"peak_width_rel_height": 0.5,
|
31
32
|
}
|
32
33
|
|
34
|
+
|
33
35
|
class ShapeDetectionMixin:
|
34
36
|
"""
|
35
37
|
Mixin class to provide shape detection capabilities (lines)
|
36
38
|
for Page, Region, PDFCollection, and PageCollection objects.
|
37
39
|
"""
|
38
40
|
|
39
|
-
def _get_image_for_detection(
|
41
|
+
def _get_image_for_detection(
|
42
|
+
self, resolution: int
|
43
|
+
) -> Tuple[Optional[np.ndarray], float, Tuple[float, float], Optional["Page"]]:
|
40
44
|
"""
|
41
45
|
Gets the image for detection, scale factor, PDF origin offset, and the relevant page object.
|
42
46
|
|
@@ -52,19 +56,27 @@ class ShapeDetectionMixin:
|
|
52
56
|
origin_offset_pdf = (0.0, 0.0)
|
53
57
|
|
54
58
|
# Determine the type of self and get the appropriate image and page context
|
55
|
-
if
|
56
|
-
|
59
|
+
if (
|
60
|
+
hasattr(self, "to_image") and hasattr(self, "width") and hasattr(self, "height")
|
61
|
+
): # Page or Region
|
62
|
+
if hasattr(self, "x0") and hasattr(self, "top") and hasattr(self, "_page"): # Region
|
57
63
|
logger.debug(f"Shape detection on Region: {self}")
|
58
64
|
page_obj = self._page
|
59
|
-
pil_image = self.to_image(
|
60
|
-
|
65
|
+
pil_image = self.to_image(
|
66
|
+
resolution=resolution, crop_only=True, include_highlights=False
|
67
|
+
)
|
68
|
+
if pil_image: # Ensure pil_image is not None before accessing attributes
|
61
69
|
origin_offset_pdf = (self.x0, self.top)
|
62
|
-
logger.debug(
|
63
|
-
|
70
|
+
logger.debug(
|
71
|
+
f"Region image rendered successfully: {pil_image.width}x{pil_image.height}, origin_offset: {origin_offset_pdf}"
|
72
|
+
)
|
73
|
+
else: # Page
|
64
74
|
logger.debug(f"Shape detection on Page: {self}")
|
65
75
|
page_obj = self
|
66
76
|
pil_image = self.to_image(resolution=resolution, include_highlights=False)
|
67
|
-
logger.debug(
|
77
|
+
logger.debug(
|
78
|
+
f"Page image rendered successfully: {pil_image.width}x{pil_image.height}"
|
79
|
+
)
|
68
80
|
else:
|
69
81
|
logger.error(f"Instance of type {type(self)} does not support to_image for detection.")
|
70
82
|
return None, 1.0, (0.0, 0.0), None
|
@@ -76,12 +88,12 @@ class ShapeDetectionMixin:
|
|
76
88
|
if pil_image.mode != "RGB":
|
77
89
|
pil_image = pil_image.convert("RGB")
|
78
90
|
cv_image = np.array(pil_image)
|
79
|
-
|
91
|
+
|
80
92
|
# Calculate scale_factor: points_per_pixel
|
81
93
|
# For a Page, self.width/height are PDF points. pil_image.width/height are pixels.
|
82
94
|
# For a Region, self.width/height are PDF points of the region. pil_image.width/height are pixels of the cropped image.
|
83
95
|
# The scale factor should always relate the dimensions of the *processed image* to the *PDF dimensions* of that same area.
|
84
|
-
|
96
|
+
|
85
97
|
if page_obj and pil_image.width > 0 and pil_image.height > 0:
|
86
98
|
# If it's a region, its self.width/height are its dimensions in PDF points.
|
87
99
|
# pil_image.width/height are the pixel dimensions of the cropped image of that region.
|
@@ -90,30 +102,34 @@ class ShapeDetectionMixin:
|
|
90
102
|
# If 100 PDF points span 200 pixels, then 1 pixel = 0.5 PDF points. scale_factor = points/pixels
|
91
103
|
# Example: Page width 500pt, image width 1000px. Scale = 500/1000 = 0.5 pt/px
|
92
104
|
# Region width 50pt, cropped image width 100px. Scale = 50/100 = 0.5 pt/px
|
93
|
-
|
105
|
+
|
94
106
|
# Use self.width/height for scale factor calculation because these correspond to the PDF dimensions of the area imaged.
|
95
107
|
# This ensures that if self is a Region, its specific dimensions are used for scaling its own cropped image.
|
96
|
-
|
108
|
+
|
97
109
|
# We need two scale factors if aspect ratio is not preserved by to_image,
|
98
110
|
# but to_image generally aims to preserve it when only resolution is changed.
|
99
111
|
# Assuming uniform scaling for now.
|
100
112
|
# A robust way: scale_x = self.width / pil_image.width; scale_y = self.height / pil_image.height
|
101
113
|
# For simplicity, let's assume uniform scaling or average it.
|
102
114
|
# Average scale factor:
|
103
|
-
scale_factor = (
|
104
|
-
logger.debug(
|
115
|
+
scale_factor = ((self.width / pil_image.width) + (self.height / pil_image.height)) / 2.0
|
116
|
+
logger.debug(
|
117
|
+
f"Calculated scale_factor: {scale_factor:.4f} (PDF dimensions: {self.width:.1f}x{self.height:.1f}, Image: {pil_image.width}x{pil_image.height})"
|
118
|
+
)
|
105
119
|
|
106
120
|
else:
|
107
121
|
logger.warning("Could not determine page object or image dimensions for scaling.")
|
108
|
-
scale_factor = 1.0
|
122
|
+
scale_factor = 1.0 # Default to no scaling if info is missing
|
109
123
|
|
110
124
|
return cv_image, scale_factor, origin_offset_pdf, page_obj
|
111
125
|
|
112
|
-
|
113
|
-
|
114
|
-
|
115
126
|
def _convert_line_to_element_data(
|
116
|
-
self,
|
127
|
+
self,
|
128
|
+
line_data_img: Dict,
|
129
|
+
scale_factor: float,
|
130
|
+
origin_offset_pdf: Tuple[float, float],
|
131
|
+
page_obj: "Page",
|
132
|
+
source_label: str,
|
117
133
|
) -> Dict:
|
118
134
|
"""Converts line data from image coordinates to PDF element data."""
|
119
135
|
# Ensure scale_factor is not zero to prevent division by zero or incorrect scaling
|
@@ -121,41 +137,50 @@ class ShapeDetectionMixin:
|
|
121
137
|
logger.warning("Scale factor is zero, cannot convert line coordinates correctly.")
|
122
138
|
# Return something or raise error, for now, try to proceed with unscaled if possible (won't be right)
|
123
139
|
# This situation ideally shouldn't happen if _get_image_for_detection is robust.
|
124
|
-
effective_scale = 1.0
|
140
|
+
effective_scale = 1.0
|
125
141
|
else:
|
126
142
|
effective_scale = scale_factor
|
127
143
|
|
128
|
-
x0 = origin_offset_pdf[0] + line_data_img[
|
129
|
-
top = origin_offset_pdf[1] + line_data_img[
|
130
|
-
x1 = origin_offset_pdf[0] + line_data_img[
|
131
|
-
bottom =
|
132
|
-
|
144
|
+
x0 = origin_offset_pdf[0] + line_data_img["x1"] * effective_scale
|
145
|
+
top = origin_offset_pdf[1] + line_data_img["y1"] * effective_scale
|
146
|
+
x1 = origin_offset_pdf[0] + line_data_img["x2"] * effective_scale
|
147
|
+
bottom = (
|
148
|
+
origin_offset_pdf[1] + line_data_img["y2"] * effective_scale
|
149
|
+
) # y2 is the second y-coord
|
150
|
+
|
133
151
|
# For lines, width attribute in PDF points
|
134
|
-
line_width_pdf = line_data_img[
|
152
|
+
line_width_pdf = line_data_img["width"] * effective_scale
|
135
153
|
|
136
154
|
# initial_doctop might not be loaded if page object is minimal
|
137
|
-
initial_doctop =
|
155
|
+
initial_doctop = (
|
156
|
+
getattr(page_obj._page, "initial_doctop", 0) if hasattr(page_obj, "_page") else 0
|
157
|
+
)
|
138
158
|
|
139
159
|
return {
|
140
|
-
"x0": x0,
|
141
|
-
"
|
142
|
-
"
|
143
|
-
"
|
160
|
+
"x0": x0,
|
161
|
+
"top": top,
|
162
|
+
"x1": x1,
|
163
|
+
"bottom": bottom, # bottom here is y2_pdf
|
164
|
+
"width": abs(x1 - x0), # This is bounding box width
|
165
|
+
"height": abs(bottom - top), # This is bounding box height
|
166
|
+
"linewidth": line_width_pdf, # Actual stroke width of the line
|
144
167
|
"object_type": "line",
|
145
168
|
"page_number": page_obj.page_number,
|
146
169
|
"doctop": top + initial_doctop,
|
147
170
|
"source": source_label,
|
148
|
-
"stroking_color": (0,0,0),
|
149
|
-
"non_stroking_color": (0,0,0),
|
171
|
+
"stroking_color": (0, 0, 0), # Default, can be enhanced
|
172
|
+
"non_stroking_color": (0, 0, 0), # Default
|
150
173
|
# Add other raw data if useful
|
151
|
-
"raw_line_thickness_px": line_data_img.get(
|
152
|
-
|
174
|
+
"raw_line_thickness_px": line_data_img.get(
|
175
|
+
"line_thickness_px"
|
176
|
+
), # Renamed from raw_nfa_score
|
177
|
+
"raw_line_position_px": line_data_img.get("line_position_px"), # Added for clarity
|
153
178
|
}
|
154
179
|
|
155
180
|
def _find_lines_on_image_data(
|
156
181
|
self,
|
157
182
|
cv_image: np.ndarray,
|
158
|
-
pil_image_rgb: Image.Image,
|
183
|
+
pil_image_rgb: Image.Image, # For original dimensions
|
159
184
|
horizontal: bool = True,
|
160
185
|
vertical: bool = True,
|
161
186
|
peak_threshold_h: float = 0.5,
|
@@ -165,7 +190,9 @@ class ShapeDetectionMixin:
|
|
165
190
|
max_lines_h: Optional[int] = None,
|
166
191
|
max_lines_v: Optional[int] = None,
|
167
192
|
binarization_method: str = LINE_DETECTION_PARAM_DEFAULTS["binarization_method"],
|
168
|
-
adaptive_thresh_block_size: int = LINE_DETECTION_PARAM_DEFAULTS[
|
193
|
+
adaptive_thresh_block_size: int = LINE_DETECTION_PARAM_DEFAULTS[
|
194
|
+
"adaptive_thresh_block_size"
|
195
|
+
],
|
169
196
|
adaptive_thresh_C_val: int = LINE_DETECTION_PARAM_DEFAULTS["adaptive_thresh_C_val"],
|
170
197
|
morph_op_h: str = LINE_DETECTION_PARAM_DEFAULTS["morph_op_h"],
|
171
198
|
morph_kernel_h: Tuple[int, int] = LINE_DETECTION_PARAM_DEFAULTS["morph_kernel_h"],
|
@@ -185,10 +212,10 @@ class ShapeDetectionMixin:
|
|
185
212
|
# Convert RGB to grayscale using numpy (faster than PIL)
|
186
213
|
# Using standard luminance weights: 0.299*R + 0.587*G + 0.114*B
|
187
214
|
if len(cv_image.shape) == 3:
|
188
|
-
gray_image = np.dot(cv_image[
|
215
|
+
gray_image = np.dot(cv_image[..., :3], [0.299, 0.587, 0.114]).astype(np.uint8)
|
189
216
|
else:
|
190
217
|
gray_image = cv_image # Already grayscale
|
191
|
-
|
218
|
+
|
192
219
|
img_height, img_width = gray_image.shape
|
193
220
|
logger.debug(f"Line detection - Image dimensions: {img_width}x{img_height}")
|
194
221
|
|
@@ -197,7 +224,7 @@ class ShapeDetectionMixin:
|
|
197
224
|
# Calculate histogram
|
198
225
|
hist, _ = np.histogram(image.flatten(), bins=256, range=(0, 256))
|
199
226
|
hist = hist.astype(float)
|
200
|
-
|
227
|
+
|
201
228
|
# Calculate probabilities
|
202
229
|
total_pixels = image.size
|
203
230
|
current_max = 0
|
@@ -205,55 +232,65 @@ class ShapeDetectionMixin:
|
|
205
232
|
sum_total = np.sum(np.arange(256) * hist)
|
206
233
|
sum_background = 0
|
207
234
|
weight_background = 0
|
208
|
-
|
235
|
+
|
209
236
|
for i in range(256):
|
210
237
|
weight_background += hist[i]
|
211
238
|
if weight_background == 0:
|
212
239
|
continue
|
213
|
-
|
240
|
+
|
214
241
|
weight_foreground = total_pixels - weight_background
|
215
242
|
if weight_foreground == 0:
|
216
243
|
break
|
217
|
-
|
244
|
+
|
218
245
|
sum_background += i * hist[i]
|
219
246
|
mean_background = sum_background / weight_background
|
220
247
|
mean_foreground = (sum_total - sum_background) / weight_foreground
|
221
|
-
|
248
|
+
|
222
249
|
# Calculate between-class variance
|
223
|
-
variance_between =
|
224
|
-
|
250
|
+
variance_between = (
|
251
|
+
weight_background * weight_foreground * (mean_background - mean_foreground) ** 2
|
252
|
+
)
|
253
|
+
|
225
254
|
if variance_between > current_max:
|
226
255
|
current_max = variance_between
|
227
256
|
threshold = i
|
228
|
-
|
257
|
+
|
229
258
|
return threshold
|
230
259
|
|
231
260
|
def adaptive_threshold(image, block_size, C):
|
232
261
|
"""Simple adaptive thresholding implementation."""
|
233
262
|
# Use scipy for gaussian filtering
|
234
263
|
from scipy.ndimage import gaussian_filter
|
235
|
-
|
264
|
+
|
236
265
|
# Calculate local means using gaussian filter
|
237
266
|
sigma = block_size / 6.0 # Approximate relationship
|
238
267
|
local_mean = gaussian_filter(image.astype(float), sigma=sigma)
|
239
|
-
|
268
|
+
|
240
269
|
# Apply threshold
|
241
270
|
binary = (image > (local_mean - C)).astype(np.uint8) * 255
|
242
271
|
return 255 - binary # Invert to match binary inverse thresholding
|
243
272
|
|
244
273
|
if binarization_method == "adaptive":
|
245
|
-
binarized_image = adaptive_threshold(
|
274
|
+
binarized_image = adaptive_threshold(
|
275
|
+
gray_image, adaptive_thresh_block_size, adaptive_thresh_C_val
|
276
|
+
)
|
246
277
|
elif binarization_method == "otsu":
|
247
278
|
otsu_thresh_val = otsu_threshold(gray_image)
|
248
|
-
binarized_image = (gray_image <= otsu_thresh_val).astype(
|
279
|
+
binarized_image = (gray_image <= otsu_thresh_val).astype(
|
280
|
+
np.uint8
|
281
|
+
) * 255 # Inverted binary
|
249
282
|
logger.debug(f"Otsu's threshold applied. Value: {otsu_thresh_val}")
|
250
283
|
else:
|
251
|
-
logger.error(
|
284
|
+
logger.error(
|
285
|
+
f"Invalid binarization_method: {binarization_method}. Supported: 'otsu', 'adaptive'. Defaulting to 'otsu'."
|
286
|
+
)
|
252
287
|
otsu_thresh_val = otsu_threshold(gray_image)
|
253
|
-
binarized_image = (gray_image <= otsu_thresh_val).astype(
|
254
|
-
|
288
|
+
binarized_image = (gray_image <= otsu_thresh_val).astype(
|
289
|
+
np.uint8
|
290
|
+
) * 255 # Inverted binary
|
291
|
+
|
255
292
|
binarized_norm = binarized_image.astype(float) / 255.0
|
256
|
-
|
293
|
+
|
257
294
|
detected_lines_data = []
|
258
295
|
profile_h_smoothed_for_viz: Optional[np.ndarray] = None
|
259
296
|
profile_v_smoothed_for_viz: Optional[np.ndarray] = None
|
@@ -262,8 +299,8 @@ class ShapeDetectionMixin:
|
|
262
299
|
profile_data: np.ndarray,
|
263
300
|
max_dimension_for_ratio: int,
|
264
301
|
params_key_suffix: str,
|
265
|
-
is_horizontal_detection: bool
|
266
|
-
) -> Tuple[List[Dict], np.ndarray]:
|
302
|
+
is_horizontal_detection: bool,
|
303
|
+
) -> Tuple[List[Dict], np.ndarray]: # Ensure it always returns profile_smoothed
|
267
304
|
lines_info = []
|
268
305
|
sigma = smoothing_sigma_h if is_horizontal_detection else smoothing_sigma_v
|
269
306
|
profile_smoothed = gaussian_filter1d(profile_data.astype(float), sigma=sigma)
|
@@ -271,27 +308,31 @@ class ShapeDetectionMixin:
|
|
271
308
|
peak_threshold = peak_threshold_h if is_horizontal_detection else peak_threshold_v
|
272
309
|
min_gap = min_gap_h if is_horizontal_detection else min_gap_v
|
273
310
|
max_lines = max_lines_h if is_horizontal_detection else max_lines_v
|
274
|
-
|
311
|
+
|
275
312
|
current_peak_height_threshold = peak_threshold * max_dimension_for_ratio
|
276
313
|
find_peaks_distance = min_gap
|
277
314
|
|
278
315
|
if max_lines is not None:
|
279
|
-
current_peak_height_threshold = 1.0
|
280
|
-
find_peaks_distance = 1
|
281
|
-
|
316
|
+
current_peak_height_threshold = 1.0
|
317
|
+
find_peaks_distance = 1
|
318
|
+
|
282
319
|
candidate_peaks_indices, candidate_properties = find_peaks(
|
283
|
-
profile_smoothed,
|
284
|
-
|
320
|
+
profile_smoothed,
|
321
|
+
height=current_peak_height_threshold,
|
322
|
+
distance=find_peaks_distance,
|
323
|
+
width=1,
|
324
|
+
prominence=1,
|
325
|
+
rel_height=peak_width_rel_height,
|
285
326
|
)
|
286
|
-
|
327
|
+
|
287
328
|
final_peaks_indices = candidate_peaks_indices
|
288
329
|
final_properties = candidate_properties
|
289
330
|
|
290
331
|
if max_lines is not None:
|
291
|
-
if len(candidate_peaks_indices) > 0 and
|
332
|
+
if len(candidate_peaks_indices) > 0 and "prominences" in candidate_properties:
|
292
333
|
prominences = candidate_properties["prominences"]
|
293
334
|
sorted_candidate_indices_by_prominence = np.argsort(prominences)[::-1]
|
294
|
-
selected_peaks_original_indices = []
|
335
|
+
selected_peaks_original_indices = []
|
295
336
|
suppressed_profile_indices = np.zeros(len(profile_smoothed), dtype=bool)
|
296
337
|
num_selected = 0
|
297
338
|
for original_idx_in_candidate_list in sorted_candidate_indices_by_prominence:
|
@@ -300,76 +341,101 @@ class ShapeDetectionMixin:
|
|
300
341
|
selected_peaks_original_indices.append(original_idx_in_candidate_list)
|
301
342
|
num_selected += 1
|
302
343
|
lower_bound = max(0, actual_profile_idx - min_gap)
|
303
|
-
upper_bound = min(
|
344
|
+
upper_bound = min(
|
345
|
+
len(profile_smoothed), actual_profile_idx + min_gap + 1
|
346
|
+
)
|
304
347
|
suppressed_profile_indices[lower_bound:upper_bound] = True
|
305
|
-
if num_selected >= max_lines:
|
348
|
+
if num_selected >= max_lines:
|
349
|
+
break
|
306
350
|
final_peaks_indices = candidate_peaks_indices[selected_peaks_original_indices]
|
307
|
-
final_properties = {
|
308
|
-
|
351
|
+
final_properties = {
|
352
|
+
key: val_array[selected_peaks_original_indices]
|
353
|
+
for key, val_array in candidate_properties.items()
|
354
|
+
}
|
355
|
+
logger.debug(
|
356
|
+
f"Selected {len(final_peaks_indices)} {params_key_suffix.upper()}-lines for max_lines={max_lines}."
|
357
|
+
)
|
309
358
|
else:
|
310
359
|
final_peaks_indices = np.array([])
|
311
360
|
final_properties = {}
|
312
361
|
logger.debug(f"No {params_key_suffix.upper()}-peaks for max_lines selection.")
|
313
362
|
elif not final_peaks_indices.size:
|
314
|
-
|
315
|
-
|
363
|
+
final_properties = {}
|
364
|
+
logger.debug(f"No {params_key_suffix.upper()}-lines found using threshold.")
|
316
365
|
else:
|
317
|
-
|
366
|
+
logger.debug(
|
367
|
+
f"Found {len(final_peaks_indices)} {params_key_suffix.upper()}-lines using threshold."
|
368
|
+
)
|
318
369
|
|
319
370
|
if final_peaks_indices.size > 0:
|
320
371
|
sort_order = np.argsort(final_peaks_indices)
|
321
372
|
final_peaks_indices = final_peaks_indices[sort_order]
|
322
|
-
for key in final_properties:
|
373
|
+
for key in final_properties:
|
374
|
+
final_properties[key] = final_properties[key][sort_order]
|
323
375
|
|
324
376
|
for i, peak_idx in enumerate(final_peaks_indices):
|
325
377
|
center_coord = int(peak_idx)
|
326
|
-
profile_thickness =
|
378
|
+
profile_thickness = (
|
379
|
+
final_properties.get("widths", [])[i]
|
380
|
+
if "widths" in final_properties and i < len(final_properties["widths"])
|
381
|
+
else 1.0
|
382
|
+
)
|
327
383
|
profile_thickness = max(1, int(round(profile_thickness)))
|
328
|
-
|
329
|
-
current_img_width = pil_image_rgb.width
|
384
|
+
|
385
|
+
current_img_width = pil_image_rgb.width # Use actual passed image dimensions
|
330
386
|
current_img_height = pil_image_rgb.height
|
331
387
|
|
332
|
-
if is_horizontal_detection:
|
333
|
-
lines_info.append(
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
388
|
+
if is_horizontal_detection:
|
389
|
+
lines_info.append(
|
390
|
+
{
|
391
|
+
"x1": 0,
|
392
|
+
"y1": center_coord,
|
393
|
+
"x2": current_img_width - 1,
|
394
|
+
"y2": center_coord,
|
395
|
+
"width": profile_thickness,
|
396
|
+
"length": current_img_width,
|
397
|
+
"line_thickness_px": profile_thickness,
|
398
|
+
"line_position_px": center_coord,
|
399
|
+
}
|
400
|
+
)
|
401
|
+
else:
|
402
|
+
lines_info.append(
|
403
|
+
{
|
404
|
+
"x1": center_coord,
|
405
|
+
"y1": 0,
|
406
|
+
"x2": center_coord,
|
407
|
+
"y2": current_img_height - 1,
|
408
|
+
"width": profile_thickness,
|
409
|
+
"length": current_img_height,
|
410
|
+
"line_thickness_px": profile_thickness,
|
411
|
+
"line_position_px": center_coord,
|
412
|
+
}
|
413
|
+
)
|
350
414
|
return lines_info, profile_smoothed
|
351
415
|
|
352
416
|
def apply_morphology(image, operation, kernel_size):
|
353
417
|
"""Apply morphological operations using scipy.ndimage."""
|
354
418
|
if operation == "none":
|
355
419
|
return image
|
356
|
-
|
420
|
+
|
357
421
|
# Create rectangular structuring element
|
358
422
|
# kernel_size is (width, height) = (cols, rows)
|
359
423
|
cols, rows = kernel_size
|
360
424
|
structure = np.ones((rows, cols)) # Note: numpy uses (rows, cols) order
|
361
|
-
|
425
|
+
|
362
426
|
# Convert to binary for morphological operations
|
363
427
|
binary_img = (image > 0.5).astype(bool)
|
364
|
-
|
428
|
+
|
365
429
|
if operation == "open":
|
366
430
|
result = binary_opening(binary_img, structure=structure)
|
367
431
|
elif operation == "close":
|
368
432
|
result = binary_closing(binary_img, structure=structure)
|
369
433
|
else:
|
370
|
-
logger.warning(
|
434
|
+
logger.warning(
|
435
|
+
f"Unknown morphological operation: {operation}. Supported: 'open', 'close', 'none'."
|
436
|
+
)
|
371
437
|
result = binary_img
|
372
|
-
|
438
|
+
|
373
439
|
# Convert back to float
|
374
440
|
return result.astype(float)
|
375
441
|
|
@@ -378,7 +444,9 @@ class ShapeDetectionMixin:
|
|
378
444
|
if morph_op_h != "none":
|
379
445
|
processed_image_h = apply_morphology(processed_image_h, morph_op_h, morph_kernel_h)
|
380
446
|
profile_h_raw = np.sum(processed_image_h, axis=1)
|
381
|
-
horizontal_lines, smoothed_h = get_lines_from_profile(
|
447
|
+
horizontal_lines, smoothed_h = get_lines_from_profile(
|
448
|
+
profile_h_raw, pil_image_rgb.width, "h", True
|
449
|
+
)
|
382
450
|
profile_h_smoothed_for_viz = smoothed_h
|
383
451
|
detected_lines_data.extend(horizontal_lines)
|
384
452
|
logger.info(f"Detected {len(horizontal_lines)} horizontal lines.")
|
@@ -388,11 +456,13 @@ class ShapeDetectionMixin:
|
|
388
456
|
if morph_op_v != "none":
|
389
457
|
processed_image_v = apply_morphology(processed_image_v, morph_op_v, morph_kernel_v)
|
390
458
|
profile_v_raw = np.sum(processed_image_v, axis=0)
|
391
|
-
vertical_lines, smoothed_v = get_lines_from_profile(
|
459
|
+
vertical_lines, smoothed_v = get_lines_from_profile(
|
460
|
+
profile_v_raw, pil_image_rgb.height, "v", False
|
461
|
+
)
|
392
462
|
profile_v_smoothed_for_viz = smoothed_v
|
393
463
|
detected_lines_data.extend(vertical_lines)
|
394
464
|
logger.info(f"Detected {len(vertical_lines)} vertical lines.")
|
395
|
-
|
465
|
+
|
396
466
|
return detected_lines_data, profile_h_smoothed_for_viz, profile_v_smoothed_for_viz
|
397
467
|
|
398
468
|
def detect_lines(
|
@@ -410,7 +480,9 @@ class ShapeDetectionMixin:
|
|
410
480
|
max_lines_v: Optional[int] = None,
|
411
481
|
replace: bool = True,
|
412
482
|
binarization_method: str = LINE_DETECTION_PARAM_DEFAULTS["binarization_method"],
|
413
|
-
adaptive_thresh_block_size: int = LINE_DETECTION_PARAM_DEFAULTS[
|
483
|
+
adaptive_thresh_block_size: int = LINE_DETECTION_PARAM_DEFAULTS[
|
484
|
+
"adaptive_thresh_block_size"
|
485
|
+
],
|
414
486
|
adaptive_thresh_C_val: int = LINE_DETECTION_PARAM_DEFAULTS["adaptive_thresh_C_val"],
|
415
487
|
morph_op_h: str = LINE_DETECTION_PARAM_DEFAULTS["morph_op_h"],
|
416
488
|
morph_kernel_h: Tuple[int, int] = LINE_DETECTION_PARAM_DEFAULTS["morph_kernel_h"],
|
@@ -428,7 +500,7 @@ class ShapeDetectionMixin:
|
|
428
500
|
initial_min_line_length: int = 10,
|
429
501
|
min_nfa_score_horizontal: float = -10.0,
|
430
502
|
min_nfa_score_vertical: float = -10.0,
|
431
|
-
) -> "ShapeDetectionMixin":
|
503
|
+
) -> "ShapeDetectionMixin": # Return type changed back to self
|
432
504
|
"""
|
433
505
|
Detects lines on the Page or Region, or on all pages within a Collection.
|
434
506
|
Adds detected lines as LineElement objects to the ElementManager.
|
@@ -439,7 +511,7 @@ class ShapeDetectionMixin:
|
|
439
511
|
method: Detection method - "projection" (default, no cv2 required) or "lsd" (requires opencv-python).
|
440
512
|
horizontal: If True, detect horizontal lines.
|
441
513
|
vertical: If True, detect vertical lines.
|
442
|
-
|
514
|
+
|
443
515
|
# Projection profiling parameters:
|
444
516
|
peak_threshold_h: Threshold for peak detection in horizontal profile (ratio of image width).
|
445
517
|
min_gap_h: Minimum gap between horizontal lines (pixels).
|
@@ -458,7 +530,7 @@ class ShapeDetectionMixin:
|
|
458
530
|
smoothing_sigma_h: Gaussian smoothing sigma for horizontal profile.
|
459
531
|
smoothing_sigma_v: Gaussian smoothing sigma for vertical profile.
|
460
532
|
peak_width_rel_height: Relative height for `scipy.find_peaks` 'width' parameter.
|
461
|
-
|
533
|
+
|
462
534
|
# LSD-specific parameters (only used when method="lsd"):
|
463
535
|
off_angle: Maximum angle deviation from horizontal/vertical for line classification.
|
464
536
|
min_line_length: Minimum length for final detected lines.
|
@@ -471,7 +543,7 @@ class ShapeDetectionMixin:
|
|
471
543
|
|
472
544
|
Returns:
|
473
545
|
Self for method chaining.
|
474
|
-
|
546
|
+
|
475
547
|
Raises:
|
476
548
|
ImportError: If method="lsd" but opencv-python is not installed.
|
477
549
|
ValueError: If method is not "projection" or "lsd".
|
@@ -479,38 +551,51 @@ class ShapeDetectionMixin:
|
|
479
551
|
if not horizontal and not vertical:
|
480
552
|
logger.info("Line detection skipped as both horizontal and vertical are False.")
|
481
553
|
return self
|
482
|
-
|
554
|
+
|
483
555
|
# Validate method parameter
|
484
556
|
if method not in ["projection", "lsd"]:
|
485
557
|
raise ValueError(f"Invalid method '{method}'. Supported methods: 'projection', 'lsd'")
|
486
|
-
|
558
|
+
|
487
559
|
collection_params = {
|
488
|
-
"resolution": resolution,
|
489
|
-
"
|
490
|
-
"
|
491
|
-
"
|
492
|
-
"
|
560
|
+
"resolution": resolution,
|
561
|
+
"source_label": source_label,
|
562
|
+
"method": method,
|
563
|
+
"horizontal": horizontal,
|
564
|
+
"vertical": vertical,
|
565
|
+
"peak_threshold_h": peak_threshold_h,
|
566
|
+
"min_gap_h": min_gap_h,
|
567
|
+
"peak_threshold_v": peak_threshold_v,
|
568
|
+
"min_gap_v": min_gap_v,
|
569
|
+
"max_lines_h": max_lines_h,
|
570
|
+
"max_lines_v": max_lines_v,
|
493
571
|
"replace": replace,
|
494
572
|
"binarization_method": binarization_method,
|
495
573
|
"adaptive_thresh_block_size": adaptive_thresh_block_size,
|
496
574
|
"adaptive_thresh_C_val": adaptive_thresh_C_val,
|
497
|
-
"morph_op_h": morph_op_h,
|
498
|
-
"
|
499
|
-
"
|
575
|
+
"morph_op_h": morph_op_h,
|
576
|
+
"morph_kernel_h": morph_kernel_h,
|
577
|
+
"morph_op_v": morph_op_v,
|
578
|
+
"morph_kernel_v": morph_kernel_v,
|
579
|
+
"smoothing_sigma_h": smoothing_sigma_h,
|
580
|
+
"smoothing_sigma_v": smoothing_sigma_v,
|
500
581
|
"peak_width_rel_height": peak_width_rel_height,
|
501
582
|
# LSD parameters
|
502
|
-
"off_angle": off_angle,
|
503
|
-
"
|
504
|
-
"
|
505
|
-
"
|
583
|
+
"off_angle": off_angle,
|
584
|
+
"min_line_length": min_line_length,
|
585
|
+
"merge_angle_tolerance": merge_angle_tolerance,
|
586
|
+
"merge_distance_tolerance": merge_distance_tolerance,
|
587
|
+
"merge_endpoint_tolerance": merge_endpoint_tolerance,
|
588
|
+
"initial_min_line_length": initial_min_line_length,
|
589
|
+
"min_nfa_score_horizontal": min_nfa_score_horizontal,
|
590
|
+
"min_nfa_score_vertical": min_nfa_score_vertical,
|
506
591
|
}
|
507
592
|
|
508
|
-
if hasattr(self,
|
593
|
+
if hasattr(self, "pdfs"):
|
509
594
|
for pdf_doc in self.pdfs:
|
510
595
|
for page_obj in pdf_doc.pages:
|
511
596
|
page_obj.detect_lines(**collection_params)
|
512
597
|
return self
|
513
|
-
elif hasattr(self,
|
598
|
+
elif hasattr(self, "pages") and not hasattr(self, "_page"):
|
514
599
|
for page_obj in self.pages:
|
515
600
|
page_obj.detect_lines(**collection_params)
|
516
601
|
return self
|
@@ -518,25 +603,47 @@ class ShapeDetectionMixin:
|
|
518
603
|
# Dispatch to appropriate detection method
|
519
604
|
if method == "projection":
|
520
605
|
return self._detect_lines_projection(
|
521
|
-
resolution=resolution,
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
606
|
+
resolution=resolution,
|
607
|
+
source_label=source_label,
|
608
|
+
horizontal=horizontal,
|
609
|
+
vertical=vertical,
|
610
|
+
peak_threshold_h=peak_threshold_h,
|
611
|
+
min_gap_h=min_gap_h,
|
612
|
+
peak_threshold_v=peak_threshold_v,
|
613
|
+
min_gap_v=min_gap_v,
|
614
|
+
max_lines_h=max_lines_h,
|
615
|
+
max_lines_v=max_lines_v,
|
616
|
+
replace=replace,
|
617
|
+
binarization_method=binarization_method,
|
618
|
+
adaptive_thresh_block_size=adaptive_thresh_block_size,
|
619
|
+
adaptive_thresh_C_val=adaptive_thresh_C_val,
|
620
|
+
morph_op_h=morph_op_h,
|
621
|
+
morph_kernel_h=morph_kernel_h,
|
622
|
+
morph_op_v=morph_op_v,
|
623
|
+
morph_kernel_v=morph_kernel_v,
|
624
|
+
smoothing_sigma_h=smoothing_sigma_h,
|
625
|
+
smoothing_sigma_v=smoothing_sigma_v,
|
626
|
+
peak_width_rel_height=peak_width_rel_height,
|
528
627
|
)
|
529
628
|
elif method == "lsd":
|
530
629
|
return self._detect_lines_lsd(
|
531
|
-
resolution=resolution,
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
|
630
|
+
resolution=resolution,
|
631
|
+
source_label=source_label,
|
632
|
+
horizontal=horizontal,
|
633
|
+
vertical=vertical,
|
634
|
+
off_angle=off_angle,
|
635
|
+
min_line_length=min_line_length,
|
636
|
+
merge_angle_tolerance=merge_angle_tolerance,
|
637
|
+
merge_distance_tolerance=merge_distance_tolerance,
|
638
|
+
merge_endpoint_tolerance=merge_endpoint_tolerance,
|
639
|
+
initial_min_line_length=initial_min_line_length,
|
640
|
+
min_nfa_score_horizontal=min_nfa_score_horizontal,
|
641
|
+
min_nfa_score_vertical=min_nfa_score_vertical,
|
642
|
+
replace=replace,
|
536
643
|
)
|
537
644
|
else:
|
538
|
-
|
539
|
-
|
645
|
+
# This should never happen due to validation above, but just in case
|
646
|
+
raise ValueError(f"Unsupported method: {method}")
|
540
647
|
|
541
648
|
def _detect_lines_projection(
|
542
649
|
self,
|
@@ -563,36 +670,44 @@ class ShapeDetectionMixin:
|
|
563
670
|
peak_width_rel_height: float,
|
564
671
|
) -> "ShapeDetectionMixin":
|
565
672
|
"""Internal method for projection profiling line detection."""
|
566
|
-
cv_image, scale_factor, origin_offset_pdf, page_object_ctx = self._get_image_for_detection(
|
673
|
+
cv_image, scale_factor, origin_offset_pdf, page_object_ctx = self._get_image_for_detection(
|
674
|
+
resolution
|
675
|
+
)
|
567
676
|
if cv_image is None or page_object_ctx is None:
|
568
677
|
logger.warning(f"Skipping line detection for {self} due to image error.")
|
569
678
|
return self
|
570
|
-
|
679
|
+
|
571
680
|
pil_image_for_dims = None
|
572
|
-
if hasattr(self,
|
573
|
-
if hasattr(self,
|
574
|
-
pil_image_for_dims = self.to_image(
|
681
|
+
if hasattr(self, "to_image") and hasattr(self, "width") and hasattr(self, "height"):
|
682
|
+
if hasattr(self, "x0") and hasattr(self, "top") and hasattr(self, "_page"):
|
683
|
+
pil_image_for_dims = self.to_image(
|
684
|
+
resolution=resolution, crop_only=True, include_highlights=False
|
685
|
+
)
|
575
686
|
else:
|
576
687
|
pil_image_for_dims = self.to_image(resolution=resolution, include_highlights=False)
|
577
688
|
if pil_image_for_dims is None:
|
578
689
|
logger.warning(f"Could not re-render PIL image for dimensions for {self}.")
|
579
|
-
pil_image_for_dims = Image.fromarray(cv_image)
|
690
|
+
pil_image_for_dims = Image.fromarray(cv_image) # Ensure it's not None
|
580
691
|
|
581
692
|
if pil_image_for_dims.mode != "RGB":
|
582
693
|
pil_image_for_dims = pil_image_for_dims.convert("RGB")
|
583
694
|
|
584
695
|
if replace:
|
585
696
|
from natural_pdf.elements.line import LineElement
|
697
|
+
|
586
698
|
element_manager = page_object_ctx._element_mgr
|
587
|
-
if hasattr(element_manager,
|
588
|
-
original_count = len(element_manager._elements[
|
589
|
-
element_manager._elements[
|
590
|
-
line
|
591
|
-
|
699
|
+
if hasattr(element_manager, "_elements") and "lines" in element_manager._elements:
|
700
|
+
original_count = len(element_manager._elements["lines"])
|
701
|
+
element_manager._elements["lines"] = [
|
702
|
+
line
|
703
|
+
for line in element_manager._elements["lines"]
|
704
|
+
if getattr(line, "source", None) != source_label
|
592
705
|
]
|
593
|
-
removed_count = original_count - len(element_manager._elements[
|
706
|
+
removed_count = original_count - len(element_manager._elements["lines"])
|
594
707
|
if removed_count > 0:
|
595
|
-
logger.info(
|
708
|
+
logger.info(
|
709
|
+
f"Removed {removed_count} existing lines with source '{source_label}' from {page_object_ctx}"
|
710
|
+
)
|
596
711
|
|
597
712
|
lines_data_img, profile_h_smoothed, profile_v_smoothed = self._find_lines_on_image_data(
|
598
713
|
cv_image=cv_image,
|
@@ -608,13 +723,17 @@ class ShapeDetectionMixin:
|
|
608
723
|
binarization_method=binarization_method,
|
609
724
|
adaptive_thresh_block_size=adaptive_thresh_block_size,
|
610
725
|
adaptive_thresh_C_val=adaptive_thresh_C_val,
|
611
|
-
morph_op_h=morph_op_h,
|
612
|
-
|
613
|
-
|
726
|
+
morph_op_h=morph_op_h,
|
727
|
+
morph_kernel_h=morph_kernel_h,
|
728
|
+
morph_op_v=morph_op_v,
|
729
|
+
morph_kernel_v=morph_kernel_v,
|
730
|
+
smoothing_sigma_h=smoothing_sigma_h,
|
731
|
+
smoothing_sigma_v=smoothing_sigma_v,
|
614
732
|
peak_width_rel_height=peak_width_rel_height,
|
615
733
|
)
|
616
734
|
|
617
735
|
from natural_pdf.elements.line import LineElement
|
736
|
+
|
618
737
|
element_manager = page_object_ctx._element_mgr
|
619
738
|
|
620
739
|
for line_data_item_img in lines_data_img:
|
@@ -625,9 +744,14 @@ class ShapeDetectionMixin:
|
|
625
744
|
line_element = LineElement(element_constructor_data, page_object_ctx)
|
626
745
|
element_manager.add_element(line_element, element_type="lines")
|
627
746
|
except Exception as e:
|
628
|
-
logger.error(
|
629
|
-
|
630
|
-
|
747
|
+
logger.error(
|
748
|
+
f"Failed to create or add LineElement: {e}. Data: {element_constructor_data}",
|
749
|
+
exc_info=True,
|
750
|
+
)
|
751
|
+
|
752
|
+
logger.info(
|
753
|
+
f"Detected and added {len(lines_data_img)} lines to {page_object_ctx} with source '{source_label}' using projection profiling."
|
754
|
+
)
|
631
755
|
return self
|
632
756
|
|
633
757
|
def _detect_lines_lsd(
|
@@ -655,32 +779,45 @@ class ShapeDetectionMixin:
|
|
655
779
|
"Install it with: pip install opencv-python\n"
|
656
780
|
"Alternatively, use method='projection' which requires no additional dependencies."
|
657
781
|
)
|
658
|
-
|
659
|
-
cv_image, scale_factor, origin_offset_pdf, page_object_ctx = self._get_image_for_detection(
|
782
|
+
|
783
|
+
cv_image, scale_factor, origin_offset_pdf, page_object_ctx = self._get_image_for_detection(
|
784
|
+
resolution
|
785
|
+
)
|
660
786
|
if cv_image is None or page_object_ctx is None:
|
661
787
|
logger.warning(f"Skipping LSD line detection for {self} due to image error.")
|
662
788
|
return self
|
663
789
|
|
664
790
|
if replace:
|
665
791
|
from natural_pdf.elements.line import LineElement
|
792
|
+
|
666
793
|
element_manager = page_object_ctx._element_mgr
|
667
|
-
if hasattr(element_manager,
|
668
|
-
original_count = len(element_manager._elements[
|
669
|
-
element_manager._elements[
|
670
|
-
line
|
671
|
-
|
794
|
+
if hasattr(element_manager, "_elements") and "lines" in element_manager._elements:
|
795
|
+
original_count = len(element_manager._elements["lines"])
|
796
|
+
element_manager._elements["lines"] = [
|
797
|
+
line
|
798
|
+
for line in element_manager._elements["lines"]
|
799
|
+
if getattr(line, "source", None) != source_label
|
672
800
|
]
|
673
|
-
removed_count = original_count - len(element_manager._elements[
|
801
|
+
removed_count = original_count - len(element_manager._elements["lines"])
|
674
802
|
if removed_count > 0:
|
675
|
-
logger.info(
|
803
|
+
logger.info(
|
804
|
+
f"Removed {removed_count} existing lines with source '{source_label}' from {page_object_ctx}"
|
805
|
+
)
|
676
806
|
|
677
807
|
lines_data_img = self._process_image_for_lines_lsd(
|
678
|
-
cv_image,
|
679
|
-
|
680
|
-
|
808
|
+
cv_image,
|
809
|
+
off_angle,
|
810
|
+
min_line_length,
|
811
|
+
merge_angle_tolerance,
|
812
|
+
merge_distance_tolerance,
|
813
|
+
merge_endpoint_tolerance,
|
814
|
+
initial_min_line_length,
|
815
|
+
min_nfa_score_horizontal,
|
816
|
+
min_nfa_score_vertical,
|
681
817
|
)
|
682
818
|
|
683
819
|
from natural_pdf.elements.line import LineElement
|
820
|
+
|
684
821
|
element_manager = page_object_ctx._element_mgr
|
685
822
|
|
686
823
|
for line_data_item_img in lines_data_img:
|
@@ -691,9 +828,14 @@ class ShapeDetectionMixin:
|
|
691
828
|
line_element = LineElement(element_constructor_data, page_object_ctx)
|
692
829
|
element_manager.add_element(line_element, element_type="lines")
|
693
830
|
except Exception as e:
|
694
|
-
logger.error(
|
695
|
-
|
696
|
-
|
831
|
+
logger.error(
|
832
|
+
f"Failed to create or add LineElement: {e}. Data: {element_constructor_data}",
|
833
|
+
exc_info=True,
|
834
|
+
)
|
835
|
+
|
836
|
+
logger.info(
|
837
|
+
f"Detected and added {len(lines_data_img)} lines to {page_object_ctx} with source '{source_label}' using LSD."
|
838
|
+
)
|
697
839
|
return self
|
698
840
|
|
699
841
|
def _process_image_for_lines_lsd(
|
@@ -710,28 +852,34 @@ class ShapeDetectionMixin:
|
|
710
852
|
) -> List[Dict]:
|
711
853
|
"""Processes an image to detect lines using OpenCV LSD and merging logic."""
|
712
854
|
import cv2 # Import is already validated in calling method
|
713
|
-
|
855
|
+
|
714
856
|
if cv_image is None:
|
715
857
|
return []
|
716
|
-
|
858
|
+
|
717
859
|
gray_image = cv2.cvtColor(cv_image, cv2.COLOR_RGB2GRAY)
|
718
860
|
lsd = cv2.createLineSegmentDetector(cv2.LSD_REFINE_ADV)
|
719
861
|
coords_arr, widths_arr, precs_arr, nfa_scores_arr = lsd.detect(gray_image)
|
720
862
|
|
721
863
|
lines_raw = []
|
722
|
-
if coords_arr is not None:
|
723
|
-
nfa_scores_list =
|
724
|
-
|
864
|
+
if coords_arr is not None: # nfa_scores_arr can be None if no lines are found
|
865
|
+
nfa_scores_list = (
|
866
|
+
nfa_scores_arr.flatten() if nfa_scores_arr is not None else [0.0] * len(coords_arr)
|
867
|
+
)
|
868
|
+
widths_list = (
|
869
|
+
widths_arr.flatten() if widths_arr is not None else [1.0] * len(coords_arr)
|
870
|
+
)
|
725
871
|
precs_list = precs_arr.flatten() if precs_arr is not None else [0.0] * len(coords_arr)
|
726
872
|
|
727
873
|
for i in range(len(coords_arr)):
|
728
|
-
lines_raw.append(
|
729
|
-
|
730
|
-
|
731
|
-
|
732
|
-
|
733
|
-
|
734
|
-
|
874
|
+
lines_raw.append(
|
875
|
+
(
|
876
|
+
coords_arr[i][0],
|
877
|
+
widths_list[i] if i < len(widths_list) else 1.0,
|
878
|
+
precs_list[i] if i < len(precs_list) else 0.0,
|
879
|
+
nfa_scores_list[i] if i < len(nfa_scores_list) else 0.0,
|
880
|
+
)
|
881
|
+
)
|
882
|
+
|
735
883
|
def get_line_properties(line_data_item):
|
736
884
|
l_coords, l_width, l_prec, l_nfa_score = line_data_item
|
737
885
|
x1, y1, x2, y2 = l_coords
|
@@ -740,154 +888,248 @@ class ShapeDetectionMixin:
|
|
740
888
|
normalized_angle_deg = angle_deg % 180
|
741
889
|
if normalized_angle_deg < 0:
|
742
890
|
normalized_angle_deg += 180
|
743
|
-
|
744
|
-
is_h =
|
891
|
+
|
892
|
+
is_h = (
|
893
|
+
abs(normalized_angle_deg) <= off_angle
|
894
|
+
or abs(normalized_angle_deg - 180) <= off_angle
|
895
|
+
)
|
745
896
|
is_v = abs(normalized_angle_deg - 90) <= off_angle
|
746
897
|
|
747
|
-
if is_h and x1 > x2:
|
748
|
-
|
749
|
-
|
750
|
-
|
751
|
-
|
752
|
-
|
753
|
-
|
898
|
+
if is_h and x1 > x2:
|
899
|
+
x1, x2, y1, y2 = x2, x1, y2, y1
|
900
|
+
elif is_v and y1 > y2:
|
901
|
+
y1, y2, x1, x2 = y2, y1, x2, x1
|
902
|
+
|
903
|
+
length = np.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)
|
904
|
+
return {
|
905
|
+
"coords": (x1, y1, x2, y2),
|
906
|
+
"width": l_width,
|
907
|
+
"prec": l_prec,
|
908
|
+
"angle_deg": normalized_angle_deg,
|
909
|
+
"is_horizontal": is_h,
|
910
|
+
"is_vertical": is_v,
|
911
|
+
"length": length,
|
912
|
+
"nfa_score": l_nfa_score,
|
913
|
+
}
|
754
914
|
|
755
915
|
processed_lines = [get_line_properties(ld) for ld in lines_raw]
|
756
|
-
|
916
|
+
|
757
917
|
filtered_lines = []
|
758
918
|
for p in processed_lines:
|
759
|
-
if p[
|
760
|
-
|
919
|
+
if p["length"] <= initial_min_line_length:
|
920
|
+
continue
|
921
|
+
if p["is_horizontal"] and p["nfa_score"] >= min_nfa_score_horizontal:
|
761
922
|
filtered_lines.append(p)
|
762
|
-
elif p[
|
923
|
+
elif p["is_vertical"] and p["nfa_score"] >= min_nfa_score_vertical:
|
763
924
|
filtered_lines.append(p)
|
764
|
-
|
765
|
-
horizontal_lines = [p for p in filtered_lines if p[
|
766
|
-
vertical_lines = [p for p in filtered_lines if p[
|
925
|
+
|
926
|
+
horizontal_lines = [p for p in filtered_lines if p["is_horizontal"]]
|
927
|
+
vertical_lines = [p for p in filtered_lines if p["is_vertical"]]
|
767
928
|
|
768
929
|
def merge_lines_list(lines_list, is_horizontal_merge):
|
769
|
-
if not lines_list:
|
770
|
-
|
930
|
+
if not lines_list:
|
931
|
+
return []
|
932
|
+
key_sort = (
|
933
|
+
(lambda p: (p["coords"][1], p["coords"][0]))
|
934
|
+
if is_horizontal_merge
|
935
|
+
else (lambda p: (p["coords"][0], p["coords"][1]))
|
936
|
+
)
|
771
937
|
lines_list.sort(key=key_sort)
|
772
|
-
|
938
|
+
|
773
939
|
merged_results = []
|
774
940
|
merged_flags = [False] * len(lines_list)
|
775
941
|
|
776
942
|
for i, current_line_props in enumerate(lines_list):
|
777
|
-
if merged_flags[i]:
|
778
|
-
|
779
|
-
|
943
|
+
if merged_flags[i]:
|
944
|
+
continue
|
945
|
+
group = [current_line_props]
|
946
|
+
merged_flags[i] = True
|
947
|
+
|
780
948
|
# Keep trying to expand the group until no more lines can be added
|
781
949
|
# Use multiple passes to ensure transitive merging works properly
|
782
950
|
for merge_pass in range(10): # Up to 10 passes to catch complex merging scenarios
|
783
951
|
group_changed = False
|
784
|
-
|
952
|
+
|
785
953
|
# Calculate current group boundaries
|
786
|
-
group_x1, group_y1 = min(p[
|
787
|
-
|
788
|
-
|
789
|
-
|
954
|
+
group_x1, group_y1 = min(p["coords"][0] for p in group), min(
|
955
|
+
p["coords"][1] for p in group
|
956
|
+
)
|
957
|
+
group_x2, group_y2 = max(p["coords"][2] for p in group), max(
|
958
|
+
p["coords"][3] for p in group
|
959
|
+
)
|
960
|
+
total_len_in_group = sum(p["length"] for p in group)
|
961
|
+
if total_len_in_group == 0:
|
962
|
+
continue # Should not happen
|
790
963
|
|
791
964
|
# Calculate weighted averages for the group
|
792
|
-
group_avg_angle =
|
793
|
-
|
965
|
+
group_avg_angle = (
|
966
|
+
sum(p["angle_deg"] * p["length"] for p in group) / total_len_in_group
|
967
|
+
)
|
968
|
+
|
794
969
|
if is_horizontal_merge:
|
795
|
-
group_avg_perp_coord =
|
970
|
+
group_avg_perp_coord = (
|
971
|
+
sum(
|
972
|
+
((p["coords"][1] + p["coords"][3]) / 2) * p["length"] for p in group
|
973
|
+
)
|
974
|
+
/ total_len_in_group
|
975
|
+
)
|
796
976
|
else:
|
797
|
-
group_avg_perp_coord =
|
977
|
+
group_avg_perp_coord = (
|
978
|
+
sum(
|
979
|
+
((p["coords"][0] + p["coords"][2]) / 2) * p["length"] for p in group
|
980
|
+
)
|
981
|
+
/ total_len_in_group
|
982
|
+
)
|
798
983
|
|
799
984
|
# Check all unmerged lines for potential merging
|
800
985
|
for j, candidate_props in enumerate(lines_list):
|
801
|
-
if merged_flags[j]:
|
802
|
-
|
986
|
+
if merged_flags[j]:
|
987
|
+
continue
|
988
|
+
|
803
989
|
# 1. Check for parallelism (angle similarity)
|
804
|
-
angle_diff = abs(group_avg_angle - candidate_props[
|
990
|
+
angle_diff = abs(group_avg_angle - candidate_props["angle_deg"])
|
805
991
|
# Handle wraparound for angles near 0/180
|
806
992
|
if angle_diff > 90:
|
807
993
|
angle_diff = 180 - angle_diff
|
808
|
-
if angle_diff > merge_angle_tolerance:
|
809
|
-
|
994
|
+
if angle_diff > merge_angle_tolerance:
|
995
|
+
continue
|
996
|
+
|
810
997
|
# 2. Check for closeness (perpendicular distance)
|
811
998
|
if is_horizontal_merge:
|
812
|
-
cand_perp_coord = (
|
999
|
+
cand_perp_coord = (
|
1000
|
+
candidate_props["coords"][1] + candidate_props["coords"][3]
|
1001
|
+
) / 2
|
813
1002
|
else:
|
814
|
-
cand_perp_coord = (
|
815
|
-
|
1003
|
+
cand_perp_coord = (
|
1004
|
+
candidate_props["coords"][0] + candidate_props["coords"][2]
|
1005
|
+
) / 2
|
1006
|
+
|
816
1007
|
perp_distance = abs(group_avg_perp_coord - cand_perp_coord)
|
817
|
-
if perp_distance > merge_distance_tolerance:
|
818
|
-
|
1008
|
+
if perp_distance > merge_distance_tolerance:
|
1009
|
+
continue
|
1010
|
+
|
819
1011
|
# 3. Check for reasonable proximity along the primary axis
|
820
1012
|
if is_horizontal_merge:
|
821
1013
|
# For horizontal lines, check x-axis relationship
|
822
|
-
cand_x1, cand_x2 =
|
1014
|
+
cand_x1, cand_x2 = (
|
1015
|
+
candidate_props["coords"][0],
|
1016
|
+
candidate_props["coords"][2],
|
1017
|
+
)
|
823
1018
|
# Check if there's overlap OR if the gap is reasonable
|
824
1019
|
overlap = max(0, min(group_x2, cand_x2) - max(group_x1, cand_x1))
|
825
1020
|
gap_to_group = min(abs(group_x1 - cand_x2), abs(group_x2 - cand_x1))
|
826
|
-
|
1021
|
+
|
827
1022
|
# Accept if there's overlap OR the gap is reasonable OR the candidate is contained within group span
|
828
|
-
if not (
|
1023
|
+
if not (
|
1024
|
+
overlap > 0
|
1025
|
+
or gap_to_group <= merge_endpoint_tolerance
|
1026
|
+
or (cand_x1 >= group_x1 and cand_x2 <= group_x2)
|
1027
|
+
):
|
829
1028
|
continue
|
830
1029
|
else:
|
831
|
-
# For vertical lines, check y-axis relationship
|
832
|
-
cand_y1, cand_y2 =
|
1030
|
+
# For vertical lines, check y-axis relationship
|
1031
|
+
cand_y1, cand_y2 = (
|
1032
|
+
candidate_props["coords"][1],
|
1033
|
+
candidate_props["coords"][3],
|
1034
|
+
)
|
833
1035
|
overlap = max(0, min(group_y2, cand_y2) - max(group_y1, cand_y1))
|
834
1036
|
gap_to_group = min(abs(group_y1 - cand_y2), abs(group_y2 - cand_y1))
|
835
|
-
|
836
|
-
if not (
|
1037
|
+
|
1038
|
+
if not (
|
1039
|
+
overlap > 0
|
1040
|
+
or gap_to_group <= merge_endpoint_tolerance
|
1041
|
+
or (cand_y1 >= group_y1 and cand_y2 <= group_y2)
|
1042
|
+
):
|
837
1043
|
continue
|
838
|
-
|
1044
|
+
|
839
1045
|
# If we reach here, lines should be merged
|
840
1046
|
group.append(candidate_props)
|
841
1047
|
merged_flags[j] = True
|
842
1048
|
group_changed = True
|
843
|
-
|
1049
|
+
|
844
1050
|
if not group_changed:
|
845
1051
|
break # No more lines added in this pass, stop trying
|
846
|
-
|
1052
|
+
|
847
1053
|
# Create final merged line from the group
|
848
|
-
final_x1, final_y1 = min(p[
|
849
|
-
|
850
|
-
|
851
|
-
|
852
|
-
|
853
|
-
|
854
|
-
|
855
|
-
|
1054
|
+
final_x1, final_y1 = min(p["coords"][0] for p in group), min(
|
1055
|
+
p["coords"][1] for p in group
|
1056
|
+
)
|
1057
|
+
final_x2, final_y2 = max(p["coords"][2] for p in group), max(
|
1058
|
+
p["coords"][3] for p in group
|
1059
|
+
)
|
1060
|
+
final_total_len = sum(p["length"] for p in group)
|
1061
|
+
if final_total_len == 0:
|
1062
|
+
continue
|
1063
|
+
|
1064
|
+
final_width = sum(p["width"] * p["length"] for p in group) / final_total_len
|
1065
|
+
final_nfa = sum(p["nfa_score"] * p["length"] for p in group) / final_total_len
|
1066
|
+
|
856
1067
|
if is_horizontal_merge:
|
857
|
-
final_y =
|
858
|
-
|
1068
|
+
final_y = (
|
1069
|
+
sum(((p["coords"][1] + p["coords"][3]) / 2) * p["length"] for p in group)
|
1070
|
+
/ final_total_len
|
1071
|
+
)
|
1072
|
+
merged_line_data = (
|
1073
|
+
final_x1,
|
1074
|
+
final_y,
|
1075
|
+
final_x2,
|
1076
|
+
final_y,
|
1077
|
+
final_width,
|
1078
|
+
final_nfa,
|
1079
|
+
)
|
859
1080
|
else:
|
860
|
-
final_x =
|
861
|
-
|
1081
|
+
final_x = (
|
1082
|
+
sum(((p["coords"][0] + p["coords"][2]) / 2) * p["length"] for p in group)
|
1083
|
+
/ final_total_len
|
1084
|
+
)
|
1085
|
+
merged_line_data = (
|
1086
|
+
final_x,
|
1087
|
+
final_y1,
|
1088
|
+
final_x,
|
1089
|
+
final_y2,
|
1090
|
+
final_width,
|
1091
|
+
final_nfa,
|
1092
|
+
)
|
862
1093
|
merged_results.append(merged_line_data)
|
863
1094
|
return merged_results
|
864
1095
|
|
865
1096
|
merged_h_lines = merge_lines_list(horizontal_lines, True)
|
866
1097
|
merged_v_lines = merge_lines_list(vertical_lines, False)
|
867
1098
|
all_merged = merged_h_lines + merged_v_lines
|
868
|
-
|
1099
|
+
|
869
1100
|
final_lines_data = []
|
870
1101
|
for line_data_item in all_merged:
|
871
1102
|
x1, y1, x2, y2, width, nfa = line_data_item
|
872
|
-
length = np.sqrt((x2 - x1)**2 + (y2 - y1)**2)
|
1103
|
+
length = np.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)
|
873
1104
|
if length > min_line_length:
|
874
1105
|
# Ensure x1 <= x2 for horizontal, y1 <= y2 for vertical
|
875
|
-
if abs(y2 - y1) < abs(x2-x1):
|
876
|
-
if x1 > x2:
|
877
|
-
|
878
|
-
|
879
|
-
|
880
|
-
|
881
|
-
|
882
|
-
|
883
|
-
|
884
|
-
|
885
|
-
|
1106
|
+
if abs(y2 - y1) < abs(x2 - x1): # Horizontal-ish
|
1107
|
+
if x1 > x2:
|
1108
|
+
x1_out, y1_out, x2_out, y2_out = x2, y2, x1, y1
|
1109
|
+
else:
|
1110
|
+
x1_out, y1_out, x2_out, y2_out = x1, y1, x2, y2
|
1111
|
+
else: # Vertical-ish
|
1112
|
+
if y1 > y2:
|
1113
|
+
x1_out, y1_out, x2_out, y2_out = x2, y2, x1, y1
|
1114
|
+
else:
|
1115
|
+
x1_out, y1_out, x2_out, y2_out = x1, y1, x2, y2
|
1116
|
+
|
1117
|
+
final_lines_data.append(
|
1118
|
+
{
|
1119
|
+
"x1": x1_out,
|
1120
|
+
"y1": y1_out,
|
1121
|
+
"x2": x2_out,
|
1122
|
+
"y2": y2_out,
|
1123
|
+
"width": width,
|
1124
|
+
"nfa_score": nfa,
|
1125
|
+
"length": length,
|
1126
|
+
}
|
1127
|
+
)
|
886
1128
|
return final_lines_data
|
887
1129
|
|
888
1130
|
def detect_lines_preview(
|
889
1131
|
self,
|
890
|
-
resolution: int = 72,
|
1132
|
+
resolution: int = 72, # Preview typically uses lower resolution
|
891
1133
|
method: str = "projection",
|
892
1134
|
horizontal: bool = True,
|
893
1135
|
vertical: bool = True,
|
@@ -898,7 +1140,9 @@ class ShapeDetectionMixin:
|
|
898
1140
|
max_lines_h: Optional[int] = None,
|
899
1141
|
max_lines_v: Optional[int] = None,
|
900
1142
|
binarization_method: str = LINE_DETECTION_PARAM_DEFAULTS["binarization_method"],
|
901
|
-
adaptive_thresh_block_size: int = LINE_DETECTION_PARAM_DEFAULTS[
|
1143
|
+
adaptive_thresh_block_size: int = LINE_DETECTION_PARAM_DEFAULTS[
|
1144
|
+
"adaptive_thresh_block_size"
|
1145
|
+
],
|
902
1146
|
adaptive_thresh_C_val: int = LINE_DETECTION_PARAM_DEFAULTS["adaptive_thresh_C_val"],
|
903
1147
|
morph_op_h: str = LINE_DETECTION_PARAM_DEFAULTS["morph_op_h"],
|
904
1148
|
morph_kernel_h: Tuple[int, int] = LINE_DETECTION_PARAM_DEFAULTS["morph_kernel_h"],
|
@@ -921,23 +1165,25 @@ class ShapeDetectionMixin:
|
|
921
1165
|
Previews detected lines on a Page or Region without adding them to the PDF elements.
|
922
1166
|
Generates and returns a debug visualization image.
|
923
1167
|
This method is intended for Page or Region objects.
|
924
|
-
|
1168
|
+
|
925
1169
|
Args:
|
926
1170
|
method: Detection method - "projection" (default) or "lsd" (requires opencv-python).
|
927
1171
|
See `detect_lines` for other parameter descriptions. The main difference is a lower default `resolution`.
|
928
|
-
|
1172
|
+
|
929
1173
|
Returns:
|
930
1174
|
PIL Image with line detection visualization, or None if preview failed.
|
931
|
-
|
1175
|
+
|
932
1176
|
Note:
|
933
|
-
Only projection profiling method supports histogram visualization.
|
1177
|
+
Only projection profiling method supports histogram visualization.
|
934
1178
|
LSD method will show detected lines overlaid on the original image.
|
935
1179
|
"""
|
936
|
-
if hasattr(self,
|
937
|
-
logger.warning(
|
1180
|
+
if hasattr(self, "pdfs") or (hasattr(self, "pages") and not hasattr(self, "_page")):
|
1181
|
+
logger.warning(
|
1182
|
+
"preview_detected_lines is intended for single Page/Region objects. For collections, process pages individually."
|
1183
|
+
)
|
938
1184
|
return None
|
939
1185
|
|
940
|
-
if not horizontal and not vertical:
|
1186
|
+
if not horizontal and not vertical: # Check this early
|
941
1187
|
logger.info("Line preview skipped as both horizontal and vertical are False.")
|
942
1188
|
return None
|
943
1189
|
|
@@ -945,22 +1191,30 @@ class ShapeDetectionMixin:
|
|
945
1191
|
if method not in ["projection", "lsd"]:
|
946
1192
|
raise ValueError(f"Invalid method '{method}'. Supported methods: 'projection', 'lsd'")
|
947
1193
|
|
948
|
-
cv_image, _, _, page_object_ctx = self._get_image_for_detection(
|
949
|
-
|
1194
|
+
cv_image, _, _, page_object_ctx = self._get_image_for_detection(
|
1195
|
+
resolution
|
1196
|
+
) # scale_factor and origin_offset not needed for preview
|
1197
|
+
if (
|
1198
|
+
cv_image is None or page_object_ctx is None
|
1199
|
+
): # page_object_ctx for logging context mostly
|
950
1200
|
logger.warning(f"Skipping line preview for {self} due to image error.")
|
951
1201
|
return None
|
952
1202
|
|
953
1203
|
pil_image_for_dims = None
|
954
|
-
if hasattr(self,
|
955
|
-
if hasattr(self,
|
956
|
-
pil_image_for_dims = self.to_image(
|
1204
|
+
if hasattr(self, "to_image") and hasattr(self, "width") and hasattr(self, "height"):
|
1205
|
+
if hasattr(self, "x0") and hasattr(self, "top") and hasattr(self, "_page"):
|
1206
|
+
pil_image_for_dims = self.to_image(
|
1207
|
+
resolution=resolution, crop_only=True, include_highlights=False
|
1208
|
+
)
|
957
1209
|
else:
|
958
1210
|
pil_image_for_dims = self.to_image(resolution=resolution, include_highlights=False)
|
959
|
-
|
1211
|
+
|
960
1212
|
if pil_image_for_dims is None:
|
961
|
-
logger.warning(
|
1213
|
+
logger.warning(
|
1214
|
+
f"Could not render PIL image for preview for {self}. Using cv_image to create one."
|
1215
|
+
)
|
962
1216
|
pil_image_for_dims = Image.fromarray(cv_image)
|
963
|
-
|
1217
|
+
|
964
1218
|
if pil_image_for_dims.mode != "RGB":
|
965
1219
|
pil_image_for_dims = pil_image_for_dims.convert("RGB")
|
966
1220
|
|
@@ -980,9 +1234,12 @@ class ShapeDetectionMixin:
|
|
980
1234
|
binarization_method=binarization_method,
|
981
1235
|
adaptive_thresh_block_size=adaptive_thresh_block_size,
|
982
1236
|
adaptive_thresh_C_val=adaptive_thresh_C_val,
|
983
|
-
morph_op_h=morph_op_h,
|
984
|
-
|
985
|
-
|
1237
|
+
morph_op_h=morph_op_h,
|
1238
|
+
morph_kernel_h=morph_kernel_h,
|
1239
|
+
morph_op_v=morph_op_v,
|
1240
|
+
morph_kernel_v=morph_kernel_v,
|
1241
|
+
smoothing_sigma_h=smoothing_sigma_h,
|
1242
|
+
smoothing_sigma_v=smoothing_sigma_v,
|
986
1243
|
peak_width_rel_height=peak_width_rel_height,
|
987
1244
|
)
|
988
1245
|
elif method == "lsd":
|
@@ -995,16 +1252,22 @@ class ShapeDetectionMixin:
|
|
995
1252
|
"Alternatively, use method='projection' for preview."
|
996
1253
|
)
|
997
1254
|
lines_data_img = self._process_image_for_lines_lsd(
|
998
|
-
cv_image,
|
999
|
-
|
1000
|
-
|
1255
|
+
cv_image,
|
1256
|
+
off_angle,
|
1257
|
+
min_line_length,
|
1258
|
+
merge_angle_tolerance,
|
1259
|
+
merge_distance_tolerance,
|
1260
|
+
merge_endpoint_tolerance,
|
1261
|
+
initial_min_line_length,
|
1262
|
+
min_nfa_score_horizontal,
|
1263
|
+
min_nfa_score_vertical,
|
1001
1264
|
)
|
1002
1265
|
profile_h_smoothed, profile_v_smoothed = None, None # LSD doesn't use profiles
|
1003
|
-
|
1004
|
-
if not lines_data_img:
|
1005
|
-
|
1006
|
-
|
1007
|
-
|
1266
|
+
|
1267
|
+
if not lines_data_img: # Check if any lines were detected before visualization
|
1268
|
+
logger.info(f"No lines detected for preview on {page_object_ctx or self}")
|
1269
|
+
# Optionally return the base image if no lines, or None
|
1270
|
+
return pil_image_for_dims.convert("RGBA") # Return base image so something is shown
|
1008
1271
|
|
1009
1272
|
# --- Visualization Logic ---
|
1010
1273
|
final_viz_image: Optional[Image.Image] = None
|
@@ -1015,69 +1278,130 @@ class ShapeDetectionMixin:
|
|
1015
1278
|
viz_params = {
|
1016
1279
|
"draw_line_thickness_viz": 2, # Slightly thicker for better visibility
|
1017
1280
|
"debug_histogram_size": 100,
|
1018
|
-
"line_color_h": (255, 0, 0, 200),
|
1019
|
-
"
|
1020
|
-
"
|
1021
|
-
"
|
1022
|
-
"
|
1023
|
-
"
|
1024
|
-
"
|
1281
|
+
"line_color_h": (255, 0, 0, 200),
|
1282
|
+
"line_color_v": (0, 0, 255, 200),
|
1283
|
+
"histogram_bar_color_h": (200, 0, 0, 200),
|
1284
|
+
"histogram_bar_color_v": (0, 0, 200, 200),
|
1285
|
+
"histogram_bg_color": (240, 240, 240, 255),
|
1286
|
+
"padding_between_viz": 10,
|
1287
|
+
"peak_threshold_h": peak_threshold_h,
|
1288
|
+
"peak_threshold_v": peak_threshold_v,
|
1289
|
+
"max_lines_h": max_lines_h,
|
1290
|
+
"max_lines_v": max_lines_v,
|
1025
1291
|
}
|
1026
1292
|
|
1027
1293
|
# Draw detected lines on the image
|
1028
1294
|
for line_info in lines_data_img:
|
1029
|
-
is_h_line = abs(line_info[
|
1295
|
+
is_h_line = abs(line_info["y1"] - line_info["y2"]) < abs(
|
1296
|
+
line_info["x1"] - line_info["x2"]
|
1297
|
+
)
|
1030
1298
|
line_color = viz_params["line_color_h"] if is_h_line else viz_params["line_color_v"]
|
1031
|
-
draw.line(
|
1032
|
-
(line_info[
|
1033
|
-
|
1034
|
-
|
1299
|
+
draw.line(
|
1300
|
+
[(line_info["x1"], line_info["y1"]), (line_info["x2"], line_info["y2"])],
|
1301
|
+
fill=line_color,
|
1302
|
+
width=viz_params["draw_line_thickness_viz"],
|
1303
|
+
)
|
1035
1304
|
|
1036
1305
|
# For projection method, add histogram visualization
|
1037
|
-
if method == "projection" and (
|
1306
|
+
if method == "projection" and (
|
1307
|
+
profile_h_smoothed is not None or profile_v_smoothed is not None
|
1308
|
+
):
|
1038
1309
|
hist_size = viz_params["debug_histogram_size"]
|
1039
|
-
hist_h_img = Image.new(
|
1310
|
+
hist_h_img = Image.new(
|
1311
|
+
"RGBA", (hist_size, img_height), viz_params["histogram_bg_color"]
|
1312
|
+
)
|
1040
1313
|
hist_h_draw = ImageDraw.Draw(hist_h_img)
|
1041
|
-
|
1314
|
+
|
1042
1315
|
if profile_h_smoothed is not None and profile_h_smoothed.size > 0:
|
1043
1316
|
actual_max_h_profile = profile_h_smoothed.max()
|
1044
1317
|
display_threshold_val_h = peak_threshold_h * img_width
|
1045
1318
|
# Use the maximum of either the profile max or threshold for scaling, so both are always visible
|
1046
|
-
max_h_profile_val_for_scaling =
|
1319
|
+
max_h_profile_val_for_scaling = (
|
1320
|
+
max(actual_max_h_profile, display_threshold_val_h)
|
1321
|
+
if actual_max_h_profile > 0
|
1322
|
+
else img_width
|
1323
|
+
)
|
1047
1324
|
for y_coord, val in enumerate(profile_h_smoothed):
|
1048
|
-
bar_len = 0
|
1325
|
+
bar_len = 0
|
1326
|
+
thresh_bar_len = 0
|
1049
1327
|
if max_h_profile_val_for_scaling > 0:
|
1050
1328
|
bar_len = int((val / max_h_profile_val_for_scaling) * hist_size)
|
1051
1329
|
if display_threshold_val_h >= 0:
|
1052
|
-
thresh_bar_len = int(
|
1330
|
+
thresh_bar_len = int(
|
1331
|
+
(display_threshold_val_h / max_h_profile_val_for_scaling)
|
1332
|
+
* hist_size
|
1333
|
+
)
|
1053
1334
|
bar_len = min(max(0, bar_len), hist_size)
|
1054
|
-
if bar_len > 0:
|
1055
|
-
|
1056
|
-
|
1335
|
+
if bar_len > 0:
|
1336
|
+
hist_h_draw.line(
|
1337
|
+
[(0, y_coord), (bar_len - 1, y_coord)],
|
1338
|
+
fill=viz_params["histogram_bar_color_h"],
|
1339
|
+
width=1,
|
1340
|
+
)
|
1341
|
+
if (
|
1342
|
+
viz_params["max_lines_h"] is None
|
1343
|
+
and display_threshold_val_h >= 0
|
1344
|
+
and thresh_bar_len > 0
|
1345
|
+
and thresh_bar_len <= hist_size
|
1346
|
+
):
|
1057
1347
|
# Ensure threshold line is within bounds
|
1058
1348
|
thresh_x = min(thresh_bar_len, hist_size - 1)
|
1059
|
-
hist_h_draw.line(
|
1060
|
-
|
1349
|
+
hist_h_draw.line(
|
1350
|
+
[
|
1351
|
+
(thresh_x, y_coord),
|
1352
|
+
(thresh_x, y_coord + 1 if y_coord + 1 < img_height else y_coord),
|
1353
|
+
],
|
1354
|
+
fill=(0, 255, 0, 100),
|
1355
|
+
width=1,
|
1356
|
+
)
|
1357
|
+
|
1061
1358
|
hist_v_img = Image.new("RGBA", (img_width, hist_size), viz_params["histogram_bg_color"])
|
1062
1359
|
hist_v_draw = ImageDraw.Draw(hist_v_img)
|
1063
1360
|
if profile_v_smoothed is not None and profile_v_smoothed.size > 0:
|
1064
1361
|
actual_max_v_profile = profile_v_smoothed.max()
|
1065
1362
|
display_threshold_val_v = peak_threshold_v * img_height
|
1066
1363
|
# Use the maximum of either the profile max or threshold for scaling, so both are always visible
|
1067
|
-
max_v_profile_val_for_scaling =
|
1364
|
+
max_v_profile_val_for_scaling = (
|
1365
|
+
max(actual_max_v_profile, display_threshold_val_v)
|
1366
|
+
if actual_max_v_profile > 0
|
1367
|
+
else img_height
|
1368
|
+
)
|
1068
1369
|
for x_coord, val in enumerate(profile_v_smoothed):
|
1069
|
-
bar_height = 0
|
1370
|
+
bar_height = 0
|
1371
|
+
thresh_bar_h = 0
|
1070
1372
|
if max_v_profile_val_for_scaling > 0:
|
1071
1373
|
bar_height = int((val / max_v_profile_val_for_scaling) * hist_size)
|
1072
|
-
if display_threshold_val_v >=0:
|
1073
|
-
thresh_bar_h = int(
|
1374
|
+
if display_threshold_val_v >= 0:
|
1375
|
+
thresh_bar_h = int(
|
1376
|
+
(display_threshold_val_v / max_v_profile_val_for_scaling)
|
1377
|
+
* hist_size
|
1378
|
+
)
|
1074
1379
|
bar_height = min(max(0, bar_height), hist_size)
|
1075
|
-
if bar_height > 0:
|
1076
|
-
|
1077
|
-
|
1380
|
+
if bar_height > 0:
|
1381
|
+
hist_v_draw.line(
|
1382
|
+
[(x_coord, hist_size - 1), (x_coord, hist_size - bar_height)],
|
1383
|
+
fill=viz_params["histogram_bar_color_v"],
|
1384
|
+
width=1,
|
1385
|
+
)
|
1386
|
+
if (
|
1387
|
+
viz_params["max_lines_v"] is None
|
1388
|
+
and display_threshold_val_v >= 0
|
1389
|
+
and thresh_bar_h > 0
|
1390
|
+
and thresh_bar_h <= hist_size
|
1391
|
+
):
|
1078
1392
|
# Ensure threshold line is within bounds
|
1079
1393
|
thresh_y = min(thresh_bar_h, hist_size - 1)
|
1080
|
-
hist_v_draw.line(
|
1394
|
+
hist_v_draw.line(
|
1395
|
+
[
|
1396
|
+
(x_coord, hist_size - thresh_y),
|
1397
|
+
(
|
1398
|
+
x_coord + 1 if x_coord + 1 < img_width else x_coord,
|
1399
|
+
hist_size - thresh_y,
|
1400
|
+
),
|
1401
|
+
],
|
1402
|
+
fill=(0, 255, 0, 100),
|
1403
|
+
width=1,
|
1404
|
+
)
|
1081
1405
|
|
1082
1406
|
padding = viz_params["padding_between_viz"]
|
1083
1407
|
total_width = img_width + padding + hist_size
|
@@ -1091,7 +1415,7 @@ class ShapeDetectionMixin:
|
|
1091
1415
|
final_viz_image = viz_image_base
|
1092
1416
|
|
1093
1417
|
logger.info(f"Generated line preview visualization for {page_object_ctx or self}")
|
1094
|
-
return final_viz_image
|
1418
|
+
return final_viz_image
|
1095
1419
|
|
1096
1420
|
def detect_table_structure_from_lines(
|
1097
1421
|
self,
|
@@ -1101,25 +1425,25 @@ class ShapeDetectionMixin:
|
|
1101
1425
|
) -> "ShapeDetectionMixin":
|
1102
1426
|
"""
|
1103
1427
|
Create table structure (rows, columns, cells) from previously detected lines.
|
1104
|
-
|
1428
|
+
|
1105
1429
|
This method analyzes horizontal and vertical lines to create a grid structure,
|
1106
1430
|
then generates Region objects for:
|
1107
1431
|
- An overall table region that encompasses the entire table structure
|
1108
1432
|
- Individual row regions spanning the width of the table
|
1109
|
-
- Individual column regions spanning the height of the table
|
1433
|
+
- Individual column regions spanning the height of the table
|
1110
1434
|
- Individual cell regions at each row/column intersection
|
1111
|
-
|
1435
|
+
|
1112
1436
|
Args:
|
1113
1437
|
source_label: Filter lines by this source label (from detect_lines)
|
1114
1438
|
ignore_outer_regions: If True, don't create regions outside the defined by lines grid.
|
1115
1439
|
If False, include regions from page/object edges to the first/last lines.
|
1116
1440
|
cell_padding: Internal padding for cell regions
|
1117
|
-
|
1441
|
+
|
1118
1442
|
Returns:
|
1119
1443
|
Self for method chaining
|
1120
1444
|
"""
|
1121
1445
|
# Handle collections
|
1122
|
-
if hasattr(self,
|
1446
|
+
if hasattr(self, "pdfs"):
|
1123
1447
|
for pdf_doc in self.pdfs:
|
1124
1448
|
for page_obj in pdf_doc.pages:
|
1125
1449
|
page_obj.detect_table_structure_from_lines(
|
@@ -1128,7 +1452,7 @@ class ShapeDetectionMixin:
|
|
1128
1452
|
cell_padding=cell_padding,
|
1129
1453
|
)
|
1130
1454
|
return self
|
1131
|
-
elif hasattr(self,
|
1455
|
+
elif hasattr(self, "pages") and not hasattr(self, "_page"): # PageCollection
|
1132
1456
|
for page_obj in self.pages:
|
1133
1457
|
page_obj.detect_table_structure_from_lines(
|
1134
1458
|
source_label=source_label,
|
@@ -1142,71 +1466,82 @@ class ShapeDetectionMixin:
|
|
1142
1466
|
origin_x, origin_y = 0.0, 0.0
|
1143
1467
|
context_width, context_height = 0.0, 0.0
|
1144
1468
|
|
1145
|
-
if
|
1469
|
+
if (
|
1470
|
+
hasattr(self, "_element_mgr") and hasattr(self, "width") and hasattr(self, "height")
|
1471
|
+
): # Likely a Page
|
1146
1472
|
page_object_for_elements = self
|
1147
1473
|
context_width = self.width
|
1148
1474
|
context_height = self.height
|
1149
1475
|
logger.debug(f"Operating on Page context: {self}")
|
1150
|
-
elif
|
1476
|
+
elif (
|
1477
|
+
hasattr(self, "_page") and hasattr(self, "x0") and hasattr(self, "width")
|
1478
|
+
): # Likely a Region
|
1151
1479
|
page_object_for_elements = self._page
|
1152
1480
|
origin_x = self.x0
|
1153
1481
|
origin_y = self.top
|
1154
|
-
context_width = self.width
|
1482
|
+
context_width = self.width # Region's own width/height for its boundary calculations
|
1155
1483
|
context_height = self.height
|
1156
1484
|
logger.debug(f"Operating on Region context: {self}, origin: ({origin_x}, {origin_y})")
|
1157
1485
|
else:
|
1158
|
-
logger.warning(
|
1486
|
+
logger.warning(
|
1487
|
+
f"Could not determine valid page/region context for {self}. Aborting table structure detection."
|
1488
|
+
)
|
1159
1489
|
return self
|
1160
|
-
|
1490
|
+
|
1161
1491
|
element_manager = page_object_for_elements._element_mgr
|
1162
1492
|
|
1163
1493
|
# Get lines with the specified source
|
1164
|
-
all_lines = element_manager.lines
|
1165
|
-
filtered_lines = [
|
1166
|
-
|
1494
|
+
all_lines = element_manager.lines # Access lines from the correct element manager
|
1495
|
+
filtered_lines = [
|
1496
|
+
line for line in all_lines if getattr(line, "source", None) == source_label
|
1497
|
+
]
|
1498
|
+
|
1167
1499
|
if not filtered_lines:
|
1168
|
-
logger.info(
|
1500
|
+
logger.info(
|
1501
|
+
f"No lines found with source '{source_label}' for table structure detection on {self}."
|
1502
|
+
)
|
1169
1503
|
return self
|
1170
1504
|
|
1171
1505
|
# Separate horizontal and vertical lines
|
1172
1506
|
# For regions, line coordinates are already absolute to the page.
|
1173
1507
|
horizontal_lines = [line for line in filtered_lines if line.is_horizontal]
|
1174
1508
|
vertical_lines = [line for line in filtered_lines if line.is_vertical]
|
1175
|
-
|
1176
|
-
logger.info(
|
1509
|
+
|
1510
|
+
logger.info(
|
1511
|
+
f"Found {len(horizontal_lines)} horizontal and {len(vertical_lines)} vertical lines for {self} with source '{source_label}'."
|
1512
|
+
)
|
1177
1513
|
|
1178
1514
|
# Define boundaries based on line positions (mid-points for sorting, actual edges for boundaries)
|
1179
1515
|
# These coordinates are relative to the page_object_for_elements (which is always a Page)
|
1180
|
-
|
1516
|
+
|
1181
1517
|
# Horizontal line Y-coordinates (use average y, effectively the line's y-position)
|
1182
1518
|
h_line_ys = sorted(list(set([(line.top + line.bottom) / 2 for line in horizontal_lines])))
|
1183
|
-
|
1519
|
+
|
1184
1520
|
# Vertical line X-coordinates (use average x, effectively the line's x-position)
|
1185
1521
|
v_line_xs = sorted(list(set([(line.x0 + line.x1) / 2 for line in vertical_lines])))
|
1186
1522
|
|
1187
1523
|
row_boundaries = []
|
1188
1524
|
if horizontal_lines:
|
1189
1525
|
if not ignore_outer_regions:
|
1190
|
-
row_boundaries.append(origin_y)
|
1526
|
+
row_boundaries.append(origin_y) # Region's top or Page's 0
|
1191
1527
|
row_boundaries.extend(h_line_ys)
|
1192
1528
|
if not ignore_outer_regions:
|
1193
|
-
row_boundaries.append(origin_y + context_height)
|
1194
|
-
elif not ignore_outer_regions
|
1195
|
-
|
1529
|
+
row_boundaries.append(origin_y + context_height) # Region's bottom or Page's height
|
1530
|
+
elif not ignore_outer_regions: # No horizontal lines, but we might want full height cells
|
1531
|
+
row_boundaries.extend([origin_y, origin_y + context_height])
|
1196
1532
|
row_boundaries = sorted(list(set(row_boundaries)))
|
1197
1533
|
|
1198
|
-
|
1199
1534
|
col_boundaries = []
|
1200
1535
|
if vertical_lines:
|
1201
1536
|
if not ignore_outer_regions:
|
1202
|
-
col_boundaries.append(origin_x)
|
1537
|
+
col_boundaries.append(origin_x) # Region's left or Page's 0
|
1203
1538
|
col_boundaries.extend(v_line_xs)
|
1204
1539
|
if not ignore_outer_regions:
|
1205
|
-
col_boundaries.append(origin_x + context_width)
|
1206
|
-
elif not ignore_outer_regions:
|
1540
|
+
col_boundaries.append(origin_x + context_width) # Region's right or Page's width
|
1541
|
+
elif not ignore_outer_regions: # No vertical lines, but we might want full width cells
|
1207
1542
|
col_boundaries.extend([origin_x, origin_x + context_width])
|
1208
1543
|
col_boundaries = sorted(list(set(col_boundaries)))
|
1209
|
-
|
1544
|
+
|
1210
1545
|
logger.debug(f"Row boundaries for {self}: {row_boundaries}")
|
1211
1546
|
logger.debug(f"Col boundaries for {self}: {col_boundaries}")
|
1212
1547
|
|
@@ -1217,7 +1552,7 @@ class ShapeDetectionMixin:
|
|
1217
1552
|
table_top = row_boundaries[0]
|
1218
1553
|
table_right = col_boundaries[-1]
|
1219
1554
|
table_bottom = row_boundaries[-1]
|
1220
|
-
|
1555
|
+
|
1221
1556
|
if table_right > table_left and table_bottom > table_top:
|
1222
1557
|
try:
|
1223
1558
|
table_region = page_object_for_elements.create_region(
|
@@ -1225,34 +1560,40 @@ class ShapeDetectionMixin:
|
|
1225
1560
|
)
|
1226
1561
|
table_region.source = source_label
|
1227
1562
|
table_region.region_type = "table"
|
1228
|
-
table_region.normalized_type =
|
1229
|
-
|
1230
|
-
|
1231
|
-
|
1232
|
-
|
1233
|
-
|
1234
|
-
"
|
1235
|
-
"
|
1563
|
+
table_region.normalized_type = (
|
1564
|
+
"table" # Add normalized_type for selector compatibility
|
1565
|
+
)
|
1566
|
+
table_region.metadata.update(
|
1567
|
+
{
|
1568
|
+
"source_lines_label": source_label,
|
1569
|
+
"num_rows": len(row_boundaries) - 1,
|
1570
|
+
"num_cols": len(col_boundaries) - 1,
|
1571
|
+
"boundaries": {"rows": row_boundaries, "cols": col_boundaries},
|
1236
1572
|
}
|
1237
|
-
|
1573
|
+
)
|
1238
1574
|
element_manager.add_element(table_region, element_type="regions")
|
1239
1575
|
tables_created += 1
|
1240
|
-
logger.debug(
|
1576
|
+
logger.debug(
|
1577
|
+
f"Created table region: L{table_left:.1f} T{table_top:.1f} R{table_right:.1f} B{table_bottom:.1f}"
|
1578
|
+
)
|
1241
1579
|
except Exception as e:
|
1242
|
-
logger.error(
|
1580
|
+
logger.error(
|
1581
|
+
f"Failed to create or add table Region: {e}. Table abs coords: L{table_left} T{table_top} R{table_right} B{table_bottom}",
|
1582
|
+
exc_info=True,
|
1583
|
+
)
|
1243
1584
|
|
1244
1585
|
# Create cell regions
|
1245
1586
|
cells_created = 0
|
1246
1587
|
rows_created = 0
|
1247
1588
|
cols_created = 0
|
1248
|
-
|
1589
|
+
|
1249
1590
|
# Create Row Regions
|
1250
1591
|
if len(row_boundaries) >= 2:
|
1251
1592
|
# Determine horizontal extent for rows
|
1252
1593
|
row_extent_x0 = origin_x
|
1253
1594
|
row_extent_x1 = origin_x + context_width
|
1254
|
-
if col_boundaries:
|
1255
|
-
if len(col_boundaries) >=2:
|
1595
|
+
if col_boundaries: # If columns are defined, rows should span only across them
|
1596
|
+
if len(col_boundaries) >= 2:
|
1256
1597
|
row_extent_x0 = col_boundaries[0]
|
1257
1598
|
row_extent_x1 = col_boundaries[-1]
|
1258
1599
|
# If only one col_boundary (e.g. from ignore_outer_regions=False and one line), use context width
|
@@ -1260,82 +1601,94 @@ class ShapeDetectionMixin:
|
|
1260
1601
|
|
1261
1602
|
for i in range(len(row_boundaries) - 1):
|
1262
1603
|
top_abs = row_boundaries[i]
|
1263
|
-
bottom_abs = row_boundaries[i+1]
|
1264
|
-
|
1604
|
+
bottom_abs = row_boundaries[i + 1]
|
1605
|
+
|
1265
1606
|
# Use calculated row_extent_x0 and row_extent_x1
|
1266
|
-
if bottom_abs > top_abs and row_extent_x1 > row_extent_x0:
|
1607
|
+
if bottom_abs > top_abs and row_extent_x1 > row_extent_x0: # Ensure valid region
|
1267
1608
|
try:
|
1268
1609
|
row_region = page_object_for_elements.create_region(
|
1269
1610
|
row_extent_x0, top_abs, row_extent_x1, bottom_abs
|
1270
1611
|
)
|
1271
1612
|
row_region.source = source_label
|
1272
1613
|
row_region.region_type = "table_row"
|
1273
|
-
row_region.normalized_type =
|
1274
|
-
|
1275
|
-
|
1276
|
-
|
1277
|
-
|
1614
|
+
row_region.normalized_type = (
|
1615
|
+
"table_row" # Add normalized_type for selector compatibility
|
1616
|
+
)
|
1617
|
+
row_region.metadata.update(
|
1618
|
+
{"row_index": i, "source_lines_label": source_label}
|
1619
|
+
)
|
1278
1620
|
element_manager.add_element(row_region, element_type="regions")
|
1279
1621
|
rows_created += 1
|
1280
1622
|
except Exception as e:
|
1281
|
-
logger.error(
|
1623
|
+
logger.error(
|
1624
|
+
f"Failed to create or add table_row Region: {e}. Row abs coords: L{row_extent_x0} T{top_abs} R{row_extent_x1} B{bottom_abs}",
|
1625
|
+
exc_info=True,
|
1626
|
+
)
|
1282
1627
|
|
1283
1628
|
# Create Column Regions
|
1284
1629
|
if len(col_boundaries) >= 2:
|
1285
1630
|
# Determine vertical extent for columns
|
1286
1631
|
col_extent_y0 = origin_y
|
1287
1632
|
col_extent_y1 = origin_y + context_height
|
1288
|
-
if row_boundaries:
|
1289
|
-
if len(row_boundaries) >=2:
|
1633
|
+
if row_boundaries: # If rows are defined, columns should span only across them
|
1634
|
+
if len(row_boundaries) >= 2:
|
1290
1635
|
col_extent_y0 = row_boundaries[0]
|
1291
1636
|
col_extent_y1 = row_boundaries[-1]
|
1292
1637
|
# If only one row_boundary, use context height - similar logic to rows
|
1293
1638
|
|
1294
1639
|
for j in range(len(col_boundaries) - 1):
|
1295
1640
|
left_abs = col_boundaries[j]
|
1296
|
-
right_abs = col_boundaries[j+1]
|
1297
|
-
|
1641
|
+
right_abs = col_boundaries[j + 1]
|
1642
|
+
|
1298
1643
|
# Use calculated col_extent_y0 and col_extent_y1
|
1299
|
-
if right_abs > left_abs and col_extent_y1 > col_extent_y0:
|
1644
|
+
if right_abs > left_abs and col_extent_y1 > col_extent_y0: # Ensure valid region
|
1300
1645
|
try:
|
1301
1646
|
col_region = page_object_for_elements.create_region(
|
1302
1647
|
left_abs, col_extent_y0, right_abs, col_extent_y1
|
1303
1648
|
)
|
1304
1649
|
col_region.source = source_label
|
1305
1650
|
col_region.region_type = "table_column"
|
1306
|
-
col_region.normalized_type =
|
1307
|
-
|
1308
|
-
|
1309
|
-
|
1310
|
-
|
1651
|
+
col_region.normalized_type = (
|
1652
|
+
"table_column" # Add normalized_type for selector compatibility
|
1653
|
+
)
|
1654
|
+
col_region.metadata.update(
|
1655
|
+
{"col_index": j, "source_lines_label": source_label}
|
1656
|
+
)
|
1311
1657
|
element_manager.add_element(col_region, element_type="regions")
|
1312
1658
|
cols_created += 1
|
1313
1659
|
except Exception as e:
|
1314
|
-
logger.error(
|
1660
|
+
logger.error(
|
1661
|
+
f"Failed to create or add table_column Region: {e}. Col abs coords: L{left_abs} T{col_extent_y0} R{right_abs} B{col_extent_y1}",
|
1662
|
+
exc_info=True,
|
1663
|
+
)
|
1315
1664
|
|
1316
1665
|
# Create Cell Regions (existing logic)
|
1317
1666
|
if len(row_boundaries) < 2 or len(col_boundaries) < 2:
|
1318
|
-
logger.info(
|
1667
|
+
logger.info(
|
1668
|
+
f"Not enough boundaries to form cells for {self}. Rows: {len(row_boundaries)}, Cols: {len(col_boundaries)}"
|
1669
|
+
)
|
1319
1670
|
# return self # Return will be at the end
|
1320
1671
|
else:
|
1321
1672
|
for i in range(len(row_boundaries) - 1):
|
1322
1673
|
top_abs = row_boundaries[i]
|
1323
|
-
bottom_abs = row_boundaries[i+1]
|
1324
|
-
|
1674
|
+
bottom_abs = row_boundaries[i + 1]
|
1675
|
+
|
1325
1676
|
for j in range(len(col_boundaries) - 1):
|
1326
1677
|
left_abs = col_boundaries[j]
|
1327
|
-
right_abs = col_boundaries[j+1]
|
1328
|
-
|
1678
|
+
right_abs = col_boundaries[j + 1]
|
1679
|
+
|
1329
1680
|
cell_left_abs = left_abs + cell_padding
|
1330
1681
|
cell_top_abs = top_abs + cell_padding
|
1331
1682
|
cell_right_abs = right_abs - cell_padding
|
1332
1683
|
cell_bottom_abs = bottom_abs - cell_padding
|
1333
|
-
|
1684
|
+
|
1334
1685
|
cell_width = cell_right_abs - cell_left_abs
|
1335
1686
|
cell_height = cell_bottom_abs - cell_top_abs
|
1336
|
-
|
1687
|
+
|
1337
1688
|
if cell_width <= 0 or cell_height <= 0:
|
1338
|
-
logger.debug(
|
1689
|
+
logger.debug(
|
1690
|
+
f"Skipping cell (zero or negative dimension after padding): L{left_abs:.1f} T{top_abs:.1f} R{right_abs:.1f} B{bottom_abs:.1f} -> W{cell_width:.1f} H{cell_height:.1f}"
|
1691
|
+
)
|
1339
1692
|
continue
|
1340
1693
|
|
1341
1694
|
try:
|
@@ -1344,30 +1697,42 @@ class ShapeDetectionMixin:
|
|
1344
1697
|
)
|
1345
1698
|
cell_region.source = source_label
|
1346
1699
|
cell_region.region_type = "table_cell"
|
1347
|
-
cell_region.normalized_type =
|
1348
|
-
|
1349
|
-
|
1350
|
-
|
1351
|
-
|
1352
|
-
|
1353
|
-
"
|
1354
|
-
"
|
1700
|
+
cell_region.normalized_type = (
|
1701
|
+
"table_cell" # Add normalized_type for selector compatibility
|
1702
|
+
)
|
1703
|
+
cell_region.metadata.update(
|
1704
|
+
{
|
1705
|
+
"row_index": i,
|
1706
|
+
"col_index": j,
|
1707
|
+
"source_lines_label": source_label,
|
1708
|
+
"original_boundaries_abs": {
|
1709
|
+
"left": left_abs,
|
1710
|
+
"top": top_abs,
|
1711
|
+
"right": right_abs,
|
1712
|
+
"bottom": bottom_abs,
|
1713
|
+
},
|
1355
1714
|
}
|
1356
|
-
|
1715
|
+
)
|
1357
1716
|
element_manager.add_element(cell_region, element_type="regions")
|
1358
1717
|
cells_created += 1
|
1359
1718
|
except Exception as e:
|
1360
|
-
logger.error(
|
1719
|
+
logger.error(
|
1720
|
+
f"Failed to create or add cell Region: {e}. Cell abs coords: L{cell_left_abs} T{cell_top_abs} R{cell_right_abs} B{cell_bottom_abs}",
|
1721
|
+
exc_info=True,
|
1722
|
+
)
|
1361
1723
|
|
1362
|
-
logger.info(
|
1724
|
+
logger.info(
|
1725
|
+
f"Created {tables_created} table, {rows_created} rows, {cols_created} columns, and {cells_created} table cells from detected lines (source: '{source_label}') for {self}."
|
1726
|
+
)
|
1363
1727
|
return self
|
1364
1728
|
|
1729
|
+
|
1365
1730
|
# Example usage would be:
|
1366
1731
|
# page.detect_lines(source_label="my_table_lines")
|
1367
1732
|
# page.detect_table_structure_from_lines(source_label="my_table_lines", cell_padding=0.5)
|
1368
|
-
#
|
1733
|
+
#
|
1369
1734
|
# Now both selector styles work equivalently:
|
1370
1735
|
# table = page.find('table[source*="table_from"]') # Direct type selector
|
1371
1736
|
# table = page.find('region[type="table"][source*="table_from"]') # Region attribute selector
|
1372
1737
|
# cells = page.find_all('table-cell[source*="table_cells_from"]') # Direct type selector
|
1373
|
-
# cells = page.find_all('region[type="table-cell"][source*="table_cells_from"]') # Region attribute selector
|
1738
|
+
# cells = page.find_all('region[type="table-cell"][source*="table_cells_from"]') # Region attribute selector
|