natural-pdf 0.1.13__py3-none-any.whl → 0.1.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- natural_pdf/analyzers/shape_detection_mixin.py +554 -273
- natural_pdf/core/page.py +127 -13
- natural_pdf/elements/base.py +20 -20
- natural_pdf/elements/region.py +167 -33
- natural_pdf/flows/element.py +2 -2
- {natural_pdf-0.1.13.dist-info → natural_pdf-0.1.15.dist-info}/METADATA +1 -1
- {natural_pdf-0.1.13.dist-info → natural_pdf-0.1.15.dist-info}/RECORD +10 -10
- {natural_pdf-0.1.13.dist-info → natural_pdf-0.1.15.dist-info}/WHEEL +0 -0
- {natural_pdf-0.1.13.dist-info → natural_pdf-0.1.15.dist-info}/licenses/LICENSE +0 -0
- {natural_pdf-0.1.13.dist-info → natural_pdf-0.1.15.dist-info}/top_level.txt +0 -0
@@ -1,11 +1,10 @@
|
|
1
1
|
import logging
|
2
2
|
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
|
3
3
|
|
4
|
-
import cv2
|
5
4
|
import numpy as np
|
6
5
|
from PIL import Image, ImageDraw
|
7
6
|
from scipy.signal import find_peaks
|
8
|
-
from scipy.ndimage import gaussian_filter1d
|
7
|
+
from scipy.ndimage import gaussian_filter1d, binary_opening, binary_closing
|
9
8
|
|
10
9
|
if TYPE_CHECKING:
|
11
10
|
from natural_pdf.core.page import Page
|
@@ -19,7 +18,7 @@ logger = logging.getLogger(__name__)
|
|
19
18
|
|
20
19
|
# Constants for default values of less commonly adjusted line detection parameters
|
21
20
|
LINE_DETECTION_PARAM_DEFAULTS = {
|
22
|
-
"binarization_method": "
|
21
|
+
"binarization_method": "adaptive",
|
23
22
|
"adaptive_thresh_block_size": 21,
|
24
23
|
"adaptive_thresh_C_val": 5,
|
25
24
|
"morph_op_h": "none",
|
@@ -111,192 +110,7 @@ class ShapeDetectionMixin:
|
|
111
110
|
return cv_image, scale_factor, origin_offset_pdf, page_obj
|
112
111
|
|
113
112
|
|
114
|
-
def _process_image_for_lines(
|
115
|
-
self,
|
116
|
-
cv_image: np.ndarray,
|
117
|
-
off_angle: int,
|
118
|
-
min_line_length: int,
|
119
|
-
merge_angle_tolerance: int,
|
120
|
-
merge_distance_tolerance: int,
|
121
|
-
merge_endpoint_tolerance: int,
|
122
|
-
initial_min_line_length: int,
|
123
|
-
min_nfa_score_horizontal: float,
|
124
|
-
min_nfa_score_vertical: float,
|
125
|
-
) -> List[Dict]:
|
126
|
-
"""Processes an image to detect lines using OpenCV LSD and merging logic."""
|
127
|
-
if cv_image is None:
|
128
|
-
return []
|
129
|
-
|
130
|
-
gray_image = cv2.cvtColor(cv_image, cv2.COLOR_RGB2GRAY)
|
131
|
-
lsd = cv2.createLineSegmentDetector(cv2.LSD_REFINE_ADV)
|
132
|
-
coords_arr, widths_arr, precs_arr, nfa_scores_arr = lsd.detect(gray_image)
|
133
|
-
|
134
|
-
lines_raw = []
|
135
|
-
if coords_arr is not None: # nfa_scores_arr can be None if no lines are found
|
136
|
-
nfa_scores_list = nfa_scores_arr.flatten() if nfa_scores_arr is not None else [0.0] * len(coords_arr)
|
137
|
-
widths_list = widths_arr.flatten() if widths_arr is not None else [1.0] * len(coords_arr)
|
138
|
-
precs_list = precs_arr.flatten() if precs_arr is not None else [0.0] * len(coords_arr)
|
139
|
-
|
140
|
-
for i in range(len(coords_arr)):
|
141
|
-
lines_raw.append((
|
142
|
-
coords_arr[i][0],
|
143
|
-
widths_list[i] if i < len(widths_list) else 1.0,
|
144
|
-
precs_list[i] if i < len(precs_list) else 0.0,
|
145
|
-
nfa_scores_list[i] if i < len(nfa_scores_list) else 0.0
|
146
|
-
))
|
147
|
-
|
148
|
-
def get_line_properties(line_data_item):
|
149
|
-
l_coords, l_width, l_prec, l_nfa_score = line_data_item
|
150
|
-
x1, y1, x2, y2 = l_coords
|
151
|
-
angle_rad = np.arctan2(y2 - y1, x2 - x1)
|
152
|
-
angle_deg = np.degrees(angle_rad)
|
153
|
-
normalized_angle_deg = angle_deg % 180
|
154
|
-
if normalized_angle_deg < 0:
|
155
|
-
normalized_angle_deg += 180
|
156
|
-
|
157
|
-
is_h = abs(normalized_angle_deg) <= off_angle or abs(normalized_angle_deg - 180) <= off_angle
|
158
|
-
is_v = abs(normalized_angle_deg - 90) <= off_angle
|
159
|
-
|
160
|
-
if is_h and x1 > x2: x1, x2, y1, y2 = x2, x1, y2, y1
|
161
|
-
elif is_v and y1 > y2: y1, y2, x1, x2 = y2, y1, x2, x1
|
162
|
-
|
163
|
-
length = np.sqrt((x2 - x1)**2 + (y2 - y1)**2)
|
164
|
-
return {'coords': (x1, y1, x2, y2), 'width': l_width, 'prec': l_prec,
|
165
|
-
'angle_deg': normalized_angle_deg, 'is_horizontal': is_h, 'is_vertical': is_v,
|
166
|
-
'length': length, 'nfa_score': l_nfa_score}
|
167
|
-
|
168
|
-
processed_lines = [get_line_properties(ld) for ld in lines_raw]
|
169
|
-
|
170
|
-
filtered_lines = []
|
171
|
-
for p in processed_lines:
|
172
|
-
if p['length'] <= initial_min_line_length: continue
|
173
|
-
if p['is_horizontal'] and p['nfa_score'] >= min_nfa_score_horizontal:
|
174
|
-
filtered_lines.append(p)
|
175
|
-
elif p['is_vertical'] and p['nfa_score'] >= min_nfa_score_vertical:
|
176
|
-
filtered_lines.append(p)
|
177
|
-
|
178
|
-
horizontal_lines = [p for p in filtered_lines if p['is_horizontal']]
|
179
|
-
vertical_lines = [p for p in filtered_lines if p['is_vertical']]
|
180
|
-
|
181
|
-
def merge_lines_list(lines_list, is_horizontal_merge):
|
182
|
-
if not lines_list: return []
|
183
|
-
key_sort = (lambda p: (p['coords'][1], p['coords'][0])) if is_horizontal_merge else (lambda p: (p['coords'][0], p['coords'][1]))
|
184
|
-
lines_list.sort(key=key_sort)
|
185
|
-
|
186
|
-
merged_results = []
|
187
|
-
merged_flags = [False] * len(lines_list)
|
188
|
-
|
189
|
-
for i, current_line_props in enumerate(lines_list):
|
190
|
-
if merged_flags[i]: continue
|
191
|
-
group = [current_line_props]; merged_flags[i] = True
|
192
|
-
|
193
|
-
# Keep trying to expand the group until no more lines can be added
|
194
|
-
# Use multiple passes to ensure transitive merging works properly
|
195
|
-
for merge_pass in range(10): # Up to 3 passes to catch complex merging scenarios
|
196
|
-
group_changed = False
|
197
|
-
|
198
|
-
# Calculate current group boundaries
|
199
|
-
group_x1, group_y1 = min(p['coords'][0] for p in group), min(p['coords'][1] for p in group)
|
200
|
-
group_x2, group_y2 = max(p['coords'][2] for p in group), max(p['coords'][3] for p in group)
|
201
|
-
total_len_in_group = sum(p['length'] for p in group)
|
202
|
-
if total_len_in_group == 0: continue # Should not happen
|
203
|
-
|
204
|
-
# Calculate weighted averages for the group
|
205
|
-
group_avg_angle = sum(p['angle_deg'] * p['length'] for p in group) / total_len_in_group
|
206
|
-
|
207
|
-
if is_horizontal_merge:
|
208
|
-
group_avg_perp_coord = sum(((p['coords'][1] + p['coords'][3]) / 2) * p['length'] for p in group) / total_len_in_group
|
209
|
-
else:
|
210
|
-
group_avg_perp_coord = sum(((p['coords'][0] + p['coords'][2]) / 2) * p['length'] for p in group) / total_len_in_group
|
211
|
-
|
212
|
-
# Check all unmerged lines for potential merging
|
213
|
-
for j, candidate_props in enumerate(lines_list):
|
214
|
-
if merged_flags[j]: continue
|
215
|
-
|
216
|
-
# 1. Check for parallelism (angle similarity)
|
217
|
-
angle_diff = abs(group_avg_angle - candidate_props['angle_deg'])
|
218
|
-
# Handle wraparound for angles near 0/180
|
219
|
-
if angle_diff > 90:
|
220
|
-
angle_diff = 180 - angle_diff
|
221
|
-
if angle_diff > merge_angle_tolerance: continue
|
222
|
-
|
223
|
-
# 2. Check for closeness (perpendicular distance)
|
224
|
-
if is_horizontal_merge:
|
225
|
-
cand_perp_coord = (candidate_props['coords'][1] + candidate_props['coords'][3]) / 2
|
226
|
-
else:
|
227
|
-
cand_perp_coord = (candidate_props['coords'][0] + candidate_props['coords'][2]) / 2
|
228
|
-
|
229
|
-
perp_distance = abs(group_avg_perp_coord - cand_perp_coord)
|
230
|
-
if perp_distance > merge_distance_tolerance: continue
|
231
|
-
|
232
|
-
# 3. Check for reasonable proximity along the primary axis
|
233
|
-
if is_horizontal_merge:
|
234
|
-
# For horizontal lines, check x-axis relationship
|
235
|
-
cand_x1, cand_x2 = candidate_props['coords'][0], candidate_props['coords'][2]
|
236
|
-
# Check if there's overlap OR if the gap is reasonable
|
237
|
-
overlap = max(0, min(group_x2, cand_x2) - max(group_x1, cand_x1))
|
238
|
-
gap_to_group = min(abs(group_x1 - cand_x2), abs(group_x2 - cand_x1))
|
239
|
-
|
240
|
-
# Accept if there's overlap OR the gap is reasonable OR the candidate is contained within group span
|
241
|
-
if not (overlap > 0 or gap_to_group <= merge_endpoint_tolerance or (cand_x1 >= group_x1 and cand_x2 <= group_x2)):
|
242
|
-
continue
|
243
|
-
else:
|
244
|
-
# For vertical lines, check y-axis relationship
|
245
|
-
cand_y1, cand_y2 = candidate_props['coords'][1], candidate_props['coords'][3]
|
246
|
-
overlap = max(0, min(group_y2, cand_y2) - max(group_y1, cand_y1))
|
247
|
-
gap_to_group = min(abs(group_y1 - cand_y2), abs(group_y2 - cand_y1))
|
248
|
-
|
249
|
-
if not (overlap > 0 or gap_to_group <= merge_endpoint_tolerance or (cand_y1 >= group_y1 and cand_y2 <= group_y2)):
|
250
|
-
continue
|
251
|
-
|
252
|
-
# If we reach here, lines should be merged
|
253
|
-
group.append(candidate_props)
|
254
|
-
merged_flags[j] = True
|
255
|
-
group_changed = True
|
256
|
-
|
257
|
-
if not group_changed:
|
258
|
-
break # No more lines added in this pass, stop trying
|
259
|
-
|
260
|
-
# Create final merged line from the group
|
261
|
-
final_x1, final_y1 = min(p['coords'][0] for p in group), min(p['coords'][1] for p in group)
|
262
|
-
final_x2, final_y2 = max(p['coords'][2] for p in group), max(p['coords'][3] for p in group)
|
263
|
-
final_total_len = sum(p['length'] for p in group)
|
264
|
-
if final_total_len == 0: continue
|
265
|
-
|
266
|
-
final_width = sum(p['width'] * p['length'] for p in group) / final_total_len
|
267
|
-
final_nfa = sum(p['nfa_score'] * p['length'] for p in group) / final_total_len
|
268
|
-
|
269
|
-
if is_horizontal_merge:
|
270
|
-
final_y = sum(((p['coords'][1] + p['coords'][3]) / 2) * p['length'] for p in group) / final_total_len
|
271
|
-
merged_line_data = (final_x1, final_y, final_x2, final_y, final_width, final_nfa)
|
272
|
-
else:
|
273
|
-
final_x = sum(((p['coords'][0] + p['coords'][2]) / 2) * p['length'] for p in group) / final_total_len
|
274
|
-
merged_line_data = (final_x, final_y1, final_x, final_y2, final_width, final_nfa)
|
275
|
-
merged_results.append(merged_line_data)
|
276
|
-
return merged_results
|
277
|
-
|
278
|
-
merged_h_lines = merge_lines_list(horizontal_lines, True)
|
279
|
-
merged_v_lines = merge_lines_list(vertical_lines, False)
|
280
|
-
all_merged = merged_h_lines + merged_v_lines
|
281
|
-
|
282
|
-
final_lines_data = []
|
283
|
-
for line_data_item in all_merged:
|
284
|
-
x1, y1, x2, y2, width, nfa = line_data_item
|
285
|
-
length = np.sqrt((x2 - x1)**2 + (y2 - y1)**2)
|
286
|
-
if length > min_line_length:
|
287
|
-
# Ensure x1 <= x2 for horizontal, y1 <= y2 for vertical
|
288
|
-
if abs(y2 - y1) < abs(x2-x1): # Horizontal-ish
|
289
|
-
if x1 > x2: x1_out, y1_out, x2_out, y2_out = x2, y2, x1, y1
|
290
|
-
else: x1_out, y1_out, x2_out, y2_out = x1, y1, x2, y2
|
291
|
-
else: # Vertical-ish
|
292
|
-
if y1 > y2: x1_out, y1_out, x2_out, y2_out = x2, y2, x1, y1
|
293
|
-
else: x1_out, y1_out, x2_out, y2_out = x1, y1, x2, y2
|
294
113
|
|
295
|
-
final_lines_data.append({
|
296
|
-
'x1': x1_out, 'y1': y1_out, 'x2': x2_out, 'y2': y2_out,
|
297
|
-
'width': width, 'nfa_score': nfa, 'length': length
|
298
|
-
})
|
299
|
-
return final_lines_data
|
300
114
|
|
301
115
|
def _convert_line_to_element_data(
|
302
116
|
self, line_data_img: Dict, scale_factor: float, origin_offset_pdf: Tuple[float, float], page_obj: 'Page', source_label: str
|
@@ -368,21 +182,77 @@ class ShapeDetectionMixin:
|
|
368
182
|
if cv_image is None:
|
369
183
|
return [], None, None
|
370
184
|
|
371
|
-
|
372
|
-
|
185
|
+
# Convert RGB to grayscale using numpy (faster than PIL)
|
186
|
+
# Using standard luminance weights: 0.299*R + 0.587*G + 0.114*B
|
187
|
+
if len(cv_image.shape) == 3:
|
188
|
+
gray_image = np.dot(cv_image[...,:3], [0.299, 0.587, 0.114]).astype(np.uint8)
|
189
|
+
else:
|
190
|
+
gray_image = cv_image # Already grayscale
|
191
|
+
|
192
|
+
img_height, img_width = gray_image.shape
|
373
193
|
logger.debug(f"Line detection - Image dimensions: {img_width}x{img_height}")
|
374
194
|
|
195
|
+
def otsu_threshold(image):
|
196
|
+
"""Simple Otsu's thresholding implementation using numpy."""
|
197
|
+
# Calculate histogram
|
198
|
+
hist, _ = np.histogram(image.flatten(), bins=256, range=(0, 256))
|
199
|
+
hist = hist.astype(float)
|
200
|
+
|
201
|
+
# Calculate probabilities
|
202
|
+
total_pixels = image.size
|
203
|
+
current_max = 0
|
204
|
+
threshold = 0
|
205
|
+
sum_total = np.sum(np.arange(256) * hist)
|
206
|
+
sum_background = 0
|
207
|
+
weight_background = 0
|
208
|
+
|
209
|
+
for i in range(256):
|
210
|
+
weight_background += hist[i]
|
211
|
+
if weight_background == 0:
|
212
|
+
continue
|
213
|
+
|
214
|
+
weight_foreground = total_pixels - weight_background
|
215
|
+
if weight_foreground == 0:
|
216
|
+
break
|
217
|
+
|
218
|
+
sum_background += i * hist[i]
|
219
|
+
mean_background = sum_background / weight_background
|
220
|
+
mean_foreground = (sum_total - sum_background) / weight_foreground
|
221
|
+
|
222
|
+
# Calculate between-class variance
|
223
|
+
variance_between = weight_background * weight_foreground * (mean_background - mean_foreground) ** 2
|
224
|
+
|
225
|
+
if variance_between > current_max:
|
226
|
+
current_max = variance_between
|
227
|
+
threshold = i
|
228
|
+
|
229
|
+
return threshold
|
230
|
+
|
231
|
+
def adaptive_threshold(image, block_size, C):
|
232
|
+
"""Simple adaptive thresholding implementation."""
|
233
|
+
# Use scipy for gaussian filtering
|
234
|
+
from scipy.ndimage import gaussian_filter
|
235
|
+
|
236
|
+
# Calculate local means using gaussian filter
|
237
|
+
sigma = block_size / 6.0 # Approximate relationship
|
238
|
+
local_mean = gaussian_filter(image.astype(float), sigma=sigma)
|
239
|
+
|
240
|
+
# Apply threshold
|
241
|
+
binary = (image > (local_mean - C)).astype(np.uint8) * 255
|
242
|
+
return 255 - binary # Invert to match binary inverse thresholding
|
243
|
+
|
375
244
|
if binarization_method == "adaptive":
|
376
|
-
binarized_image =
|
377
|
-
cv2.THRESH_BINARY_INV, adaptive_thresh_block_size, adaptive_thresh_C_val)
|
245
|
+
binarized_image = adaptive_threshold(gray_image, adaptive_thresh_block_size, adaptive_thresh_C_val)
|
378
246
|
elif binarization_method == "otsu":
|
379
|
-
otsu_thresh_val
|
247
|
+
otsu_thresh_val = otsu_threshold(gray_image)
|
248
|
+
binarized_image = (gray_image <= otsu_thresh_val).astype(np.uint8) * 255 # Inverted binary
|
380
249
|
logger.debug(f"Otsu's threshold applied. Value: {otsu_thresh_val}")
|
381
250
|
else:
|
382
251
|
logger.error(f"Invalid binarization_method: {binarization_method}. Supported: 'otsu', 'adaptive'. Defaulting to 'otsu'.")
|
383
|
-
otsu_thresh_val
|
252
|
+
otsu_thresh_val = otsu_threshold(gray_image)
|
253
|
+
binarized_image = (gray_image <= otsu_thresh_val).astype(np.uint8) * 255 # Inverted binary
|
384
254
|
|
385
|
-
binarized_norm = binarized_image / 255.0
|
255
|
+
binarized_norm = binarized_image.astype(float) / 255.0
|
386
256
|
|
387
257
|
detected_lines_data = []
|
388
258
|
profile_h_smoothed_for_viz: Optional[np.ndarray] = None
|
@@ -479,12 +349,34 @@ class ShapeDetectionMixin:
|
|
479
349
|
})
|
480
350
|
return lines_info, profile_smoothed
|
481
351
|
|
352
|
+
def apply_morphology(image, operation, kernel_size):
|
353
|
+
"""Apply morphological operations using scipy.ndimage."""
|
354
|
+
if operation == "none":
|
355
|
+
return image
|
356
|
+
|
357
|
+
# Create rectangular structuring element
|
358
|
+
# kernel_size is (width, height) = (cols, rows)
|
359
|
+
cols, rows = kernel_size
|
360
|
+
structure = np.ones((rows, cols)) # Note: numpy uses (rows, cols) order
|
361
|
+
|
362
|
+
# Convert to binary for morphological operations
|
363
|
+
binary_img = (image > 0.5).astype(bool)
|
364
|
+
|
365
|
+
if operation == "open":
|
366
|
+
result = binary_opening(binary_img, structure=structure)
|
367
|
+
elif operation == "close":
|
368
|
+
result = binary_closing(binary_img, structure=structure)
|
369
|
+
else:
|
370
|
+
logger.warning(f"Unknown morphological operation: {operation}. Supported: 'open', 'close', 'none'.")
|
371
|
+
result = binary_img
|
372
|
+
|
373
|
+
# Convert back to float
|
374
|
+
return result.astype(float)
|
375
|
+
|
482
376
|
if horizontal:
|
483
377
|
processed_image_h = binarized_norm.copy()
|
484
378
|
if morph_op_h != "none":
|
485
|
-
|
486
|
-
op = cv2.MORPH_OPEN if morph_op_h == "open" else cv2.MORPH_CLOSE
|
487
|
-
processed_image_h = cv2.morphologyEx(processed_image_h, op, kernel_h_struct)
|
379
|
+
processed_image_h = apply_morphology(processed_image_h, morph_op_h, morph_kernel_h)
|
488
380
|
profile_h_raw = np.sum(processed_image_h, axis=1)
|
489
381
|
horizontal_lines, smoothed_h = get_lines_from_profile(profile_h_raw, pil_image_rgb.width, 'h', True)
|
490
382
|
profile_h_smoothed_for_viz = smoothed_h
|
@@ -494,9 +386,7 @@ class ShapeDetectionMixin:
|
|
494
386
|
if vertical:
|
495
387
|
processed_image_v = binarized_norm.copy()
|
496
388
|
if morph_op_v != "none":
|
497
|
-
|
498
|
-
op = cv2.MORPH_OPEN if morph_op_v == "open" else cv2.MORPH_CLOSE
|
499
|
-
processed_image_v = cv2.morphologyEx(processed_image_v, op, kernel_v_struct)
|
389
|
+
processed_image_v = apply_morphology(processed_image_v, morph_op_v, morph_kernel_v)
|
500
390
|
profile_v_raw = np.sum(processed_image_v, axis=0)
|
501
391
|
vertical_lines, smoothed_v = get_lines_from_profile(profile_v_raw, pil_image_rgb.height, 'v', False)
|
502
392
|
profile_v_smoothed_for_viz = smoothed_v
|
@@ -509,6 +399,7 @@ class ShapeDetectionMixin:
|
|
509
399
|
self,
|
510
400
|
resolution: int = 192,
|
511
401
|
source_label: str = "detected",
|
402
|
+
method: str = "projection",
|
512
403
|
horizontal: bool = True,
|
513
404
|
vertical: bool = True,
|
514
405
|
peak_threshold_h: float = 0.5,
|
@@ -528,17 +419,28 @@ class ShapeDetectionMixin:
|
|
528
419
|
smoothing_sigma_h: float = LINE_DETECTION_PARAM_DEFAULTS["smoothing_sigma_h"],
|
529
420
|
smoothing_sigma_v: float = LINE_DETECTION_PARAM_DEFAULTS["smoothing_sigma_v"],
|
530
421
|
peak_width_rel_height: float = LINE_DETECTION_PARAM_DEFAULTS["peak_width_rel_height"],
|
422
|
+
# LSD-specific parameters
|
423
|
+
off_angle: int = 5,
|
424
|
+
min_line_length: int = 30,
|
425
|
+
merge_angle_tolerance: int = 5,
|
426
|
+
merge_distance_tolerance: int = 3,
|
427
|
+
merge_endpoint_tolerance: int = 10,
|
428
|
+
initial_min_line_length: int = 10,
|
429
|
+
min_nfa_score_horizontal: float = -10.0,
|
430
|
+
min_nfa_score_vertical: float = -10.0,
|
531
431
|
) -> "ShapeDetectionMixin": # Return type changed back to self
|
532
432
|
"""
|
533
|
-
Detects lines on the Page or Region, or on all pages within a Collection
|
534
|
-
using projection profiling and peak detection.
|
433
|
+
Detects lines on the Page or Region, or on all pages within a Collection.
|
535
434
|
Adds detected lines as LineElement objects to the ElementManager.
|
536
435
|
|
537
436
|
Args:
|
538
437
|
resolution: DPI for image rendering before detection.
|
539
438
|
source_label: Label assigned to the 'source' attribute of created LineElements.
|
439
|
+
method: Detection method - "projection" (default, no cv2 required) or "lsd" (requires opencv-python).
|
540
440
|
horizontal: If True, detect horizontal lines.
|
541
441
|
vertical: If True, detect vertical lines.
|
442
|
+
|
443
|
+
# Projection profiling parameters:
|
542
444
|
peak_threshold_h: Threshold for peak detection in horizontal profile (ratio of image width).
|
543
445
|
min_gap_h: Minimum gap between horizontal lines (pixels).
|
544
446
|
peak_threshold_v: Threshold for peak detection in vertical profile (ratio of image height).
|
@@ -556,16 +458,34 @@ class ShapeDetectionMixin:
|
|
556
458
|
smoothing_sigma_h: Gaussian smoothing sigma for horizontal profile.
|
557
459
|
smoothing_sigma_v: Gaussian smoothing sigma for vertical profile.
|
558
460
|
peak_width_rel_height: Relative height for `scipy.find_peaks` 'width' parameter.
|
461
|
+
|
462
|
+
# LSD-specific parameters (only used when method="lsd"):
|
463
|
+
off_angle: Maximum angle deviation from horizontal/vertical for line classification.
|
464
|
+
min_line_length: Minimum length for final detected lines.
|
465
|
+
merge_angle_tolerance: Maximum angle difference for merging parallel lines.
|
466
|
+
merge_distance_tolerance: Maximum perpendicular distance for merging lines.
|
467
|
+
merge_endpoint_tolerance: Maximum gap at endpoints for merging lines.
|
468
|
+
initial_min_line_length: Initial minimum length filter before merging.
|
469
|
+
min_nfa_score_horizontal: Minimum NFA score for horizontal lines.
|
470
|
+
min_nfa_score_vertical: Minimum NFA score for vertical lines.
|
559
471
|
|
560
472
|
Returns:
|
561
473
|
Self for method chaining.
|
474
|
+
|
475
|
+
Raises:
|
476
|
+
ImportError: If method="lsd" but opencv-python is not installed.
|
477
|
+
ValueError: If method is not "projection" or "lsd".
|
562
478
|
"""
|
563
479
|
if not horizontal and not vertical:
|
564
480
|
logger.info("Line detection skipped as both horizontal and vertical are False.")
|
565
481
|
return self
|
482
|
+
|
483
|
+
# Validate method parameter
|
484
|
+
if method not in ["projection", "lsd"]:
|
485
|
+
raise ValueError(f"Invalid method '{method}'. Supported methods: 'projection', 'lsd'")
|
566
486
|
|
567
487
|
collection_params = {
|
568
|
-
"resolution": resolution, "source_label": source_label,
|
488
|
+
"resolution": resolution, "source_label": source_label, "method": method,
|
569
489
|
"horizontal": horizontal, "vertical": vertical,
|
570
490
|
"peak_threshold_h": peak_threshold_h, "min_gap_h": min_gap_h,
|
571
491
|
"peak_threshold_v": peak_threshold_v, "min_gap_v": min_gap_v,
|
@@ -578,6 +498,11 @@ class ShapeDetectionMixin:
|
|
578
498
|
"morph_op_v": morph_op_v, "morph_kernel_v": morph_kernel_v,
|
579
499
|
"smoothing_sigma_h": smoothing_sigma_h, "smoothing_sigma_v": smoothing_sigma_v,
|
580
500
|
"peak_width_rel_height": peak_width_rel_height,
|
501
|
+
# LSD parameters
|
502
|
+
"off_angle": off_angle, "min_line_length": min_line_length,
|
503
|
+
"merge_angle_tolerance": merge_angle_tolerance, "merge_distance_tolerance": merge_distance_tolerance,
|
504
|
+
"merge_endpoint_tolerance": merge_endpoint_tolerance, "initial_min_line_length": initial_min_line_length,
|
505
|
+
"min_nfa_score_horizontal": min_nfa_score_horizontal, "min_nfa_score_vertical": min_nfa_score_vertical,
|
581
506
|
}
|
582
507
|
|
583
508
|
if hasattr(self, 'pdfs'):
|
@@ -590,6 +515,54 @@ class ShapeDetectionMixin:
|
|
590
515
|
page_obj.detect_lines(**collection_params)
|
591
516
|
return self
|
592
517
|
|
518
|
+
# Dispatch to appropriate detection method
|
519
|
+
if method == "projection":
|
520
|
+
return self._detect_lines_projection(
|
521
|
+
resolution=resolution, source_label=source_label, horizontal=horizontal, vertical=vertical,
|
522
|
+
peak_threshold_h=peak_threshold_h, min_gap_h=min_gap_h, peak_threshold_v=peak_threshold_v, min_gap_v=min_gap_v,
|
523
|
+
max_lines_h=max_lines_h, max_lines_v=max_lines_v, replace=replace,
|
524
|
+
binarization_method=binarization_method, adaptive_thresh_block_size=adaptive_thresh_block_size,
|
525
|
+
adaptive_thresh_C_val=adaptive_thresh_C_val, morph_op_h=morph_op_h, morph_kernel_h=morph_kernel_h,
|
526
|
+
morph_op_v=morph_op_v, morph_kernel_v=morph_kernel_v, smoothing_sigma_h=smoothing_sigma_h,
|
527
|
+
smoothing_sigma_v=smoothing_sigma_v, peak_width_rel_height=peak_width_rel_height
|
528
|
+
)
|
529
|
+
elif method == "lsd":
|
530
|
+
return self._detect_lines_lsd(
|
531
|
+
resolution=resolution, source_label=source_label, horizontal=horizontal, vertical=vertical,
|
532
|
+
off_angle=off_angle, min_line_length=min_line_length, merge_angle_tolerance=merge_angle_tolerance,
|
533
|
+
merge_distance_tolerance=merge_distance_tolerance, merge_endpoint_tolerance=merge_endpoint_tolerance,
|
534
|
+
initial_min_line_length=initial_min_line_length, min_nfa_score_horizontal=min_nfa_score_horizontal,
|
535
|
+
min_nfa_score_vertical=min_nfa_score_vertical, replace=replace
|
536
|
+
)
|
537
|
+
else:
|
538
|
+
# This should never happen due to validation above, but just in case
|
539
|
+
raise ValueError(f"Unsupported method: {method}")
|
540
|
+
|
541
|
+
def _detect_lines_projection(
|
542
|
+
self,
|
543
|
+
resolution: int,
|
544
|
+
source_label: str,
|
545
|
+
horizontal: bool,
|
546
|
+
vertical: bool,
|
547
|
+
peak_threshold_h: float,
|
548
|
+
min_gap_h: int,
|
549
|
+
peak_threshold_v: float,
|
550
|
+
min_gap_v: int,
|
551
|
+
max_lines_h: Optional[int],
|
552
|
+
max_lines_v: Optional[int],
|
553
|
+
replace: bool,
|
554
|
+
binarization_method: str,
|
555
|
+
adaptive_thresh_block_size: int,
|
556
|
+
adaptive_thresh_C_val: int,
|
557
|
+
morph_op_h: str,
|
558
|
+
morph_kernel_h: Tuple[int, int],
|
559
|
+
morph_op_v: str,
|
560
|
+
morph_kernel_v: Tuple[int, int],
|
561
|
+
smoothing_sigma_h: float,
|
562
|
+
smoothing_sigma_v: float,
|
563
|
+
peak_width_rel_height: float,
|
564
|
+
) -> "ShapeDetectionMixin":
|
565
|
+
"""Internal method for projection profiling line detection."""
|
593
566
|
cv_image, scale_factor, origin_offset_pdf, page_object_ctx = self._get_image_for_detection(resolution)
|
594
567
|
if cv_image is None or page_object_ctx is None:
|
595
568
|
logger.warning(f"Skipping line detection for {self} due to image error.")
|
@@ -654,12 +627,268 @@ class ShapeDetectionMixin:
|
|
654
627
|
except Exception as e:
|
655
628
|
logger.error(f"Failed to create or add LineElement: {e}. Data: {element_constructor_data}", exc_info=True)
|
656
629
|
|
657
|
-
logger.info(f"Detected and added {len(lines_data_img)} lines to {page_object_ctx} with source '{source_label}'.")
|
630
|
+
logger.info(f"Detected and added {len(lines_data_img)} lines to {page_object_ctx} with source '{source_label}' using projection profiling.")
|
631
|
+
return self
|
632
|
+
|
633
|
+
def _detect_lines_lsd(
|
634
|
+
self,
|
635
|
+
resolution: int,
|
636
|
+
source_label: str,
|
637
|
+
horizontal: bool,
|
638
|
+
vertical: bool,
|
639
|
+
off_angle: int,
|
640
|
+
min_line_length: int,
|
641
|
+
merge_angle_tolerance: int,
|
642
|
+
merge_distance_tolerance: int,
|
643
|
+
merge_endpoint_tolerance: int,
|
644
|
+
initial_min_line_length: int,
|
645
|
+
min_nfa_score_horizontal: float,
|
646
|
+
min_nfa_score_vertical: float,
|
647
|
+
replace: bool,
|
648
|
+
) -> "ShapeDetectionMixin":
|
649
|
+
"""Internal method for LSD line detection."""
|
650
|
+
try:
|
651
|
+
import cv2
|
652
|
+
except ImportError:
|
653
|
+
raise ImportError(
|
654
|
+
"OpenCV (cv2) is required for LSD line detection. "
|
655
|
+
"Install it with: pip install opencv-python\n"
|
656
|
+
"Alternatively, use method='projection' which requires no additional dependencies."
|
657
|
+
)
|
658
|
+
|
659
|
+
cv_image, scale_factor, origin_offset_pdf, page_object_ctx = self._get_image_for_detection(resolution)
|
660
|
+
if cv_image is None or page_object_ctx is None:
|
661
|
+
logger.warning(f"Skipping LSD line detection for {self} due to image error.")
|
662
|
+
return self
|
663
|
+
|
664
|
+
if replace:
|
665
|
+
from natural_pdf.elements.line import LineElement
|
666
|
+
element_manager = page_object_ctx._element_mgr
|
667
|
+
if hasattr(element_manager, '_elements') and 'lines' in element_manager._elements:
|
668
|
+
original_count = len(element_manager._elements['lines'])
|
669
|
+
element_manager._elements['lines'] = [
|
670
|
+
line for line in element_manager._elements['lines']
|
671
|
+
if getattr(line, 'source', None) != source_label
|
672
|
+
]
|
673
|
+
removed_count = original_count - len(element_manager._elements['lines'])
|
674
|
+
if removed_count > 0:
|
675
|
+
logger.info(f"Removed {removed_count} existing lines with source '{source_label}' from {page_object_ctx}")
|
676
|
+
|
677
|
+
lines_data_img = self._process_image_for_lines_lsd(
|
678
|
+
cv_image, off_angle, min_line_length, merge_angle_tolerance,
|
679
|
+
merge_distance_tolerance, merge_endpoint_tolerance, initial_min_line_length,
|
680
|
+
min_nfa_score_horizontal, min_nfa_score_vertical
|
681
|
+
)
|
682
|
+
|
683
|
+
from natural_pdf.elements.line import LineElement
|
684
|
+
element_manager = page_object_ctx._element_mgr
|
685
|
+
|
686
|
+
for line_data_item_img in lines_data_img:
|
687
|
+
element_constructor_data = self._convert_line_to_element_data(
|
688
|
+
line_data_item_img, scale_factor, origin_offset_pdf, page_object_ctx, source_label
|
689
|
+
)
|
690
|
+
try:
|
691
|
+
line_element = LineElement(element_constructor_data, page_object_ctx)
|
692
|
+
element_manager.add_element(line_element, element_type="lines")
|
693
|
+
except Exception as e:
|
694
|
+
logger.error(f"Failed to create or add LineElement: {e}. Data: {element_constructor_data}", exc_info=True)
|
695
|
+
|
696
|
+
logger.info(f"Detected and added {len(lines_data_img)} lines to {page_object_ctx} with source '{source_label}' using LSD.")
|
658
697
|
return self
|
659
698
|
|
699
|
+
def _process_image_for_lines_lsd(
|
700
|
+
self,
|
701
|
+
cv_image: np.ndarray,
|
702
|
+
off_angle: int,
|
703
|
+
min_line_length: int,
|
704
|
+
merge_angle_tolerance: int,
|
705
|
+
merge_distance_tolerance: int,
|
706
|
+
merge_endpoint_tolerance: int,
|
707
|
+
initial_min_line_length: int,
|
708
|
+
min_nfa_score_horizontal: float,
|
709
|
+
min_nfa_score_vertical: float,
|
710
|
+
) -> List[Dict]:
|
711
|
+
"""Processes an image to detect lines using OpenCV LSD and merging logic."""
|
712
|
+
import cv2 # Import is already validated in calling method
|
713
|
+
|
714
|
+
if cv_image is None:
|
715
|
+
return []
|
716
|
+
|
717
|
+
gray_image = cv2.cvtColor(cv_image, cv2.COLOR_RGB2GRAY)
|
718
|
+
lsd = cv2.createLineSegmentDetector(cv2.LSD_REFINE_ADV)
|
719
|
+
coords_arr, widths_arr, precs_arr, nfa_scores_arr = lsd.detect(gray_image)
|
720
|
+
|
721
|
+
lines_raw = []
|
722
|
+
if coords_arr is not None: # nfa_scores_arr can be None if no lines are found
|
723
|
+
nfa_scores_list = nfa_scores_arr.flatten() if nfa_scores_arr is not None else [0.0] * len(coords_arr)
|
724
|
+
widths_list = widths_arr.flatten() if widths_arr is not None else [1.0] * len(coords_arr)
|
725
|
+
precs_list = precs_arr.flatten() if precs_arr is not None else [0.0] * len(coords_arr)
|
726
|
+
|
727
|
+
for i in range(len(coords_arr)):
|
728
|
+
lines_raw.append((
|
729
|
+
coords_arr[i][0],
|
730
|
+
widths_list[i] if i < len(widths_list) else 1.0,
|
731
|
+
precs_list[i] if i < len(precs_list) else 0.0,
|
732
|
+
nfa_scores_list[i] if i < len(nfa_scores_list) else 0.0
|
733
|
+
))
|
734
|
+
|
735
|
+
def get_line_properties(line_data_item):
|
736
|
+
l_coords, l_width, l_prec, l_nfa_score = line_data_item
|
737
|
+
x1, y1, x2, y2 = l_coords
|
738
|
+
angle_rad = np.arctan2(y2 - y1, x2 - x1)
|
739
|
+
angle_deg = np.degrees(angle_rad)
|
740
|
+
normalized_angle_deg = angle_deg % 180
|
741
|
+
if normalized_angle_deg < 0:
|
742
|
+
normalized_angle_deg += 180
|
743
|
+
|
744
|
+
is_h = abs(normalized_angle_deg) <= off_angle or abs(normalized_angle_deg - 180) <= off_angle
|
745
|
+
is_v = abs(normalized_angle_deg - 90) <= off_angle
|
746
|
+
|
747
|
+
if is_h and x1 > x2: x1, x2, y1, y2 = x2, x1, y2, y1
|
748
|
+
elif is_v and y1 > y2: y1, y2, x1, x2 = y2, y1, x2, x1
|
749
|
+
|
750
|
+
length = np.sqrt((x2 - x1)**2 + (y2 - y1)**2)
|
751
|
+
return {'coords': (x1, y1, x2, y2), 'width': l_width, 'prec': l_prec,
|
752
|
+
'angle_deg': normalized_angle_deg, 'is_horizontal': is_h, 'is_vertical': is_v,
|
753
|
+
'length': length, 'nfa_score': l_nfa_score}
|
754
|
+
|
755
|
+
processed_lines = [get_line_properties(ld) for ld in lines_raw]
|
756
|
+
|
757
|
+
filtered_lines = []
|
758
|
+
for p in processed_lines:
|
759
|
+
if p['length'] <= initial_min_line_length: continue
|
760
|
+
if p['is_horizontal'] and p['nfa_score'] >= min_nfa_score_horizontal:
|
761
|
+
filtered_lines.append(p)
|
762
|
+
elif p['is_vertical'] and p['nfa_score'] >= min_nfa_score_vertical:
|
763
|
+
filtered_lines.append(p)
|
764
|
+
|
765
|
+
horizontal_lines = [p for p in filtered_lines if p['is_horizontal']]
|
766
|
+
vertical_lines = [p for p in filtered_lines if p['is_vertical']]
|
767
|
+
|
768
|
+
def merge_lines_list(lines_list, is_horizontal_merge):
|
769
|
+
if not lines_list: return []
|
770
|
+
key_sort = (lambda p: (p['coords'][1], p['coords'][0])) if is_horizontal_merge else (lambda p: (p['coords'][0], p['coords'][1]))
|
771
|
+
lines_list.sort(key=key_sort)
|
772
|
+
|
773
|
+
merged_results = []
|
774
|
+
merged_flags = [False] * len(lines_list)
|
775
|
+
|
776
|
+
for i, current_line_props in enumerate(lines_list):
|
777
|
+
if merged_flags[i]: continue
|
778
|
+
group = [current_line_props]; merged_flags[i] = True
|
779
|
+
|
780
|
+
# Keep trying to expand the group until no more lines can be added
|
781
|
+
# Use multiple passes to ensure transitive merging works properly
|
782
|
+
for merge_pass in range(10): # Up to 10 passes to catch complex merging scenarios
|
783
|
+
group_changed = False
|
784
|
+
|
785
|
+
# Calculate current group boundaries
|
786
|
+
group_x1, group_y1 = min(p['coords'][0] for p in group), min(p['coords'][1] for p in group)
|
787
|
+
group_x2, group_y2 = max(p['coords'][2] for p in group), max(p['coords'][3] for p in group)
|
788
|
+
total_len_in_group = sum(p['length'] for p in group)
|
789
|
+
if total_len_in_group == 0: continue # Should not happen
|
790
|
+
|
791
|
+
# Calculate weighted averages for the group
|
792
|
+
group_avg_angle = sum(p['angle_deg'] * p['length'] for p in group) / total_len_in_group
|
793
|
+
|
794
|
+
if is_horizontal_merge:
|
795
|
+
group_avg_perp_coord = sum(((p['coords'][1] + p['coords'][3]) / 2) * p['length'] for p in group) / total_len_in_group
|
796
|
+
else:
|
797
|
+
group_avg_perp_coord = sum(((p['coords'][0] + p['coords'][2]) / 2) * p['length'] for p in group) / total_len_in_group
|
798
|
+
|
799
|
+
# Check all unmerged lines for potential merging
|
800
|
+
for j, candidate_props in enumerate(lines_list):
|
801
|
+
if merged_flags[j]: continue
|
802
|
+
|
803
|
+
# 1. Check for parallelism (angle similarity)
|
804
|
+
angle_diff = abs(group_avg_angle - candidate_props['angle_deg'])
|
805
|
+
# Handle wraparound for angles near 0/180
|
806
|
+
if angle_diff > 90:
|
807
|
+
angle_diff = 180 - angle_diff
|
808
|
+
if angle_diff > merge_angle_tolerance: continue
|
809
|
+
|
810
|
+
# 2. Check for closeness (perpendicular distance)
|
811
|
+
if is_horizontal_merge:
|
812
|
+
cand_perp_coord = (candidate_props['coords'][1] + candidate_props['coords'][3]) / 2
|
813
|
+
else:
|
814
|
+
cand_perp_coord = (candidate_props['coords'][0] + candidate_props['coords'][2]) / 2
|
815
|
+
|
816
|
+
perp_distance = abs(group_avg_perp_coord - cand_perp_coord)
|
817
|
+
if perp_distance > merge_distance_tolerance: continue
|
818
|
+
|
819
|
+
# 3. Check for reasonable proximity along the primary axis
|
820
|
+
if is_horizontal_merge:
|
821
|
+
# For horizontal lines, check x-axis relationship
|
822
|
+
cand_x1, cand_x2 = candidate_props['coords'][0], candidate_props['coords'][2]
|
823
|
+
# Check if there's overlap OR if the gap is reasonable
|
824
|
+
overlap = max(0, min(group_x2, cand_x2) - max(group_x1, cand_x1))
|
825
|
+
gap_to_group = min(abs(group_x1 - cand_x2), abs(group_x2 - cand_x1))
|
826
|
+
|
827
|
+
# Accept if there's overlap OR the gap is reasonable OR the candidate is contained within group span
|
828
|
+
if not (overlap > 0 or gap_to_group <= merge_endpoint_tolerance or (cand_x1 >= group_x1 and cand_x2 <= group_x2)):
|
829
|
+
continue
|
830
|
+
else:
|
831
|
+
# For vertical lines, check y-axis relationship
|
832
|
+
cand_y1, cand_y2 = candidate_props['coords'][1], candidate_props['coords'][3]
|
833
|
+
overlap = max(0, min(group_y2, cand_y2) - max(group_y1, cand_y1))
|
834
|
+
gap_to_group = min(abs(group_y1 - cand_y2), abs(group_y2 - cand_y1))
|
835
|
+
|
836
|
+
if not (overlap > 0 or gap_to_group <= merge_endpoint_tolerance or (cand_y1 >= group_y1 and cand_y2 <= group_y2)):
|
837
|
+
continue
|
838
|
+
|
839
|
+
# If we reach here, lines should be merged
|
840
|
+
group.append(candidate_props)
|
841
|
+
merged_flags[j] = True
|
842
|
+
group_changed = True
|
843
|
+
|
844
|
+
if not group_changed:
|
845
|
+
break # No more lines added in this pass, stop trying
|
846
|
+
|
847
|
+
# Create final merged line from the group
|
848
|
+
final_x1, final_y1 = min(p['coords'][0] for p in group), min(p['coords'][1] for p in group)
|
849
|
+
final_x2, final_y2 = max(p['coords'][2] for p in group), max(p['coords'][3] for p in group)
|
850
|
+
final_total_len = sum(p['length'] for p in group)
|
851
|
+
if final_total_len == 0: continue
|
852
|
+
|
853
|
+
final_width = sum(p['width'] * p['length'] for p in group) / final_total_len
|
854
|
+
final_nfa = sum(p['nfa_score'] * p['length'] for p in group) / final_total_len
|
855
|
+
|
856
|
+
if is_horizontal_merge:
|
857
|
+
final_y = sum(((p['coords'][1] + p['coords'][3]) / 2) * p['length'] for p in group) / final_total_len
|
858
|
+
merged_line_data = (final_x1, final_y, final_x2, final_y, final_width, final_nfa)
|
859
|
+
else:
|
860
|
+
final_x = sum(((p['coords'][0] + p['coords'][2]) / 2) * p['length'] for p in group) / final_total_len
|
861
|
+
merged_line_data = (final_x, final_y1, final_x, final_y2, final_width, final_nfa)
|
862
|
+
merged_results.append(merged_line_data)
|
863
|
+
return merged_results
|
864
|
+
|
865
|
+
merged_h_lines = merge_lines_list(horizontal_lines, True)
|
866
|
+
merged_v_lines = merge_lines_list(vertical_lines, False)
|
867
|
+
all_merged = merged_h_lines + merged_v_lines
|
868
|
+
|
869
|
+
final_lines_data = []
|
870
|
+
for line_data_item in all_merged:
|
871
|
+
x1, y1, x2, y2, width, nfa = line_data_item
|
872
|
+
length = np.sqrt((x2 - x1)**2 + (y2 - y1)**2)
|
873
|
+
if length > min_line_length:
|
874
|
+
# Ensure x1 <= x2 for horizontal, y1 <= y2 for vertical
|
875
|
+
if abs(y2 - y1) < abs(x2-x1): # Horizontal-ish
|
876
|
+
if x1 > x2: x1_out, y1_out, x2_out, y2_out = x2, y2, x1, y1
|
877
|
+
else: x1_out, y1_out, x2_out, y2_out = x1, y1, x2, y2
|
878
|
+
else: # Vertical-ish
|
879
|
+
if y1 > y2: x1_out, y1_out, x2_out, y2_out = x2, y2, x1, y1
|
880
|
+
else: x1_out, y1_out, x2_out, y2_out = x1, y1, x2, y2
|
881
|
+
|
882
|
+
final_lines_data.append({
|
883
|
+
'x1': x1_out, 'y1': y1_out, 'x2': x2_out, 'y2': y2_out,
|
884
|
+
'width': width, 'nfa_score': nfa, 'length': length
|
885
|
+
})
|
886
|
+
return final_lines_data
|
887
|
+
|
660
888
|
def detect_lines_preview(
|
661
889
|
self,
|
662
890
|
resolution: int = 72, # Preview typically uses lower resolution
|
891
|
+
method: str = "projection",
|
663
892
|
horizontal: bool = True,
|
664
893
|
vertical: bool = True,
|
665
894
|
peak_threshold_h: float = 0.5,
|
@@ -678,12 +907,31 @@ class ShapeDetectionMixin:
|
|
678
907
|
smoothing_sigma_h: float = LINE_DETECTION_PARAM_DEFAULTS["smoothing_sigma_h"],
|
679
908
|
smoothing_sigma_v: float = LINE_DETECTION_PARAM_DEFAULTS["smoothing_sigma_v"],
|
680
909
|
peak_width_rel_height: float = LINE_DETECTION_PARAM_DEFAULTS["peak_width_rel_height"],
|
910
|
+
# LSD-specific parameters
|
911
|
+
off_angle: int = 5,
|
912
|
+
min_line_length: int = 30,
|
913
|
+
merge_angle_tolerance: int = 5,
|
914
|
+
merge_distance_tolerance: int = 3,
|
915
|
+
merge_endpoint_tolerance: int = 10,
|
916
|
+
initial_min_line_length: int = 10,
|
917
|
+
min_nfa_score_horizontal: float = -10.0,
|
918
|
+
min_nfa_score_vertical: float = -10.0,
|
681
919
|
) -> Optional[Image.Image]:
|
682
920
|
"""
|
683
921
|
Previews detected lines on a Page or Region without adding them to the PDF elements.
|
684
922
|
Generates and returns a debug visualization image.
|
685
923
|
This method is intended for Page or Region objects.
|
686
|
-
|
924
|
+
|
925
|
+
Args:
|
926
|
+
method: Detection method - "projection" (default) or "lsd" (requires opencv-python).
|
927
|
+
See `detect_lines` for other parameter descriptions. The main difference is a lower default `resolution`.
|
928
|
+
|
929
|
+
Returns:
|
930
|
+
PIL Image with line detection visualization, or None if preview failed.
|
931
|
+
|
932
|
+
Note:
|
933
|
+
Only projection profiling method supports histogram visualization.
|
934
|
+
LSD method will show detected lines overlaid on the original image.
|
687
935
|
"""
|
688
936
|
if hasattr(self, 'pdfs') or (hasattr(self, 'pages') and not hasattr(self, '_page')):
|
689
937
|
logger.warning("preview_detected_lines is intended for single Page/Region objects. For collections, process pages individually.")
|
@@ -693,6 +941,10 @@ class ShapeDetectionMixin:
|
|
693
941
|
logger.info("Line preview skipped as both horizontal and vertical are False.")
|
694
942
|
return None
|
695
943
|
|
944
|
+
# Validate method parameter
|
945
|
+
if method not in ["projection", "lsd"]:
|
946
|
+
raise ValueError(f"Invalid method '{method}'. Supported methods: 'projection', 'lsd'")
|
947
|
+
|
696
948
|
cv_image, _, _, page_object_ctx = self._get_image_for_detection(resolution) # scale_factor and origin_offset not needed for preview
|
697
949
|
if cv_image is None or page_object_ctx is None: # page_object_ctx for logging context mostly
|
698
950
|
logger.warning(f"Skipping line preview for {self} due to image error.")
|
@@ -712,40 +964,56 @@ class ShapeDetectionMixin:
|
|
712
964
|
if pil_image_for_dims.mode != "RGB":
|
713
965
|
pil_image_for_dims = pil_image_for_dims.convert("RGB")
|
714
966
|
|
715
|
-
|
716
|
-
|
717
|
-
|
718
|
-
|
719
|
-
|
720
|
-
|
721
|
-
|
722
|
-
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
|
727
|
-
|
728
|
-
|
729
|
-
|
730
|
-
|
731
|
-
|
732
|
-
|
733
|
-
|
967
|
+
# Get lines data based on method
|
968
|
+
if method == "projection":
|
969
|
+
lines_data_img, profile_h_smoothed, profile_v_smoothed = self._find_lines_on_image_data(
|
970
|
+
cv_image=cv_image,
|
971
|
+
pil_image_rgb=pil_image_for_dims,
|
972
|
+
horizontal=horizontal,
|
973
|
+
vertical=vertical,
|
974
|
+
peak_threshold_h=peak_threshold_h,
|
975
|
+
min_gap_h=min_gap_h,
|
976
|
+
peak_threshold_v=peak_threshold_v,
|
977
|
+
min_gap_v=min_gap_v,
|
978
|
+
max_lines_h=max_lines_h,
|
979
|
+
max_lines_v=max_lines_v,
|
980
|
+
binarization_method=binarization_method,
|
981
|
+
adaptive_thresh_block_size=adaptive_thresh_block_size,
|
982
|
+
adaptive_thresh_C_val=adaptive_thresh_C_val,
|
983
|
+
morph_op_h=morph_op_h, morph_kernel_h=morph_kernel_h,
|
984
|
+
morph_op_v=morph_op_v, morph_kernel_v=morph_kernel_v,
|
985
|
+
smoothing_sigma_h=smoothing_sigma_h, smoothing_sigma_v=smoothing_sigma_v,
|
986
|
+
peak_width_rel_height=peak_width_rel_height,
|
987
|
+
)
|
988
|
+
elif method == "lsd":
|
989
|
+
try:
|
990
|
+
import cv2
|
991
|
+
except ImportError:
|
992
|
+
raise ImportError(
|
993
|
+
"OpenCV (cv2) is required for LSD line detection preview. "
|
994
|
+
"Install it with: pip install opencv-python\n"
|
995
|
+
"Alternatively, use method='projection' for preview."
|
996
|
+
)
|
997
|
+
lines_data_img = self._process_image_for_lines_lsd(
|
998
|
+
cv_image, off_angle, min_line_length, merge_angle_tolerance,
|
999
|
+
merge_distance_tolerance, merge_endpoint_tolerance, initial_min_line_length,
|
1000
|
+
min_nfa_score_horizontal, min_nfa_score_vertical
|
1001
|
+
)
|
1002
|
+
profile_h_smoothed, profile_v_smoothed = None, None # LSD doesn't use profiles
|
734
1003
|
|
735
1004
|
if not lines_data_img: # Check if any lines were detected before visualization
|
736
1005
|
logger.info(f"No lines detected for preview on {page_object_ctx or self}")
|
737
1006
|
# Optionally return the base image if no lines, or None
|
738
1007
|
return pil_image_for_dims.convert("RGBA") # Return base image so something is shown
|
739
1008
|
|
740
|
-
|
741
|
-
# --- Visualization Logic (copied from previous debug block) ---
|
1009
|
+
# --- Visualization Logic ---
|
742
1010
|
final_viz_image: Optional[Image.Image] = None
|
743
1011
|
viz_image_base = pil_image_for_dims.convert("RGBA")
|
744
1012
|
draw = ImageDraw.Draw(viz_image_base)
|
745
1013
|
img_width, img_height = viz_image_base.size
|
746
1014
|
|
747
1015
|
viz_params = {
|
748
|
-
"draw_line_thickness_viz":
|
1016
|
+
"draw_line_thickness_viz": 2, # Slightly thicker for better visibility
|
749
1017
|
"debug_histogram_size": 100,
|
750
1018
|
"line_color_h": (255, 0, 0, 200), "line_color_v": (0, 0, 255, 200),
|
751
1019
|
"histogram_bar_color_h": (200, 0, 0, 200), "histogram_bar_color_v": (0, 0, 200, 200),
|
@@ -756,6 +1024,7 @@ class ShapeDetectionMixin:
|
|
756
1024
|
"max_lines_v": max_lines_v,
|
757
1025
|
}
|
758
1026
|
|
1027
|
+
# Draw detected lines on the image
|
759
1028
|
for line_info in lines_data_img:
|
760
1029
|
is_h_line = abs(line_info['y1'] - line_info['y2']) < abs(line_info['x1'] - line_info['x2'])
|
761
1030
|
line_color = viz_params["line_color_h"] if is_h_line else viz_params["line_color_v"]
|
@@ -764,51 +1033,63 @@ class ShapeDetectionMixin:
|
|
764
1033
|
(line_info['x2'], line_info['y2'])
|
765
1034
|
], fill=line_color, width=viz_params["draw_line_thickness_viz"])
|
766
1035
|
|
767
|
-
|
768
|
-
|
769
|
-
|
770
|
-
|
771
|
-
|
772
|
-
|
773
|
-
|
774
|
-
|
775
|
-
|
776
|
-
|
777
|
-
if
|
778
|
-
|
779
|
-
|
780
|
-
|
781
|
-
|
782
|
-
|
783
|
-
|
784
|
-
|
785
|
-
hist_h_draw.line([(
|
786
|
-
|
787
|
-
|
788
|
-
|
789
|
-
|
790
|
-
|
791
|
-
|
792
|
-
|
793
|
-
|
794
|
-
|
795
|
-
|
796
|
-
|
797
|
-
|
798
|
-
|
799
|
-
|
800
|
-
|
801
|
-
|
802
|
-
|
803
|
-
|
804
|
-
|
805
|
-
|
806
|
-
|
807
|
-
|
808
|
-
|
809
|
-
|
810
|
-
|
811
|
-
|
1036
|
+
# For projection method, add histogram visualization
|
1037
|
+
if method == "projection" and (profile_h_smoothed is not None or profile_v_smoothed is not None):
|
1038
|
+
hist_size = viz_params["debug_histogram_size"]
|
1039
|
+
hist_h_img = Image.new("RGBA", (hist_size, img_height), viz_params["histogram_bg_color"])
|
1040
|
+
hist_h_draw = ImageDraw.Draw(hist_h_img)
|
1041
|
+
|
1042
|
+
if profile_h_smoothed is not None and profile_h_smoothed.size > 0:
|
1043
|
+
actual_max_h_profile = profile_h_smoothed.max()
|
1044
|
+
display_threshold_val_h = peak_threshold_h * img_width
|
1045
|
+
# Use the maximum of either the profile max or threshold for scaling, so both are always visible
|
1046
|
+
max_h_profile_val_for_scaling = max(actual_max_h_profile, display_threshold_val_h) if actual_max_h_profile > 0 else img_width
|
1047
|
+
for y_coord, val in enumerate(profile_h_smoothed):
|
1048
|
+
bar_len = 0; thresh_bar_len = 0
|
1049
|
+
if max_h_profile_val_for_scaling > 0:
|
1050
|
+
bar_len = int((val / max_h_profile_val_for_scaling) * hist_size)
|
1051
|
+
if display_threshold_val_h >= 0:
|
1052
|
+
thresh_bar_len = int((display_threshold_val_h / max_h_profile_val_for_scaling) * hist_size)
|
1053
|
+
bar_len = min(max(0, bar_len), hist_size)
|
1054
|
+
if bar_len > 0: hist_h_draw.line([(0, y_coord), (bar_len -1 , y_coord)], fill=viz_params["histogram_bar_color_h"], width=1)
|
1055
|
+
if viz_params["max_lines_h"] is None and display_threshold_val_h >=0 and \
|
1056
|
+
thresh_bar_len > 0 and thresh_bar_len <= hist_size:
|
1057
|
+
# Ensure threshold line is within bounds
|
1058
|
+
thresh_x = min(thresh_bar_len, hist_size - 1)
|
1059
|
+
hist_h_draw.line([(thresh_x, y_coord), (thresh_x, y_coord+1 if y_coord+1 < img_height else y_coord)], fill=(0,255,0,100), width=1)
|
1060
|
+
|
1061
|
+
hist_v_img = Image.new("RGBA", (img_width, hist_size), viz_params["histogram_bg_color"])
|
1062
|
+
hist_v_draw = ImageDraw.Draw(hist_v_img)
|
1063
|
+
if profile_v_smoothed is not None and profile_v_smoothed.size > 0:
|
1064
|
+
actual_max_v_profile = profile_v_smoothed.max()
|
1065
|
+
display_threshold_val_v = peak_threshold_v * img_height
|
1066
|
+
# Use the maximum of either the profile max or threshold for scaling, so both are always visible
|
1067
|
+
max_v_profile_val_for_scaling = max(actual_max_v_profile, display_threshold_val_v) if actual_max_v_profile > 0 else img_height
|
1068
|
+
for x_coord, val in enumerate(profile_v_smoothed):
|
1069
|
+
bar_height = 0; thresh_bar_h = 0
|
1070
|
+
if max_v_profile_val_for_scaling > 0:
|
1071
|
+
bar_height = int((val / max_v_profile_val_for_scaling) * hist_size)
|
1072
|
+
if display_threshold_val_v >=0:
|
1073
|
+
thresh_bar_h = int((display_threshold_val_v / max_v_profile_val_for_scaling) * hist_size)
|
1074
|
+
bar_height = min(max(0, bar_height), hist_size)
|
1075
|
+
if bar_height > 0: hist_v_draw.line([(x_coord, hist_size -1 ), (x_coord, hist_size - bar_height)], fill=viz_params["histogram_bar_color_v"], width=1)
|
1076
|
+
if viz_params["max_lines_v"] is None and display_threshold_val_v >=0 and \
|
1077
|
+
thresh_bar_h > 0 and thresh_bar_h <= hist_size:
|
1078
|
+
# Ensure threshold line is within bounds
|
1079
|
+
thresh_y = min(thresh_bar_h, hist_size - 1)
|
1080
|
+
hist_v_draw.line([(x_coord, hist_size - thresh_y), (x_coord+1 if x_coord+1 < img_width else x_coord, hist_size - thresh_y)], fill=(0,255,0,100), width=1)
|
1081
|
+
|
1082
|
+
padding = viz_params["padding_between_viz"]
|
1083
|
+
total_width = img_width + padding + hist_size
|
1084
|
+
total_height = img_height + padding + hist_size
|
1085
|
+
final_viz_image = Image.new("RGBA", (total_width, total_height), (255, 255, 255, 255))
|
1086
|
+
final_viz_image.paste(viz_image_base, (0, 0))
|
1087
|
+
final_viz_image.paste(hist_h_img, (img_width + padding, 0))
|
1088
|
+
final_viz_image.paste(hist_v_img, (0, img_height + padding))
|
1089
|
+
else:
|
1090
|
+
# For LSD method, just return the image with lines overlaid
|
1091
|
+
final_viz_image = viz_image_base
|
1092
|
+
|
812
1093
|
logger.info(f"Generated line preview visualization for {page_object_ctx or self}")
|
813
1094
|
return final_viz_image
|
814
1095
|
|