kdraw 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
kdraw/__init__.py ADDED
@@ -0,0 +1,11 @@
1
+ from .pixel_perfect import convert_pixel_perfect
2
+ from .smooth import convert_smooth
3
+ from .plotter import convert_plotter
4
+ from .centerline import convert_centerline
5
+
6
+ __all__ = [
7
+ 'convert_pixel_perfect',
8
+ 'convert_smooth',
9
+ 'convert_plotter',
10
+ 'convert_centerline',
11
+ ]
kdraw/centerline.py ADDED
@@ -0,0 +1,199 @@
1
+ import os
2
+ import sys
3
+ import time
4
+ import numpy as np
5
+
6
+ from .graph import build_and_prune_graph
7
+ from .optimization import optimize_paths
8
+ from .smoothing import smooth_paths_chaikin, smooth_paths_laplacian
9
+
10
+ def convert_centerline(input_path, output_path, threshold_val=None, epsilon=0.3, no_sort=False,
11
+ invert_threshold=False, blur_size=9, use_adaptive=False, block_size=15, c_val=10,
12
+ min_spur_length=16, max_join=2.5, loop_gap=0.0, min_path_len=0.0,
13
+ smooth_type='chaikin', smooth_iters=3, smooth_weight=0.5, smooth_decimate=0.1,
14
+ upscale_factor=4, morph_close=5, morph_open=0, collapse_junc=8):
15
+ """
16
+ Skeletonize the image to a 1-pixel-wide centerline, trace it into single-line paths,
17
+ prune short spurs using graph topology, and output a stroke-only SVG with minimized pen-up travel.
18
+ """
19
+ try:
20
+ import cv2
21
+ except ImportError:
22
+ print("\n[ERROR] The 'opencv-python' package is required for Centerline Mode.")
23
+ print("Please install it via:")
24
+ print(" pip install opencv-python")
25
+ return False
26
+
27
+ try:
28
+ from skimage.morphology import skeletonize
29
+ except ImportError:
30
+ print("\n[ERROR] The 'scikit-image' package is required for Centerline Mode.")
31
+ print("Please install it via:")
32
+ print(" pip install scikit-image")
33
+ return False
34
+
35
+ print(f"Loading image {input_path} in grayscale...")
36
+ img = cv2.imread(input_path)
37
+ if img is None:
38
+ print(f"Error: Could not load image {input_path} with OpenCV.")
39
+ sys.exit(1)
40
+
41
+ height, width = img.shape[:2]
42
+ gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
43
+
44
+ # 1. Upscale if requested
45
+ if upscale_factor > 1:
46
+ print(f"Upscaling input image by {upscale_factor}x for smooth curve definition...")
47
+ width_up, height_up = width * upscale_factor, height * upscale_factor
48
+ gray = cv2.resize(gray, (width_up, height_up), interpolation=cv2.INTER_CUBIC)
49
+ if blur_size > 0:
50
+ blur_size = int(blur_size)
51
+ if blur_size % 2 == 0:
52
+ blur_size += 1
53
+
54
+ # 2. Apply Gaussian Blur to smooth pixelated edges and JPEG compression wiggles
55
+ if blur_size > 0:
56
+ if blur_size % 2 == 0:
57
+ blur_size += 1
58
+ print(f"Applying Gaussian Blur (kernel={blur_size}x{blur_size}) to smooth wiggles...")
59
+ gray = cv2.GaussianBlur(gray, (blur_size, blur_size), 0)
60
+
61
+ # 3. Apply Thresholding
62
+ if use_adaptive:
63
+ print(f"Applying Adaptive Gaussian Thresholding (blockSize={block_size}, C={c_val})...")
64
+ if block_size % 2 == 0:
65
+ block_size += 1
66
+ block_size = max(3, block_size)
67
+
68
+ thresh_type = cv2.THRESH_BINARY if invert_threshold else cv2.THRESH_BINARY_INV
69
+ processed = cv2.adaptiveThreshold(
70
+ gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, thresh_type, block_size, c_val
71
+ )
72
+ else:
73
+ thresh_type = cv2.THRESH_BINARY if invert_threshold else cv2.THRESH_BINARY_INV
74
+ if threshold_val is None:
75
+ print("Applying Otsu's adaptive thresholding...")
76
+ _, processed = cv2.threshold(gray, 0, 255, thresh_type + cv2.THRESH_OTSU)
77
+ else:
78
+ print(f"Applying binary thresholding at value {threshold_val}...")
79
+ _, processed = cv2.threshold(gray, threshold_val, 255, thresh_type)
80
+
81
+ # 4. Apply Morphological closing/opening if upscaled
82
+ if upscale_factor > 1:
83
+ if morph_close > 0:
84
+ print(f"Applying morphological closing (kernel={morph_close}x{morph_close})...")
85
+ kernel_close = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (morph_close, morph_close))
86
+ processed = cv2.morphologyEx(processed, cv2.MORPH_CLOSE, kernel_close)
87
+ if morph_open > 0:
88
+ print(f"Applying morphological opening (kernel={morph_open}x{morph_open})...")
89
+ kernel_open = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (morph_open, morph_open))
90
+ processed = cv2.morphologyEx(processed, cv2.MORPH_OPEN, kernel_open)
91
+
92
+ # 5. Skeletonize (thinning to 1-pixel centerline)
93
+ print("Skeletonizing image...")
94
+ start_skel = time.time()
95
+ binary_bool = (processed > 0)
96
+ skel_bool = skeletonize(binary_bool)
97
+ print(f"Skeletonization completed in {time.time() - start_skel:.3f} seconds.")
98
+
99
+ # 6. Graph-based tracing and pruning
100
+ print(f"Tracing centerline paths with graph topology (min_spur={min_spur_length}, collapse_junc={collapse_junc})...")
101
+ start_trace = time.time()
102
+ paths = build_and_prune_graph(skel_bool, min_spur_length=min_spur_length, collapse_dist=collapse_junc)
103
+ print(f"Tracing finished in {time.time() - start_trace:.3f} seconds. Extracted {len(paths):,} paths.")
104
+
105
+ # 7. Scale coordinates back and simplify paths
106
+ processed_paths = []
107
+ pruned_count = 0
108
+ total_raw_points = sum(len(p) for p in paths)
109
+
110
+ for p in paths:
111
+ if len(p) < 2:
112
+ continue
113
+
114
+ # Scale back to original coordinates
115
+ if upscale_factor > 1:
116
+ p_scaled = p / upscale_factor
117
+ else:
118
+ p_scaled = p
119
+
120
+ # Pruning short paths
121
+ if min_path_len > 0.0:
122
+ p_len = float(np.sum(np.sqrt(np.sum(np.diff(p_scaled, axis=0)**2, axis=1))))
123
+ if p_len < min_path_len:
124
+ pruned_count += 1
125
+ continue
126
+
127
+ # Simplify path using RDP
128
+ if len(p_scaled) == 2:
129
+ approx = p_scaled
130
+ else:
131
+ p_reshaped = p_scaled.reshape(-1, 1, 2)
132
+ approx = cv2.approxPolyDP(p_reshaped, epsilon, False).reshape(-1, 2)
133
+
134
+ if len(approx) >= 2:
135
+ processed_paths.append(approx)
136
+
137
+ paths = processed_paths
138
+ if min_path_len > 0.0:
139
+ print(f"Pruned {pruned_count:,} short paths (length < {min_path_len}px).")
140
+
141
+ # 8. Path Smoothing and Post-decimation
142
+ if smooth_iters > 0:
143
+ print(f"Applying {smooth_type} path smoothing ({smooth_iters} iterations)...")
144
+ smoothed_paths = []
145
+ for p in paths:
146
+ if smooth_type.lower() == 'chaikin':
147
+ sp = smooth_paths_chaikin(p, smooth_iters)
148
+ else:
149
+ sp = smooth_paths_laplacian(p, smooth_iters, smooth_weight)
150
+
151
+ if smooth_decimate > 0.0 and len(sp) > 2:
152
+ sp_reshaped = sp.reshape(-1, 1, 2)
153
+ approx = cv2.approxPolyDP(sp_reshaped, smooth_decimate, False)
154
+ sp = approx.reshape(-1, 2)
155
+
156
+ if len(sp) >= 2:
157
+ smoothed_paths.append(sp)
158
+ paths = smoothed_paths
159
+
160
+ total_simp_points = sum(len(p) for p in paths)
161
+ print(f"Simplified to {len(paths):,} paths.")
162
+ print(f"Reduced points from {total_raw_points:,} to {total_simp_points:,} ({(1 - total_simp_points/max(1, total_raw_points))*100:.1f}% reduction).")
163
+
164
+ # 9. Optimize path sequence (TSP)
165
+ if not no_sort:
166
+ print("Optimizing path sequences to minimize pen travel (TSP)...")
167
+ start_sort = time.time()
168
+ optimized, unopt_travel, opt_travel = optimize_paths(paths, max_join)
169
+ sort_time = time.time() - start_sort
170
+ print(f"TSP optimization finished in {sort_time:.3f} seconds.")
171
+ if unopt_travel > 0:
172
+ saved = (1 - opt_travel / unopt_travel) * 100
173
+ print(f"Pen-up travel distance reduced from {unopt_travel:.1f}px to {opt_travel:.1f}px ({saved:.1f}% travel saved!).")
174
+ else:
175
+ print("Skipping path sequence optimization.")
176
+ optimized = paths
177
+
178
+ print(f"Writing centerline SVG to {output_path}...")
179
+ start_write = time.time()
180
+
181
+ with open(output_path, 'w') as f:
182
+ f.write(f'<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 {width} {height}" width="{width}" height="{height}">\n')
183
+
184
+ path_d_parts = []
185
+ for path in optimized:
186
+ if len(path) < 2:
187
+ continue
188
+ d_str = f"M{path[0][0]:.2f},{path[0][1]:.2f}"
189
+ for pt in path[1:]:
190
+ d_str += f"L{pt[0]:.2f},{pt[1]:.2f}"
191
+ path_d_parts.append(d_str)
192
+
193
+ path_d = " ".join(path_d_parts)
194
+ f.write(f' <path d="{path_d}" fill="none" stroke="black" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" />\n')
195
+ f.write('</svg>\n')
196
+
197
+ print(f"SVG written in {time.time() - start_write:.3f} seconds.")
198
+ print(f"Output file size: {os.path.getsize(output_path) / 1024 / 1024:.2f} MB" if os.path.getsize(output_path) > 1024*1024 else f"Output file size: {os.path.getsize(output_path) / 1024:.2f} KB")
199
+ return True
kdraw/cli.py ADDED
@@ -0,0 +1,161 @@
1
+ #!/usr/bin/env python3
2
+ import os
3
+ import sys
4
+ import argparse
5
+
6
+ import kdraw
7
+
8
+ def main():
9
+ parser = argparse.ArgumentParser(description="Convert an image (JPG/PNG) into an optimized SVG path document.")
10
+ parser.add_argument("input_path", help="Path to the input image (e.g. a.jpg)")
11
+ parser.add_argument("output_path", nargs="?", help="Path to the output SVG (defaults to <input_name>.svg)")
12
+ parser.add_argument("-c", "--colors", type=int, default=None,
13
+ help="Reduce image to N colors using quantization before merging (highly recommended for JPEGs)")
14
+ parser.add_argument("-l", "--lossless", action="store_true",
15
+ help="Convert exact pixel colors without quantization (can result in very large files for JPEGs)")
16
+ parser.add_argument("-s", "--smooth", action="store_true",
17
+ help="Generate smooth vectorized curves instead of pixel-perfect blocks (requires vtracer)")
18
+
19
+ # CNC Plotter arguments
20
+ parser.add_argument("-p", "--plotter", action="store_true",
21
+ help="Generate stroke-only SVG paths optimized for a CNC pen plotter")
22
+ parser.add_argument("-cl", "--centerline", action="store_true",
23
+ help="Generate single-line centerpaths using skeletonization to eliminate bubble letters")
24
+ parser.add_argument("--invert", action="store_true",
25
+ help="Invert thresholding in centerline mode (for white lines on a dark background)")
26
+ parser.add_argument("--canny", action="store_true",
27
+ help="Use Canny edge detection instead of thresholding for plotter mode")
28
+ parser.add_argument("--threshold", type=int, default=None,
29
+ help="Custom binary threshold value (0-255) for plotter/centerline mode")
30
+ parser.add_argument("--epsilon", type=float, default=0.3,
31
+ help="Path simplification distance tolerance in pixels (default: 0.3)")
32
+ parser.add_argument("--no-sort", action="store_true",
33
+ help="Skip TSP path sorting/sequence optimization")
34
+ parser.add_argument("--max-join", type=float, default=2.5,
35
+ help="Join path endpoints within this distance in pixels to reduce pen lifts (default: 2.5)")
36
+
37
+ # Path smoothing options
38
+ parser.add_argument("--smooth-type", choices=["laplacian", "chaikin"], default="chaikin",
39
+ help="Smoothing algorithm to use: laplacian or chaikin (default: chaikin)")
40
+ parser.add_argument("--smooth-iters", type=int, default=3,
41
+ help="Number of smoothing passes/iterations (default: 3)")
42
+ parser.add_argument("--smooth-weight", type=float, default=0.5,
43
+ help="Laplacian smoothing blend factor between 0.0 and 1.0 (default: 0.5)")
44
+ parser.add_argument("--smooth-decimate", type=float, default=0.1,
45
+ help="Post-smoothing RDP decimation epsilon tolerance (default: 0.1)")
46
+
47
+ # Image processing enhancements
48
+ parser.add_argument("--blur", type=int, default=9,
49
+ help="Gaussian blur kernel size to smooth out pixelation wiggles (default: 9)")
50
+ parser.add_argument("--no-adaptive", action="store_true",
51
+ help="Disable Adaptive Gaussian thresholding and use global thresholding instead")
52
+ parser.add_argument("--block-size", type=int, default=15,
53
+ help="Local neighborhood block size for adaptive thresholding (default: 15)")
54
+ parser.add_argument("--c-val", type=int, default=10,
55
+ help="Constant subtracted from local mean for adaptive thresholding; higher makes lines thinner (default: 10)")
56
+ parser.add_argument("--min-spur", type=int, default=16,
57
+ help="Minimum pixel length for a skeleton branch to not be pruned as a spur (default: 16)")
58
+ parser.add_argument("--loop-gap", type=float, default=0.0,
59
+ help="Width of gap in pixels to open small closed loops (e.g. 5.0 to 8.0) (default: 0.0)")
60
+ parser.add_argument("--min-path-len", type=float, default=0.0,
61
+ help="Minimum length of a path in pixels to keep; shorter paths are pruned (default: 0.0)")
62
+
63
+ # Upscaling & Advanced Graph-based Truning arguments
64
+ parser.add_argument("--upscale", type=int, default=4,
65
+ help="Upscale factor to smooth out pixelation wiggles during centerline mode (default: 4)")
66
+ parser.add_argument("--morph-close", type=int, default=5,
67
+ help="Morphological closing kernel size on upscaled image to fill gaps (default: 5)")
68
+ parser.add_argument("--morph-open", type=int, default=0,
69
+ help="Morphological opening kernel size on upscaled image to smooth contours (default: 0)")
70
+ parser.add_argument("--collapse-junc", type=int, default=8,
71
+ help="Distance in pixels below which adjacent junctions will be collapsed (default: 8)")
72
+
73
+ # Advanced vtracer parameters
74
+ parser.add_argument("--filter-speckle", type=int, default=4, help="Speckle filter size for smooth vectorization")
75
+ parser.add_argument("--color-precision", type=int, default=6, help="Color precision (significant bits) for smooth vectorization")
76
+ parser.add_argument("--corner-threshold", type=int, default=60, help="Corner threshold angle for smooth vectorization")
77
+ parser.add_argument("--path-precision", type=int, default=3, help="Decimal precision of path coordinates")
78
+
79
+ args = parser.parse_args()
80
+
81
+ if not os.path.exists(args.input_path):
82
+ print(f"Error: Input file '{args.input_path}' not found.")
83
+ sys.exit(1)
84
+
85
+ output_path = args.output_path
86
+ if not output_path:
87
+ base, _ = os.path.splitext(args.input_path)
88
+ if args.centerline:
89
+ suffix = "_centerline"
90
+ elif args.plotter:
91
+ suffix = "_plotter"
92
+ else:
93
+ suffix = ""
94
+ output_path = base + suffix + ".svg"
95
+
96
+ if args.centerline:
97
+ success = kdraw.convert_centerline(
98
+ args.input_path, output_path,
99
+ threshold_val=args.threshold,
100
+ epsilon=args.epsilon,
101
+ no_sort=args.no_sort,
102
+ invert_threshold=args.invert,
103
+ blur_size=args.blur,
104
+ use_adaptive=not args.no_adaptive,
105
+ block_size=args.block_size,
106
+ c_val=args.c_val,
107
+ min_spur_length=args.min_spur,
108
+ max_join=args.max_join,
109
+ loop_gap=args.loop_gap,
110
+ min_path_len=args.min_path_len,
111
+ smooth_type=args.smooth_type,
112
+ smooth_iters=args.smooth_iters,
113
+ smooth_weight=args.smooth_weight,
114
+ smooth_decimate=args.smooth_decimate,
115
+ upscale_factor=args.upscale,
116
+ morph_close=args.morph_close,
117
+ morph_open=args.morph_open,
118
+ collapse_junc=args.collapse_junc
119
+ )
120
+ if not success:
121
+ sys.exit(1)
122
+ elif args.plotter:
123
+ success = kdraw.convert_plotter(
124
+ args.input_path, output_path,
125
+ use_canny=args.canny,
126
+ threshold_val=args.threshold,
127
+ epsilon=args.epsilon,
128
+ no_sort=args.no_sort,
129
+ blur_size=args.blur,
130
+ max_join=args.max_join,
131
+ smooth_type=args.smooth_type,
132
+ smooth_iters=args.smooth_iters,
133
+ smooth_weight=args.smooth_weight,
134
+ smooth_decimate=args.smooth_decimate
135
+ )
136
+ if not success:
137
+ sys.exit(1)
138
+ elif args.smooth:
139
+ success = kdraw.convert_smooth(
140
+ args.input_path, output_path,
141
+ filter_speckle=args.filter_speckle,
142
+ color_precision=args.color_precision,
143
+ corner_threshold=args.corner_threshold,
144
+ path_precision=args.path_precision
145
+ )
146
+ if not success:
147
+ sys.exit(1)
148
+ else:
149
+ num_colors = args.colors
150
+ if not args.lossless and num_colors is None:
151
+ _, ext = os.path.splitext(args.input_path.lower())
152
+ if ext in ('.jpg', '.jpeg'):
153
+ print("WARNING: Input is a JPEG and no quantization is specified.")
154
+ print("JPEG compression noise will prevent efficient pixel merging, producing a huge SVG.")
155
+ print("Defaulting to 64 colors quantization for efficiency. Use --lossless to override.")
156
+ num_colors = 64
157
+
158
+ kdraw.convert_pixel_perfect(args.input_path, output_path, num_colors=num_colors)
159
+
160
+ if __name__ == "__main__":
161
+ main()
kdraw/graph.py ADDED
@@ -0,0 +1,263 @@
1
+ import collections
2
+ import numpy as np
3
+
4
+ def build_and_prune_graph(skel_bool, min_spur_length=16, collapse_dist=8):
5
+ pixels = set(zip(*np.where(skel_bool)))
6
+
7
+ # Compute adjacency
8
+ adj = {}
9
+ for p in pixels:
10
+ y, x = p
11
+ candidates = [
12
+ (y-1, x-1), (y-1, x), (y-1, x+1),
13
+ (y, x-1), (y, x+1),
14
+ (y+1, x-1), (y+1, x), (y+1, x+1)
15
+ ]
16
+ adj[p] = [c for c in candidates if c in pixels]
17
+
18
+ # Classify pixels
19
+ endpoints = {p for p, neighbors in adj.items() if len(neighbors) == 1}
20
+ junctions = {p for p, neighbors in adj.items() if len(neighbors) >= 3}
21
+ regular = {p for p, neighbors in adj.items() if len(neighbors) == 2}
22
+
23
+ # Group junction pixels into clusters (each cluster is a node)
24
+ visited_junc = set()
25
+ junc_clusters = []
26
+ for j in junctions:
27
+ if j in visited_junc:
28
+ continue
29
+ cluster = []
30
+ queue = [j]
31
+ visited_junc.add(j)
32
+ while queue:
33
+ curr = queue.pop(0)
34
+ cluster.append(curr)
35
+ for n in adj[curr]:
36
+ if n in junctions and n not in visited_junc:
37
+ visited_junc.add(n)
38
+ queue.append(n)
39
+ junc_clusters.append(cluster)
40
+
41
+ # Create node mapping: pixel -> node_id
42
+ node_to_pixels = {}
43
+ pixel_to_node = {}
44
+ node_id_counter = 0
45
+
46
+ for ep in endpoints:
47
+ node_to_pixels[node_id_counter] = [ep]
48
+ pixel_to_node[ep] = node_id_counter
49
+ node_id_counter += 1
50
+
51
+ for jc in junc_clusters:
52
+ node_to_pixels[node_id_counter] = jc
53
+ for j in jc:
54
+ pixel_to_node[j] = node_id_counter
55
+ node_id_counter += 1
56
+
57
+ # Trace edges connecting nodes
58
+ edges = []
59
+ edge_id_counter = 0
60
+ visited_regular = set()
61
+ added_direct = set()
62
+
63
+ def get_node_of_pixel(px):
64
+ return pixel_to_node.get(px, None)
65
+
66
+ for node_id, node_pxs in node_to_pixels.items():
67
+ for start_px in node_pxs:
68
+ for neighbor in adj[start_px]:
69
+ if neighbor in regular and neighbor not in visited_regular:
70
+ # Start tracing an edge through regular pixels
71
+ path = [start_px, neighbor]
72
+ visited_regular.add(neighbor)
73
+ curr = neighbor
74
+
75
+ while True:
76
+ next_candidates = [n for n in adj[curr] if n != path[-2]]
77
+ if not next_candidates:
78
+ break
79
+ next_px = None
80
+ for n in next_candidates:
81
+ if n in regular:
82
+ if n not in visited_regular:
83
+ next_px = n
84
+ break
85
+ elif n in pixel_to_node:
86
+ next_px = n
87
+ break
88
+ if next_px is None:
89
+ break
90
+
91
+ path.append(next_px)
92
+ if next_px in regular:
93
+ visited_regular.add(next_px)
94
+ curr = next_px
95
+ else:
96
+ break
97
+
98
+ end_px = path[-1]
99
+ end_node_id = get_node_of_pixel(end_px)
100
+ if end_node_id is not None:
101
+ edges.append({
102
+ 'id': edge_id_counter,
103
+ 'p1': node_id,
104
+ 'p2': end_node_id,
105
+ 'path': path
106
+ })
107
+ edge_id_counter += 1
108
+ elif neighbor in pixel_to_node:
109
+ # Direct node-to-node connection
110
+ neighbor_node_id = pixel_to_node[neighbor]
111
+ if node_id != neighbor_node_id:
112
+ pair = tuple(sorted((node_id, neighbor_node_id)))
113
+ if pair not in added_direct:
114
+ added_direct.add(pair)
115
+ edges.append({
116
+ 'id': edge_id_counter,
117
+ 'p1': node_id,
118
+ 'p2': neighbor_node_id,
119
+ 'path': [start_px, neighbor]
120
+ })
121
+ edge_id_counter += 1
122
+
123
+ # Find isolated loops
124
+ for p in regular:
125
+ if p not in visited_regular:
126
+ path = [p]
127
+ visited_regular.add(p)
128
+ curr = p
129
+ while True:
130
+ next_candidates = [n for n in adj[curr] if n in regular and n not in visited_regular]
131
+ if not next_candidates:
132
+ break
133
+ next_px = next_candidates[0]
134
+ path.append(next_px)
135
+ visited_regular.add(next_px)
136
+ curr = next_px
137
+ if len(path) > 2:
138
+ if path[0] in adj[path[-1]]:
139
+ path.append(path[0])
140
+ dummy_node = node_id_counter
141
+ node_to_pixels[dummy_node] = [path[0]]
142
+ node_id_counter += 1
143
+ edges.append({
144
+ 'id': edge_id_counter,
145
+ 'p1': dummy_node,
146
+ 'p2': dummy_node,
147
+ 'path': path
148
+ })
149
+ edge_id_counter += 1
150
+
151
+ # Prune spurs and collapse short edges
152
+ changed = True
153
+ while changed:
154
+ changed = False
155
+ node_degrees = collections.defaultdict(int)
156
+ for e in edges:
157
+ node_degrees[e['p1']] += 1
158
+ node_degrees[e['p2']] += 1
159
+
160
+ spur_to_remove = None
161
+ for e in edges:
162
+ u, v = e['p1'], e['p2']
163
+ if u == v:
164
+ continue
165
+ deg_u = node_degrees[u]
166
+ deg_v = node_degrees[v]
167
+ length = len(e['path'])
168
+
169
+ is_spur = False
170
+ if (deg_u == 1 and deg_v >= 3) or (deg_v == 1 and deg_u >= 3):
171
+ is_spur = (length < min_spur_length)
172
+ elif deg_u == 1 and deg_v == 1:
173
+ # Isolated path (i-dots, punctuation). Keep all of them.
174
+ is_spur = False
175
+
176
+ if is_spur:
177
+ spur_to_remove = e
178
+ break
179
+
180
+ if spur_to_remove:
181
+ edges.remove(spur_to_remove)
182
+ changed = True
183
+ continue
184
+
185
+ edge_to_collapse = None
186
+ for e in edges:
187
+ u, v = e['p1'], e['p2']
188
+ if u == v:
189
+ continue
190
+ deg_u = node_degrees[u]
191
+ deg_v = node_degrees[v]
192
+ length = len(e['path'])
193
+
194
+ if deg_u >= 3 and deg_v >= 3 and length <= collapse_dist:
195
+ edge_to_collapse = e
196
+ break
197
+
198
+ if edge_to_collapse:
199
+ u = edge_to_collapse['p1']
200
+ v = edge_to_collapse['p2']
201
+ edges.remove(edge_to_collapse)
202
+ for e in edges:
203
+ if e['p1'] == v: e['p1'] = u
204
+ if e['p2'] == v: e['p2'] = u
205
+ node_to_pixels[u].extend(node_to_pixels[v])
206
+ del node_to_pixels[v]
207
+ changed = True
208
+ continue
209
+
210
+ # Merge degree 2 nodes
211
+ node_degrees = collections.defaultdict(int)
212
+ node_edges = collections.defaultdict(list)
213
+ for e in edges:
214
+ node_edges[e['p1']].append(e)
215
+ node_edges[e['p2']].append(e)
216
+ node_degrees[e['p1']] += 1
217
+ node_degrees[e['p2']] += 1
218
+
219
+ degree_2_nodes = [node_id for node_id, deg in node_degrees.items() if deg == 2]
220
+ for node_id in degree_2_nodes:
221
+ node_es = node_edges[node_id]
222
+ if len(node_es) == 2:
223
+ e1, e2 = node_es[0], node_es[1]
224
+ if e1['id'] != e2['id']:
225
+ p1_pts = list(e1['path'])
226
+ p2_pts = list(e2['path'])
227
+ shared_pixels = set(node_to_pixels[node_id])
228
+
229
+ p1_start_in_shared = p1_pts[0] in shared_pixels
230
+ p1_end_in_shared = p1_pts[-1] in shared_pixels
231
+ p2_start_in_shared = p2_pts[0] in shared_pixels
232
+ p2_end_in_shared = p2_pts[-1] in shared_pixels
233
+
234
+ if p1_end_in_shared and p2_start_in_shared:
235
+ merged_path = p1_pts[:-1] + p2_pts
236
+ new_p1 = e1['p1'] if e1['p2'] == node_id else e1['p2']
237
+ new_p2 = e2['p2'] if e2['p1'] == node_id else e2['p1']
238
+ elif p1_end_in_shared and p2_end_in_shared:
239
+ merged_path = p1_pts[:-1] + p2_pts[::-1]
240
+ new_p1 = e1['p1'] if e1['p2'] == node_id else e1['p2']
241
+ new_p2 = e2['p1'] if e2['p2'] == node_id else e2['p2']
242
+ elif p1_start_in_shared and p2_start_in_shared:
243
+ merged_path = p1_pts[::-1][:-1] + p2_pts
244
+ new_p1 = e1['p2'] if e1['p1'] == node_id else e1['p1']
245
+ new_p2 = e2['p2'] if e2['p1'] == node_id else e2['p1']
246
+ else:
247
+ merged_path = p2_pts[:-1] + p1_pts
248
+ new_p1 = e2['p1'] if e2['p2'] == node_id else e2['p2']
249
+ new_p2 = e1['p2'] if e1['p1'] == node_id else e1['p1']
250
+
251
+ edges.remove(e1)
252
+ edges.remove(e2)
253
+ new_edge = {
254
+ 'id': e1['id'],
255
+ 'p1': new_p1,
256
+ 'p2': new_p2,
257
+ 'path': merged_path
258
+ }
259
+ edges.append(new_edge)
260
+ node_edges[new_p1] = [e for e in node_edges[new_p1] if e['id'] not in (e1['id'], e2['id'])] + [new_edge]
261
+ node_edges[new_p2] = [e for e in node_edges[new_p2] if e['id'] not in (e1['id'], e2['id'])] + [new_edge]
262
+
263
+ return [np.array([(pt[1], pt[0]) for pt in e['path']], dtype=np.float32) for e in edges]