PyPI - nettracer3d - Versions diffs - 0.8.4__py3-none-any.whl → 0.8.6__py3-none-any.whl - Mend

nettracer3d 0.8.4py3-none-any.whl → 0.8.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of nettracer3d might be problematic. Click here for more details.

Files changed (16) hide show

nettracer3d/community_extractor.py +3 -2
nettracer3d/neighborhoods.py +140 -31
nettracer3d/nettracer.py +10 -3
nettracer3d/nettracer_gui.py +496 -706
nettracer3d/painting.py +375 -0
nettracer3d/proximity.py +2 -2
nettracer3d/segmenter.py +849 -851
nettracer3d/segmenter_GPU.py +806 -658
nettracer3d/smart_dilate.py +2 -2
{nettracer3d-0.8.4.dist-info → nettracer3d-0.8.6.dist-info}/METADATA +5 -2
nettracer3d-0.8.6.dist-info/RECORD +25 -0
{nettracer3d-0.8.4.dist-info → nettracer3d-0.8.6.dist-info}/licenses/LICENSE +2 -4
nettracer3d-0.8.4.dist-info/RECORD +0 -24
{nettracer3d-0.8.4.dist-info → nettracer3d-0.8.6.dist-info}/WHEEL +0 -0
{nettracer3d-0.8.4.dist-info → nettracer3d-0.8.6.dist-info}/entry_points.txt +0 -0
{nettracer3d-0.8.4.dist-info → nettracer3d-0.8.6.dist-info}/top_level.txt +0 -0

nettracer3d/segmenter_GPU.py CHANGED Viewed

@@ -29,7 +29,6 @@ class InteractiveSegmenter:
             max_depth=None
         )
-        self.feature_cache = None
         self.lock = threading.Lock()
         self._currently_segmenting = None
         self.use_gpu = True
@@ -47,9 +46,9 @@ class InteractiveSegmenter:
         self.two_slices = []
         self.speed = True
         self.cur_gpu = False
-        self.map_slice = None
         self.prev_z = None
         self.previewing = False
+        self.batch_amplifier = 2 # Can raise this number to make SKLearn batches larger
         #  flags to track state
         self._currently_processing = False
@@ -58,10 +57,11 @@ class InteractiveSegmenter:
         self.mem_lock = False
         #Adjustable feature map params:
-        self.alphas = [1,2,4,8]
+        self.sigmas = [1,2,4,8]
         self.windows = 10
         self.dogs = [(1, 2), (2, 4), (4, 8)]
         self.master_chunk = 49
+        self.twod_chunk_size = 262144
         #Data when loading prev model:
         self.previous_foreground = None
@@ -69,139 +69,6 @@ class InteractiveSegmenter:
         self.previous_z_fore = None
         self.previous_z_back = None
-    def segment_slice_chunked(self, slice_z, block_size=49):
-        """
-        A completely standalone method to segment a single z-slice in chunks
-        with improved safeguards.
-        """
-        # Check if we're already processing this slice
-        if self._currently_processing and self._currently_processing == slice_z:
-            return
-        # Set processing flag with the slice we're processing
-        self._currently_processing = slice_z
-        try:
-            # First attempt to get the feature map
-            feature_map = None
-            try:
-                if slice_z in self.feature_cache:
-                    feature_map = self.feature_cache[slice_z]
-                elif hasattr(self, 'map_slice') and self.map_slice is not None and slice_z == self.current_z:
-                    feature_map = self.map_slice
-                else:
-                    # Generate new feature map
-                    try:
-                        feature_map = self.get_feature_map_slice(slice_z, self.current_speed, False)
-                        self.map_slice = feature_map
-                    except Exception as e:
-                        print(f"Error generating feature map: {e}")
-                        import traceback
-                        traceback.print_exc()
-                        return  # Exit if we can't generate the feature map
-            except:
-                # Generate new feature map
-                try:
-                    feature_map = self.get_feature_map_slice(slice_z, self.current_speed, False)
-                    self.map_slice = feature_map
-                except Exception as e:
-                    print(f"Error generating feature map: {e}")
-                    import traceback
-                    traceback.print_exc()
-                    return  # Exit if we can't generate the feature map
-            # Check that we have a valid feature map
-            if feature_map is None:
-                return
-            # Get dimensions of the slice
-            y_size, x_size = self.image_3d.shape[1], self.image_3d.shape[2]
-            chunk_count = 0
-            # Determine if feature_map is a CuPy array
-            is_cupy_array = hasattr(feature_map, 'get')
-            # Process in blocks for chunked feedback
-            for y_start in range(0, y_size, block_size):
-                if self._currently_processing != slice_z:
-                    return
-                for x_start in range(0, x_size, block_size):
-                    if self._currently_processing != slice_z:
-                        return
-                    y_end = min(y_start + block_size, y_size)
-                    x_end = min(x_start + block_size, x_size)
-                    # Create coordinates and features for this block
-                    coords = []
-                    features_list = []
-                    for y in range(y_start, y_end):
-                        for x in range(x_start, x_end):
-                            coords.append((slice_z, y, x))
-                            features_list.append(feature_map[y, x])
-                    # Convert features to NumPy properly based on type
-                    if is_cupy_array:
-                        # If feature_map is a CuPy array, we need to extract a CuPy array
-                        # from the list and then convert it to NumPy
-                        try:
-                            # Create a CuPy array from the list of feature vectors
-                            features_array = cp.stack(features_list)
-                            # Convert to NumPy explicitly using .get()
-                            features = features_array.get()
-                        except Exception as e:
-                            print(f"Error converting features to NumPy: {e}")
-                            # Fallback: convert each feature individually
-                            features = []
-                            for feat in features_list:
-                                if hasattr(feat, 'get'):
-                                    features.append(feat.get())
-                                else:
-                                    features.append(feat)
-                    else:
-                        # If it's already a NumPy array, we can use it directly
-                        features = features_list
-                    # Skip empty blocks
-                    if not coords:
-                        continue
-                    # Predict
-                    try:
-                        try:
-                            predictions = self.model.predict(features)
-                        except ValueError:
-                            self.feature_cache = None
-                            self.map_slice = None
-                            return None, None
-                        # Split results
-                        foreground = set()
-                        background = set()
-                        for coord, pred in zip(coords, predictions):
-                            if pred:
-                                foreground.add(coord)
-                            else:
-                                background.add(coord)
-                        # Yield this chunk
-                        chunk_count += 1
-                        yield foreground, background
-                    except Exception as e:
-                        print(f"Error processing chunk: {e}")
-                        import traceback
-                        traceback.print_exc()
-        finally:
-            # Only clear if we're still processing the same slice
-            # (otherwise, another slice might have taken over)
-            if self._currently_processing == slice_z:
-                self._currently_processing = None
     def process_chunk(self, chunk_coords):
         """Process a chunk staying in CuPy as much as possible"""
@@ -209,116 +76,61 @@ class InteractiveSegmenter:
         foreground_coords = []  # Keep as list of CuPy coordinates
         background_coords = []
-        if self.previewing or not self.use_two:
-            if self.realtimechunks is None:
-                z_min, z_max = chunk_coords[0], chunk_coords[1]
-                y_min, y_max = chunk_coords[2], chunk_coords[3]
-                x_min, x_max = chunk_coords[4], chunk_coords[5]
-                # Create meshgrid using CuPy - already good
-                z_range = cp.arange(z_min, z_max)
-                y_range = cp.arange(y_min, y_max)
-                x_range = cp.arange(x_min, x_max)
-                # More efficient way to create coordinates
-                chunk_coords_array = cp.stack(cp.meshgrid(
-                    z_range, y_range, x_range, indexing='ij'
-                )).reshape(3, -1).T
-                # Keep as CuPy array instead of converting to list
-                chunk_coords_gpu = chunk_coords_array
-            else:
-                # Convert list to CuPy array once
-                chunk_coords_gpu = cp.array(chunk_coords)
-                z_coords = chunk_coords_gpu[:, 0]
-                y_coords = chunk_coords_gpu[:, 1]
-                x_coords = chunk_coords_gpu[:, 2]
-                z_min, z_max = cp.min(z_coords).item(), cp.max(z_coords).item()
-                y_min, y_max = cp.min(y_coords).item(), cp.max(y_coords).item()
-                x_min, x_max = cp.min(x_coords).item(), cp.max(x_coords).item()
-            # Extract subarray - already good
-            subarray = self.image_3d[z_min:z_max+1, y_min:y_max+1, x_min:x_max+1]
-            # Compute features
-            if self.speed:
-                feature_map = self.compute_feature_maps_gpu(subarray)
-            else:
-                feature_map = self.compute_deep_feature_maps_gpu(subarray)
-            # Extract features more efficiently
-            local_coords = chunk_coords_gpu.copy()
-            local_coords[:, 0] -= z_min
-            local_coords[:, 1] -= y_min
-            local_coords[:, 2] -= x_min
+        if self.realtimechunks is None:
+            z_min, z_max = chunk_coords[0], chunk_coords[1]
+            y_min, y_max = chunk_coords[2], chunk_coords[3]
+            x_min, x_max = chunk_coords[4], chunk_coords[5]
-            # Vectorized feature extraction
-            features_gpu = feature_map[local_coords[:, 0], local_coords[:, 1], local_coords[:, 2]]
+            # Create meshgrid using CuPy - already good
+            z_range = cp.arange(z_min, z_max)
+            y_range = cp.arange(y_min, y_max)
+            x_range = cp.arange(x_min, x_max)
-            features_cpu = cp.asnumpy(features_gpu)
-            predictions = self.model.predict(features_cpu)
+            # More efficient way to create coordinates
+            chunk_coords_array = cp.stack(cp.meshgrid(
+                z_range, y_range, x_range, indexing='ij'
+            )).reshape(3, -1).T
-            # Keep coordinates as CuPy arrays
-            pred_mask = cp.array(predictions, dtype=bool)
-            foreground_coords = chunk_coords_gpu[pred_mask]
-            background_coords = chunk_coords_gpu[~pred_mask]
+            # Keep as CuPy array instead of converting to list
+            chunk_coords_gpu = chunk_coords_array
         else:
-            # 2D implementation for GPU
-            foreground_coords = []
-            background_coords = []
-            # Check if chunk_coords is in the 2D format [z, y_start, y_end, x_start, x_end]
-            if len(chunk_coords) == 5:
-                z = chunk_coords[0]
-                y_start = chunk_coords[1]
-                y_end = chunk_coords[2]
-                x_start = chunk_coords[3]
-                x_end = chunk_coords[4]
-                # Generate coordinates for this slice or subchunk using the new function
-                coords_array = self.twodim_coords(z, y_start, y_end, x_start, x_end)
-                # Get the feature map for this z-slice
-                if self.feature_cache is None:
-                    feature_map = self.get_feature_map_slice(z, self.speed, True)  # Use GPU
-                elif z not in self.feature_cache and not self.previewing:
-                    feature_map = self.get_feature_map_slice(z, self.speed, True)  # Use GPU
-                elif (z not in self.feature_cache or self.feature_cache is None) and self.previewing:
-                    feature_map = self.map_slice
-                    if feature_map is None:
-                        return [], []
-                else:
-                    feature_map = self.feature_cache[z]
-                # Check if we have a valid feature map
-                if feature_map is None:
-                    return [], []
-                # Extract y and x coordinates from the array
-                y_indices = coords_array[:, 1]
-                x_indices = coords_array[:, 2]
-                # Extract features using CuPy indexing
-                features_gpu = feature_map[y_indices, x_indices]
-                # Convert to NumPy for the model
-                features_cpu = features_gpu.get()
-                # Make predictions
-                predictions = self.model.predict(features_cpu)
-                # Create CuPy boolean mask from predictions
-                pred_mask = cp.array(predictions, dtype=bool)
-                # Split into foreground and background using the mask
-                fore_coords = coords_array[pred_mask]
-                back_coords = coords_array[~pred_mask]
-                return fore_coords, back_coords
+            # Convert list to CuPy array once
+            chunk_coords_gpu = cp.array(chunk_coords)
+            z_coords = chunk_coords_gpu[:, 0]
+            y_coords = chunk_coords_gpu[:, 1]
+            x_coords = chunk_coords_gpu[:, 2]
+            z_min, z_max = cp.min(z_coords).item(), cp.max(z_coords).item()
+            y_min, y_max = cp.min(y_coords).item(), cp.max(y_coords).item()
+            x_min, x_max = cp.min(x_coords).item(), cp.max(x_coords).item()
+        # Extract subarray - already good
+        subarray = self.image_3d[z_min:z_max+1, y_min:y_max+1, x_min:x_max+1]
+        # Compute features
+        if self.speed:
+            feature_map = self.compute_feature_maps_gpu(subarray)
+        else:
+            feature_map = self.compute_deep_feature_maps_gpu(subarray)
+        # Extract features more efficiently
+        local_coords = chunk_coords_gpu.copy()
+        local_coords[:, 0] -= z_min
+        local_coords[:, 1] -= y_min
+        local_coords[:, 2] -= x_min
+        # Vectorized feature extraction
+        features_gpu = feature_map[local_coords[:, 0], local_coords[:, 1], local_coords[:, 2]]
+        features_cpu = cp.asnumpy(features_gpu)
+        predictions = self.model.predict(features_cpu)
+        # Keep coordinates as CuPy arrays
+        pred_mask = cp.array(predictions, dtype=bool)
+        foreground_coords = chunk_coords_gpu[pred_mask]
+        background_coords = chunk_coords_gpu[~pred_mask]
         return foreground_coords, background_coords
     def twodim_coords(self, z, y_start, y_end, x_start, x_end):
@@ -356,337 +168,424 @@ class InteractiveSegmenter:
         return slice_coords
     def compute_feature_maps_gpu(self, image_3d=None):
-        """Compute feature maps using GPU with CuPy"""
+        """Optimized GPU version that caches Gaussian filters to avoid redundant computation"""
         import cupy as cp
         import cupyx.scipy.ndimage as cupy_ndimage
-        features = []
         if image_3d is None:
             image_3d = self.image_3d  # Assuming this is already a cupy array
+        if image_3d.ndim == 4 and image_3d.shape[-1] == 3:
+            # RGB case - process each channel
+            features_per_channel = []
+            for channel in range(3):
+                channel_features = self.compute_feature_maps_gpu(image_3d[..., channel])
+                features_per_channel.append(channel_features)
+            # Stack all channel features
+            return cp.concatenate(features_per_channel, axis=-1)
-        original_shape = image_3d.shape
+        # Pre-allocate result array
+        num_features = len(self.sigmas) + len(self.dogs) + 2
+        features = cp.empty(image_3d.shape + (num_features,), dtype=image_3d.dtype)
+        features[..., 0] = image_3d
-        # Gaussian smoothing at different scales
-        for sigma in self.alphas:
-            smooth = cupy_ndimage.gaussian_filter(image_3d, sigma)
-            features.append(smooth)
+        feature_idx = 1
-        # Difference of Gaussians
-        for (s1, s2) in self.dogs:
-            g1 = cupy_ndimage.gaussian_filter(image_3d, s1)
-            g2 = cupy_ndimage.gaussian_filter(image_3d, s2)
-            dog = g1 - g2
-            features.append(dog)
+        # Cache for Gaussian filters - only compute each sigma once
+        gaussian_cache = {}
-        # Gradient computations using cupyx
-        gx = cupy_ndimage.sobel(image_3d, axis=2, mode='reflect')  # x direction
-        gy = cupy_ndimage.sobel(image_3d, axis=1, mode='reflect')  # y direction
-        gz = cupy_ndimage.sobel(image_3d, axis=0, mode='reflect')  # z direction
+        # Compute all unique sigmas needed (from both sigmas and dogs)
+        all_sigmas = set(self.sigmas)
+        for s1, s2 in self.dogs:
+            all_sigmas.add(s1)
+            all_sigmas.add(s2)
-        # Gradient magnitude
-        gradient_magnitude = cp.sqrt(gx**2 + gy**2 + gz**2)
-        features.append(gradient_magnitude)
+        # Pre-compute all Gaussian filters
+        for sigma in all_sigmas:
+            gaussian_cache[sigma] = cupy_ndimage.gaussian_filter(image_3d, sigma)
+        # Gaussian smoothing - use cached results
+        for sigma in self.sigmas:
+            features[..., feature_idx] = gaussian_cache[sigma]
+            feature_idx += 1
+        # Difference of Gaussians - use cached results
+        for s1, s2 in self.dogs:
+            features[..., feature_idx] = gaussian_cache[s1] - gaussian_cache[s2]
+            feature_idx += 1
-        # Verify shapes
-        for i, feat in enumerate(features):
-            if feat.shape != original_shape:
-                feat_adjusted = cp.expand_dims(feat, axis=0)
-                if feat_adjusted.shape != original_shape:
-                    raise ValueError(f"Feature {i} has shape {feat.shape}, expected {original_shape}")
-                features[i] = feat_adjusted
+        # Gradient magnitude
+        gx = cupy_ndimage.sobel(image_3d, axis=2, mode='reflect')
+        gy = cupy_ndimage.sobel(image_3d, axis=1, mode='reflect')
+        gz = cupy_ndimage.sobel(image_3d, axis=0, mode='reflect')
+        features[..., feature_idx] = cp.sqrt(gx**2 + gy**2 + gz**2)
-        return cp.stack(features, axis=-1)
+        return features
     def compute_deep_feature_maps_gpu(self, image_3d=None):
-        """Compute feature maps using GPU"""
+        """Vectorized detailed GPU version with Gaussian gradient magnitudes, Laplacians, and largest Hessian eigenvalue only"""
         import cupy as cp
         import cupyx.scipy.ndimage as cupy_ndimage
-        features = []
         if image_3d is None:
             image_3d = self.image_3d  # Assuming this is already a cupy array
-        original_shape = image_3d.shape
-        # Gaussian and DoG using cupyx
-        for sigma in self.alphas:
-            smooth = cupy_ndimage.gaussian_filter(image_3d, sigma)
-            features.append(smooth)
-        # Difference of Gaussians
-        for (s1, s2) in self.dogs:
-            g1 = cupy_ndimage.gaussian_filter(image_3d, s1)
-            g2 = cupy_ndimage.gaussian_filter(image_3d, s2)
-            dog = g1 - g2
-            features.append(dog)
-        # Local statistics using cupyx's convolve
-        window_size = self.windows
-        kernel = cp.ones((window_size, window_size, window_size)) / (window_size**3)
-        # Local mean
-        local_mean = cupy_ndimage.convolve(image_3d, kernel, mode='reflect')
-        features.append(local_mean)
-        # Local variance
-        mean = cp.mean(image_3d)
-        local_var = cupy_ndimage.convolve((image_3d - mean)**2, kernel, mode='reflect')
-        features.append(local_var)
-        # Gradient computations using cupyx
-        gx = cupy_ndimage.sobel(image_3d, axis=2, mode='reflect')
-        gy = cupy_ndimage.sobel(image_3d, axis=1, mode='reflect')
-        gz = cupy_ndimage.sobel(image_3d, axis=0, mode='reflect')
-        # Gradient magnitude
-        gradient_magnitude = cp.sqrt(gx**2 + gy**2 + gz**2)
-        features.append(gradient_magnitude)
-        # Second-order gradients
-        gxx = cupy_ndimage.sobel(gx, axis=2, mode='reflect')
-        gyy = cupy_ndimage.sobel(gy, axis=1, mode='reflect')
-        gzz = cupy_ndimage.sobel(gz, axis=0, mode='reflect')
-        # Laplacian (sum of second derivatives)
-        laplacian = gxx + gyy + gzz
-        features.append(laplacian)
-        # Hessian determinant
-        hessian_det = gxx * gyy * gzz
-        features.append(hessian_det)
+        if image_3d.ndim == 4 and image_3d.shape[-1] == 3:
+            # RGB case - process each channel
+            features_per_channel = []
+            for channel in range(3):
+                channel_features = self.compute_deep_feature_maps_gpu(image_3d[..., channel])
+                features_per_channel.append(channel_features)
+            # Stack all channel features
+            return cp.concatenate(features_per_channel, axis=-1)
+        # Calculate total number of features
+        num_basic_features = 1 + len(self.sigmas) + len(self.dogs)  # original + gaussians + dogs
+        num_gradient_features = len(self.sigmas)  # gradient magnitude for each sigma
+        num_laplacian_features = len(self.sigmas)  # laplacian for each sigma
+        num_hessian_features = len(self.sigmas) * 1  # 1 eigenvalue (largest) for each sigma
+        total_features = num_basic_features + num_gradient_features + num_laplacian_features + num_hessian_features
+        # Pre-allocate result array
+        features = cp.empty(image_3d.shape + (total_features,), dtype=image_3d.dtype)
+        features[..., 0] = image_3d
+        feature_idx = 1
+        # Cache for Gaussian filters - only compute each sigma once
+        gaussian_cache = {}
+        # Compute all unique sigmas needed (from both sigmas and dogs)
+        all_sigmas = set(self.sigmas)
+        for s1, s2 in self.dogs:
+            all_sigmas.add(s1)
+            all_sigmas.add(s2)
+        # Pre-compute all Gaussian filters
+        for sigma in all_sigmas:
+            gaussian_cache[sigma] = cupy_ndimage.gaussian_filter(image_3d, sigma)
+        # Gaussian smoothing - use cached results
+        for sigma in self.sigmas:
+            features[..., feature_idx] = gaussian_cache[sigma]
+            feature_idx += 1
+        # Difference of Gaussians - use cached results
+        for s1, s2 in self.dogs:
+            features[..., feature_idx] = gaussian_cache[s1] - gaussian_cache[s2]
+            feature_idx += 1
+        # Gaussian gradient magnitudes for each sigma (vectorized)
+        for sigma in self.sigmas:
+            gaussian_img = gaussian_cache[sigma]
+            gx = cupy_ndimage.sobel(gaussian_img, axis=2, mode='reflect')
+            gy = cupy_ndimage.sobel(gaussian_img, axis=1, mode='reflect')
+            gz = cupy_ndimage.sobel(gaussian_img, axis=0, mode='reflect')
+            features[..., feature_idx] = cp.sqrt(gx**2 + gy**2 + gz**2)
+            feature_idx += 1
+        # Laplacian of Gaussian for each sigma (vectorized)
+        for sigma in self.sigmas:
+            gaussian_img = gaussian_cache[sigma]
+            features[..., feature_idx] = cupy_ndimage.laplace(gaussian_img, mode='reflect')
+            feature_idx += 1
+        # Largest Hessian eigenvalue for each sigma (fully vectorized)
+        for sigma in self.sigmas:
+            gaussian_img = gaussian_cache[sigma]
+            # Compute second derivatives (Hessian components) - all vectorized
+            hxx = cupy_ndimage.gaussian_filter(gaussian_img, sigma=0, order=[0, 0, 2], mode='reflect')
+            hyy = cupy_ndimage.gaussian_filter(gaussian_img, sigma=0, order=[0, 2, 0], mode='reflect')
+            hzz = cupy_ndimage.gaussian_filter(gaussian_img, sigma=0, order=[2, 0, 0], mode='reflect')
+            hxy = cupy_ndimage.gaussian_filter(gaussian_img, sigma=0, order=[0, 1, 1], mode='reflect')
+            hxz = cupy_ndimage.gaussian_filter(gaussian_img, sigma=0, order=[1, 0, 1], mode='reflect')
+            hyz = cupy_ndimage.gaussian_filter(gaussian_img, sigma=0, order=[1, 1, 0], mode='reflect')
+            # Vectorized eigenvalue computation using cupy broadcasting
+            # Create arrays with shape (d0, d1, d2, 3, 3) for all Hessian matrices
+            shape = image_3d.shape
+            hessian_matrices = cp.zeros(shape + (3, 3))
+            # Fill the symmetric Hessian matrices
+            hessian_matrices[..., 0, 0] = hxx
+            hessian_matrices[..., 1, 1] = hyy
+            hessian_matrices[..., 2, 2] = hzz
+            hessian_matrices[..., 0, 1] = hessian_matrices[..., 1, 0] = hxy
+            hessian_matrices[..., 0, 2] = hessian_matrices[..., 2, 0] = hxz
+            hessian_matrices[..., 1, 2] = hessian_matrices[..., 2, 1] = hyz
+            # Reshape for batch eigenvalue computation
+            original_shape = hessian_matrices.shape[:-2]  # (d0, d1, d2)
+            batch_size = int(cp.prod(cp.array(original_shape)))
+            hessian_batch = hessian_matrices.reshape(batch_size, 3, 3)
+            # Compute eigenvalues for all matrices at once using CuPy
+            # Since Hessian matrices are symmetric, we can use eigvalsh
+            eigenvalues_batch = cp.linalg.eigvalsh(hessian_batch)
+            # Get only the largest eigenvalue for each matrix
+            largest_eigenvalues = cp.max(eigenvalues_batch, axis=1)
+            # Reshape back to original spatial dimensions
+            largest_eigenvalues = largest_eigenvalues.reshape(original_shape)
+            # Add the largest eigenvalue as a feature
+            features[..., feature_idx] = largest_eigenvalues
+            feature_idx += 1
-        # Verify shapes
-        for i, feat in enumerate(features):
-            if feat.shape != original_shape:
-                feat_adjusted = cp.expand_dims(feat, axis=0)
-                if feat_adjusted.shape != original_shape:
-                    raise ValueError(f"Feature {i} has shape {feat.shape}, expected {original_shape}")
-                features[i] = feat_adjusted
+        # Normalize only morphological features, keep intensity features raw
+        intensity_features = features[..., :num_basic_features]  # original + gaussians + DoGs
+        morphology_features = features[..., num_basic_features:]  # gradients + laplacians + eigenvalues
+        # Normalize only morphological features using CuPy
+        morph_means = cp.mean(morphology_features, axis=(0, 1, 2), keepdims=True)
+        morph_stds = cp.std(morphology_features, axis=(0, 1, 2), keepdims=True)
+        morph_stds = cp.where(morph_stds == 0, 1, morph_stds)
+        morphology_features = (morphology_features - morph_means) / morph_stds
+        # Recombine
+        features = cp.concatenate([intensity_features, morphology_features], axis=-1)
-        return cp.stack(features, axis=-1)
+        return features
-    def compute_feature_maps_gpu_2d(self, z=None):
-        """Compute feature maps for 2D images using GPU with CuPy"""
+    def compute_feature_maps_gpu_2d(self, z=None, image_2d = None):
+        """Compute feature maps for 2D images using GPU with caching optimization"""
         import cupy as cp
         import cupyx.scipy.ndimage as cupy_ndimage
-        # Extract 2D slice if z is provided, otherwise use the image directly
-        if z is not None:
+        # Extract 2D slice - convert to CuPy array if needed
+        if image_2d is None:
             image_2d = cp.asarray(self.image_3d[z, :, :])
-        else:
-            # Assuming image_2d is already available or passed
-            image_2d = cp.asarray(self.image_2d)
+        if image_2d.ndim == 3 and image_2d.shape[-1] == 3:
+            # RGB case - process each channel
+            features_per_channel = []
+            for channel in range(3):
+                channel_features = self.compute_feature_maps_gpu_2d(image_2d = image_2d[..., channel])
+                features_per_channel.append(channel_features)
+            # Stack all channel features
+            return cp.concatenate(features_per_channel, axis=-1)
-        original_shape = image_2d.shape
-        features = []
+        # Pre-allocate result array
+        num_features = len(self.sigmas) + len(self.dogs) + 2  # +2 for original image + gradient
+        features = cp.empty(image_2d.shape + (num_features,), dtype=image_2d.dtype)
-        # Gaussian smoothing at different scales
-        for sigma in self.alphas:
-            smooth = cupy_ndimage.gaussian_filter(image_2d, sigma)
-            features.append(smooth)
+        # Include original image as first feature
+        features[..., 0] = image_2d
+        feature_idx = 1
-        # Difference of Gaussians
-        for (s1, s2) in self.dogs:
-            g1 = cupy_ndimage.gaussian_filter(image_2d, s1)
-            g2 = cupy_ndimage.gaussian_filter(image_2d, s2)
-            dog = g1 - g2
-            features.append(dog)
+        # Cache for Gaussian filters - only compute each sigma once
+        gaussian_cache = {}
-        # Gradient computations for 2D
-        gx = cupy_ndimage.sobel(image_2d, axis=1, mode='reflect')  # x direction
-        gy = cupy_ndimage.sobel(image_2d, axis=0, mode='reflect')  # y direction
+        # Compute all unique sigmas needed (from both sigmas and dogs)
+        all_sigmas = set(self.sigmas)
+        for s1, s2 in self.dogs:
+            all_sigmas.add(s1)
+            all_sigmas.add(s2)
-        # Gradient magnitude (2D version - no z component)
-        gradient_magnitude = cp.sqrt(gx**2 + gy**2)
-        features.append(gradient_magnitude)
-        # Verify shapes
-        for i, feat in enumerate(features):
-            if feat.shape != original_shape:
-                # Check dimensionality and expand if needed
-                if len(feat.shape) < len(original_shape):
-                    feat_adjusted = feat
-                    missing_dims = len(original_shape) - len(feat.shape)
-                    for _ in range(missing_dims):
-                        feat_adjusted = cp.expand_dims(feat_adjusted, axis=0)
-                    if feat_adjusted.shape != original_shape:
-                        raise ValueError(f"Feature {i} has shape {feat.shape}, expected {original_shape}")
-                    features[i] = feat_adjusted
+        # Pre-compute all Gaussian filters
+        for sigma in all_sigmas:
+            gaussian_cache[sigma] = cupy_ndimage.gaussian_filter(image_2d, sigma)
+        # Gaussian smoothing - use cached results
+        for sigma in self.sigmas:
+            features[..., feature_idx] = gaussian_cache[sigma]
+            feature_idx += 1
-        # Stack features along a new dimension
-        result = cp.stack(features, axis=-1)
+        # Difference of Gaussians - use cached results
+        for s1, s2 in self.dogs:
+            features[..., feature_idx] = gaussian_cache[s1] - gaussian_cache[s2]
+            feature_idx += 1
-        # Optional: Return as numpy array if needed
-        # result = cp.asnumpy(result)
+        # Gradient magnitude (2D version)
+        gx = cupy_ndimage.sobel(image_2d, axis=1, mode='reflect')  # x direction
+        gy = cupy_ndimage.sobel(image_2d, axis=0, mode='reflect')  # y direction
+        features[..., feature_idx] = cp.sqrt(gx**2 + gy**2)
-        return result
+        return features
-    def compute_deep_feature_maps_gpu_2d(self, z=None):
-        """Compute 2D feature maps using GPU with CuPy"""
+    def compute_deep_feature_maps_gpu_2d(self, z=None, image_2d = None):
+        """Vectorized detailed GPU version with Gaussian gradient magnitudes, Laplacians, and largest Hessian eigenvalue for 2D images"""
         import cupy as cp
         import cupyx.scipy.ndimage as cupy_ndimage
-        # Extract 2D slice if z is provided, otherwise use the image directly
-        if z is not None:
+        if z is None:
+            z = self.image_3d.shape[0] // 2  # Use middle slice if not specified
+        # Extract 2D slice - convert to CuPy array if needed
+        if image_2d is None:
             image_2d = cp.asarray(self.image_3d[z, :, :])
-        else:
-            # Assuming image_2d is already available or passed
-            image_2d = cp.asarray(self.image_2d)
-        original_shape = image_2d.shape
-        features = []
-        # Stage 1: Compute all base features
-        # Gaussian smoothing
-        gaussian_results = {}
-        for sigma in self.alphas:
-            smooth = cupy_ndimage.gaussian_filter(image_2d, sigma)
-            gaussian_results[sigma] = smooth
-            features.append(smooth)
-        # Difference of Gaussians
-        for (s1, s2) in self.dogs:
-            g1 = cupy_ndimage.gaussian_filter(image_2d, s1)
-            g2 = cupy_ndimage.gaussian_filter(image_2d, s2)
-            dog = g1 - g2
-            features.append(dog)
-        # Local statistics using 2D kernel
-        window_size = self.windows
-        kernel = cp.ones((window_size, window_size)) / (window_size**2)
-        # Local mean
-        local_mean = cupy_ndimage.convolve(image_2d, kernel, mode='reflect')
-        features.append(local_mean)
-        # Local variance
-        mean = cp.mean(image_2d)
-        local_var = cupy_ndimage.convolve((image_2d - mean)**2, kernel, mode='reflect')
-        features.append(local_var)
-        # First-order gradients
-        gx = cupy_ndimage.sobel(image_2d, axis=1, mode='reflect')  # x direction
-        gy = cupy_ndimage.sobel(image_2d, axis=0, mode='reflect')  # y direction
+        if image_2d.ndim == 3 and image_2d.shape[-1] == 3:
+            # RGB case - process each channel
+            features_per_channel = []
+            for channel in range(3):
+                channel_features = self.compute_deep_feature_maps_gpu_2d(image_2d = image_2d[..., channel])
+                features_per_channel.append(channel_features)
+            # Stack all channel features
+            return cp.concatenate(features_per_channel, axis=-1)
+        # Calculate total number of features
+        num_basic_features = 1 + len(self.sigmas) + len(self.dogs)  # original + gaussians + dogs
+        num_gradient_features = len(self.sigmas)  # gradient magnitude for each sigma
+        num_laplacian_features = len(self.sigmas)  # laplacian for each sigma
+        num_hessian_features = len(self.sigmas) * 1  # 1 eigenvalue (largest) for each sigma
+        total_features = num_basic_features + num_gradient_features + num_laplacian_features + num_hessian_features
+        # Pre-allocate result array
+        features = cp.empty(image_2d.shape + (total_features,), dtype=image_2d.dtype)
+        features[..., 0] = image_2d
+        feature_idx = 1
+        # Cache for Gaussian filters - only compute each sigma once
+        gaussian_cache = {}
+        # Compute all unique sigmas needed (from both sigmas and dogs)
+        all_sigmas = set(self.sigmas)
+        for s1, s2 in self.dogs:
+            all_sigmas.add(s1)
+            all_sigmas.add(s2)
+        # Pre-compute all Gaussian filters
+        for sigma in all_sigmas:
+            gaussian_cache[sigma] = cupy_ndimage.gaussian_filter(image_2d, sigma)
+        # Gaussian smoothing - use cached results
+        for sigma in self.sigmas:
+            features[..., feature_idx] = gaussian_cache[sigma]
+            feature_idx += 1
+        # Difference of Gaussians - use cached results
+        for s1, s2 in self.dogs:
+            features[..., feature_idx] = gaussian_cache[s1] - gaussian_cache[s2]
+            feature_idx += 1
+        # Gaussian gradient magnitudes for each sigma (vectorized, 2D version)
+        for sigma in self.sigmas:
+            gaussian_img = gaussian_cache[sigma]
+            gx = cupy_ndimage.sobel(gaussian_img, axis=1, mode='reflect')  # x direction
+            gy = cupy_ndimage.sobel(gaussian_img, axis=0, mode='reflect')  # y direction
+            features[..., feature_idx] = cp.sqrt(gx**2 + gy**2)
+            feature_idx += 1
+        # Laplacian of Gaussian for each sigma (vectorized, 2D version)
+        for sigma in self.sigmas:
+            gaussian_img = gaussian_cache[sigma]
+            features[..., feature_idx] = cupy_ndimage.laplace(gaussian_img, mode='reflect')
+            feature_idx += 1
+        # Largest Hessian eigenvalue for each sigma (fully vectorized, 2D version)
+        for sigma in self.sigmas:
+            gaussian_img = gaussian_cache[sigma]
+            # Compute second derivatives (Hessian components) - all vectorized for 2D
+            hxx = cupy_ndimage.gaussian_filter(gaussian_img, sigma=0, order=[0, 2], mode='reflect')
+            hyy = cupy_ndimage.gaussian_filter(gaussian_img, sigma=0, order=[2, 0], mode='reflect')
+            hxy = cupy_ndimage.gaussian_filter(gaussian_img, sigma=0, order=[1, 1], mode='reflect')
+            # Vectorized eigenvalue computation using cupy broadcasting
+            # Create arrays with shape (d0, d1, 2, 2) for all 2D Hessian matrices
+            shape = image_2d.shape
+            hessian_matrices = cp.zeros(shape + (2, 2))
+            # Fill the symmetric 2D Hessian matrices
+            hessian_matrices[..., 0, 0] = hxx
+            hessian_matrices[..., 1, 1] = hyy
+            hessian_matrices[..., 0, 1] = hessian_matrices[..., 1, 0] = hxy
+            # Reshape for batch eigenvalue computation
+            original_shape = hessian_matrices.shape[:-2]  # (d0, d1)
+            batch_size = int(cp.prod(cp.array(original_shape)))
+            hessian_batch = hessian_matrices.reshape(batch_size, 2, 2)
+            # Compute eigenvalues for all matrices at once using CuPy
+            # Since Hessian matrices are symmetric, we can use eigvalsh
+            eigenvalues_batch = cp.linalg.eigvalsh(hessian_batch)
+            # Get only the largest eigenvalue for each matrix
+            largest_eigenvalues = cp.max(eigenvalues_batch, axis=1)
+            # Reshape back to original spatial dimensions
+            largest_eigenvalues = largest_eigenvalues.reshape(original_shape)
+            # Add the largest eigenvalue as a feature
+            features[..., feature_idx] = largest_eigenvalues
+            feature_idx += 1
-        # Gradient magnitude
-        gradient_magnitude = cp.sqrt(gx**2 + gy**2)
-        features.append(gradient_magnitude)
-        # Stage 2: Compute derived features
-        # Second-order gradients
-        gxx = cupy_ndimage.sobel(gx, axis=1, mode='reflect')
-        gyy = cupy_ndimage.sobel(gy, axis=0, mode='reflect')
-        # Cross derivatives for Hessian determinant
-        gxy = cupy_ndimage.sobel(gx, axis=0, mode='reflect')
-        gyx = cupy_ndimage.sobel(gy, axis=1, mode='reflect')
-        # Laplacian (sum of second derivatives)
-        laplacian = gxx + gyy
-        features.append(laplacian)
-        # Hessian determinant
-        hessian_det = gxx * gyy - gxy * gyx
-        features.append(hessian_det)
-        # Verify shapes
-        for i, feat in enumerate(features):
-            if feat.shape != original_shape:
-                # Check dimensionality and expand if needed
-                if len(feat.shape) < len(original_shape):
-                    feat_adjusted = feat
-                    missing_dims = len(original_shape) - len(feat.shape)
-                    for _ in range(missing_dims):
-                        feat_adjusted = cp.expand_dims(feat_adjusted, axis=0)
-                    if feat_adjusted.shape != original_shape:
-                        raise ValueError(f"Feature {i} has shape {feat.shape}, expected {original_shape}")
-                    features[i] = feat_adjusted
+        # Normalize only morphological features, keep intensity features raw
+        intensity_features = features[..., :num_basic_features]  # original + gaussians + DoGs
+        morphology_features = features[..., num_basic_features:]  # gradients + laplacians + eigenvalues
+        # Normalize only morphological features using CuPy
+        morph_means = cp.mean(morphology_features, axis=(0, 1), keepdims=True)
+        morph_stds = cp.std(morphology_features, axis=(0, 1), keepdims=True)
+        morph_stds = cp.where(morph_stds == 0, 1, morph_stds)
+        morphology_features = (morphology_features - morph_means) / morph_stds
+        # Recombine
+        features = cp.concatenate([intensity_features, morphology_features], axis=-1)
-        # Stack all features along a new dimension
-        result = cp.stack(features, axis=-1)
+        return features
+    def create_2d_chunks(self):
+        """Same 2D chunking logic"""
+        MAX_CHUNK_SIZE = self.twod_chunk_size
+        chunks = []
-        # Optional: Return as numpy array if needed
-        # result = cp.asnumpy(result)
+        for z in range(self.image_3d.shape[0]):
+            y_dim = self.image_3d.shape[1]
+            x_dim = self.image_3d.shape[2]
+            total_pixels = y_dim * x_dim
+            if total_pixels <= MAX_CHUNK_SIZE:
+                chunks.append([z, 0, y_dim, 0, x_dim])
+            else:
+                largest_dim = 'y' if y_dim >= x_dim else 'x'
+                num_divisions = int(cp.ceil(total_pixels / MAX_CHUNK_SIZE))
+                if largest_dim == 'y':
+                    div_size = int(cp.ceil(y_dim / num_divisions))
+                    for i in range(0, y_dim, div_size):
+                        end_i = min(i + div_size, y_dim)
+                        chunks.append([z, i, end_i, 0, x_dim])
+                else:
+                    div_size = int(cp.ceil(x_dim / num_divisions))
+                    for i in range(0, x_dim, div_size):
+                        end_i = min(i + div_size, x_dim)
+                        chunks.append([z, 0, y_dim, i, end_i])
-        return result
+        return chunks
     def segment_volume(self, array, chunk_size=None, gpu=True):
-        """Segment volume using parallel processing of chunks with vectorized chunk creation"""
-        array = cp.asarray(array)  # Ensure CuPy array
+        """Optimized GPU version with sequential GPU processing and batched sklearn prediction"""
+        array = cp.asarray(array)
         self.realtimechunks = None
-        self.map_slice = None
         chunk_size = self.master_chunk
-        def create_2d_chunks():
-            """
-            Create chunks by z-slices for 2D processing.
-            Each chunk is a complete z-slice with all y,x coordinates,
-            unless the slice exceeds 262144 pixels, in which case it's divided into subchunks.
-            Returns:
-                List of chunks where each chunk contains the parameters needed for processing.
-                Format depends on subchunking:
-                - No subchunking: [y_dim, x_dim, z, total_pixels, None]
-                - Y subchunking: [y_dim, x_dim, z, None, ['y', start_y, end_y]]
-                - X subchunking: [y_dim, x_dim, z, None, ['x', start_x, end_x]]
-            """
-            MAX_CHUNK_SIZE = 262144
-            chunks = []
-            for z in range(self.image_3d.shape[0]):
-                # Get the dimensions of this z-slice
-                y_dim = self.image_3d.shape[1]
-                x_dim = self.image_3d.shape[2]
-                total_pixels = y_dim * x_dim
-                # If the slice is small enough, do not subchunk
-                if total_pixels <= MAX_CHUNK_SIZE:
-                    chunks.append([z, 0, y_dim, 0, x_dim])  # [z_start, y_start, y_end, x_start, x_end]
-                else:
-                    # Determine which dimension to divide (the largest one)
-                    largest_dim = 'y' if y_dim >= x_dim else 'x'
-                    # Calculate how many divisions we need
-                    num_divisions = int(cp.ceil(total_pixels / MAX_CHUNK_SIZE))
-                    # Calculate the approx size of each division along the largest dimension
-                    if largest_dim == 'y':
-                        div_size = int(cp.ceil(y_dim / num_divisions))
-                        # Create subchunks by dividing the y-dimension
-                        for i in range(0, y_dim, div_size):
-                            end_i = min(i + div_size, y_dim)
-                            chunks.append([z, i, end_i, 0, x_dim])  # [z, y_start, y_end, x_start, x_end]
-                    else:  # largest_dim == 'x'
-                        div_size = int(cp.ceil(x_dim / num_divisions))
-                        # Create subchunks by dividing the x-dimension
-                        for i in range(0, x_dim, div_size):
-                            end_i = min(i + div_size, x_dim)
-                            chunks.append([z, 0, y_dim, i, end_i])  # [z, y_start, y_end, x_start, x_end]
-            return chunks
         print("Chunking data...")
         if not self.use_two:
-            # 3D Processing - Create chunks for 3D volume
-            # Round to nearest multiple of 32 for better memory alignment
+            # 3D Processing
             chunk_size = ((chunk_size + 15) // 32) * 32
-            # Calculate number of chunks in each dimension
             z_chunks = (self.image_3d.shape[0] + chunk_size - 1) // chunk_size
             y_chunks = (self.image_3d.shape[1] + chunk_size - 1) // chunk_size
             x_chunks = (self.image_3d.shape[2] + chunk_size - 1) // chunk_size
-            # Create start indices for all chunks at once using CuPy
             chunk_starts = cp.array(cp.meshgrid(
                 cp.arange(z_chunks) * chunk_size,
                 cp.arange(y_chunks) * chunk_size,
@@ -696,8 +595,7 @@ class InteractiveSegmenter:
             chunks = []
             for chunk_start_gpu in chunk_starts:
-                # Extract values from CuPy array
-                z_start = int(chunk_start_gpu[0])  # Convert to regular Python int
+                z_start = int(chunk_start_gpu[0])
                 y_start = int(chunk_start_gpu[1])
                 x_start = int(chunk_start_gpu[2])
@@ -708,47 +606,155 @@ class InteractiveSegmenter:
                 coords = [z_start, z_end, y_start, y_end, x_start, x_end]
                 chunks.append(coords)
         else:
-            # 2D Processing - Create chunks by z-slices
-            chunks = create_2d_chunks()
-            self.feature_cache = None  # Reset feature cache for 2D processing
-        # Process chunks
-        print("Segmenting chunks...")
-        for i, chunk in enumerate(chunks):
-            # Process chunk - returns CuPy arrays of coordinates
-            fore_coords, _ = self.process_chunk(chunk)
-            if isinstance(fore_coords, list) and len(fore_coords) == 0:
-                # Skip empty results
-                pass
-            elif hasattr(fore_coords, 'shape') and len(fore_coords) > 0:
-                # Direct indexing with CuPy arrays
-                try:
-                    array[fore_coords[:, 0], fore_coords[:, 1], fore_coords[:, 2]] = 255
-                except IndexError as e:
-                    print(f"Index error when updating array: {e}")
-                    # Fallback to a safer but slower approach
-                    for coord in fore_coords:
+            chunks = self.create_2d_chunks()
+        print("Processing chunks with optimized GPU batching...")
+        # Optimal batch size - balance memory usage vs sklearn efficiency
+        max_workers = multiprocessing.cpu_count()
+        batch_size = max_workers * self.batch_amplifier  # Process more chunks per batch for better sklearn utilization
+        total_processed = 0
+        # Configure sklearn for maximum parallelism
+        if hasattr(self.model, 'n_jobs'):
+            original_n_jobs = self.model.n_jobs
+            self.model.n_jobs = -1
+        try:
+            for batch_start in range(0, len(chunks), batch_size):
+                batch_end = min(batch_start + batch_size, len(chunks))
+                chunk_batch = chunks[batch_start:batch_end]
+                print(f"Processing batch {batch_start//batch_size + 1}/{(len(chunks) + batch_size - 1)//batch_size}")
+                # PHASE 1: Sequential GPU feature extraction (much faster than threading)
+                batch_results = []
+                for chunk in chunk_batch:
+                    features_cpu, coords_gpu = self.extract_chunk_features_gpu(chunk)
+                    if len(features_cpu) > 0:
+                        batch_results.append((features_cpu, coords_gpu))
+                # PHASE 2: Batch predict with sklearn's parallelism
+                if batch_results:
+                    # Combine all CPU features from this batch
+                    all_batch_features = cp.vstack([result[0] for result in batch_results])
+                    all_batch_coords = cp.vstack([result[1] for result in batch_results])
+                    all_batch_features = cp.asnumpy(all_batch_features)
+                    # Single prediction call using sklearn's internal parallelism
+                    predictions = self.model.predict(all_batch_features)
+                    predictions = cp.array(predictions, dtype=bool)
+                    # Apply predictions to array
+                    foreground_coords = all_batch_coords[predictions]
+                    if len(foreground_coords) > 0:
                         try:
-                            z, y, x = int(coord[0]), int(coord[1]), int(coord[2])
-                            if 0 <= z < array.shape[0] and 0 <= y < array.shape[1] and 0 <= x < array.shape[2]:
-                                array[z, y, x] = 255
-                        except Exception as inner_e:
-                            print(f"Error updating coordinate {coord}: {inner_e}")
+                            array[foreground_coords[:, 0], foreground_coords[:, 1], foreground_coords[:, 2]] = 255
+                        except IndexError as e:
+                            print(f"Index error when updating array: {e}")
+                            # Fallback approach
+                            for coord in foreground_coords:
+                                z, y, x = int(coord[0]), int(coord[1]), int(coord[2])
+                                if 0 <= z < array.shape[0] and 0 <= y < array.shape[1] and 0 <= x < array.shape[2]:
+                                    array[z, y, x] = 255
+                    # Memory cleanup for this batch
+                    del all_batch_features, all_batch_coords, predictions, foreground_coords
+                    cp.get_default_memory_pool().free_all_blocks()
+                total_processed += len(chunk_batch)
+                print(f"Completed {total_processed}/{len(chunks)} chunks")
+        finally:
+            # Restore sklearn settings
+            if hasattr(self.model, 'n_jobs'):
+                self.model.n_jobs = original_n_jobs
-            # Memory management - release reference to chunk data
-            if i % 10 == 0:  # Do periodic memory cleanup
-                cp.get_default_memory_pool().free_all_blocks()
+            # Final GPU memory cleanup
+            cp.get_default_memory_pool().free_all_blocks()
+        return cp.asnumpy(array)
-            print(f"Processed {i+1}/{len(chunks)} chunks")
+    def extract_chunk_features_gpu(self, chunk_coords):
+        """
+        GPU version of feature extraction without prediction
+        Returns CPU features and GPU coordinates for efficient batch processing
+        """
-        # Clean up GPU memory
-        cp.get_default_memory_pool().free_all_blocks()
+        if self.previewing or not self.use_two:
+            # 3D processing
+            if self.realtimechunks is None:
+                z_min, z_max = chunk_coords[0], chunk_coords[1]
+                y_min, y_max = chunk_coords[2], chunk_coords[3]
+                x_min, x_max = chunk_coords[4], chunk_coords[5]
+                # Create coordinates using CuPy (GPU operations)
+                z_range = cp.arange(z_min, z_max)
+                y_range = cp.arange(y_min, y_max)
+                x_range = cp.arange(x_min, x_max)
+                chunk_coords_gpu = cp.stack(cp.meshgrid(
+                    z_range, y_range, x_range, indexing='ij'
+                )).reshape(3, -1).T
+            else:
+                chunk_coords_gpu = cp.array(chunk_coords)
+                z_coords = chunk_coords_gpu[:, 0]
+                y_coords = chunk_coords_gpu[:, 1]
+                x_coords = chunk_coords_gpu[:, 2]
+                z_min, z_max = cp.min(z_coords).item(), cp.max(z_coords).item()
+                y_min, y_max = cp.min(y_coords).item(), cp.max(y_coords).item()
+                x_min, x_max = cp.min(x_coords).item(), cp.max(x_coords).item()
+            # Extract subarray and compute features (GPU operations)
+            subarray = self.image_3d[z_min:z_max+1, y_min:y_max+1, x_min:x_max+1]
+            if self.speed:
+                feature_map = self.compute_feature_maps_gpu(subarray)
+            else:
+                feature_map = self.compute_deep_feature_maps_gpu(subarray)
+            # Extract features using GPU operations
+            local_coords = chunk_coords_gpu.copy()
+            local_coords[:, 0] -= z_min
+            local_coords[:, 1] -= y_min
+            local_coords[:, 2] -= x_min
+            features_gpu = feature_map[local_coords[:, 0], local_coords[:, 1], local_coords[:, 2]]
+            # Convert features to CPU for sklearn, convert coordinates to NumPy for final assignment
+            #features_cpu = cp.asnumpy(features_gpu)
+            #coords_cpu = cp.asnumpy(chunk_coords_gpu)
+            return features_gpu, chunk_coords_gpu
+        else:
+            # 2D processing
+            if len(chunk_coords) == 5:
+                z = chunk_coords[0]
+                y_start = chunk_coords[1]
+                y_end = chunk_coords[2]
+                x_start = chunk_coords[3]
+                x_end = chunk_coords[4]
+                # Generate coordinates for this slice
+                coords_array = self.twodim_coords(z, y_start, y_end, x_start, x_end)
+                # Get feature map for this z-slice
+                feature_map = self.get_feature_map_slice(z, self.speed, True)
+                # Extract features using GPU operations
+                y_indices = coords_array[:, 1]
+                x_indices = coords_array[:, 2]
+                features_gpu = feature_map[y_indices, x_indices]
+                # Convert features to CPU for sklearn, convert coordinates to NumPy for final assignment
+                #features_cpu = cp.asnumpy(features_gpu)
+                #coords_cpu = cp.asnumpy(coords_array)
+                return features_gpu, coords_array
-        # Convert to NumPy at the very end for return
-        return cp.asnumpy(array)
     def update_position(self, z=None, x=None, y=None):
         """Update current position for chunk prioritization with safeguards"""
@@ -776,15 +782,63 @@ class InteractiveSegmenter:
         # Only clear map_slice if z changes and we're not already generating a new one
         if self.current_z != self.prev_z:
-            # Instead of setting to None, check if we already have it in the cache
-            if hasattr(self, 'feature_cache') and self.feature_cache is not None:
-                if self.current_z not in self.feature_cache:
-                    self.map_slice = None
             self._currently_segmenting = None
         # Update previous z
         self.prev_z = z
+    def get_realtime_chunks_2d(self, chunk_size=None):
+        """
+        Create square chunks with 1 z-thickness (2D chunks across XY planes)
+        """
+        if chunk_size is None:
+            chunk_size = int(cp.sqrt(self.twod_chunk_size))
+        # Determine if we need to chunk XY planes
+        small_dims = (self.image_3d.shape[1] <= chunk_size and
+                     self.image_3d.shape[2] <= chunk_size)
+        few_z = self.image_3d.shape[0] <= 100  # arbitrary threshold
+        # If small enough, each Z is one chunk
+        if small_dims and few_z:
+            chunk_size_xy = max(self.image_3d.shape[1], self.image_3d.shape[2])
+        else:
+            chunk_size_xy = chunk_size
+        # Calculate chunks for XY plane
+        y_chunks = (self.image_3d.shape[1] + chunk_size_xy - 1) // chunk_size_xy
+        x_chunks = (self.image_3d.shape[2] + chunk_size_xy - 1) // chunk_size_xy
+        # Populate chunk dictionary
+        chunk_dict = {}
+        # Create chunks for each Z plane (single Z thickness)
+        for z in range(self.image_3d.shape[0]):
+            if small_dims:
+                chunk_dict[(z, 0, 0)] = {
+                    'coords': [0, self.image_3d.shape[1], 0, self.image_3d.shape[2]],
+                    'processed': False,
+                    'z': z  # Keep for backward compatibility
+                }
+            else:
+                # Multiple chunks per Z plane
+                for y_chunk in range(y_chunks):
+                    for x_chunk in range(x_chunks):
+                        y_start = y_chunk * chunk_size_xy
+                        x_start = x_chunk * chunk_size_xy
+                        y_end = min(y_start + chunk_size_xy, self.image_3d.shape[1])
+                        x_end = min(x_start + chunk_size_xy, self.image_3d.shape[2])
+                        chunk_dict[(z, y_start, x_start)] = {
+                            'coords': [y_start, y_end, x_start, x_end],
+                            'processed': False,
+                            'z': z  # Keep for backward compatibility
+                        }
+        self.realtimechunks = chunk_dict
+        print("Ready!")
     def get_realtime_chunks(self, chunk_size=49):
@@ -848,7 +902,10 @@ class InteractiveSegmenter:
             #gpu = False
         if self.realtimechunks is None:
-            self.get_realtime_chunks()
+            if not self.use_two:
+                self.get_realtime_chunks()
+            else:
+                self.get_realtime_chunks_2d()
         else:
             for chunk_pos in self.realtimechunks:  # chunk_pos is the (z, y_start, x_start) tuple
                 self.realtimechunks[chunk_pos]['processed'] = False
@@ -952,80 +1009,58 @@ class InteractiveSegmenter:
         if not saving:
             print("Training model...")
+            self.model = RandomForestClassifier(
+                n_estimators=100,
+                n_jobs=-1,
+                max_depth=None
+            )
         self.speed = speed
         self.cur_gpu = use_gpu
-        self.realtimechunks = None  # dump ram
+        #self.realtimechunks = None  # dump ram
         self.mem_lock = mem_lock
-        self.model = RandomForestClassifier(
-            n_estimators=100,
-            n_jobs=-1,
-            max_depth=None
-        )
-        if use_two:
+        if use_two != self.use_two:
+            self.realtimechunks = None
-            #changed = [] #Track which slices need feature maps
+        if not use_two:
+            self.use_two = False
+        if use_two:
             if not self.use_two: #Clarifies if we need to redo feature cache for 2D
-                self.feature_cache = None
                 self.use_two = True
-            self.feature_cache = None #Decided this should reset, can remove this line to have it retain prev feature maps
             self.two_slices = []
-            if self.feature_cache == None:
-                self.feature_cache = {}
             foreground_array = cp.asarray(foreground_array)
             # Get foreground coordinates and features
             z_fore, y_fore, x_fore = cp.where(foreground_array == 1)
-            z_fore_cpu = cp.asnumpy(z_fore)
-            y_fore_cpu = cp.asnumpy(y_fore)
-            x_fore_cpu = cp.asnumpy(x_fore)
-            fore_coords = list(zip(z_fore_cpu, y_fore_cpu, x_fore_cpu))
+            # Keep as CuPy arrays but convert to regular Python types for dictionary keys
+            fore_coords = [(int(z), int(y), int(x)) for z, y, x in zip(z_fore, y_fore, x_fore)]
             # Get background coordinates and features
             z_back, y_back, x_back = cp.where(foreground_array == 2)
-            z_back_cpu = cp.asnumpy(z_back)
-            y_back_cpu = cp.asnumpy(y_back)
-            x_back_cpu = cp.asnumpy(x_back)
-            back_coords = list(zip(z_back_cpu, y_back_cpu, x_back_cpu))
+            # Keep as CuPy arrays but convert to regular Python types for dictionary keys
+            back_coords = [(int(z), int(y), int(x)) for z, y, x in zip(z_back, y_back, x_back)]
             foreground_features = []
             background_features = []
             z_fores = self.organize_by_z(fore_coords)
             z_backs = self.organize_by_z(back_coords)
             slices = set(list(z_fores.keys()) + list(z_backs.keys()))
             for z in slices:
                 current_map = self.get_feature_map_slice(z, speed, use_gpu)
                 if z in z_fores:
                     for y, x in z_fores[z]:
                         # Get the feature vector for this foreground point
                         feature_vector = current_map[y, x]
                         # Add to our collection
                         foreground_features.append(cp.asnumpy(feature_vector))
                 if z in z_backs:
                     for y, x in z_backs[z]:
-                        # Get the feature vector for this foreground point
+                        # Get the feature vector for this background point
                         feature_vector = current_map[y, x]
                         # Add to our collection
                         background_features.append(cp.asnumpy(feature_vector))
@@ -1045,11 +1080,11 @@ class InteractiveSegmenter:
             z_back = cp.argwhere(foreground_array_gpu == 2)
             # Convert back to NumPy for compatibility with the rest of the code
-            z_fore_cpu = cp.asnumpy(z_fore)
-            z_back_cpu = cp.asnumpy(z_back)
+            #z_fore_cpu = cp.asnumpy(z_fore)
+            #z_back_cpu = cp.asnumpy(z_back)
             # If no scribbles, return empty lists
-            if len(z_fore_cpu) == 0 and len(z_back_cpu) == 0:
+            if len(z_fore) == 0 and len(z_back) == 0:
                 return foreground_features, background_features
             # Get dimensions of the input array
@@ -1070,7 +1105,7 @@ class InteractiveSegmenter:
             grid_cells_with_scribbles = set()
             # Map original coordinates to grid cells
-            for z, y, x in cp.vstack((z_fore_cpu, z_back_cpu)) if len(z_back_cpu) > 0 else z_fore_cpu:
+            for z, y, x in cp.vstack((z_fore, z_back)) if len(z_back) > 0 else z_fore:
                 grid_z = int(z // box_size)
                 grid_y = int(y // box_size)
                 grid_x = int(x // box_size)
@@ -1111,96 +1146,207 @@ class InteractiveSegmenter:
         if self.previous_foreground is not None:
             failed = True
             try:
-                # Make sure foreground_features is a NumPy array before vstack
+                # Handle foreground features
                 if isinstance(foreground_features, list):
-                    foreground_features = np.array(foreground_features)
+                    if len(foreground_features) > 0:
+                        # Check if first element is CuPy or NumPy
+                        if hasattr(foreground_features[0], 'get'):  # CuPy array
+                            foreground_features = cp.stack(foreground_features)
+                        else:  # NumPy array
+                            import numpy as np
+                            foreground_features = cp.asarray(np.stack(foreground_features))
+                    else:
+                        foreground_features = cp.array([])
-                # Convert CuPy arrays to NumPy if necessary
+                # Convert CuPy arrays to NumPy if necessary for consistent handling
                 if hasattr(foreground_features, 'get'):
                     foreground_features = foreground_features.get()
-                foreground_features = np.vstack([self.previous_foreground, foreground_features])
+                # Combine with previous foreground features
+                if len(foreground_features) > 0:
+                    foreground_features = np.vstack([self.previous_foreground, foreground_features])
+                else:
+                    foreground_features = self.previous_foreground
                 failed = False
             except Exception as e:
-                pass
+                print(f"Error combining foreground features: {e}")
+                # Keep only new features if combination fails
+                if isinstance(foreground_features, list):
+                    if len(foreground_features) > 0:
+                        # Check if first element is CuPy or NumPy
+                        if hasattr(foreground_features[0], 'get'):  # CuPy array
+                            foreground_features = cp.stack(foreground_features)
+                        else:  # NumPy array
+                            import numpy as np
+                            foreground_features = cp.asarray(np.stack(foreground_features))
+                    else:
+                        foreground_features = cp.array([])
+                if hasattr(foreground_features, 'get'):
+                    foreground_features = foreground_features.get()
             try:
-                # Make sure background_features is a NumPy array before vstack
+                # Handle background features
                 if isinstance(background_features, list):
-                    background_features = np.array(background_features)
+                    if len(background_features) > 0:
+                        # Check if first element is CuPy or NumPy
+                        if hasattr(background_features[0], 'get'):  # CuPy array
+                            background_features = cp.stack(background_features)
+                        else:  # NumPy array
+                            import numpy as np
+                            background_features = cp.asarray(np.stack(background_features))
+                    else:
+                        background_features = cp.array([])
-                # Convert CuPy arrays to NumPy if necessary
+                # Convert CuPy arrays to NumPy if necessary for consistent handling
                 if hasattr(background_features, 'get'):
                     background_features = background_features.get()
-                background_features = np.vstack([self.previous_background, background_features])
+                # Combine with previous background features
+                if len(background_features) > 0:
+                    background_features = np.vstack([self.previous_background, background_features])
+                else:
+                    background_features = self.previous_background
                 failed = False
             except Exception as e:
-                pass
+                print(f"Error combining background features: {e}")
+                # Keep only new features if combination fails
+                if isinstance(background_features, list):
+                    if len(background_features) > 0:
+                        # Check if first element is CuPy or NumPy
+                        if hasattr(background_features[0], 'get'):  # CuPy array
+                            background_features = cp.stack(background_features)
+                        else:  # NumPy array
+                            import numpy as np
+                            background_features = cp.asarray(np.stack(background_features))
+                    else:
+                        background_features = cp.array([])
+                if hasattr(background_features, 'get'):
+                    background_features = background_features.get()
             try:
-                # Ensure coordinate arrays are NumPy arrays
-                if hasattr(z_fore_cpu, 'get'):
-                    z_fore_cpu = z_fore_cpu.get()
+                # Handle foreground coordinates - always combine when we have new ones
+                if hasattr(z_fore, 'get'):
+                    z_fore_numpy = z_fore.get()
+                else:
+                    z_fore_numpy = z_fore
                 if hasattr(self.previous_z_fore, 'get'):
-                    self.previous_z_fore = self.previous_z_fore.get()
+                    prev_z_fore_numpy = self.previous_z_fore.get()
+                else:
+                    prev_z_fore_numpy = self.previous_z_fore
-                z_fore_cpu = np.concatenate([self.previous_z_fore, z_fore_cpu])
+                # Always combine coordinates when we have new ones
+                if len(z_fore_numpy) > 0:  # We have new coordinates
+                    z_fore = np.concatenate([prev_z_fore_numpy, z_fore_numpy])
+                else:  # No new coordinates, keep old ones
+                    z_fore = prev_z_fore_numpy
             except Exception as e:
-                pass
+                print(f"Error combining foreground coordinates: {e}")
+                # Fallback: keep new coordinates if combination fails
+                if hasattr(z_fore, 'get'):
+                    z_fore = z_fore.get()
             try:
-                # Ensure coordinate arrays are NumPy arrays
-                if hasattr(z_back_cpu, 'get'):
-                    z_back_cpu = z_back_cpu.get()
+                # Handle background coordinates - always combine when we have new ones
+                if hasattr(z_back, 'get'):
+                    z_back_numpy = z_back.get()
+                else:
+                    z_back_numpy = z_back
                 if hasattr(self.previous_z_back, 'get'):
-                    self.previous_z_back = self.previous_z_back.get()
+                    prev_z_back_numpy = self.previous_z_back.get()
+                else:
+                    prev_z_back_numpy = self.previous_z_back
-                z_back_cpu = np.concatenate([self.previous_z_back, z_back_cpu])
+                # Always combine coordinates when we have new ones
+                if len(z_back_numpy) > 0:  # We have new coordinates
+                    z_back = np.concatenate([prev_z_back_numpy, z_back_numpy])
+                else:  # No new coordinates, keep old ones
+                    z_back = prev_z_back_numpy
             except Exception as e:
-                pass
+                print(f"Error combining background coordinates: {e}")
+                # Fallback: keep new coordinates if combination fails
+                if hasattr(z_back, 'get'):
+                    z_back = z_back.get()
             if failed:
                 print("Could not combine new model with old loaded model. Perhaps you are trying to combine a quick model with a deep model? I cannot combine these...")
         if saving:
-            # Make sure to return NumPy arrays, not CuPy arrays
-            if hasattr(foreground_features, 'get'):
-                foreground_features = foreground_features.get()
-            if hasattr(background_features, 'get'):
-                background_features = background_features.get()
-            if hasattr(z_fore_cpu, 'get'):
-                z_fore_cpu = z_fore_cpu.get()
-            if hasattr(z_back_cpu, 'get'):
-                z_back_cpu = z_back_cpu.get()
-            return foreground_features, background_features, z_fore_cpu, z_back_cpu
-        # Make sure foreground_features and background_features are NumPy arrays
+            return foreground_features, background_features, z_fore, z_back
+        # Ensure features are proper arrays for training
         if isinstance(foreground_features, list):
-            foreground_features = np.array(foreground_features)
-        elif hasattr(foreground_features, 'get'):
-            foreground_features = foreground_features.get()
+            if len(foreground_features) > 0:
+                # Check if first element is CuPy or NumPy
+                if hasattr(foreground_features[0], 'get'):  # CuPy array
+                    foreground_features = cp.stack(foreground_features)
+                else:  # NumPy array
+                    import numpy as np
+                    foreground_features = cp.asarray(np.stack(foreground_features))
+            else:
+                foreground_features = cp.array([])
         if isinstance(background_features, list):
-            background_features = np.array(background_features)
-        elif hasattr(background_features, 'get'):
+            if len(background_features) > 0:
+                # Check if first element is CuPy or NumPy
+                if hasattr(background_features[0], 'get'):  # CuPy array
+                    background_features = cp.stack(background_features)
+                else:  # NumPy array
+                    import numpy as np
+                    background_features = cp.asarray(np.stack(background_features))
+            else:
+                background_features = cp.array([])
+        # Convert to NumPy for sklearn
+        if hasattr(foreground_features, 'get'):
+            foreground_features = foreground_features.get()
+        if hasattr(background_features, 'get'):
             background_features = background_features.get()
+        # Validate dimensions before training
+        # Ensure we have matching numbers of features and coordinates
+        if len(foreground_features) != len(z_fore):
+            print(f"Warning: Foreground features ({len(foreground_features)}) and coordinates ({len(z_fore)}) don't match!")
+            # Trim to the smaller size
+            min_len = min(len(foreground_features), len(z_fore))
+            foreground_features = foreground_features[:min_len]
+            z_fore = z_fore[:min_len]
+        if len(background_features) != len(z_back):
+            print(f"Warning: Background features ({len(background_features)}) and coordinates ({len(z_back)}) don't match!")
+            # Trim to the smaller size
+            min_len = min(len(background_features), len(z_back))
+            background_features = background_features[:min_len]
+            z_back = z_back[:min_len]
         # Combine features and labels for training
-        X = np.vstack([foreground_features, background_features])
-        y = np.hstack([np.ones(len(z_fore_cpu)), np.zeros(len(z_back_cpu))])
-        # Train the model
-        try:
-            self.model.fit(X, y)
-        except Exception as e:
-            print(f"Error during model training: {e}")
-            import traceback
+        if len(foreground_features) > 0 and len(background_features) > 0:
+            X = np.vstack([foreground_features, background_features])
+            y = np.hstack([np.ones(len(z_fore)), np.zeros(len(z_back))])
+            # Train the model
+            try:
+                self.model.fit(X, y)
+            except Exception as e:
+                print(f"Error during model training: {e}")
+                import traceback
+                traceback.print_exc()
+        else:
+            print("Not enough features to train the model")
         self.current_speed = speed
         # Clean up GPU memory
         cp.get_default_memory_pool().free_all_blocks()
         print("Done")
@@ -1211,7 +1357,7 @@ class InteractiveSegmenter:
         foreground_features, background_features, z_fore, z_back = self.train_batch(foreground_array, speed = self.speed, use_gpu = self.use_gpu, use_two = self.use_two, mem_lock = self.mem_lock, saving = True)
-        np.savez(file_name,
+        cp.savez(file_name,
                  foreground_features=foreground_features,
                  background_features=background_features,
                  z_fore=z_fore,
@@ -1221,14 +1367,14 @@ class InteractiveSegmenter:
                  use_two=self.use_two,
                  mem_lock=self.mem_lock)
-        print(f"Model data saved to {file_name}. Please retrain current model prior to segmentation.")
+        print(f"Model data saved to {file_name}.")
     def load_model(self, file_name):
         print("Loading model data")
-        data = np.load(file_name)
+        data = cp.load(file_name)
         # Unpack the arrays
         self.previous_foreground = data['foreground_features']
@@ -1240,8 +1386,10 @@ class InteractiveSegmenter:
         self.use_two = bool(data['use_two'])
         self.mem_lock = bool(data['mem_lock'])
-        X = np.vstack([self.previous_foreground, self.previous_background])
-        y = np.hstack([np.ones(len(self.previous_z_fore)), np.zeros(len(self.previous_z_back))])
+        X = cp.vstack([self.previous_foreground, self.previous_background])
+        y = cp.hstack([cp.ones(len(self.previous_z_fore)), cp.zeros(len(self.previous_z_back))])
+        X = cp.asnumpy(X)
+        y = cp.asnumpy(y)
         try:
             self.model.fit(X, y)

nettracer3d 0.8.4__py3-none-any.whl → 0.8.6__py3-none-any.whl

Potentially problematic release.

nettracer3d 0.8.4py3-none-any.whl → 0.8.6py3-none-any.whl