PyPI - figpack - Versions diffs - 0.2.7__py3-none-any.whl → 0.2.8__py3-none-any.whl - Mend

figpack 0.2.7py3-none-any.whl → 0.2.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of figpack might be problematic. Click here for more details.

Files changed (13) hide show

figpack/spike_sorting/views/SpikeAmplitudes.py CHANGED Viewed

@@ -2,13 +2,15 @@
 SpikeAmplitudes view for figpack - displays spike amplitudes over time
 """
-from typing import List, Optional
+from typing import List
 import numpy as np
 import zarr
 from ...core.figpack_view import FigpackView
 from .SpikeAmplitudesItem import SpikeAmplitudesItem
+from .UnitsTable import UnitsTable, UnitsTableColumn, UnitsTableRow
+from ...views.Box import Box, LayoutItem
 class SpikeAmplitudes(FigpackView):
@@ -22,8 +24,6 @@ class SpikeAmplitudes(FigpackView):
         start_time_sec: float,
         end_time_sec: float,
         plots: List[SpikeAmplitudesItem],
-        hide_unit_selector: bool = False,
-        height: int = 500,
     ):
         """
         Initialize a SpikeAmplitudes view
@@ -32,18 +32,89 @@ class SpikeAmplitudes(FigpackView):
             start_time_sec: Start time of the view in seconds
             end_time_sec: End time of the view in seconds
             plots: List of SpikeAmplitudesItem objects
-            hide_unit_selector: Whether to hide the unit selector
-            height: Height of the view in pixels
         """
         self.start_time_sec = start_time_sec
         self.end_time_sec = end_time_sec
         self.plots = plots
-        self.hide_unit_selector = hide_unit_selector
-        self.height = height
+    @staticmethod
+    def from_nwb_units_table(
+        nwb_url_or_path_or_h5py,
+        *,
+        units_path: str,
+        include_units_selector: bool = False,
+    ):
+        if isinstance(nwb_url_or_path_or_h5py, str):
+            import lindi
+            f = lindi.LindiH5pyFile.from_hdf5_file(nwb_url_or_path_or_h5py)
+        else:
+            f = nwb_url_or_path_or_h5py
+        X = f[units_path]
+        spike_amplitudes = X["spike_amplitudes"]
+        # spike_amplitudes_index = X["spike_amplitudes_index"] # presumably the same as spike_times_index
+        spike_times = X["spike_times"]
+        spike_times_index = X["spike_times_index"]
+        id = X["id"]
+        plots = []
+        num_units = len(spike_times_index)
+        start_times = []
+        end_times = []
+        for unit_index in range(num_units):
+            unit_id = id[unit_index]
+            if unit_index > 0:
+                start_index = spike_times_index[unit_index - 1]
+            else:
+                start_index = 0
+            end_index = spike_times_index[unit_index]
+            unit_spike_amplitudes = spike_amplitudes[start_index:end_index]
+            unit_spike_times = spike_times[start_index:end_index]
+            if len(unit_spike_times) == 0:
+                continue
+            start_times.append(unit_spike_times[0])
+            end_times.append(unit_spike_times[-1])
+            plots.append(
+                SpikeAmplitudesItem(
+                    unit_id=str(unit_id),
+                    spike_times_sec=unit_spike_times,
+                    spike_amplitudes=unit_spike_amplitudes,
+                )
+            )
+        view = SpikeAmplitudes(
+            start_time_sec=min(start_times),
+            end_time_sec=max(end_times),
+            plots=plots,
+        )
+        if include_units_selector:
+            columns: List[UnitsTableColumn] = [
+                UnitsTableColumn(key="unitId", label="Unit", dtype="int"),
+            ]
+            rows: List[UnitsTableRow] = []
+            for unit_id in id:
+                rows.append(
+                    UnitsTableRow(
+                        unit_id=str(unit_id),
+                        values={},
+                    )
+                )
+            units_table = UnitsTable(
+                columns=columns,
+                rows=rows,
+            )
+            layout = Box(
+                direction="horizontal",
+                items=[
+                    LayoutItem(view=units_table, max_size=150, title="Units"),
+                    LayoutItem(view=view, title="Spike Amplitudes"),
+                ],
+            )
+            return layout
+        else:
+            return view
     def _write_to_zarr_group(self, group: zarr.Group) -> None:
         """
-        Write the SpikeAmplitudes data to a Zarr group
+        Write the SpikeAmplitudes data to a Zarr group using unified storage format
         Args:
             group: Zarr group to write data into
@@ -54,36 +125,250 @@ class SpikeAmplitudes(FigpackView):
         # Store view parameters
         group.attrs["start_time_sec"] = self.start_time_sec
         group.attrs["end_time_sec"] = self.end_time_sec
-        group.attrs["hide_unit_selector"] = self.hide_unit_selector
-        group.attrs["height"] = self.height
-        # Store the number of plots
-        group.attrs["num_plots"] = len(self.plots)
-        # Store metadata for each plot
-        plot_metadata = []
-        for i, plot in enumerate(self.plots):
-            plot_name = f"plot_{i}"
-            # Store metadata
-            metadata = {
-                "name": plot_name,
-                "unit_id": str(plot.unit_id),
-                "num_spikes": len(plot.spike_times_sec),
-            }
-            plot_metadata.append(metadata)
-            # Create arrays for this plot
+        # Prepare unified data arrays
+        unified_data = self._prepare_unified_data()
+        if unified_data["total_spikes"] == 0:
+            # Handle empty data case
+            group.create_dataset("timestamps", data=np.array([], dtype=np.float32))
+            group.create_dataset("unit_indices", data=np.array([], dtype=np.uint16))
+            group.create_dataset("amplitudes", data=np.array([], dtype=np.float32))
+            group.create_dataset("reference_times", data=np.array([], dtype=np.float32))
             group.create_dataset(
-                f"{plot_name}/spike_times_sec",
-                data=plot.spike_times_sec,
-                dtype=np.float32,
+                "reference_indices", data=np.array([], dtype=np.uint32)
             )
-            group.create_dataset(
-                f"{plot_name}/spike_amplitudes",
-                data=plot.spike_amplitudes,
-                dtype=np.float32,
+            group.attrs["unit_ids"] = []
+            group.attrs["total_spikes"] = 0
+            return
+        chunks = (
+            (2_000_000,)
+            if unified_data["total_spikes"] > 2_000_000
+            else (len(unified_data["timestamps"]),)
+        )
+        # Store main data arrays
+        group.create_dataset(
+            "timestamps",
+            data=unified_data["timestamps"],
+            dtype=np.float32,
+            chunks=chunks,
+        )
+        group.create_dataset(
+            "unit_indices",
+            data=unified_data["unit_indices"],
+            dtype=np.uint16,
+            chunks=chunks,
+        )
+        group.create_dataset(
+            "amplitudes",
+            data=unified_data["amplitudes"],
+            dtype=np.float32,
+            chunks=chunks,
+        )
+        group.create_dataset(
+            "reference_times",
+            data=unified_data["reference_times"],
+            dtype=np.float32,
+            chunks=(len(unified_data["reference_times"]),),
+        )
+        group.create_dataset(
+            "reference_indices",
+            data=unified_data["reference_indices"],
+            dtype=np.uint32,
+            chunks=(len(unified_data["reference_indices"]),),
+        )
+        # Store unit ID mapping
+        group.attrs["unit_ids"] = unified_data["unit_ids"]
+        group.attrs["total_spikes"] = unified_data["total_spikes"]
+        # Create subsampled data
+        subsampled_data = self._create_subsampled_data(
+            unified_data["timestamps"],
+            unified_data["unit_indices"],
+            unified_data["amplitudes"],
+        )
+        if subsampled_data:
+            subsampled_group = group.create_group("subsampled_data")
+            for factor_name, data in subsampled_data.items():
+                chunks = (
+                    (2_000_000,)
+                    if len(data["timestamps"]) > 2_000_000
+                    else (len(data["timestamps"]),)
+                )
+                factor_group = subsampled_group.create_group(factor_name)
+                factor_group.create_dataset(
+                    "timestamps",
+                    data=data["timestamps"],
+                    dtype=np.float32,
+                    chunks=chunks,
+                )
+                factor_group.create_dataset(
+                    "unit_indices",
+                    data=data["unit_indices"],
+                    dtype=np.uint16,
+                    chunks=chunks,
+                )
+                factor_group.create_dataset(
+                    "amplitudes",
+                    data=data["amplitudes"],
+                    dtype=np.float32,
+                    chunks=chunks,
+                )
+                factor_group.create_dataset(
+                    "reference_times",
+                    data=data["reference_times"],
+                    dtype=np.float32,
+                    chunks=(len(data["reference_times"]),),
+                )
+                factor_group.create_dataset(
+                    "reference_indices",
+                    data=data["reference_indices"],
+                    dtype=np.uint32,
+                    chunks=(len(data["reference_indices"]),),
+                )
+    def _prepare_unified_data(self) -> dict:
+        """
+        Prepare unified data arrays from all plots
+        Returns:
+            Dictionary containing unified arrays and metadata
+        """
+        if not self.plots:
+            return {
+                "timestamps": np.array([], dtype=np.float32),
+                "unit_indices": np.array([], dtype=np.uint16),
+                "amplitudes": np.array([], dtype=np.float32),
+                "reference_times": np.array([], dtype=np.float32),
+                "reference_indices": np.array([], dtype=np.uint32),
+                "unit_ids": [],
+                "total_spikes": 0,
+            }
+        # Create unit ID mapping
+        unit_ids = [str(plot.unit_id) for plot in self.plots]
+        unit_id_to_index = {unit_id: i for i, unit_id in enumerate(unit_ids)}
+        # Collect all spikes with their unit indices
+        all_spikes = []
+        for plot in self.plots:
+            unit_index = unit_id_to_index[str(plot.unit_id)]
+            for time, amplitude in zip(plot.spike_times_sec, plot.spike_amplitudes):
+                all_spikes.append((float(time), unit_index, float(amplitude)))
+        if not all_spikes:
+            return {
+                "timestamps": np.array([], dtype=np.float32),
+                "unit_indices": np.array([], dtype=np.uint16),
+                "amplitudes": np.array([], dtype=np.float32),
+                "reference_times": np.array([], dtype=np.float32),
+                "reference_indices": np.array([], dtype=np.uint32),
+                "unit_ids": unit_ids,
+                "total_spikes": 0,
+            }
+        # Sort by timestamp
+        all_spikes.sort(key=lambda x: x[0])
+        # Extract sorted arrays
+        timestamps = np.array([spike[0] for spike in all_spikes], dtype=np.float32)
+        unit_indices = np.array([spike[1] for spike in all_spikes], dtype=np.uint16)
+        amplitudes = np.array([spike[2] for spike in all_spikes], dtype=np.float32)
+        # Generate reference arrays
+        reference_times, reference_indices = self._generate_reference_arrays(timestamps)
+        return {
+            "timestamps": timestamps,
+            "unit_indices": unit_indices,
+            "amplitudes": amplitudes,
+            "reference_times": reference_times,
+            "reference_indices": reference_indices,
+            "unit_ids": unit_ids,
+            "total_spikes": len(all_spikes),
+        }
+    def _generate_reference_arrays(
+        self, timestamps: np.ndarray, interval_sec: float = 1.0
+    ) -> tuple:
+        """
+        Generate reference arrays using actual timestamps from the data
+        Args:
+            timestamps: Sorted array of timestamps
+            interval_sec: Minimum interval between reference points
+        Returns:
+            Tuple of (reference_times, reference_indices)
+        """
+        if len(timestamps) == 0:
+            return np.array([], dtype=np.float32), np.array([], dtype=np.uint32)
+        reference_times = []
+        reference_indices = []
+        current_ref_time = timestamps[0]
+        reference_times.append(current_ref_time)
+        reference_indices.append(0)
+        # Find the next reference point at least interval_sec later
+        for i, timestamp in enumerate(timestamps):
+            if timestamp >= current_ref_time + interval_sec:
+                reference_times.append(timestamp)
+                reference_indices.append(i)
+                current_ref_time = timestamp
+        return np.array(reference_times, dtype=np.float32), np.array(
+            reference_indices, dtype=np.uint32
+        )
+    def _create_subsampled_data(
+        self, timestamps: np.ndarray, unit_indices: np.ndarray, amplitudes: np.ndarray
+    ) -> dict:
+        """
+        Create subsampled data with geometric progression factors
+        Args:
+            timestamps: Original timestamps array
+            unit_indices: Original unit indices array
+            amplitudes: Original amplitudes array
+        Returns:
+            Dictionary of subsampled data by factor
+        """
+        subsampled_data = {}
+        factor = 4
+        current_timestamps = timestamps
+        current_unit_indices = unit_indices
+        current_amplitudes = amplitudes
+        while len(current_timestamps) >= 500000:
+            # Create subsampled version by taking every Nth spike
+            subsampled_indices = np.arange(0, len(current_timestamps), factor)
+            subsampled_timestamps = current_timestamps[subsampled_indices]
+            subsampled_unit_indices = current_unit_indices[subsampled_indices]
+            subsampled_amplitudes = current_amplitudes[subsampled_indices]
+            # Generate reference arrays for this subsampled level
+            ref_times, ref_indices = self._generate_reference_arrays(
+                subsampled_timestamps
             )
-        # Store the plot metadata
-        group.attrs["plots"] = plot_metadata
+            subsampled_data[f"factor_{factor}"] = {
+                "timestamps": subsampled_timestamps,
+                "unit_indices": subsampled_unit_indices,
+                "amplitudes": subsampled_amplitudes,
+                "reference_times": ref_times,
+                "reference_indices": ref_indices,
+            }
+            # Prepare for next iteration
+            current_timestamps = subsampled_timestamps
+            current_unit_indices = subsampled_unit_indices
+            current_amplitudes = subsampled_amplitudes
+            factor *= 4  # Geometric progression: 4, 16, 64, 256, ...
+        return subsampled_data

figpack/views/Spectrogram.py ADDED Viewed

@@ -0,0 +1,223 @@
+"""
+Spectrogram visualization component
+"""
+import math
+from typing import Optional
+import numpy as np
+import zarr
+from ..core.figpack_view import FigpackView
+class Spectrogram(FigpackView):
+    """
+    A spectrogram visualization component for time-frequency data
+    """
+    def __init__(
+        self,
+        *,
+        start_time_sec: float,
+        sampling_frequency_hz: float,
+        frequency_min_hz: float,
+        frequency_delta_hz: float,
+        data: np.ndarray,
+    ):
+        """
+        Initialize a Spectrogram view
+        Args:
+            start_time_sec: Starting time in seconds
+            sampling_frequency_hz: Sampling rate in Hz
+            frequency_min_hz: Minimum frequency in Hz
+            frequency_delta_hz: Frequency bin spacing in Hz
+            data: N×M numpy array where N is timepoints and M is frequency bins
+        """
+        assert data.ndim == 2, "Data must be a 2D array (timepoints × frequencies)"
+        assert sampling_frequency_hz > 0, "Sampling frequency must be positive"
+        assert frequency_delta_hz > 0, "Frequency delta must be positive"
+        self.start_time_sec = start_time_sec
+        self.sampling_frequency_hz = sampling_frequency_hz
+        self.frequency_min_hz = frequency_min_hz
+        self.frequency_delta_hz = frequency_delta_hz
+        self.data = data.astype(np.float32)  # Ensure float32 for efficiency
+        n_timepoints, n_frequencies = data.shape
+        self.n_timepoints = n_timepoints
+        self.n_frequencies = n_frequencies
+        # Calculate frequency bins
+        self.frequency_bins = (
+            frequency_min_hz + np.arange(n_frequencies) * frequency_delta_hz
+        )
+        # Calculate data range for color scaling
+        self.data_min = float(np.nanmin(data))
+        self.data_max = float(np.nanmax(data))
+        # Prepare downsampled arrays for efficient rendering
+        self.downsampled_data = self._compute_downsampled_data()
+    def _compute_downsampled_data(self) -> dict:
+        """
+        Compute downsampled arrays at power-of-4 factors using max values only.
+        Returns:
+            dict: {factor: (ceil(N/factor), M) float32 array}, where each bin
+                contains the maximum value across the time dimension.
+        """
+        data = self.data  # (N, M), float32
+        n_timepoints, n_frequencies = data.shape
+        downsampled = {}
+        if n_timepoints < 4:
+            # No level with factor >= 4 fits the stop condition (factor < N)
+            return downsampled
+        def _first_level_from_raw(x: np.ndarray) -> np.ndarray:
+            """Build the factor=4 level directly from the raw data."""
+            N, M = x.shape
+            n_bins = math.ceil(N / 4)
+            pad = n_bins * 4 - N
+            # Pad time axis with NaNs so max ignores the padded tail
+            x_pad = np.pad(
+                x, ((0, pad), (0, 0)), mode="constant", constant_values=np.nan
+            )
+            blk = x_pad.reshape(n_bins, 4, M)  # (B, 4, M)
+            maxs = np.nanmax(blk, axis=1)  # (B, M)
+            return maxs.astype(np.float32)
+        def _downsample4_bins(level_max: np.ndarray) -> np.ndarray:
+            """
+            Build the next pyramid level from the previous one by grouping every 4
+            bins. Input is (B, M) -> Output is (ceil(B/4), M).
+            """
+            B, M = level_max.shape
+            n_bins_next = math.ceil(B / 4)
+            pad = n_bins_next * 4 - B
+            lvl_pad = np.pad(
+                level_max,
+                ((0, pad), (0, 0)),
+                mode="constant",
+                constant_values=np.nan,
+            )
+            blk = lvl_pad.reshape(n_bins_next, 4, M)  # (B', 4, M)
+            # Next maxs from maxs
+            maxs = np.nanmax(blk, axis=1)  # (B', M)
+            return maxs.astype(np.float32)
+        # Level 1: factor = 4 from raw data
+        factor = 4
+        level = _first_level_from_raw(data)
+        downsampled[factor] = level
+        # Higher levels: factor *= 4 each time, built from previous level
+        factor *= 4  # -> 16
+        while factor < n_timepoints / 1000:
+            level = _downsample4_bins(level)
+            downsampled[factor] = level
+            factor *= 4
+        return downsampled
+    def _calculate_optimal_chunk_size(
+        self, shape: tuple, target_size_mb: float = 5.0
+    ) -> tuple:
+        """
+        Calculate optimal chunk size for Zarr storage targeting ~5MB per chunk
+        Args:
+            shape: Array shape (n_timepoints, n_frequencies)
+            target_size_mb: Target chunk size in MB
+        Returns:
+            Tuple of chunk dimensions
+        """
+        # Calculate bytes per element (float32 = 4 bytes)
+        bytes_per_element = 4
+        target_size_bytes = target_size_mb * 1024 * 1024
+        n_timepoints, n_frequencies = shape
+        elements_per_timepoint = n_frequencies
+        # Calculate chunk size in timepoints
+        max_timepoints_per_chunk = target_size_bytes // (
+            elements_per_timepoint * bytes_per_element
+        )
+        # Find next lower power of 2
+        chunk_timepoints = 2 ** math.floor(math.log2(max_timepoints_per_chunk))
+        chunk_timepoints = max(chunk_timepoints, 1)  # At least 1
+        chunk_timepoints = min(chunk_timepoints, n_timepoints)  # At most n_timepoints
+        # If n_timepoints is less than our calculated size, round down to next power of 2
+        if chunk_timepoints > n_timepoints:
+            chunk_timepoints = 2 ** math.floor(math.log2(n_timepoints))
+        return (chunk_timepoints, n_frequencies)
+    def _write_to_zarr_group(self, group: zarr.Group) -> None:
+        """
+        Write the spectrogram data to a Zarr group
+        Args:
+            group: Zarr group to write data into
+        """
+        group.attrs["view_type"] = "Spectrogram"
+        # Store metadata
+        group.attrs["start_time_sec"] = self.start_time_sec
+        group.attrs["sampling_frequency_hz"] = self.sampling_frequency_hz
+        group.attrs["frequency_min_hz"] = self.frequency_min_hz
+        group.attrs["frequency_delta_hz"] = self.frequency_delta_hz
+        group.attrs["n_timepoints"] = self.n_timepoints
+        group.attrs["n_frequencies"] = self.n_frequencies
+        group.attrs["data_min"] = self.data_min
+        group.attrs["data_max"] = self.data_max
+        # Store frequency bins
+        group.create_dataset(
+            "frequency_bins",
+            data=self.frequency_bins.astype(np.float32),
+            compression="blosc",
+            compression_opts={"cname": "lz4", "clevel": 5, "shuffle": 1},
+        )
+        # Store original data with optimal chunking
+        original_chunks = self._calculate_optimal_chunk_size(self.data.shape)
+        group.create_dataset(
+            "data",
+            data=self.data,
+            chunks=original_chunks,
+            compression="blosc",
+            compression_opts={"cname": "lz4", "clevel": 5, "shuffle": 1},
+        )
+        # Store downsampled data arrays
+        downsample_factors = list(self.downsampled_data.keys())
+        group.attrs["downsample_factors"] = downsample_factors
+        for factor, downsampled_array in self.downsampled_data.items():
+            dataset_name = f"data_ds_{factor}"
+            # Calculate optimal chunks for this downsampled array
+            ds_chunks = self._calculate_optimal_chunk_size(downsampled_array.shape)
+            group.create_dataset(
+                dataset_name,
+                data=downsampled_array,
+                chunks=ds_chunks,
+                compression="blosc",
+                compression_opts={"cname": "lz4", "clevel": 5, "shuffle": 1},
+            )
+        print(f"Stored Spectrogram with {len(downsample_factors)} downsampled levels:")
+        print(f"  Original: {self.data.shape} (chunks: {original_chunks})")
+        for factor in downsample_factors:
+            ds_shape = self.downsampled_data[factor].shape
+            ds_chunks = self._calculate_optimal_chunk_size(ds_shape)
+            print(f"  Factor {factor}: {ds_shape} (chunks: {ds_chunks})")

figpack/views/__init__.py CHANGED Viewed

@@ -8,6 +8,7 @@ from .Markdown import Markdown
 from .MatplotlibFigure import MatplotlibFigure
 from .MultiChannelTimeseries import MultiChannelTimeseries
 from .PlotlyFigure import PlotlyFigure
+from .Spectrogram import Spectrogram
 from .Splitter import Splitter
 from .TabLayout import TabLayout
 from .TabLayoutItem import TabLayoutItem

{figpack-0.2.7.dist-info → figpack-0.2.8.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: figpack
-Version: 0.2.7
+Version: 0.2.8
 Summary: A Python package for creating shareable, interactive visualizations in the browser
 Author-email: Jeremy Magland <jmagland@flatironinstitute.org>
 License: Apache-2.0

figpack 0.2.7__py3-none-any.whl → 0.2.8__py3-none-any.whl

Potentially problematic release.

figpack 0.2.7py3-none-any.whl → 0.2.8py3-none-any.whl