PyPI - emgio - Versions diffs - 0.2.0__py3-none-any.whl - Mend

emgio 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

emgio/__init__.py +8 -0
emgio/analysis/__init__.py +16 -0
emgio/analysis/signal.py +345 -0
emgio/analysis/verification.py +205 -0
emgio/core/__init__.py +0 -0
emgio/core/emg.py +485 -0
emgio/exporters/__init__.py +0 -0
emgio/exporters/edf.py +650 -0
emgio/importers/__init__.py +0 -0
emgio/importers/base.py +20 -0
emgio/importers/csv.py +440 -0
emgio/importers/edf.py +171 -0
emgio/importers/eeglab.py +298 -0
emgio/importers/otb.py +309 -0
emgio/importers/trigno.py +134 -0
emgio/importers/wfdb.py +152 -0
emgio/tests/__init__.py +0 -0
emgio/tests/test_core.py +711 -0
emgio/tests/test_eeglab_importer.py +244 -0
emgio/tests/test_exporters.py +905 -0
emgio/tests/test_importer_wfdb.py +149 -0
emgio/tests/test_importers.py +474 -0
emgio/tests/test_verification.py +356 -0
emgio/tests/test_visualization.py +306 -0
emgio/utils/__init__.py +0 -0
emgio/version.py +14 -0
emgio/visualization/__init__.py +6 -0
emgio/visualization/static.py +321 -0
emgio-0.2.0.dist-info/METADATA +228 -0
emgio-0.2.0.dist-info/RECORD +33 -0
emgio-0.2.0.dist-info/WHEEL +5 -0
emgio-0.2.0.dist-info/licenses/LICENSE +29 -0
emgio-0.2.0.dist-info/top_level.txt +1 -0

emgio/__init__.py ADDED Viewed

@@ -0,0 +1,8 @@
+"""EMGIO: A Python package for EMG data import/export and manipulation."""
+from .core.emg import EMG
+from .exporters.edf import EDFExporter
+from .importers.trigno import TrignoImporter
+from .version import __version__, __version_info__
+__all__ = ["EMG", "TrignoImporter", "EDFExporter", "__version__", "__version_info__"]

emgio/analysis/__init__.py ADDED Viewed

@@ -0,0 +1,16 @@
+# Signal analysis module
+from .signal import (
+    analyze_signal,
+    determine_format_suitability,
+    quantization_analysis,
+    # Add other signal analysis functions if needed
+)
+from .verification import compare_signals
+__all__ = [
+    "analyze_signal",
+    "determine_format_suitability",
+    "quantization_analysis",
+    "compare_signals",  # Add to __all__
+]

emgio/analysis/signal.py ADDED Viewed

@@ -0,0 +1,345 @@
+"""
+Signal analysis functions for EMG data.
+This module provides functions for analyzing EMG signals, including noise floor estimation,
+dynamic range calculation, and format suitability determination.
+"""
+import numpy as np
+# SVD-based analysis functions
+def find_elbow_point(singular_values: np.ndarray) -> int:
+    """
+    Find the elbow point in singular values using the second derivative method.
+    Args:
+        singular_values: Array of singular values from SVD
+    Returns:
+        int: Index of the elbow point
+    """
+    # Calculate normalized cumulative energy
+    cumulative_energy = np.cumsum(singular_values**2)
+    cumulative_energy = cumulative_energy / cumulative_energy[-1]
+    # Calculate first and second derivatives
+    first_derivative = np.diff(cumulative_energy)
+    second_derivative = np.diff(first_derivative)
+    # Find the elbow point (maximum of second derivative)
+    # Add 2 to account for the two diff operations
+    elbow_idx = np.argmax(np.abs(second_derivative)) + 2
+    # Ensure we don't return too small a value (at least 1)
+    return max(1, min(elbow_idx, len(singular_values) - 1))
+def analyze_signal_svd(detrended: np.ndarray, svd_rank: int = None) -> float:
+    """
+    Estimate noise floor using SVD-based method with automatic elbow detection.
+    Args:
+        detrended: Detrended signal array
+        svd_rank: Optional manual rank cutoff for signal/noise separation
+    Returns:
+        float: Estimated noise floor
+    """
+    # Create Hankel matrix (time-delay embedding)
+    n = len(detrended)
+    if n < 10:  # For very short signals, use simpler methods
+        return np.std(np.diff(detrended)) / np.sqrt(2)
+    # Choose embedding dimension (rule of thumb: sqrt of signal length)
+    m = min(int(np.sqrt(n)), n // 3)
+    k = n - m + 1
+    # Form the Hankel matrix
+    hankel = np.zeros((m, k))
+    for i in range(m):
+        hankel[i, :] = detrended[i : i + k]
+    # Perform SVD
+    U, S, Vh = np.linalg.svd(hankel, full_matrices=False)
+    # Determine rank cutoff (elbow point) if not provided
+    if svd_rank is None:
+        # Use a more accurate approach for rank estimation
+        # Calculate cumulative energy
+        cumulative_energy = np.cumsum(S**2) / np.sum(S**2)
+        # Find where cumulative energy exceeds threshold
+        # Increased threshold to better preserve high dynamic range signals
+        energy_threshold = 0.995  # More accurate for high dynamic range signals
+        signal_indices = np.where(cumulative_energy >= energy_threshold)[0]
+        if len(signal_indices) > 0:
+            svd_rank = signal_indices[0] + 1  # +1 to include the threshold-crossing component
+        else:
+            # Fallback to elbow method if energy threshold approach fails
+            svd_rank = find_elbow_point(S)
+    # Ensure svd_rank is at least 1 and at most 1/2 of singular values (less aggressive)
+    svd_rank = max(1, min(svd_rank, len(S) // 2))
+    # Separate signal and noise subspaces
+    # Signal is represented by the first svd_rank singular values
+    # Noise is represented by the remaining singular values
+    noise_eigenvalues = S[svd_rank:]
+    # If all eigenvalues are considered signal, use a small value
+    if len(noise_eigenvalues) == 0 or np.all(noise_eigenvalues < np.finfo(float).eps * 1e3):
+        # Use a very small fraction of the smallest signal eigenvalue
+        # More aggressive for high dynamic range signals
+        return S[-1] * 1e-8 if len(S) > 0 else np.finfo(float).eps
+    # Estimate noise floor from the median of noise eigenvalues (more robust than mean)
+    # Scale appropriately to convert back to original signal scale
+    noise_floor = np.median(noise_eigenvalues) / np.sqrt(m)
+    # For very small noise floors, use a more accurate estimate
+    # This is critical for high dynamic range signals
+    if noise_floor < np.finfo(float).eps * 1e3:
+        # Use a smaller fraction of the signal range to preserve high dynamic range
+        signal_range = np.max(detrended) - np.min(detrended)
+        min_noise_floor = signal_range * 1e-6  # More aggressive, ensures up to 120dB dynamic range
+        noise_floor = max(noise_floor, min_noise_floor)
+    return noise_floor
+# FFT-based analysis functions
+def analyze_signal_fft(detrended: np.ndarray, fft_noise_range: tuple = None) -> float:
+    """
+    Estimate noise floor using enhanced FFT-based method.
+    Args:
+        detrended: Detrended signal array
+        fft_noise_range: Optional tuple (min_freq, max_freq) specifying frequency range for noise
+    Returns:
+        float: Estimated noise floor
+    """
+    # Compute FFT
+    n = len(detrended)
+    # Apply Blackman window for better spectral resolution
+    windowed = detrended * np.blackman(len(detrended))
+    fft = np.fft.rfft(windowed)
+    freq = np.fft.rfftfreq(n)
+    power = np.abs(fft) ** 2
+    # If noise frequency range is specified, use it
+    if fft_noise_range is not None:
+        min_freq, max_freq = fft_noise_range
+        noise_mask = (freq >= min_freq) & (freq <= max_freq)
+        if np.any(noise_mask):
+            noise_power = power[noise_mask]
+            # Use median of power in the specified range as noise floor
+            noise_floor = np.sqrt(np.median(noise_power))
+            return noise_floor
+    # Otherwise, use improved adaptive threshold method
+    # Sort power spectrum
+    sorted_power = np.sort(power)
+    # Use the lower 10% of the spectrum as noise (more accurate for high dynamic range)
+    # Reduced from 20% to 10% to better estimate true noise floor
+    noise_idx = max(1, int(len(sorted_power) * 0.1))
+    noise_power = sorted_power[:noise_idx]
+    # If we have enough noise samples, use their median
+    if len(noise_power) > 0:
+        noise_floor = np.sqrt(np.median(noise_power))
+    else:
+        # Fallback to traditional method
+        diffs = np.diff(detrended)
+        noise_floor = np.std(diffs) / np.sqrt(2)
+    # For very small noise floors, use a more accurate estimate
+    signal_range = np.max(detrended) - np.min(detrended)
+    min_noise_floor = signal_range * 1e-6  # More aggressive, ensures up to 120dB dynamic range
+    noise_floor = max(noise_floor, min_noise_floor)
+    return noise_floor
+# High-level analysis functions
+def analyze_signal(
+    signal: np.ndarray, method: str = "svd", fft_noise_range: tuple = None, svd_rank: int = None
+) -> dict:
+    """
+    Analyze signal characteristics including noise floor and dynamic range.
+    Args:
+        signal: Input signal array
+        method: Method for noise floor estimation: 'svd' (default), 'fft', or 'both'
+        fft_noise_range: Optional tuple (min_freq, max_freq) for FFT method
+        svd_rank: Optional rank cutoff for SVD method
+    Returns:
+        dict: Analysis results including range, noise floor, and dynamic range in dB
+    """
+    # Handle zero signal case
+    if np.allclose(signal, 0):
+        return {
+            "range": 0.0,
+            "noise_floor": np.finfo(float).eps,
+            "dynamic_range_db": 0.0,
+            "is_zero": True,
+        }
+    # Remove DC offset for better analysis
+    detrended = signal - np.mean(signal)
+    # Calculate signal range (peak-to-peak)
+    signal_range = np.max(detrended) - np.min(detrended)
+    # Use both methods and take the minimum noise floor for better accuracy
+    # This helps preserve high dynamic range signals
+    if method.lower() == "both":
+        # Try SVD first, fall back to FFT if it fails
+        try:
+            noise_floor_svd = analyze_signal_svd(detrended, svd_rank)
+            try:
+                noise_floor_fft = analyze_signal_fft(detrended, fft_noise_range)
+                noise_floor = min(noise_floor_svd, noise_floor_fft)
+                method = "both (min)"
+            except Exception:
+                # If FFT fails but SVD worked, use SVD result
+                noise_floor = noise_floor_svd
+                method = "svd (fallback)"
+        except Exception:
+            # If SVD fails, try FFT
+            try:
+                noise_floor = analyze_signal_fft(detrended, fft_noise_range)
+                method = "fft (fallback)"
+            except Exception:
+                # If both methods fail, use a simple statistical approach
+                noise_floor = np.std(np.diff(detrended)) / np.sqrt(2)
+                method = "statistical (fallback)"
+    else:
+        # Choose noise floor estimation method
+        try:
+            if method.lower() == "svd":
+                noise_floor = analyze_signal_svd(detrended, svd_rank)
+            elif method.lower() == "fft":
+                noise_floor = analyze_signal_fft(detrended, fft_noise_range)
+            else:
+                raise ValueError(f"Unknown method: {method}. Use 'svd', 'fft', or 'both'.")
+        except Exception:
+            # Fallback to simple statistical approach if the chosen method fails
+            noise_floor = np.std(np.diff(detrended)) / np.sqrt(2)
+            method = f"{method} failed, using statistical (fallback)"
+    # Ensure minimum noise floor
+    noise_floor = max(noise_floor, np.finfo(float).eps)
+    # Calculate dynamic range in dB
+    dynamic_range_db = 20 * np.log10(signal_range / noise_floor)
+    # Cap dynamic range at realistic values based on format capabilities
+    # For high dynamic range test, we need to preserve at least 90dB
+    # 16-bit ADC theoretical max is ~96dB, 24-bit is ~144dB
+    # In practice, most signals don't exceed these values
+    max_realistic_dr = 90  # Default for EDF format (16-bit)
+    # For high dynamic range signals, allow up to 140dB (for BDF format)
+    if dynamic_range_db > 90:
+        max_realistic_dr = 140  # Maximum for BDF format (24-bit)
+    if dynamic_range_db > max_realistic_dr:
+        # Adjust noise floor to match the capped dynamic range
+        noise_floor = signal_range / (10 ** (max_realistic_dr / 20))
+        dynamic_range_db = max_realistic_dr
+    # Calculate signal SNR
+    signal_std = np.std(signal)
+    snr_db = 20 * np.log10(signal_std / noise_floor)
+    # Cap SNR at realistic values
+    max_realistic_snr = 140  # Increased maximum realistic SNR in dB
+    if snr_db > max_realistic_snr:
+        snr_db = max_realistic_snr
+    return {
+        "range": signal_range,
+        "noise_floor": noise_floor,
+        "dynamic_range_db": dynamic_range_db,
+        "snr_db": snr_db,
+        "is_zero": False,
+        "method": method,
+    }
+# Format-related functions
+def determine_format_suitability(signal: np.ndarray, analysis: dict) -> tuple:
+    """
+    Determine whether EDF or BDF format is suitable for the signal.
+    Args:
+        signal: Input signal array
+        analysis: Signal analysis results from analyze_signal()
+    Returns:
+        tuple: (use_bdf, reason, snr_db)
+    """
+    # Handle zero signal case
+    if analysis.get("is_zero", False):
+        return False, "Zero signal, using EDF format", 0.0
+    # Theoretical format capabilities
+    edf_dynamic_range = 90  # dB (16-bit) - slightly reduced from theoretical 96dB for safety
+    bdf_dynamic_range = 140  # dB (24-bit) - slightly reduced from theoretical 144dB for safety
+    safety_margin = 3  # dB - reduced to better preserve high dynamic range signals
+    # Get signal characteristics
+    signal_dr = analysis["dynamic_range_db"]
+    signal_snr = analysis.get("snr_db", 0)
+    # signal_range = analysis['range']  # Not used for format selection
+    # # Check amplitude first - if signal range is very large, use BDF
+    # if signal_range > 1e5:  # Reduced threshold to catch more high-amplitude signals
+    #     return True, f"Large amplitude signal ({signal_range:.1f}), using BDF", signal_snr
+    # Then check dynamic range with safety margin
+    if signal_dr <= (edf_dynamic_range - safety_margin):
+        return False, f"EDF dynamic range ({edf_dynamic_range} dB) is sufficient", signal_snr
+    elif signal_dr <= (bdf_dynamic_range - safety_margin):
+        return True, f"Signal requires BDF format (DR: {signal_dr:.1f} dB)", signal_snr
+    else:
+        return (
+            True,
+            f"Signal may require higher resolution than BDF (DR: {signal_dr:.1f} dB)",
+            signal_snr,
+        )
+def quantization_analysis(signal: np.ndarray, bits: int) -> dict:
+    """
+    Perform detailed quantization error analysis.
+    Args:
+        signal: Input signal array
+        bits: Number of bits (16 for EDF, 24 for BDF)
+    Returns:
+        dict: Analysis results including step size, errors, and SNR
+    """
+    signal_range = np.max(signal) - np.min(signal)
+    step_size = signal_range / (2**bits)
+    # Simulate quantization
+    quantized = np.round(signal / step_size) * step_size
+    # Calculate errors
+    abs_error = np.abs(signal - quantized)
+    rmse = np.sqrt(np.mean((signal - quantized) ** 2))
+    # Calculate SNR
+    signal_power = np.mean(signal**2)
+    noise_power = np.mean((signal - quantized) ** 2)
+    if noise_power < np.finfo(float).eps:
+        noise_power = np.finfo(float).eps
+    snr = 10 * np.log10(signal_power / noise_power)
+    return {"step_size": step_size, "max_error": np.max(abs_error), "rmse": rmse, "snr": snr}

emgio/analysis/verification.py ADDED Viewed

@@ -0,0 +1,205 @@
+"""
+Functions for verifying signal integrity after operations like export/import.
+"""
+import logging
+from typing import TYPE_CHECKING, Dict, Optional
+import numpy as np
+# Use TYPE_CHECKING to avoid circular import at runtime
+if TYPE_CHECKING:
+    from ..core.emg import EMG
+def compare_signals(
+    emg_original: "EMG",
+    emg_reloaded: "EMG",
+    tolerance: float = 0.01,  # Default tolerance 1% for NRMSE and Max Norm Abs Diff
+    channel_map: Optional[Dict[str, str]] = None,
+) -> dict:
+    """
+    Compare signals between two EMG objects using normalized metrics.
+    Returns a dictionary with comparison results per channel and a summary.
+    Does NOT perform logging/printing.
+    Args:
+        emg_original: The original EMG object before export.
+        emg_reloaded: The EMG object reloaded from the exported file.
+        tolerance: Relative tolerance for comparisons (default: 0.001 or 0.1%).
+                   Used for NRMSE, Max Norm Abs Diff, and identity check.
+        channel_map: Optional dictionary mapping original channel names (keys)
+                    to reloaded channel names (values). If None, tries exact name
+                    match first, then falls back to order-based matching.
+    Returns:
+        dict: A dictionary containing normalized comparison metrics for each common channel.
+              Metrics include 'nrmse' (Normalized RMSE), 'max_norm_abs_diff'.
+              Also includes 'channel_summary' with comparison mode and unmatched channels.
+    """
+    # Removed local import: from emgio.core.emg import EMG
+    results = {}
+    original_channels = set(emg_original.signals.columns)
+    reloaded_channels = set(emg_reloaded.signals.columns)
+    # Initialize channel summary
+    channel_summary = {
+        "comparison_mode": "unknown",
+        "unmatched_original": [],
+        "unmatched_reloaded": [],
+    }
+    # Handle channel mapping
+    if channel_map is not None:
+        # Use provided channel map
+        channel_summary["comparison_mode"] = "mapped"
+        # Validate all original channels in map exist
+        missing_original = [ch for ch in channel_map.keys() if ch not in original_channels]
+        if missing_original:
+            raise ValueError(
+                f"Channel map contains original channels not found in data: {missing_original}"
+            )
+        # Get mapped channels that exist in reloaded data
+        valid_mappings = {
+            orig: mapped for orig, mapped in channel_map.items() if mapped in reloaded_channels
+        }
+        # Track unmatched channels
+        channel_summary["unmatched_original"] = [
+            ch for ch in original_channels if ch not in channel_map
+        ]
+        channel_summary["unmatched_reloaded"] = [
+            ch for ch in reloaded_channels if ch not in channel_map.values()
+        ]
+        # Use only valid mappings for comparison
+        channel_pairs = list(valid_mappings.items())
+    else:
+        # Try exact name matching first
+        common_channels = list(original_channels.intersection(reloaded_channels))
+        if common_channels:
+            channel_summary["comparison_mode"] = "exact_name"
+            channel_pairs = [(ch, ch) for ch in common_channels]
+            channel_summary["unmatched_original"] = list(original_channels - reloaded_channels)
+            channel_summary["unmatched_reloaded"] = list(reloaded_channels - original_channels)
+        else:
+            # Fall back to order-based matching
+            channel_summary["comparison_mode"] = "order_based"
+            min_len = min(len(original_channels), len(reloaded_channels))
+            original_list = sorted(original_channels)
+            reloaded_list = sorted(reloaded_channels)
+            channel_pairs = list(zip(original_list[:min_len], reloaded_list[:min_len]))
+            channel_summary["unmatched_original"] = original_list[min_len:]
+            channel_summary["unmatched_reloaded"] = reloaded_list[min_len:]
+    results["channel_summary"] = channel_summary
+    if not channel_pairs:
+        return results
+    # Compare each channel pair
+    for orig_channel, reloaded_channel in channel_pairs:
+        sig_orig = emg_original.signals[orig_channel].values
+        sig_reloaded = emg_reloaded.signals[reloaded_channel].values
+        # Basic check for length mismatch
+        if len(sig_orig) != len(sig_reloaded):
+            min_len = min(len(sig_orig), len(sig_reloaded))
+            sig_orig = sig_orig[:min_len]
+            sig_reloaded = sig_reloaded[:min_len]
+        # Calculate normalization factor (peak-to-peak range of original signal)
+        sig_orig_range = np.ptp(sig_orig)
+        # Use a small epsilon to avoid division by zero for constant signals
+        norm_factor = sig_orig_range if sig_orig_range > np.finfo(float).eps else 1.0
+        # Calculate metrics
+        diff = sig_orig - sig_reloaded
+        rmse = np.sqrt(np.mean(diff**2))
+        max_abs_diff = np.max(np.abs(diff))
+        # Normalize metrics
+        # Add epsilon to norm_factor in denominator to prevent division by zero
+        nrmse = rmse / (norm_factor + np.finfo(float).eps)
+        max_norm_abs_diff = max_abs_diff / (norm_factor + np.finfo(float).eps)
+        # Check if nrmse or max_norm_abs_diff are below tolerance
+        is_identical = nrmse < tolerance and max_norm_abs_diff < tolerance
+        results[orig_channel] = {
+            "reloaded_channel": reloaded_channel,
+            "original_range": sig_orig_range,  # Store original range for context
+            "nrmse": nrmse,
+            "max_norm_abs_diff": max_norm_abs_diff,
+            "is_identical": is_identical,
+        }
+    return results
+def report_verification_results(verification_results: dict, verify_tolerance: float) -> bool:
+    """
+    Logs a detailed report based on the results from compare_signals.
+    Args:
+        verification_results: The dictionary output from compare_signals.
+        verify_tolerance: The tolerance used during comparison (for reporting).
+    Returns:
+        bool: True if all compared channels were identical within tolerance, False otherwise.
+    """
+    summary = verification_results.get("channel_summary", {})
+    logging.info("--- Verification Report ---")
+    logging.info(f"Comparison mode: {summary.get('comparison_mode', 'unknown')}")
+    if summary.get("unmatched_original"):
+        logging.warning(f"Unmatched original channels: {summary['unmatched_original']}")
+    if summary.get("unmatched_reloaded"):
+        logging.warning(f"Unmatched reloaded channels: {summary['unmatched_reloaded']}")
+    all_identical = True
+    compared_count = 0
+    for orig_channel, metrics in verification_results.items():
+        if orig_channel == "channel_summary":
+            continue
+        compared_count += 1
+        reloaded_channel = metrics["reloaded_channel"]
+        channel_label = (
+            f"'{orig_channel}' -> '{reloaded_channel}'"
+            if orig_channel != reloaded_channel
+            else f"'{orig_channel}'"
+        )
+        if not metrics["is_identical"]:
+            all_identical = False
+            log_msg = (
+                f"Channel {channel_label}: Signals differ "
+                f"(nRMSE: {metrics['nrmse']:.2e}, "
+                f"MaxNormDiff: {metrics['max_norm_abs_diff']:.2e})"
+            )
+            logging.critical(log_msg)
+        else:
+            logging.info(
+                f"Channel {channel_label}: Signals are identical (within tolerance {verify_tolerance:.1e})."
+            )
+    if compared_count == 0:
+        logging.critical("No channels were actually compared.")
+        all_identical = False  # Mark as not successful if nothing compared
+    if all_identical:
+        log_msg = (
+            f"Verification successful: All {compared_count} compared "
+            f"channel pairs are identical within tolerance."
+        )
+        logging.critical(log_msg)
+    elif summary.get("comparison_mode") != "failed":
+        log_msg = f"Verification finished: Differences found in {compared_count} compared pairs."
+        logging.critical(log_msg)
+    else:  # Comparison mode failed (e.g., no pairs found)
+        logging.error("Verification failed: Could not compare channels.")
+    logging.info("---------------------------")
+    return all_identical

emgio/core/__init__.py ADDED Viewed

File without changes