PyPI - brainsets - Versions diffs - 0.1.0__py3-none-any.whl - Mend

brainsets 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

brainsets/__init__.py +3 -0
brainsets/cli.py +128 -0
brainsets/core.py +135 -0
brainsets/descriptions.py +124 -0
brainsets/processing/__init__.py +1 -0
brainsets/processing/signal.py +169 -0
brainsets/taxonomy/__init__.py +17 -0
brainsets/taxonomy/allen.py +28 -0
brainsets/taxonomy/drifting_gratings.py +12 -0
brainsets/taxonomy/homosapiens.py +188 -0
brainsets/taxonomy/macaque.py +253 -0
brainsets/taxonomy/mice.py +26 -0
brainsets/taxonomy/recording_tech.py +65 -0
brainsets/taxonomy/speech.py +176 -0
brainsets/taxonomy/subject.py +21 -0
brainsets/taxonomy/task.py +35 -0
brainsets/taxonomy/writing.py +89 -0
brainsets-0.1.0.dist-info/LICENSE +201 -0
brainsets-0.1.0.dist-info/METADATA +127 -0
brainsets-0.1.0.dist-info/RECORD +23 -0
brainsets-0.1.0.dist-info/WHEEL +5 -0
brainsets-0.1.0.dist-info/entry_points.txt +2 -0
brainsets-0.1.0.dist-info/top_level.txt +1 -0

brainsets/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+__version__ = "0.1.0"
+from .core import serialize_fn_map

brainsets/cli.py ADDED Viewed

@@ -0,0 +1,128 @@
+import click
+import json
+from pathlib import Path
+import subprocess
+CONFIG_FILE = Path.home() / ".brainsets_config.json"
+# TODO: Implement a function to dynamically generate this list
+DATASETS = ["perich_miller_population_2018", "pei_pandarinath_nlb_2021"]
+def load_config():
+    if CONFIG_FILE.exists():
+        with open(CONFIG_FILE, "r") as f:
+            return json.load(f)
+    return {"raw_dir": None, "processed_dir": None}
+def save_config(config):
+    with open(CONFIG_FILE, "w") as f:
+        json.dump(config, f, indent=2)
+@click.group()
+def cli():
+    """Brainsets CLI tool."""
+    pass
+@cli.command()
+@click.argument("dataset", type=click.Choice(DATASETS, case_sensitive=False))
+@click.option("-c", "--cores", default=4, help="Number of cores to use")
+def prepare(dataset, cores):
+    """Download and process a specific dataset."""
+    click.echo(f"Preparing {dataset}...")
+    # Get config to check if directories are set
+    config = load_config()
+    if not config["raw_dir"] or not config["processed_dir"]:
+        click.echo(
+            "Error: Please set raw and processed directories first using 'brainsets config'"
+        )
+        return
+    # Run snakemake workflow for dataset download with live output
+    try:
+        process = subprocess.run(
+            [
+                "snakemake",
+                "--config",
+                f"raw_dir={config['raw_dir']}",
+                f"processed_dir={config['processed_dir']}",
+                f"-c{cores}",
+                f"{dataset}",
+            ],
+            check=True,
+            capture_output=False,
+            text=True,
+        )
+        if process.returncode == 0:
+            click.echo(f"Successfully downloaded {dataset}")
+        else:
+            click.echo("Error downloading dataset")
+    except subprocess.CalledProcessError as e:
+        click.echo(f"Error: Command failed with return code {e.returncode}")
+    except Exception as e:
+        click.echo(f"Error: {str(e)}")
+@cli.command()
+def list():
+    """List available datasets."""
+    click.echo("Available datasets:")
+    for dataset in DATASETS:
+        click.echo(f"- {dataset}")
+@cli.command()
+@click.option(
+    "--raw",
+    prompt="Enter raw data directory",
+    type=click.Path(file_okay=False, dir_okay=True),
+    required=False,
+)
+@click.option(
+    "--processed",
+    prompt="Enter processed data directory",
+    type=click.Path(file_okay=False, dir_okay=True),
+    required=False,
+)
+def config(raw, processed):
+    """Set raw and processed data directories."""
+    # Create directories if they don't exist
+    import os
+    # If no arguments provided, prompt for input
+    if raw is None or processed is None:
+        if raw is None:
+            raw = click.prompt(
+                "Enter raw data directory",
+                type=click.Path(file_okay=False, dir_okay=True),
+            )
+        if processed is None:
+            processed = click.prompt(
+                "Enter processed data directory",
+                type=click.Path(file_okay=False, dir_okay=True),
+            )
+    os.makedirs(raw, exist_ok=True)
+    os.makedirs(processed, exist_ok=True)
+    # Convert to absolute paths
+    raw = os.path.abspath(raw)
+    processed = os.path.abspath(processed)
+    config = load_config()
+    config["raw_dir"] = raw
+    config["processed_dir"] = processed
+    save_config(config)
+    click.echo("Configuration updated successfully.")
+    click.echo(f"Raw data directory: {raw}")
+    click.echo(f"Processed data directory: {processed}")
+if __name__ == "__main__":
+    cli()

brainsets/core.py ADDED Viewed

@@ -0,0 +1,135 @@
+from enum import Enum
+import datetime
+class NestedEnumType(type(Enum)):
+    def __new__(cls, clsname, bases, clsdict, parent=None):
+        new_cls = super().__new__(cls, clsname, bases, clsdict)
+        new_cls._parent = parent
+        if parent is not None:
+            parent._parent_cls = new_cls
+            for name, member in new_cls.__members__.items():
+                parent.__setattr__(name, member)
+        return new_cls
+    def __contains__(cls, member):
+        return (isinstance(member, cls) and (member._name_ in cls._member_map_)) or (
+            member._parent is not None and member._parent in cls
+        )
+class StringIntEnum(Enum, metaclass=NestedEnumType):
+    r"""Base class for string-integer enums.
+    This class extends Python's built-in Enum class to provide:
+        - String representation via __str__
+        - Integer representation via __int__
+        - Case-insensitive string parsing via from_string()
+        - Maximum value lookup via max_value()
+    .. code-block:: python
+        >>> class Color(StringIntEnum):
+        ...     RED = 1
+        ...     BLUE = 2
+        >>> str(Color.RED)
+        'RED'
+        >>> int(Color.RED)
+        1
+        >>> Color.from_string("red")
+        <Color.RED: 1>
+        >>> Color.max_value()
+        2
+    """
+    def __str__(self):
+        if self._parent is not None:
+            return f"{str(self._parent)}.{self.name}"
+        else:
+            return self.name
+    def __int__(self):
+        return self.value
+    @classmethod
+    def from_string(cls, string: str) -> "StringIntEnum":
+        r"""Convert a string to an enum member. This method is case insensitive and
+        will replace spaces with underscores.
+        Args:
+            string: The string to convert to an enum member.
+        Examples:
+            >>> from brainsets.taxonomy import Sex
+            >>> Sex.from_string("Male")
+            <Sex.MALE: 1>
+            >>> Sex.from_string("M")
+            <Sex.MALE: 1>
+        """
+        nested_string = string.split(".", maxsplit=1)
+        if len(nested_string) > 1:
+            parent = cls.from_string(nested_string[0])
+            return parent._parent_cls.from_string(nested_string[1])
+        else:
+            # normalize string by replacing spaces with underscores and converting
+            # to upper case
+            normalized_string = string.strip().upper().replace(" ", "_")
+            # create a mapping of enum names to enum members
+            mapping = {name.upper(): member for name, member in cls.__members__.items()}
+            # try to match the string to an enum name
+            if normalized_string in mapping:
+                return mapping[normalized_string]
+            # if there is no match raise an error
+            raise ValueError(
+                f"{normalized_string} does not exist in {cls.__name__}, "
+                "consider adding it to the enum."
+            )
+    @classmethod
+    def max_value(cls):
+        r"""Return the maximum value in the enum class."""
+        return max(cls.__members__.values(), key=lambda x: x.value).value
+class Dictable:
+    r"""A dataclass that can be converted to a dict."""
+    def to_dict(self):
+        r"""Convert the dataclass instance to a dictionary.
+        Returns:
+            dict: A dictionary containing all fields of the dataclass as key-value pairs.
+        .. code-block:: python
+            >>> from dataclasses import dataclass
+            >>> @dataclass
+            ... class Person(Dictable):
+            ...     name: str
+            ...     age: int
+            >>> p = Person("Alice", 30)
+            >>> p.to_dict()
+            {'name': 'Alice', 'age': 30}
+        """
+        from dataclasses import asdict
+        return {k: v for k, v in asdict(self).items()}  # type: ignore
+def string_int_enum_serialize_fn(obj, serialize_fn_map=None):
+    r"""Convert a StringIntEnum object to a string."""
+    return str(obj)
+def datetime_serialize_fn(obj, serialize_fn_map=None):
+    r"""Convert a datetime object to a string."""
+    return str(obj)
+serialize_fn_map = {
+    StringIntEnum: string_int_enum_serialize_fn,
+    datetime.datetime: datetime_serialize_fn,
+}

brainsets/descriptions.py ADDED Viewed

@@ -0,0 +1,124 @@
+import datetime
+from typing import Dict, List, Tuple, Optional, Union
+from pydantic.dataclasses import dataclass
+import temporaldata
+import brainsets
+from brainsets.taxonomy import *
+from brainsets.taxonomy.mice import *
+@dataclass
+class BrainsetDescription(temporaldata.Data):
+    r"""A class for describing a brainset.
+    Parameters
+    ----------
+    id : str
+        Unique identifier for the brainset
+    origin_version : str
+        Version identifier for the original data source
+    derived_version : str
+        Version identifier for the derived/processed data
+    source : str
+        Original data source (usually a URL, or a short description otherwise)
+    description : str
+        Text description of the brainset
+    brainsets_version : str, optional
+        Version of brainsets package used, defaults to current version
+    temporaldata_version : str, optional
+        Version of temporaldata package used, defaults to current version
+    """
+    id: str
+    origin_version: str
+    derived_version: str
+    source: str
+    description: str
+    brainsets_version: str = brainsets.__version__
+    temporaldata_version: str = temporaldata.__version__
+@dataclass
+class SubjectDescription(temporaldata.Data):
+    r"""A class for describing a subject.
+    Parameters
+    ----------
+    id : str
+        Unique identifier for the subject
+    species : Species
+        Species of the subject
+    age : float, optional
+        Age of the subject in days, defaults to 0.0
+    sex : Sex, optional
+        Sex of the subject, defaults to UNKNOWN
+    genotype : str, optional
+        Genotype of the subject, defaults to "unknown"
+    cre_line : Cre_line, optional
+        Cre line of the subject, defaults to None
+    """
+    id: str
+    species: Species
+    age: float = 0.0  # in days
+    sex: Sex = Sex.UNKNOWN
+    genotype: str = "unknown"  # no idea how many there will be for now.
+    cre_line: Optional[Cre_line] = None
+@dataclass
+class SessionDescription(temporaldata.Data):
+    r"""A class for describing an experimental session.
+    Parameters
+    ----------
+    id : str
+        Unique identifier for the session
+    recording_date : datetime.datetime
+        Date and time when the recording was made
+    task : Task
+        Task performed during the session
+    """
+    id: str
+    recording_date: datetime.datetime
+    task: Optional[Task] = None
+@dataclass
+class DeviceDescription(temporaldata.Data):
+    r"""A class for describing a recording device.
+    Parameters
+    ----------
+    id : str
+        Unique identifier for the device
+    recording_tech : RecordingTech or List[RecordingTech], optional
+        Recording technology used, defaults to None
+    processing : str, optional
+        Processing applied to the recording, defaults to None
+    chronic : bool, optional
+        Whether the device was chronically implanted, defaults to False
+    start_date : datetime.datetime, optional
+        Date when device was implanted/first used, defaults to None
+    end_date : datetime.datetime, optional
+        Date when device was removed/last used, defaults to None
+    imaging_depth : float, optional
+        Depth of imaging in micrometers, defaults to None
+    target_area : BrainRegion, optional
+        Target brain region for recording, defaults to None
+    """
+    id: str
+    # units: List[str]
+    # areas: Union[List[StringIntEnum], List[Macaque]]
+    recording_tech: Union[RecordingTech, List[RecordingTech]] = None
+    processing: Optional[str] = None
+    chronic: bool = False
+    start_date: Optional[datetime.datetime] = None
+    end_date: Optional[datetime.datetime] = None
+    # Ophys
+    imaging_depth: Optional[float] = None  # in um
+    target_area: Optional[BrainRegion] = None

brainsets/processing/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from .signal import downsample_wideband, extract_bands, cube_to_long

brainsets/processing/signal.py ADDED Viewed

@@ -0,0 +1,169 @@
+"""Signal processing functions. Inspired by Stavisky et al. (2015).
+https://dx.doi.org/10.1088/1741-2560/12/3/036009
+"""
+from typing import List, Tuple
+import numpy as np
+import tqdm
+from scipy import signal
+from temporaldata import Data, IrregularTimeSeries, ArrayDict
+from brainsets.taxonomy import RecordingTech
+def downsample_wideband(
+    wideband: np.ndarray,
+    timestamps: np.ndarray,
+    wideband_Fs: float,
+    lfp_Fs: float = 1000,
+) -> tuple[np.ndarray, np.ndarray]:
+    """
+    Downsample wideband signal to LFP sampling rate.
+    """
+    assert wideband.shape[0] == timestamps.shape[0], "Time should be first dimension."
+    # Decimate by a factor of 4
+    dec_factor = 4
+    if wideband.shape[0] % dec_factor != 0:
+        wideband = wideband[: -(wideband.shape[0] % dec_factor), :]
+        timestamps = timestamps[: -(timestamps.shape[0] % dec_factor)]
+    wideband = wideband.reshape(-1, dec_factor, wideband.shape[1])
+    wideband = wideband.mean(axis=1)
+    timestamps = timestamps[::dec_factor]
+    nyq = 0.5 * wideband_Fs / dec_factor  # Nyquist frequency
+    cutoff = 0.333 * lfp_Fs  # remove everything above 170 Hz.
+    normal_cutoff = cutoff / nyq
+    b, a = signal.butter(4, normal_cutoff, btype="low", analog=False, output="ba")
+    # Interpolation to achieve the desired sampling rate
+    t_new = np.arange(timestamps[0], timestamps[-1], 1 / lfp_Fs)
+    lfp = np.zeros((len(t_new), wideband.shape[1]))
+    for i in range(wideband.shape[1]):
+        # We do this one channel at a time to save memory.
+        broadband_low = signal.filtfilt(b, a, wideband[:, i], axis=0)
+        lfp[:, i] = np.interp(t_new, timestamps, broadband_low)
+    return lfp, t_new
+def extract_bands(
+    lfps: np.ndarray, ts: np.ndarray, Fs: float = 1000, notch: float = 60
+) -> Tuple[np.ndarray, np.ndarray, List]:
+    """Extract bands from LFP
+    We prefer to extract bands from the LFP upstream rather than downstream, because
+    it can be difficult to estimate e.g. the phase of low-frequency LFPs from
+    short segments.
+    We use the proposed bands from Stravisky et al. (2015), but we use the MNE toolbox
+    rather than straight scipy signal.
+    """
+    try:
+        import mne
+    except ImportError:
+        raise ImportError(
+            "This function requires the MNE library which you can install with "
+            "`pip install mne`"
+        )
+    target_Fs = 50
+    assert (
+        Fs % target_Fs == 0
+    ), "Sampling rate must be a multiple of the target frequency"
+    assert lfps.shape[0] == ts.shape[0], "Time should be first dimension."
+    info = mne.create_info(
+        ch_names=lfps.shape[1], sfreq=Fs, ch_types=["eeg"] * lfps.shape[1]
+    )
+    data = mne.io.RawArray(lfps.T, info)
+    data = data.notch_filter(np.arange(notch, notch * 5 + 1, notch), n_jobs=4)
+    filtered = []
+    band_names = ["delta", "theta", "alpha", "beta", "gamma", "lmp"]
+    bands = [(1, 4), (3, 10), (12, 23), (27, 38), (50, 300)]
+    for band_low, band_hi in bands:
+        band = data.copy().filter(band_low, band_hi, fir_design="firwin", n_jobs=4)
+        band = band.apply_function(lambda x: x**2, n_jobs=4)
+        band = band.filter(18, None, fir_design="firwin", n_jobs=4)
+        # It seems resample overwrites the original data, so we copy it first.
+        band = band.resample(target_Fs, npad="auto", n_jobs=4)
+        filtered.append(band.get_data().T)
+    lmp = data.copy().filter(0.1, 20, fir_design="firwin", n_jobs=4)
+    lmp = lmp.resample(target_Fs, npad="auto", n_jobs=4)
+    filtered.append(lmp.get_data().T)
+    ts = ts[int(Fs / target_Fs / 2) :: int(Fs / target_Fs)]
+    stacked = np.stack(filtered, axis=2)
+    # There can be off by one errors.
+    if stacked.shape[0] != len(ts):
+        stacked = stacked[: len(ts), :, :]
+    return stacked, ts, band_names
+def cube_to_long(
+    ts: np.ndarray, cube: np.ndarray, channel_prefix="chan"
+) -> Tuple[List[IrregularTimeSeries], Data]:
+    """Convert a cube of threshold crossings to a list of trials and units."""
+    assert cube.shape[1] == len(ts)
+    assert cube.ndim == 3
+    channels = np.arange(cube.shape[2])
+    channels = np.tile(channels, [cube.shape[1], 1])
+    # First dim is batch, second is time, third is channel.
+    assert np.issubdtype(cube.dtype, np.integer)
+    assert cube.min() >= 0
+    ts = np.tile(ts.reshape((-1, 1)), [1, cube.shape[2]])
+    assert ts.shape == channels.shape
+    # The first dimension we map to a single trial.
+    trials = []
+    for b in tqdm.tqdm(range(cube.shape[0])):
+        cube_ = cube[b, :, :]
+        ts_ = []
+        channels_ = []
+        # This data is binned, so we create N identifical timestamps when there are N
+        # spikes in a bin.
+        for n in range(1, cube_.max() + 1):
+            ts_.append(ts[cube_ >= n])
+            channels_.append(channels[cube_ >= n])
+        ts_ = np.concatenate(ts_)
+        channels_ = np.concatenate(channels_)
+        tidx = np.argsort(ts_)
+        ts_ = ts_[tidx]
+        channels_ = channels_[tidx]
+        trials.append(
+            IrregularTimeSeries(
+                timestamps=ts_,
+                unit_index=channels_,
+                types=np.ones(len(ts_))
+                * int(RecordingTech.UTAH_ARRAY_THRESHOLD_CROSSINGS),
+                domain="auto",
+            )
+        )
+    counts = cube.sum(axis=0).sum(axis=0)
+    units = ArrayDict(
+        count=np.array(counts.astype(int)),
+        channel_name=np.array(
+            [f"{channel_prefix}{c:03}" for c in range(cube.shape[2])]
+        ),
+        unit_number=np.zeros(cube.shape[2]),
+        id=np.array([f"{channel_prefix}{c}" for c in range(cube.shape[2])]),
+        channel_number=np.arange(cube.shape[2]),
+        type=np.ones(cube.shape[2]) * int(RecordingTech.UTAH_ARRAY_THRESHOLD_CROSSINGS),
+    )
+    return trials, units

brainsets/taxonomy/__init__.py ADDED Viewed

@@ -0,0 +1,17 @@
+from .subject import (
+    Species,
+    Sex,
+)
+from .task import (
+    Task,
+)
+from .drifting_gratings import Orientation_8_Classes
+from .macaque import Macaque
+from .mice import Cre_line
+from .recording_tech import (
+    RecordingTech,
+    Hemisphere,
+)

brainsets/taxonomy/allen.py ADDED Viewed

@@ -0,0 +1,28 @@
+ORIENTATION_8_CLASSES_map = {
+    0.0: 0,
+    45.0: 1,
+    90.0: 2,
+    135.0: 3,
+    180.0: 4,
+    225.0: 5,
+    270.0: 6,
+    315.0: 7,
+}
+ORIENTATION_12_CLASSES_map = {
+    0.0: 0,
+    30.0: 1,
+    60.0: 2,
+    90.0: 3,
+    120.0: 4,
+    150.0: 5,
+    180.0: 6,
+    210.0: 7,
+    240.0: 8,
+    270.0: 9,
+    300.0: 10,
+    330.0: 11,
+}
+TEMPORAL_FREQ_5_map = {1.0: 0, 2.0: 1, 4.0: 2, 8.0: 3, 15.0: 4}
+SPATIAL_FREQ_5_map = {0.02: 0, 0.04: 1, 0.08: 2, 0.16: 3, 0.32: 4}
+PHASE_4_map = {0.0: 0, 90.0: 1, 180.0: 2, 270.0: 3}

brainsets/taxonomy/drifting_gratings.py ADDED Viewed

@@ -0,0 +1,12 @@
+from brainsets.core import StringIntEnum
+class Orientation_8_Classes(StringIntEnum):
+    angle_0 = 0
+    angle_45 = 1
+    angle_90 = 2
+    angle_135 = 3
+    angle_180 = 4
+    angle_225 = 5
+    angle_270 = 6
+    angle_315 = 7