PyPI - UnitMatchPy - Versions diffs - 1.0__py3-none-any.whl - Mend

UnitMatchPy 1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

UnitMatchPy/AssignUniqueID.py +143 -0
UnitMatchPy/Bayes_fun.py +66 -0
UnitMatchPy/Extract_raw_data.py +216 -0
UnitMatchPy/GUI.py +1300 -0
UnitMatchPy/Metrics_fun.py +550 -0
UnitMatchPy/Overlord.py +102 -0
UnitMatchPy/Param_fun.py +306 -0
UnitMatchPy/Save_utils.py +324 -0
UnitMatchPy/__init__.py +9 -0
UnitMatchPy/utils.py +441 -0
UnitMatchPy-1.0.dist-info/LICENSE +437 -0
UnitMatchPy-1.0.dist-info/METADATA +47 -0
UnitMatchPy-1.0.dist-info/RECORD +15 -0
UnitMatchPy-1.0.dist-info/WHEEL +5 -0
UnitMatchPy-1.0.dist-info/top_level.txt +1 -0

UnitMatchPy/AssignUniqueID.py ADDED Viewed

@@ -0,0 +1,143 @@
+import numpy as np
+def check_is_in(TestArray, ParentArray):
+    """
+    Test to se if a row in TestArray is in ParrentArray
+    Arguments:
+        TestArray -- ndarray (N, 2)
+        ParentArray -- ndArray(M, 2)
+    Returns:
+        IsIn -- ndarray (N) dtype - bool
+    """
+    IsIn = (TestArray[:, None] == ParentArray).all(-1).any(-1)
+    return IsIn
+def AssignUID(Output, param, ClusInfo):
+    AllClusterIDs = ClusInfo['OriginalID'] # each units has unique ID
+    #create arrays for the uniwue ids
+    UniqueIDLiberal = np.arange(AllClusterIDs.shape[0])
+    OriUniqueID = np.arange(AllClusterIDs.shape[0])
+    UniqueIDConservative = np.arange(AllClusterIDs.shape[0])
+    UniqueID = np.arange(AllClusterIDs.shape[0])
+    GoodRecSesID = ClusInfo['SessionID']
+    RecOpt = np.unique(ClusInfo['SessionID'])
+    nRec = RecOpt.shape[0]
+    #data Driven Threshold?
+    if param.get('UseDataDrivenProbThrs', False):
+        stepsz = 0.1
+        binedges = np.arange(0,  1 + stepsz, stepsz)
+        plotvec = np.arange(stepsz / 2, 1, stepsz)
+        hw, __ = np.histogram(np.diag(Output), bins = len(binedges), density = True)
+        Threshold = plotvec[np.diff(hw) > 0.1]
+    else:
+        Threshold = param['MatchThreshold']
+    Pairs = np.argwhere(Output > Threshold)
+    Pairs = np.delete(Pairs, np.argwhere(Pairs[:,0] == Pairs[:,1]), axis =0) #delete self-matches
+    Pairs = np.sort(Pairs, axis = 1)# arange so smaller pairID is in column 1
+    #Only keep one copy of pairs only if both CV agrree its a match
+    PairsUnique, Count = np.unique(Pairs, axis = 0, return_counts=True)
+    PairsUniqueFilt = np.delete(PairsUnique, Count == 1, axis = 0) #if Count = 1 only 1 CV for that pair!
+    #get the mean probabilty for each match
+    ProbMean = np.nanmean(np.vstack((Output[PairsUniqueFilt[:,0], PairsUniqueFilt[:,1]], Output[PairsUniqueFilt[:,1], PairsUniqueFilt[:,0]])), axis=0)
+    #sort by the mean probabilty
+    PairsProb = np.hstack((PairsUniqueFilt, ProbMean[:, np.newaxis]))
+    SortIdxs = np.argsort(-PairsProb[:,2], axis = 0) #start go in decending order
+    PairsProbSorted = np.zeros_like(PairsProb)
+    PairsProbSorted = PairsProb[SortIdxs,:]
+    #Create a list which has both copies of each match e.g (1,2) and (2,1) for easier comparisson
+    PairsAll = np.zeros((PairsUniqueFilt.shape[0]*2,2))
+    PairsAll[:PairsUniqueFilt.shape[0],:] = PairsUniqueFilt
+    PairsAll[PairsUniqueFilt.shape[0]:,:] = PairsUniqueFilt[:, (1,0)]
+    nMatchesConservative = 0
+    nMatchesLiberal = 0
+    nMatches = 0
+    #Go through each pair and assign to groups!!
+    for pair in PairsProbSorted[:,:2]:
+        pair = pair.astype(np.int16)
+        #Get the conservative group ID for thecurrent 2 units
+        UnitAConservativeID = UniqueIDConservative[pair[0]]
+        UnitBConservativeID = UniqueIDConservative[pair[1]]
+        # get all units which have the same ID
+        SameGroupIdA = np.argwhere(UniqueIDConservative == UnitAConservativeID).squeeze()
+        SameGroupIdB = np.argwhere(UniqueIDConservative == UnitBConservativeID).squeeze()
+        #reshape array to be a 1d array if needed
+        if len(SameGroupIdA.shape) == 0:
+            SameGroupIdA = SameGroupIdA[np.newaxis]
+        if len(SameGroupIdB.shape) == 0:
+            SameGroupIdB = SameGroupIdB[np.newaxis]
+        #will need to check if pair[0] has match with SameGroupIdB and vice versa
+        CheckPairsA = np.stack((SameGroupIdB, np.broadcast_to(np.array(pair[0]), SameGroupIdB.shape)), axis = -1)
+        CheckPairsB = np.stack((SameGroupIdA, np.broadcast_to(np.array(pair[1]), SameGroupIdA.shape)), axis = -1)
+        # delete the potential self-matches
+        CheckPairsA = np.delete(CheckPairsA, np.argwhere(CheckPairsA[:,0] == CheckPairsA[:,1]), axis =0)
+        CheckPairsB = np.delete(CheckPairsB, np.argwhere(CheckPairsB[:,0] == CheckPairsB[:,1]), axis =0)
+        if (np.logical_and(np.all(check_is_in(CheckPairsA, PairsAll)), np.all(check_is_in(CheckPairsB, PairsAll)))):
+            #If each pairs matches with every unit in the other pairs group
+            #can add as match to all classes
+            UniqueIDConservative[pair[0]] = min(UniqueIDConservative[pair])
+            UniqueIDConservative[pair[1]] = min(UniqueIDConservative[pair])
+            nMatchesConservative +=1
+            UniqueID[pair[0]] = min(UniqueID[pair])
+            UniqueID[pair[1]] = min(UniqueID[pair])
+            nMatches +=1
+            UniqueIDLiberal[pair[0]] = min(UniqueIDLiberal[pair])
+            UniqueIDLiberal[pair[0]] = min(UniqueIDLiberal[pair])
+            nMatchesLiberal +=1
+        else:
+            #Now test to see if each pairs match with every unit in the other pair IF they are in the same/adjacent sessions
+            UnitAID = UniqueID[pair[0]]
+            UnitBID = UniqueID[pair[1]]
+            SameGroupIdA = np.argwhere(UniqueID == UnitAID).squeeze()
+            SameGroupIdB = np.argwhere(UniqueID == UnitBID).squeeze()
+            if len(SameGroupIdA.shape) == 0:
+                SameGroupIdA = SameGroupIdA[np.newaxis]
+            if len(SameGroupIdB.shape) == 0:
+                SameGroupIdB = SameGroupIdB[np.newaxis]
+            CheckPairsA = np.stack((SameGroupIdB, np.broadcast_to(np.array(pair[0]), SameGroupIdB.shape)), axis = -1)
+            CheckPairsB = np.stack((SameGroupIdA, np.broadcast_to(np.array(pair[1]), SameGroupIdA.shape)), axis = -1)
+            #delte potential self-matches
+            CheckPairsA = np.delete(CheckPairsA, np.argwhere(CheckPairsA[:,0] == CheckPairsA[:,1]), axis =0)
+            CheckPairsB = np.delete(CheckPairsB, np.argwhere(CheckPairsB[:,0] == CheckPairsB[:,1]), axis =0)
+            #check to see if they are in the same or adjacent sessions
+            InNearSessionA = np.abs(np.diff(ClusInfo['SessionID'][CheckPairsA])) <= 1
+            InNearSessionB = np.abs(np.diff(ClusInfo['SessionID'][CheckPairsB])) <= 1
+            CheckPairsNearA = CheckPairsA[InNearSessionA.squeeze()]
+            CheckPairsNearB = CheckPairsB[InNearSessionB.squeeze()]
+            if (np.logical_and(np.all(check_is_in(CheckPairsNearA, PairsAll)), np.all(check_is_in(CheckPairsNearB, PairsAll)))):
+                UniqueID[pair[0]] = min(UniqueID[pair])
+                UniqueID[pair[1]] = min(UniqueID[pair])
+                nMatches +=1
+                UniqueIDLiberal[pair[0]] = min(UniqueIDLiberal[pair])
+                UniqueIDLiberal[pair[1]] = min(UniqueIDLiberal[pair])
+                nMatchesLiberal +=1
+            else:
+                UniqueIDLiberal[pair[0]] = min(UniqueIDLiberal[pair])
+                UniqueIDLiberal[pair[1]] = min(UniqueIDLiberal[pair])
+                nMatchesLiberal +=1
+    print(f'Number of Liberal Matches: {nMatchesLiberal}')
+    print(f'Number of Intermediate Matches: {nMatches}')
+    print(f'Number of Conservative Matches: {nMatchesConservative}')
+    return [UniqueIDLiberal, UniqueID, UniqueIDConservative, OriUniqueID]

UnitMatchPy/Bayes_fun.py ADDED Viewed

@@ -0,0 +1,66 @@
+import numpy as np
+import UnitMatchPy.Param_fun as pf
+def get_ParameterKernels(Scores2Include, labels, Cond, param, addone = 1):
+    """
+    Requires Score2Include, a dictionary where the keys are the metric used and the values are
+    nUnits * nUnits with the score for each unit.
+    Smoothing and add one is done to try and compensate for the fact the histogram used as a prediction for the
+    probability distn has few values, therefore this smoothing hopes to make it more similar to the true distn
+    by smoothing nearby peaks and trough to reduce shot noise
+    """
+    ScoreVector = param['ScoreVector']
+    Bins = param['Bins']
+    SmoothProb = param['SmoothProb']
+    ParameterKernels = np.full((len(ScoreVector), len(Scores2Include), len(Cond)), np.nan)
+    ScoreID = 0
+    for sc in Scores2Include:
+        Scorestmp = Scores2Include[sc]
+        SmoothTmp = SmoothProb # Not doing the different ones for now (default the same)
+        for Ck in range(len(Cond)):
+            HistTmp , __ = np.histogram(Scorestmp[labels == Ck], Bins)
+            ParameterKernels[:,ScoreID, Ck] = pf.smooth(HistTmp, SmoothTmp)
+            ParameterKernels[:,ScoreID, Ck] /= np.sum(ParameterKernels[:,ScoreID,Ck])
+            ParameterKernels[:,ScoreID, Ck] += addone* np.min(ParameterKernels[ParameterKernels[:,ScoreID, Ck] !=0, ScoreID, Ck], axis = 0)
+        ScoreID +=1
+    return ParameterKernels
+def apply_naive_bayes(ParameterKernels,Priors, Predictors, param, Cond):
+    """
+    Using the Paramater kernels, Priors and Predictors, calculate the probability each pair of units is a
+    match
+    """
+    ScoreVector = param['ScoreVector']
+    nPairs = Predictors.shape[0] ** 2
+    unravel = np.reshape(Predictors , (Predictors.shape[0] * Predictors.shape[1], Predictors.shape[2],1))
+    x1 = np.tile(unravel, ( 1, 1, len(ScoreVector)))
+    tmp = np.expand_dims(ScoreVector, axis = (0,1))
+    x2 = np.tile(tmp, (x1.shape[0], x1.shape[1], 1))
+    minidx = np.argmin( np.abs(x1 - x2), axis = 2)
+    likelihood = np.full((nPairs, len(Cond)), np.nan)
+    for Ck in range(len(Cond)):
+        tmpp = np.zeros_like(minidx, np.float64)
+        for yy in range(minidx.shape[1]):
+            tmpp[:,yy] = ParameterKernels[minidx[:,yy],yy,Ck]
+        likelihood[:,Ck] = np.prod(tmpp, axis=1)
+    Probability = np.full((nPairs,2), np.nan )
+    for Ck in range(len(Cond)):
+        Probability[:,Ck] = Priors[Ck] * likelihood[:,Ck] / np.nansum((Priors * likelihood), axis =1)
+    return Probability

UnitMatchPy/Extract_raw_data.py ADDED Viewed

@@ -0,0 +1,216 @@
+#Function for extracting and averaging raw data
+import os
+import numpy as np
+from pathlib import Path
+from scipy.ndimage import gaussian_filter
+from mtscomp import decompress
+from joblib import Parallel, delayed
+import UnitMatchPy.utils as util
+#Decompressed data functions
+def Read_Meta(metaPath):
+    "Readin Meta data as a dictionary"
+    metaDict = {}
+    with metaPath.open() as f:
+        mdatList = f.read().splitlines()
+        # convert the list entries into key value pairs
+        for m in mdatList:
+            csList = m.split(sep='=')
+            if csList[0][0] == '~':
+                currKey = csList[0][1:len(csList[0])]
+            else:
+                currKey = csList[0]
+            metaDict.update({currKey: csList[1]})
+    return(metaDict)
+def get_sample_idx(SpikeTimes, UnitIDs, SampleAmount, units):
+    """
+    Needs spike times, unit ID's (from kilosort dir) and maximum number of samples per unit.
+    Returns a (nUnits, SampleAmount) array with what spikes to sample for every unit, selected spikes evely spaced over time and
+    fill with NaN if the unit has less spikes than SampleAmount
+    """
+    UniqueUnitIDs = np.unique(UnitIDs)
+    nUnitsALL = len(UniqueUnitIDs)
+    SampleIdx = np.zeros((nUnitsALL, SampleAmount))
+    #Process ALL unit
+    for i, idx in enumerate(units):
+        UnitTimes = SpikeTimes[UnitIDs == idx]
+        if SampleAmount < len(UnitTimes):
+            ChooseIdx = np.linspace(0,len(UnitTimes)-1, SampleAmount, dtype = int) # -1 so can't indx out of region
+            SampleIdx[i,:] = UnitTimes[ChooseIdx]
+        else:
+            SampleIdx[i,:len(UnitTimes)] = UnitTimes
+            SampleIdx[i,len(UnitTimes):] = np.nan
+    return SampleIdx
+def Extract_A_Unit(SampleIdx, Data, HalfWidth, SpikeWidth, nChannels, SampleAmount):
+    """
+    This function extracts and averages the raw data for A unit, and splits the unit into two half for cross verification.
+    returns AvgWavforms shape (nChannels, SpikeWidth, 2)
+    NOTE - Here SampleIdx is a array of shape (SampleAmount), i.e use SampleIdx[UnitIdx] to get the AvgWAveform for that unit
+    """
+    Channels = np.arange(0,nChannels)
+    AllSampleWaveforms = np.zeros( (SampleAmount, SpikeWidth, nChannels))
+    for i, idx in enumerate(SampleIdx[:]):
+        if np.isnan(idx):
+            continue
+        tmp = Data[ int(idx - HalfWidth - 1): int(idx + HalfWidth - 1), Channels] # -1, to better fit with ML
+        tmp.astype(np.float32)
+        #gaussina smooth, over time gaussina window = 5, sigma = window size / 5
+        tmp = gaussian_filter(tmp, 1, radius = 2, axes = 0) #edges are handled differently to ML
+        # window ~ radius *2 + 1
+        tmp = tmp - np.mean(tmp[:20,:], axis = 0)
+        AllSampleWaveforms[i] = tmp
+    #median and split CV's
+    nWavs = np.sum(~np.isnan(SampleIdx[:]))
+    CVlim = np.floor(nWavs / 2).astype(int)
+    #find median over samples
+    AvgWaveforms = np.zeros((SpikeWidth, nChannels, 2))
+    AvgWaveforms[:, :, 0] = np.median(AllSampleWaveforms[:CVlim, :, :], axis = 0) #median over samples
+    AvgWaveforms[:, :, 1] = np.median(AllSampleWaveforms[CVlim:nWavs, :, :], axis = 0) #median over samples
+    return AvgWaveforms
+def Extract_A_UnitKS4(SampleIdx, Data, SamplesBefore, SamplesAfter, SpikeWidth, nChannels, SampleAmount):
+    """
+    This function extracts and averages the raw data for A unit, and splits the unit into two half for cross verification.
+    returns AvgWavforms shape (nChannels, SpikeWidth, 2)
+    NOTE - Here SampleIdx is a array of shape (SampleAmount), i.e use SampleIdx[UnitIdx] to get the AvgWAveform for that unit
+    """
+    Channels = np.arange(0,nChannels)
+    AllSampleWaveforms = np.zeros( (SampleAmount, SpikeWidth, nChannels))
+    for i, idx in enumerate(SampleIdx[:]):
+        if np.isnan(idx):
+            continue
+        tmp = Data[ int(idx - SamplesBefore - 1): int(idx + SamplesAfter - 1), Channels] # -1, to better fit with ML
+        tmp.astype(np.float32)
+        #gaussina smooth, over time gaussina window = 5, sigma = window size / 5
+        tmp = gaussian_filter(tmp, 1, radius = 2, axes = 0) #edges are handled differently to ML
+        # window ~ radius *2 + 1
+        tmp = tmp - np.mean(tmp[:20,:], axis = 0)
+        AllSampleWaveforms[i] = tmp
+    #median and split CV's
+    nWavs = np.sum(~np.isnan(SampleIdx[:]))
+    CVlim = np.floor(nWavs / 2).astype(int)
+    #find median over samples
+    AvgWaveforms = np.zeros((SpikeWidth, nChannels, 2))
+    AvgWaveforms[:, :, 0] = np.median(AllSampleWaveforms[:CVlim, :, :], axis = 0) #median over samples
+    AvgWaveforms[:, :, 1] = np.median(AllSampleWaveforms[CVlim:nWavs, :, :], axis = 0) #median over samples
+    return AvgWaveforms
+def Save_AvgWaveforms(AvgWaveforms, SaveDir, GoodUnits, ExtractGoodUnitsOnly = False):
+    """
+    Will save the extracted average waveforms in a folder called 'RawWaveforms' in the given SaveDir
+    Each waveform will be saved in a unique .npy file called 'UnitX_RawSpikes.npy.
+    Supply GoodUnits, a array of which idx's are included, if they you are not extract all units
+    from the recording session.
+    """
+    CurrentDir = os.getcwd()
+    os.chdir(SaveDir)
+    DirList = os.listdir()
+    if 'RawWaveforms' in DirList:
+        TmpPath = os.path.join(SaveDir, 'RawWaveforms')
+    else:
+        os.mkdir('RawWaveforms')
+        TmpPath = os.path.join(SaveDir, 'RawWaveforms')
+    os.chdir(TmpPath)
+    #first axis is each unit
+    #ALL waveforms from 0->nUnits
+    if ExtractGoodUnitsOnly == False:
+        for i in range(AvgWaveforms.shape[0]):
+            np.save(f'Unit{i}_RawSpikes.npy', AvgWaveforms[i,:,:,:])
+    #If only extracting GoodUnits
+    else:
+        for i, idx in enumerate(GoodUnits):
+            # ironically need idx[0], to selct value so saves with correct name
+            np.save(f'Unit{idx[0]}_RawSpikes.npy', AvgWaveforms[i,:,:,:])
+    os.chdir(CurrentDir)
+# Load in necessary files from KS directory and raw data directory
+# extracting n Sessions
+def get_raw_data_paths(RawDataDirPaths):
+    """
+    This function requires RawDatPaths, a list of pahts to the Raw data directories, e.g where .cbin, .ch .meta files are
+    This function will return a list fo paths to the.cbin, .ch and .meta files
+    """
+    cbinPaths = []
+    chPaths = []
+    metaPaths = []
+    for i in range(len(RawDataDirPaths)):
+        for f in os.listdir(RawDataDirPaths[i]):
+            name, ext = os.path.splitext(f)
+            if ext == '.cbin':
+                cbinPaths.append(os.path.join(RawDataDirPaths[i], name + ext))
+            if ext == '.ch':
+                chPaths.append(os.path.join(RawDataDirPaths[i], name + ext))
+            if ext == '.meta':
+                metaPaths.append(os.path.join(RawDataDirPaths[i], name + ext))
+    return cbinPaths, chPaths, metaPaths
+def extract_KSdata(KSdirs, ExtractGoodUnitsOnly = False):
+    """
+    This fucntion requires KSdirs, a lsit of KiloSort directories for each session.
+    This function will then load in the spike_times, spike_ids and a Good_Units
+    """
+    nSessions = len(KSdirs)
+    #Load Spike Times
+    SpikeTimes = []
+    for i in range(nSessions):
+        PathTmp = os.path.join(KSdirs[i], 'spike_times.npy')
+        SpikeTimestmp = np.load(PathTmp)
+        SpikeTimes.append(SpikeTimestmp)
+    #Load Spike ID's
+    SpikeIDs = []
+    for i in range(nSessions):
+        PathTmp = os.path.join(KSdirs[i], 'spike_clusters.npy')
+        SpikeIDstmp = np.load(PathTmp)
+        SpikeIDs.append(SpikeIDstmp)
+    if ExtractGoodUnitsOnly:
+        #Good unit ID's
+        UnitLabelPaths = []
+        # load Good unit Paths
+        for i in range(nSessions):
+            UnitLabelPaths.append( os.path.join(KSdirs[i], 'cluster_group.tsv'))
+        GoodUnits = util.get_good_units(UnitLabelPaths)
+        return SpikeIDs, SpikeTimes, GoodUnits
+    else:
+        return SpikeIDs, SpikeTimes, [None for s in range(nSessions)]