PyPI - antspymm - Versions diffs - 1.2.8__py3-none-any.whl → 1.3.3__py3-none-any.whl - Mend

antspymm 1.2.8py3-none-any.whl → 1.3.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

antspymm/__init__.py +7 -0
antspymm/mm.py +861 -217
{antspymm-1.2.8.dist-info → antspymm-1.3.3.dist-info}/METADATA +1 -1
antspymm-1.3.3.dist-info/RECORD +7 -0
{antspymm-1.2.8.dist-info → antspymm-1.3.3.dist-info}/WHEEL +1 -1
antspymm-1.2.8.dist-info/RECORD +0 -7
{antspymm-1.2.8.dist-info → antspymm-1.3.3.dist-info}/LICENSE +0 -0
{antspymm-1.2.8.dist-info → antspymm-1.3.3.dist-info}/top_level.txt +0 -0

antspymm/mm.py CHANGED Viewed

@@ -161,6 +161,23 @@ def version( ):
               'antspymm': pkg_resources.require("antspymm")[0].version
               }
+def nrg_filename_to_subjectvisit(s, separator='-'):
+    """
+    Extracts a pattern from the input string.
+    Parameters:
+    - s: The input string from which to extract the pattern.
+    - separator: The separator used in the string (default is '-').
+    Returns:
+    - A string in the format of 'PREFIX-Number-Date'
+    """
+    parts = os.path.basename(s).split(separator)
+    # Assuming the pattern is always in the form of PREFIX-Number-Date-...
+    # and PREFIX is always "PPMI", extract the first three parts
+    extracted = separator.join(parts[:3])
+    return extracted
 def validate_nrg_file_format(path, separator):
     """
@@ -436,11 +453,11 @@ def docsamson(locmod, studycsv, outputdir, projid, sid, dtid, mysep, t1iid=None,
             imfns.append('nmid' + str(i))
     elif locmod == 'rsfMRI':
         imfns=[]
-        for i in range(3):
+        for i in range(4):
             imfns.append('rsfid' + str(i))
     elif locmod == 'DTI':
         imfns=[]
-        for i in range(3):
+        for i in range(4):
             imfns.append('dtid' + str(i))
     # Process each file name
@@ -489,7 +506,7 @@ def get_valid_modalities( long=False, asString=False, qc=False ):
     if long:
         mymod = ["T1w", "NM2DMT", "rsfMRI", "rsfMRI_LR", "rsfMRI_RL", "rsfMRILR", "rsfMRIRL", "DTI", "DTI_LR","DTI_RL",  "DTILR","DTIRL","T2Flair", "dwi", "dwi_ap", "dwi_pa", "func", "func_ap", "func_pa", "perf"]
     elif qc:
-        mymod = [ 'T1w', 'T2Flair', 'NM2DMT','DTIdwi','DTIb0', 'rsfMRI', "perf" ]
+        mymod = [ 'T1w', 'T2Flair', 'NM2DMT', 'DTI', 'DTIdwi','DTIb0', 'rsfMRI', "perf" ]
     else:
         mymod = ["T1w", "NM2DMT", "DTI","T2Flair", "rsfMRI", "perf" ]
     if not asString:
@@ -565,15 +582,15 @@ def generate_mm_dataframe(
         raise ValueError("source_image_directory does not exist")
     if len( rsf_filenames ) > 2:
         raise ValueError("len( rsf_filenames ) > 2")
-    if len( dti_filenames ) > 2:
-        raise ValueError("len( dti_filenames ) > 2")
+    if len( dti_filenames ) > 3:
+        raise ValueError("len( dti_filenames ) > 3")
     if len( nm_filenames ) > 11:
         raise ValueError("len( nm_filenames ) > 11")
     if len( rsf_filenames ) < 2:
         for k in range(len(rsf_filenames),2):
             rsf_filenames.append(None)
-    if len( dti_filenames ) < 2:
-        for k in range(len(dti_filenames),2):
+    if len( dti_filenames ) < 3:
+        for k in range(len(dti_filenames),3):
             dti_filenames.append(None)
     if len( nm_filenames ) < 10:
         for k in range(len(nm_filenames),10):
@@ -649,7 +666,7 @@ def generate_mm_dataframe(
         'perfid']
     mycols0 = corecols + [
         'rsfid1', 'rsfid2',
-        'dtid1', 'dtid2']
+        'dtid1', 'dtid2','dtid3']
     nmext = [
         'nmid1', 'nmid2' 'nmid3', 'nmid4', 'nmid5',
         'nmid6', 'nmid7','nmid8', 'nmid9', 'nmid10', 'nmid11'
@@ -981,25 +998,28 @@ def outlierness_by_modality( qcdf, uid='filename', outlier_columns = ['noise', '
     from PyNomaly import loop
     from sklearn.neighbors import LocalOutlierFactor
     qcdfout = qcdf.copy()
+    pd.set_option('future.no_silent_downcasting', True)
+    qcdfout.replace([np.inf, -np.inf], np.nan, inplace=True)
     if uid not in qcdfout.keys():
-        raise ValueError(uid + " not in dataframe")
+        raise ValueError( str(uid) + " not in dataframe")
     if 'ol_loop' not in qcdfout.keys():
         qcdfout['ol_loop']=math.nan
     if 'ol_lof' not in qcdfout.keys():
         qcdfout['ol_lof']=math.nan
+    didit=False
     for mod in get_valid_modalities( qc=True ):
+        didit=True
         lof = LocalOutlierFactor()
         locsel = qcdfout["modality"] == mod
         rr = qcdfout[locsel][outlier_columns]
-        with pd.option_context('mode.use_inf_as_na', True):
-            for myolcol in outlier_columns:
-                rr[myolcol].fillna(rr[myolcol].mean(), inplace=True)
+        column_means = rr.mean()
+        rr.fillna(column_means, inplace=True)
         if rr.shape[0] > 1:
             if verbose:
-                print(mod)
+                print("calc: " + mod + " outlierness " )
             myneigh = np.min( [24, int(np.round(rr.shape[0]*0.5)) ] )
             temp = antspyt1w.loop_outlierness(rr.astype(float), standardize=True, extent=3, n_neighbors=myneigh, cluster_labels=None)
-            qcdfout.loc[locsel,'ol_loop']=temp
+            qcdfout.loc[locsel,'ol_loop']=temp.astype('float64')
             yhat = lof.fit_predict(rr)
             temp = lof.negative_outlier_factor_*(-1.0)
             temp = temp - temp.min()
@@ -1007,6 +1027,8 @@ def outlierness_by_modality( qcdf, uid='filename', outlier_columns = ['noise', '
             yhat[ yhat == -1] = 1 # these are outliers
             qcdfout.loc[locsel,'ol_lof_decision']=yhat
             qcdfout.loc[locsel,'ol_lof']=temp/temp.max()
+    if verbose:
+        print( didit )
     return qcdfout
@@ -1084,6 +1106,11 @@ def study_dataframe_from_matched_dataframe( matched_dataframe, rootdir, outputdi
         dtfn2=glob.glob(os.path.join(rootdir, pid, sid, dt, 'DTI*', dtid, str(csvrow['dtfn2'].iloc[0]+iext) ))[0]
         if exists( dtfn2 ):
             dtList.append( dtfn2 )
+    if 'dtfn3' in csvrow.keys():
+        dtid=str(int(csvrow['dtid3'].iloc[0]))
+        dtfn3=glob.glob(os.path.join(rootdir, pid, sid, dt, 'DTI*', dtid, str(csvrow['dtfn3'].iloc[0]+iext) ))[0]
+        if exists( dtfn3 ):
+            dtList.append( dtfn3 )
     if 'rsffn1' in csvrow.keys():
         rsid=str(int(csvrow['rsfid1'].iloc[0]))
         rsfn1=glob.glob(os.path.join( rootdir, pid, sid, dt, 'rsfMRI*', rsid, str(csvrow['rsffn1'].iloc[0]+iext) ))[0]
@@ -1159,6 +1186,8 @@ def highest_quality_repeat(mxdfin, idvar, visitvar, qualityvar):
     if qualityvar not in mxdfin.columns:
         raise ValueError("qualityvar not in dataframe")
+    mxdfin[qualityvar] = mxdfin[qualityvar].astype(float)
     vizzes = mxdfin[visitvar].unique()
     uids = mxdfin[idvar].unique()
     useit = np.zeros(mxdfin.shape[0], dtype=bool)
@@ -1195,40 +1224,48 @@ def match_modalities( qc_dataframe, unique_identifier='filename', outlier_column
     """
     import pandas as pd
     import numpy as np
+    qc_dataframe['filename']=qc_dataframe['filename'].astype(str)
+    qc_dataframe['ol_loop']=qc_dataframe['ol_loop'].astype(float)
+    qc_dataframe['ol_lof']=qc_dataframe['ol_lof'].astype(float)
+    qc_dataframe['ol_lof_decision']=qc_dataframe['ol_lof_decision'].astype(float)
     mmdf = best_mmm( qc_dataframe, 'T1w', outlier_column=outlier_column )['filt']
     fldf = best_mmm( qc_dataframe, 'T2Flair', outlier_column=outlier_column )['filt']
     nmdf = best_mmm( qc_dataframe, 'NM2DMT', outlier_column=outlier_column )['filt']
     rsdf = best_mmm( qc_dataframe, 'rsfMRI', outlier_column=outlier_column )['filt']
     dtdf = best_mmm( qc_dataframe, 'DTI', outlier_column=outlier_column )['filt']
-    mmdf['flairid'] = np.nan
-    mmdf['flairfn'] = np.nan
-    mmdf['flairloop'] = np.nan
-    mmdf['flairlof'] = np.nan
-    mmdf['dtid1'] = np.nan
-    mmdf['dtfn1'] = np.nan
-    mmdf['dtloop1'] = np.nan
-    mmdf['dtlof1'] = np.nan
-    mmdf['dtid2'] = np.nan
-    mmdf['dtfn2'] = np.nan
-    mmdf['dtloop2'] = np.nan
-    mmdf['dtlof2'] = np.nan
-    mmdf['rsfid1'] = np.nan
-    mmdf['rsffn1'] = np.nan
-    mmdf['rsfloop1'] = np.nan
-    mmdf['rsflof1'] = np.nan
-    mmdf['rsfid2'] = np.nan
-    mmdf['rsffn2'] = np.nan
-    mmdf['rsfloop2'] = np.nan
-    mmdf['rsflof2'] = np.nan
+    mmdf['flairid'] = None
+    mmdf['flairfn'] = None
+    mmdf['flairloop'] = None
+    mmdf['flairlof'] = None
+    mmdf['dtid1'] = None
+    mmdf['dtfn1'] = None
+    mmdf['dtntimepoints1'] = 0
+    mmdf['dtloop1'] = math.nan
+    mmdf['dtlof1'] = math.nan
+    mmdf['dtid2'] = None
+    mmdf['dtfn2'] = None
+    mmdf['dtntimepoints2'] = 0
+    mmdf['dtloop2'] = math.nan
+    mmdf['dtlof2'] = math.nan
+    mmdf['rsfid1'] = None
+    mmdf['rsffn1'] = None
+    mmdf['rsfntimepoints1'] = 0
+    mmdf['rsfloop1'] = math.nan
+    mmdf['rsflof1'] = math.nan
+    mmdf['rsfid2'] = None
+    mmdf['rsffn2'] = None
+    mmdf['rsfntimepoints2'] = 0
+    mmdf['rsfloop2'] = math.nan
+    mmdf['rsflof2'] = math.nan
     for k in range(1,11):
         myid='nmid'+str(k)
-        mmdf[myid] = np.nan
+        mmdf[myid] = None
         myid='nmfn'+str(k)
-        mmdf[myid] = np.nan
+        mmdf[myid] = None
         myid='nmloop'+str(k)
-        mmdf[myid] = np.nan
+        mmdf[myid] = math.nan
         myid='nmlof'+str(k)
-        mmdf[myid] = np.nan
+        mmdf[myid] = math.nan
     if verbose:
         print( mmdf.shape )
     for k in range(mmdf.shape[0]):
@@ -1237,12 +1274,13 @@ def match_modalities( qc_dataframe, unique_identifier='filename', outlier_column
                 progger = str( k ) # np.round( k / mmdf.shape[0] * 100 ) )
                 print( progger, end ="...", flush=True)
         if dtdf is not None:
-            locsel = (dtdf["subjectIDdate"] == mmdf["subjectIDdate"].iloc[k]) & (dtdf[outlier_column] < 0.5)
+            locsel = (dtdf["subjectIDdate"] == mmdf["subjectIDdate"].iloc[k])
             if sum(locsel) == 1:
                 mmdf.iloc[k, mmdf.columns.get_loc("dtid1")] = dtdf["imageID"][locsel].values[0]
-                mmdf.iloc[k, mmdf.columns.get_loc("dtfn1")] = dtdf["fn"][locsel].values[0]
+                mmdf.iloc[k, mmdf.columns.get_loc("dtfn1")] = dtdf[unique_identifier][locsel].values[0]
                 mmdf.iloc[k, mmdf.columns.get_loc("dtloop1")] = dtdf[outlier_column][locsel].values[0]
-                mmdf.iloc[k, mmdf.columns.get_loc("dtlof1")] = dtdf['ol_lof_decision'][locsel].values[0]
+                mmdf.iloc[k, mmdf.columns.get_loc("dtlof1")] = float(dtdf['ol_lof_decision'][locsel].values[0])
+                mmdf.iloc[k, mmdf.columns.get_loc("dtntimepoints1")] = float(dtdf['dimt'][locsel].values[0])
             elif sum(locsel) > 1:
                 locdf = dtdf[locsel]
                 dedupe = locdf[["snr","cnr"]].duplicated()
@@ -1250,21 +1288,24 @@ def match_modalities( qc_dataframe, unique_identifier='filename', outlier_column
                 if locdf.shape[0] > 1:
                     locdf = locdf.sort_values(outlier_column).iloc[:2]
                 mmdf.iloc[k, mmdf.columns.get_loc("dtid1")] = locdf["imageID"].values[0]
-                mmdf.iloc[k, mmdf.columns.get_loc("dtfn1")] = locdf["fn"].values[0]
+                mmdf.iloc[k, mmdf.columns.get_loc("dtfn1")] = locdf[unique_identifier].values[0]
                 mmdf.iloc[k, mmdf.columns.get_loc("dtloop1")] = locdf[outlier_column].values[0]
-                mmdf.iloc[k, mmdf.columns.get_loc("dtlof1")] = locdf['ol_lof_decision'][locsel].values[0]
+                mmdf.iloc[k, mmdf.columns.get_loc("dtlof1")] = float(locdf['ol_lof_decision'][locsel].values[0])
+                mmdf.iloc[k, mmdf.columns.get_loc("dtntimepoints1")] = float(dtdf['dimt'][locsel].values[0])
                 if locdf.shape[0] > 1:
                     mmdf.iloc[k, mmdf.columns.get_loc("dtid2")] = locdf["imageID"].values[1]
-                    mmdf.iloc[k, mmdf.columns.get_loc("dtfn2")] = locdf["fn"].values[1]
+                    mmdf.iloc[k, mmdf.columns.get_loc("dtfn2")] = locdf[unique_identifier].values[1]
                     mmdf.iloc[k, mmdf.columns.get_loc("dtloop2")] = locdf[outlier_column].values[1]
-                    mmdf.iloc[k, mmdf.columns.get_loc("dtlof2")] = locdf['ol_lof_decision'][locsel].values[1]
+                    mmdf.iloc[k, mmdf.columns.get_loc("dtlof2")] = float(locdf['ol_lof_decision'][locsel].values[1])
+                    mmdf.iloc[k, mmdf.columns.get_loc("dtntimepoints2")] = float(dtdf['dimt'][locsel].values[1])
         if rsdf is not None:
-            locsel = (rsdf["subjectIDdate"] == mmdf["subjectIDdate"].iloc[k]) & (rsdf[outlier_column] < 0.5)
+            locsel = (rsdf["subjectIDdate"] == mmdf["subjectIDdate"].iloc[k])
             if sum(locsel) == 1:
                 mmdf.iloc[k, mmdf.columns.get_loc("rsfid1")] = rsdf["imageID"][locsel].values[0]
-                mmdf.iloc[k, mmdf.columns.get_loc("rsffn1")] = rsdf["fn"][locsel].values[0]
+                mmdf.iloc[k, mmdf.columns.get_loc("rsffn1")] = rsdf[unique_identifier][locsel].values[0]
                 mmdf.iloc[k, mmdf.columns.get_loc("rsfloop1")] = rsdf[outlier_column][locsel].values[0]
-                mmdf.iloc[k, mmdf.columns.get_loc("rsflof1")] = rsdf['ol_lof_decision'][locsel].values[0]
+                mmdf.iloc[k, mmdf.columns.get_loc("rsflof1")] = float(rsdf['ol_lof_decision'].values[0])
+                mmdf.iloc[k, mmdf.columns.get_loc("rsfntimepoints1")] = float(rsdf['dimt'][locsel].values[0])
             elif sum(locsel) > 1:
                 locdf = rsdf[locsel]
                 dedupe = locdf[["snr","cnr"]].duplicated()
@@ -1272,22 +1313,24 @@ def match_modalities( qc_dataframe, unique_identifier='filename', outlier_column
                 if locdf.shape[0] > 1:
                     locdf = locdf.sort_values(outlier_column).iloc[:2]
                 mmdf.iloc[k, mmdf.columns.get_loc("rsfid1")] = locdf["imageID"].values[0]
-                mmdf.iloc[k, mmdf.columns.get_loc("rsffn1")] = locdf["fn"].values[0]
+                mmdf.iloc[k, mmdf.columns.get_loc("rsffn1")] = locdf[unique_identifier].values[0]
                 mmdf.iloc[k, mmdf.columns.get_loc("rsfloop1")] = locdf[outlier_column].values[0]
-                mmdf.iloc[k, mmdf.columns.get_loc("rsflof1")] = locdf['ol_lof_decision'].values[0]
+                mmdf.iloc[k, mmdf.columns.get_loc("rsflof1")] = float(locdf['ol_lof_decision'].values[0])
+                mmdf.iloc[k, mmdf.columns.get_loc("rsfntimepoints1")] = float(locdf['dimt'][locsel].values[0])
                 if locdf.shape[0] > 1:
                     mmdf.iloc[k, mmdf.columns.get_loc("rsfid2")] = locdf["imageID"].values[1]
-                    mmdf.iloc[k, mmdf.columns.get_loc("rsffn2")] = locdf["fn"].values[1]
+                    mmdf.iloc[k, mmdf.columns.get_loc("rsffn2")] = locdf[unique_identifier].values[1]
                     mmdf.iloc[k, mmdf.columns.get_loc("rsfloop2")] = locdf[outlier_column].values[1]
-                    mmdf.iloc[k, mmdf.columns.get_loc("rsflof2")] = locdf['ol_lof_decision'].values[1]
+                    mmdf.iloc[k, mmdf.columns.get_loc("rsflof2")] = float(locdf['ol_lof_decision'].values[1])
+                    mmdf.iloc[k, mmdf.columns.get_loc("rsfntimepoints2")] = float(locdf['dimt'][locsel].values[1])
         if fldf is not None:
             locsel = fldf['subjectIDdate'] == mmdf['subjectIDdate'].iloc[k]
             if locsel.sum() == 1:
                 mmdf.iloc[k, mmdf.columns.get_loc("flairid")] = fldf['imageID'][locsel].values[0]
-                mmdf.iloc[k, mmdf.columns.get_loc("flairfn")] = fldf['filename'][locsel].values[0]
+                mmdf.iloc[k, mmdf.columns.get_loc("flairfn")] = fldf[unique_identifier][locsel].values[0]
                 mmdf.iloc[k, mmdf.columns.get_loc("flairloop")] = fldf[outlier_column][locsel].values[0]
-                mmdf.iloc[k, mmdf.columns.get_loc("flairlof")] = fldf['ol_lof_decision'][locsel].values[0]
+                mmdf.iloc[k, mmdf.columns.get_loc("flairlof")] = float(fldf['ol_lof_decision'][locsel].values[0])
             elif sum(locsel) > 1:
                 locdf = fldf[locsel]
                 dedupe = locdf[["snr","cnr"]].duplicated()
@@ -1295,9 +1338,9 @@ def match_modalities( qc_dataframe, unique_identifier='filename', outlier_column
                 if locdf.shape[0] > 1:
                     locdf = locdf.sort_values(outlier_column).iloc[:2]
                 mmdf.iloc[k, mmdf.columns.get_loc("flairid")] = locdf["imageID"].values[0]
-                mmdf.iloc[k, mmdf.columns.get_loc("flairfn")] = locdf["fn"].values[0]
+                mmdf.iloc[k, mmdf.columns.get_loc("flairfn")] = locdf[unique_identifier].values[0]
                 mmdf.iloc[k, mmdf.columns.get_loc("flairloop")] = locdf[outlier_column].values[0]
-                mmdf.iloc[k, mmdf.columns.get_loc("flairlof")] = locdf['ol_lof_decision'].values[0]
+                mmdf.iloc[k, mmdf.columns.get_loc("flairlof")] = float(locdf['ol_lof_decision'].values[0])
         if nmdf is not None:
             locsel = nmdf['subjectIDdate'] == mmdf['subjectIDdate'].iloc[k]
@@ -1305,16 +1348,40 @@ def match_modalities( qc_dataframe, unique_identifier='filename', outlier_column
                 locdf = nmdf[locsel]
                 for i in range(np.min( [10,locdf.shape[0]])):
                     nmid = "nmid"+str(i+1)
-                    mmdf[nmid].iloc[k] = locdf['imageID'].iloc[i]
+                    mmdf.loc[k,nmid] = locdf['imageID'].iloc[i]
                     nmfn = "nmfn"+str(i+1)
-                    mmdf[nmfn].iloc[k] = locdf['imageID'].iloc[i]
+                    mmdf.loc[k,nmfn] = locdf['imageID'].iloc[i]
                     nmloop = "nmloop"+str(i+1)
-                    mmdf[nmloop].iloc[k] = locdf[outlier_column].iloc[i]
+                    mmdf.loc[k,nmloop] = locdf[outlier_column].iloc[i]
                     nmloop = "nmlof"+str(i+1)
-                    mmdf[nmloop].iloc[k] = locdf['ol_lof_decision'].iloc[i]
+                    mmdf.loc[k,nmloop] = float(locdf['ol_lof_decision'].iloc[i])
+    mmdf['rsf_total_timepoints']=mmdf['rsfntimepoints1']+mmdf['rsfntimepoints2']
+    mmdf['dt_total_timepoints']=mmdf['dtntimepoints1']+mmdf['dtntimepoints2']
     return mmdf
+def add_repeat_column(df, groupby_column):
+    """
+    Adds a 'repeat' column to the DataFrame that counts occurrences of each unique value
+    in the specified 'groupby_column'. The count increments from 1 for each identical entry.
+    Parameters:
+    - df: pandas DataFrame.
+    - groupby_column: The name of the column to group by and count repeats.
+    Returns:
+    - Modified pandas DataFrame with an added 'repeat' column.
+    """
+    # Validate if the groupby_column exists in the DataFrame
+    if groupby_column not in df.columns:
+        raise ValueError(f"Column '{groupby_column}' does not exist in the DataFrame.")
+    # Count the occurrences of each unique value in the specified column and increment from 1
+    df['repeat'] = df.groupby(groupby_column).cumcount() + 1
+    return df
 def best_mmm( mmdf, wmod, mysep='-', outlier_column='ol_loop', verbose=False):
     """
     Selects the best repeats per modality.
@@ -1333,6 +1400,8 @@ def best_mmm( mmdf, wmod, mysep='-', outlier_column='ol_loop', verbose=False):
     list: a list containing two metadata dataframes - raw and filt. raw contains all the metadata for the selected modality and filt contains the metadata filtered for highest quality repeats.
     """
+#    mmdf = mmdf.astype(str)
+    mmdf[outlier_column]=mmdf[outlier_column].astype(float)
     msel = mmdf['modality'] == wmod
     if wmod == 'rsfMRI':
         msel1 = mmdf['modality'] == 'rsfMRI'
@@ -1345,34 +1414,44 @@ def best_mmm( mmdf, wmod, mysep='-', outlier_column='ol_loop', verbose=False):
         msel2 = mmdf['modality'] == 'DTI_LR'
         msel3 = mmdf['modality'] == 'DTI_RL'
         msel4 = mmdf['modality'] == 'DTIdwi'
-        msel = msel1 | msel2 | msel3 | msel4
+        msel5 = mmdf['modality'] == 'DTIb0'
+        msel = msel1 | msel2 | msel3 | msel4 | msel5
     if sum(msel) == 0:
         return {'raw': None, 'filt': None}
-    uids = list(mmdf['filename'][msel])
-    metasub = mmdf[msel]
+    metasub = mmdf[msel].copy()
     if verbose:
         print(f"{wmod} {(metasub.shape[0])} pre")
-    metasub['subjectID']=math.nan
-    metasub['date']=math.nan
-    metasub['subjectIDdate']=math.nan
-    metasub['imageID']=math.nan
-    for k in range(len(uids)):
-        temp = uids[k].split( mysep )
-        metasub['subjectID'].iloc[k] = temp[1]
-        metasub['date'].iloc[k] = temp[2]
-        metasub['subjectIDdate'].iloc[k] = temp[1] + mysep + temp[2]
-        metasub['imageID'].iloc[k] = temp[4]
-    metasub['negol'] = metasub[outlier_column].max() - metasub[outlier_column]
+    metasub['subjectID']=None
+    metasub['date']=None
+    metasub['subjectIDdate']=None
+    metasub['imageID']=None
+    metasub['negol']=math.nan
+    for k in metasub.index:
+        temp = metasub.loc[k, 'filename'].split( mysep )
+        metasub.loc[k,'subjectID'] = str( temp[1] )
+        metasub.loc[k,'date'] = str( temp[2] )
+        metasub.loc[k,'subjectIDdate'] = str( temp[1] + mysep + temp[2] )
+        metasub.loc[k,'imageID'] = str( temp[4])
+    if 'ol_' in outlier_column:
+        metasub['negol'] = metasub[outlier_column].max() - metasub[outlier_column]
+    else:
+        metasub['negol'] = metasub[outlier_column]
     if 'date' not in metasub.keys():
-        metasub['date']='NA'
-    metasubq = highest_quality_repeat(metasub, 'filename', 'date', 'negol')
+        metasub['date']=None
+    metasubq = add_repeat_column( metasub, 'subjectIDdate' )
+    metasubq = highest_quality_repeat(metasubq, 'filename', 'date', 'negol')
     if verbose:
         print(f"{wmod} {metasubq.shape[0]} post")
+#    metasub = metasub.astype(str)
+#    metasubq = metasubq.astype(str)
+    metasub[outlier_column]=metasub[outlier_column].astype(float)
+    metasubq[outlier_column]=metasubq[outlier_column].astype(float)
     return {'raw': metasub, 'filt': metasubq}
 def mm_read( x, standardize_intensity=False, modality='' ):
@@ -1783,10 +1862,12 @@ def merge_dwi_data( img_LRdwp, bval_LR, bvec_LR, img_RLdwp, bval_RL, bvec_RL ):
     bvec_RL : array
     """
+    import warnings
     insamespace = ants.image_physical_space_consistency( img_LRdwp, img_RLdwp )
     if not insamespace :
-        raise ValueError('not insamespace ... corrected image pair should occupy the same physical space')
+        warnings.warn('not insamespace ... corrected image pair should occupy the same physical space; returning only the 1st set and wont join these data.')
+        return img_LRdwp, bval_LR, bvec_LR
     bval_LR = np.concatenate([bval_LR,bval_RL])
     bvec_LR = np.concatenate([bvec_LR,bvec_RL])
     # concatenate the images
@@ -2555,7 +2636,7 @@ def template_figure_with_overlay(scalar_label_df, prefix, outputfilename=None, t
     toviz = temp['overlay']
     return { "underlay": seggm, 'overlay': toviz, 'seg': tcrop  }
-def get_data( name=None, force_download=False, version=21, target_extension='.csv' ):
+def get_data( name=None, force_download=False, version=23, target_extension='.csv' ):
     """
     Get ANTsPyMM data filename
@@ -2839,6 +2920,34 @@ def super_res_mcimage( image,
     return ants.list_to_ndimage( imageup, mcsr )
+def segment_timeseries_by_bvalue(bvals):
+    """
+    Segments a time series based on a threshold applied to b-values.
+    This function categorizes indices of the given b-values array into two groups:
+    one for indices where b-values are above a near-zero threshold, and another
+    where b-values are at or below this threshold. The threshold is set to 1e-12.
+    Parameters:
+    - bvals (numpy.ndarray): An array of b-values.
+    Returns:
+    - dict: A dictionary with two keys, 'lowermeans' and 'highermeans', each containing
+      the indices of bvals where the b-values are above and at/below the threshold, respectively.
+    """
+    # Define the threshold
+    threshold = 1e-12
+    # Get indices where b-values are greater than the threshold
+    lowermeans = list(np.where(bvals > threshold)[0])
+    # Get indices where b-values are less than or equal to the threshold
+    highermeans = list(np.where(bvals <= threshold)[0])
+    return {
+        'lowermeans': lowermeans,
+        'highermeans': highermeans
+    }
 def segment_timeseries_by_meanvalue( image, quantile = 0.995 ):
     """
@@ -3290,7 +3399,7 @@ def dipy_dti_recon(
         space as the image, we will resample directly to the image space.  This
         could lead to problems if the inputs are really incorrect.
-    b0_idx : the indices of the B0; if None, use segment_timeseries_by_meanvalue to guess
+    b0_idx : the indices of the B0; if None, use segment_timeseries_by_bvalue
     mask_dilation : integer zero or more dilates the brain mask
@@ -3321,8 +3430,7 @@ def dipy_dti_recon(
         bvals = bvalsfn.copy()
         bvecs = bvecsfn.copy()
-    if b0_idx is None:
-        b0_idx = segment_timeseries_by_meanvalue( image )['highermeans']
+    b0_idx = segment_timeseries_by_bvalue( bvals )['highermeans']
     b0 = ants.slice_image( image, axis=3, idx=b0_idx[0] )
     bxtmod='bold'
@@ -3532,6 +3640,9 @@ def joint_dti_recon(
     def fix_dwi_shape( img, bvalfn, bvecfn ):
         if isinstance(bvecfn, str):
             bvals, bvecs = read_bvals_bvecs( bvalfn , bvecfn   )
+        else:
+            bvals = bvalfn
+            bvecs = bvecfn
         if bvecs.shape[0] < img.shape[3]:
             imgout = ants.from_numpy( img[:,:,:,0:bvecs.shape[0]] )
             imgout = ants.copy_image_info( img, imgout )
@@ -4586,7 +4697,7 @@ def get_rsf_outputs( coords ):
         return list( yeo['SystemName'].unique() )
 def tra_initializer( fixed, moving, n_simulations=32, max_rotation=30,
-    transform=['rigid'], verbose=False ):
+    transform=['rigid'], compreg=None, verbose=False ):
     """
     multi-start multi-transform registration solution - based on ants.registration
@@ -4600,6 +4711,8 @@ def tra_initializer( fixed, moving, n_simulations=32, max_rotation=30,
     transform : list of transforms to loop through
+    compreg : registration results against which to compare
     verbose : boolean
     """
@@ -4608,15 +4721,20 @@ def tra_initializer( fixed, moving, n_simulations=32, max_rotation=30,
         output_directory_w = output_directory + "/tra_reg/"
         os.makedirs(output_directory_w,exist_ok=True)
         bestmi = math.inf
+        bestvar = 0.0
         myorig = list(ants.get_origin( fixed ))
         mymax = 0;
         for k in range(len( myorig ) ):
             if abs(myorig[k]) > mymax:
                 mymax = abs(myorig[k])
         maxtrans = mymax * 0.05
-        bestreg=ants.registration( fixed,moving,'Translation',
-            outprefix=output_directory_w+"trans")
-        initx = ants.read_transform( bestreg['fwdtransforms'][0] )
+        if compreg is None:
+            bestreg=ants.registration( fixed,moving,'Translation',
+                outprefix=output_directory_w+"trans")
+            initx = ants.read_transform( bestreg['fwdtransforms'][0] )
+        else :
+            bestreg=compreg
+            initx = ants.read_transform( bestreg['fwdtransforms'][0] )
         for mytx in transform:
             regtx = 'Rigid'
             with tempfile.NamedTemporaryFile(suffix='.h5') as tp:
@@ -4653,6 +4771,9 @@ def tra_initializer( fixed, moving, n_simulations=32, max_rotation=30,
                                 print( "mi @ " + str(k) + " : " + str(mymi), flush=True)
                             bestmi = mymi
                             bestreg = reg
+                            bestvar = myvar
+        if bestvar == 0.0 and compreg is not None:
+            return compreg
         return bestreg
 def neuromelanin( list_nm_images, t1, t1_head, t1lab, brain_stem_dilation=8,
@@ -4834,7 +4955,7 @@ def neuromelanin( list_nm_images, t1, t1_head, t1lab, brain_stem_dilation=8,
             nm_avg_cropped_new = nm_avg_cropped_new + warpednext
       nm_avg_cropped = nm_avg_cropped_new / len( crop_nm_list )
-  slabregUpdated = tra_initializer( nm_avg_cropped, t1c, verbose=verbose  )
+  slabregUpdated = tra_initializer( nm_avg_cropped, t1c, compreg=slabreg,verbose=verbose  )
   tempOrig = ants.apply_transforms( nm_avg_cropped_new, t1c, slabreg['fwdtransforms'] )
   tempUpdate = ants.apply_transforms( nm_avg_cropped_new, t1c, slabregUpdated['fwdtransforms'] )
   miUpdate = ants.image_mutual_information(
@@ -5409,7 +5530,8 @@ def resting_state_fmri_networks( fmri, fmri_template, t1, t1segmentation,
         ptImage=ants.threshold_image( ptImg, pts2bold.loc[i,'ROI'], pts2bold.loc[i,'ROI'] )
     if debug:
       ptImgAll = ptImgAll + ptImage
-    meanROI[:,i] = ants.timeseries_to_matrix( simg, ptImage).mean(axis=1)
+    if ptImage.sum() > 0 :
+        meanROI[:,i] = ants.timeseries_to_matrix( simg, ptImage).mean(axis=1)
   if debug:
       ants.image_write( simg, '/tmp/simg.nii.gz' )
@@ -5527,9 +5649,15 @@ def resting_state_fmri_networks( fmri, fmri_template, t1, t1segmentation,
     fname='falffPoint'+kk+anatname
     aname='alffPoint'+kk+anatname
     pname='perafPoint'+kk+anatname
-    outdict[fname]=(outdict['falff'][ptImg==k]).mean()
-    outdict[aname]=(outdict['alff'][ptImg==k]).mean()
-    outdict[pname]=(perafimg[ptImg==k]).mean()
+    localsel = ptImg == k
+    if localsel.sum() > 0 : # check if non-empty
+        outdict[fname]=(outdict['falff'][localsel]).mean()
+        outdict[aname]=(outdict['alff'][localsel]).mean()
+        outdict[pname]=(perafimg[localsel]).mean()
+    else:
+        outdict[fname]=math.nan
+        outdict[aname]=math.nan
+        outdict[pname]=math.nan
   rsfNuisance = pd.DataFrame( nuisance )
   if remove_it:
@@ -5562,6 +5690,7 @@ def resting_state_fmri_networks( fmri, fmri_template, t1, t1segmentation,
   outdict['despiking_count_summary'] = despiking_count_summary
   outdict['FD_max'] = corrmo['FD'].max()
   outdict['FD_mean'] = corrmo['FD'].mean()
+  outdict['FD_sd'] = corrmo['FD'].std()
   outdict['bold_evr'] =  antspyt1w.patch_eigenvalue_ratio( und, 512, [16,16,16], evdepth = 0.9, mask = bmask )
   outdict['n_outliers'] = len(hlinds)
   outdict['nc_wm'] = int(nc_wm)
@@ -5638,11 +5767,18 @@ def despike_time_series_afni(image, c1=2.5, c2=4):
     residuals = data - curve
     mad = np.median(np.abs(residuals - np.median(residuals, axis=-1, keepdims=True)), axis=-1, keepdims=True)
     sigma = np.sqrt(np.pi / 2) * mad
+    # Ensure sigma is not zero to avoid division by zero
+    sigma_safe = np.where(sigma == 0, 1e-10, sigma)
+    # Optionally, handle NaN or inf values in data, curve, or sigma
+    data = np.nan_to_num(data, nan=0.0, posinf=np.finfo(np.float64).max, neginf=np.finfo(np.float64).min)
+    curve = np.nan_to_num(curve, nan=0.0, posinf=np.finfo(np.float64).max, neginf=np.finfo(np.float64).min)
+    sigma_safe = np.nan_to_num(sigma_safe, nan=1e-10, posinf=np.finfo(np.float64).max, neginf=np.finfo(np.float64).min)
     # Despike algorithm
     spike_counts = np.zeros( image.shape[3] )
     for i in range(data.shape[-1]):
-        s = (data[..., i] - curve[..., i]) / sigma[..., 0]
+        s = (data[..., i] - curve[..., i]) / sigma_safe[..., 0]
         ww = s > c1
         s_prime = np.where( ww, c1 + (c2 - c1) * np.tanh((s - c1) / (c2 - c1)), s)
         spike_counts[i] = ww.sum()
@@ -5907,6 +6043,7 @@ def bold_perfusion_minimal(
   outdict['dvars'] = dvars( corrmo['motion_corrected'], gmseg )
   outdict['FD_max'] = rsfNuisance['FD'].max()
   outdict['FD_mean'] = rsfNuisance['FD'].mean()
+  outdict['FD_sd'] = rsfNuisance['FD'].std()
   outdict['outlier_volumes']=hlinds
   outdict['negative_voxels']=negative_voxels
   return convert_np_in_dict( outdict )
@@ -6313,6 +6450,7 @@ Where:
   outdict['high_motion_pct'] = (rsfNuisance['FD'] > FD_threshold ).sum() / rsfNuisance.shape[0]
   outdict['FD_max'] = rsfNuisance['FD'].max()
   outdict['FD_mean'] = rsfNuisance['FD'].mean()
+  outdict['FD_sd'] = rsfNuisance['FD'].std()
   outdict['bold_evr'] =  antspyt1w.patch_eigenvalue_ratio( und, 512, [16,16,16], evdepth = 0.9, mask = bmask )
   outdict['t1reg'] = t1reg
   outdict['outlier_volumes']=hlinds
@@ -6877,11 +7015,14 @@ def mm(
             normalization_dict['MD_norm'] = ants.apply_transforms( group_template, mydti['recon_md'],group_transform+dtirig['fwdtransforms'] )
             normalization_dict['FA_norm'] = ants.apply_transforms( group_template, mydti['recon_fa'],group_transform+dtirig['fwdtransforms'] )
             output_directory = tempfile.mkdtemp()
-            comptx = ants.apply_transforms( group_template, group_template,
-                group_transform+dtirig['fwdtransforms'],
-                compose = output_directory + '/xxx' )
-            normalization_dict['DTI_norm'] = transform_and_reorient_dti(
-                group_template, mydti['dti'], comptx, py_based=True, verbose=True )
+            do_dti_norm=False
+            if do_dti_norm:
+                comptx = ants.apply_transforms( group_template, group_template, group_transform+dtirig['fwdtransforms'], compose = output_directory + '/xxx' )
+                tspc=[2.,2.,2.]
+                if srmodel is not None:
+                    tspc=[1.,1.,1.]
+                group_template2mm = ants.resample_image( group_template, tspc  )
+                normalization_dict['DTI_norm'] = transform_and_reorient_dti( group_template2mm, mydti['dti'], comptx, py_based=True, verbose=True )
             import shutil
             shutil.rmtree(output_directory, ignore_errors=True )
         if output_dict['rsf'] is not None:
@@ -7019,6 +7160,8 @@ def write_mm( output_prefix, mm, mm_norm=None, t1wide=None, separator='_', verbo
     if 'flair' in mm:
         if mm['flair'] is not None:
             myop = output_prefix + separator + 'wmh.nii.gz'
+            pngfnb = output_prefix + separator + 'wmh_seg.png'
+            ants.plot( mm['flair']['flair'], mm['flair']['WMH_posterior_probability_map'], axis=2, nslices=21, ncol=7, filename=pngfnb, crop=True )
             if mm['flair']['WMH_probability_map'] is not None:
                 image_write_with_thumbnail( mm['flair']['WMH_probability_map'], myop, thumb=False )
             flwide = dict_to_dataframe( mm['flair'] )
@@ -7066,9 +7209,10 @@ def write_mm( output_prefix, mm, mm_norm=None, t1wide=None, separator='_', verbo
                 mm_wide['dti_high_motion_count'] =  mydti['high_motion_count']
                 mm_wide['dti_FD_mean'] = mydti['framewise_displacement'].mean()
                 mm_wide['dti_FD_max'] = mydti['framewise_displacement'].max()
+                mm_wide['dti_FD_sd'] = mydti['framewise_displacement'].std()
                 fdfn = output_prefix + separator + '_fd.csv'
             else:
-                mm_wide['dti_FD_mean'] = mm_wide['dti_FD_max'] = 'NA'
+                mm_wide['dti_FD_mean'] = mm_wide['dti_FD_max'] = mm_wide['dti_FD_sd'] = 'NA'
     if 'perf' in mm:
         if mm['perf'] is not None:
@@ -7708,6 +7852,7 @@ def mm_csv(
     writes output to disk and produces figures
     """
+    import traceback
     visualize = True
     verbose = True
     if verbose:
@@ -7965,9 +8110,11 @@ def mm_csv(
                             test_run=test_run,
                             verbose=True )
                     except Exception as e:
+                        error_info = traceback.format_exc()
+                        print(error_info)
                         visualize=False
                         dowrite=False
-                        print(f"An error occurred while processing {overmodX}: {e}")
+                        print(f"antspymmerror occurred while processing {overmodX}: {e}")
                         pass
                     if not test_run:
                         write_mm( output_prefix=mymm, mm=tabPro, mm_norm=normPro, t1wide=None, separator=mysep )
@@ -8021,9 +8168,11 @@ def mm_csv(
                                             test_run=test_run,
                                             verbose=True )
                                     except Exception as e:
+                                        error_info = traceback.format_exc()
+                                        print(error_info)
                                         visualize=False
                                         dowrite=False
-                                        print(f"An error occurred while processing {overmodX}: {e}")
+                                        print(f"antspymmerror occurred while processing {overmodX}: {e}")
                                         pass
                                     if visualize:
                                         maxslice = np.min( [21, hier['brain_n4_dnz'].shape[2] ] )
@@ -8044,9 +8193,11 @@ def mm_csv(
                                         test_run=test_run,
                                         verbose=True )
                                 except Exception as e:
+                                        error_info = traceback.format_exc()
+                                        print(error_info)
                                         visualize=False
                                         dowrite=False
-                                        print(f"An error occurred while processing {overmodX}: {e}")
+                                        print(f"antspymmerror occurred while processing {overmodX}: {e}")
                                         pass
                                 if visualize:
                                     maxslice = np.min( [21, img.shape[2] ] )
@@ -8081,11 +8232,13 @@ def mm_csv(
                                             test_run=test_run,
                                             verbose=True )
                                     except Exception as e:
+                                        error_info = traceback.format_exc()
+                                        print(error_info)
                                         visualize=False
                                         dowrite=False
                                         tabPro={'rsf':None}
                                         normPro={'rsf':None}
-                                        print(f"An error occurred while processing {overmodX}: {e}")
+                                        print(f"antspymmerror occurred while processing {overmodX}: {e}")
                                         pass
                                 if tabPro['rsf'] is not None and visualize:
                                     for tpro in tabPro['rsf']: # FIXMERSF
@@ -8117,10 +8270,12 @@ def mm_csv(
                                         perfusion_m0=perfusion_m0,
                                         verbose=True )
                                 except Exception as e:
+                                        error_info = traceback.format_exc()
+                                        print(error_info)
                                         visualize=False
                                         dowrite=False
                                         tabPro={'perf':None}
-                                        print(f"An error occurred while processing {overmodX}: {e}")
+                                        print(f"antspymmerror occurred while processing {overmodX}: {e}")
                                         pass
                                 if tabPro['perf'] is not None and visualize:
                                     maxslice = np.min( [21, tabPro['perf']['meanBold'].shape[2] ] )
@@ -8137,7 +8292,7 @@ def mm_csv(
                                 bvalfnList = [ bvalfn ]
                                 bvecfnList = [ bvecfn ]
                                 missing_dti_data=False # bval, bvec or images
-                                if len( myimgsr ) > 1:  # find DTI_RL
+                                if len( myimgsr ) == 2:  # find DTI_RL
                                     dtilrfn = myimgsr[myimgcount+1]
                                     if exists( dtilrfn ):
                                         bvalfnRL = re.sub( '.nii.gz', '.bval' , dtilrfn )
@@ -8146,6 +8301,28 @@ def mm_csv(
                                         imgList.append( imgRL )
                                         bvalfnList.append( bvalfnRL )
                                         bvecfnList.append( bvecfnRL )
+                                elif len( myimgsr ) == 3:  # find DTI_RL
+                                    print("DTI trinity")
+                                    dtilrfn = myimgsr[myimgcount+1]
+                                    dtilrfn2 = myimgsr[myimgcount+2]
+                                    if exists( dtilrfn ) and exists( dtilrfn2 ):
+                                        bvalfnRL = re.sub( '.nii.gz', '.bval' , dtilrfn )
+                                        bvecfnRL = re.sub( '.nii.gz', '.bvec' , dtilrfn )
+                                        bvalfnRL2 = re.sub( '.nii.gz', '.bval' , dtilrfn2 )
+                                        bvecfnRL2 = re.sub( '.nii.gz', '.bvec' , dtilrfn2 )
+                                        imgRL = ants.image_read( dtilrfn )
+                                        imgRL2 = ants.image_read( dtilrfn2 )
+                                        bvals, bvecs = read_bvals_bvecs( bvalfnRL , bvecfnRL  )
+                                        print( bvals.max() )
+                                        bvals2, bvecs2 = read_bvals_bvecs( bvalfnRL2 , bvecfnRL2  )
+                                        print( bvals2.max() )
+                                        temp = merge_dwi_data( imgRL, bvals, bvecs, imgRL2, bvals2, bvecs2  )
+                                        imgList.append( temp[0] )
+                                        bvalfnList.append( mymm+mysep+'joined.bval' )
+                                        bvecfnList.append( mymm+mysep+'joined.bvec' )
+                                        write_bvals_bvecs( temp[1], temp[2], mymm+mysep+'joined' )
+                                        bvalsX, bvecsX = read_bvals_bvecs( bvalfnRL2 , bvecfnRL2  )
+                                        print( bvalsX.max() )
                                 # check existence of all files expected ...
                                 for dtiex in bvalfnList+bvecfnList+myimgsr:
                                     if not exists(dtiex):
@@ -8175,20 +8352,22 @@ def mm_csv(
                                             bvals = bvalfnList,
                                             bvecs = bvecfnList,
                                             srmodel=srmodel_DTI_mdl,
-                                            do_tractography=False, #not test_run,
+                                            do_tractography=not test_run,
                                             do_kk=False,
                                             do_normalization=templateTx,
-                                            group_template = ants.resample_image(normalization_template,(2,2,2)),
+                                            group_template = normalization_template,
                                             group_transform = groupTx,
                                             dti_motion_correct = dti_motion_correct,
                                             dti_denoise = dti_denoise,
                                             test_run=test_run,
                                             verbose=True )
                                     except Exception as e:
+                                            error_info = traceback.format_exc()
+                                            print(error_info)
                                             visualize=False
                                             dowrite=False
                                             tabPro={'DTI':None}
-                                            print(f"An error occurred while processing {overmodX}: {e}")
+                                            print(f"antspymmerror occurred while processing {overmodX}: {e}")
                                             pass
                                     mydti = tabPro['DTI']
                                     if visualize and tabPro['DTI'] is not None:
@@ -8835,6 +9014,7 @@ def boot_wmh( flair, t1, t1seg, mmfromconvexhull = 0.0, strict=True,
     wmh_sum_aug = wmh_sum_aug / float( n_simulations )
     wmh_sum_prior_aug = wmh_sum_prior_aug / float( n_simulations )
     return{
+      'flair' : ants.iMath(flair,"Normalize"),
       'WMH_probability_map' : augprob,
       'WMH_posterior_probability_map' : augprob_prior,
       'wmh_mass': wmh_sum_aug,
@@ -9267,6 +9447,7 @@ def blind_image_assessment(
     title=False,
     pull_rank=False,
     resample=None,
+    n_to_skip = 10,
     verbose=False
 ):
     """
@@ -9296,6 +9477,8 @@ def blind_image_assessment(
     resample : None, numeric max or min, resamples image to isotropy
+    n_to_skip : 10 by default; samples time series every n_to_skip volume
     verbose : boolean
     """
@@ -9307,6 +9490,7 @@ def blind_image_assessment(
     from pathlib import Path
     import json
     import re
+    from dipy.io.gradients import read_bvals_bvecs
     mystem=''
     if isinstance(image,list):
         isfilename=isinstance( image[0], str)
@@ -9315,6 +9499,7 @@ def blind_image_assessment(
         isfilename=isinstance( image, str)
     outdf = pd.DataFrame()
     mymeta = None
+    MagneticFieldStrength = None
     image_filename=''
     if isfilename:
         image_filename = image
@@ -9322,10 +9507,14 @@ def blind_image_assessment(
             image_filename=image[0]
         json_name = re.sub(".nii.gz",".json",image_filename)
         if exists( json_name ):
-            with open(json_name, 'r') as fcc_file:
-                mymeta = json.load(fcc_file, strict=False)
-                if verbose:
-                    print(json.dumps(mymeta, indent=4))
+            try:
+                with open(json_name, 'r') as fcc_file:
+                    mymeta = json.load(fcc_file)
+                    if verbose:
+                        print(json.dumps(mymeta, indent=4))
+                    fcc_file.close()
+            except:
+                pass
         mystem=Path( image ).stem
         mystem=Path( mystem ).stem
         image_reference = ants.image_read( image )
@@ -9333,6 +9522,7 @@ def blind_image_assessment(
     else:
         image_reference = ants.image_clone( image )
     ntimepoints = 1
+    bvalueMax=None
     if image_reference.dimension == 4:
         ntimepoints = image_reference.shape[3]
         if "DTI" in image_filename:
@@ -9340,11 +9530,16 @@ def blind_image_assessment(
             image_b0, image_dwi = get_average_dwi_b0( image_reference, fast=True )
             image_b0 = ants.iMath( image_b0, 'Normalize' )
             image_dwi = ants.iMath( image_dwi, 'Normalize' )
+            bval_name = re.sub(".nii.gz",".bval",image_filename)
+            bvec_name = re.sub(".nii.gz",".bvec",image_filename)
+            if exists( bval_name ) and exists( bvec_name ):
+                bvals, bvecs = read_bvals_bvecs( bval_name , bvec_name  )
+                bvalueMax = bvals.max()
         else:
             image_b0 = ants.get_average_of_timeseries( image_reference ).iMath("Normalize")
     else:
         image_compare = ants.smooth_image( image_reference, 3, sigma_in_physical_coordinates=False )
-    for jjj in range(ntimepoints):
+    for jjj in range(0,ntimepoints,n_to_skip):
         modality='unknown'
         if "rsfMRI" in image_filename:
             modality='rsfMRI'
@@ -9365,7 +9560,7 @@ def blind_image_assessment(
                     modality='DTIdwi'
             else:
                 image_compare = ants.image_clone( image_b0 )
-        image = ants.iMath( image, 'TruncateIntensity',0.01,0.995)
+        # image = ants.iMath( image, 'TruncateIntensity',0.01,0.995)
         minspc = np.min(ants.get_spacing(image))
         maxspc = np.max(ants.get_spacing(image))
         if resample is not None:
@@ -9397,69 +9592,76 @@ def blind_image_assessment(
         bgmsk = msk*0+1-msk
         mskdil = ants.iMath(msk, "MD", 4 )
         # ants.plot_ortho( image, msk, crop=False )
-        image = ants.crop_image( image, mskdil ).iMath("Normalize")
-        msk = ants.crop_image( msk, mskdil ).iMath("Normalize")
-        bgmsk = ants.crop_image( bgmsk, mskdil ).iMath("Normalize")
-        image_compare = ants.crop_image( image_compare, mskdil ).iMath("Normalize")
         nvox = int( msk.sum() )
-        minshp = np.min( image.shape )
-        npatch = int( np.round(  0.1 * nvox ) )
-        npatch = np.min(  [512,npatch ] )
-        patch_shape = []
-        for k in range( 3 ):
-            p = int( 32.0 / ants.get_spacing( image  )[k] )
-            if p > int( np.round( image.shape[k] * 0.5 ) ):
-                p = int( np.round( image.shape[k] * 0.5 ) )
-            patch_shape.append( p )
-        if verbose:
-            print(image)
-            print( patch_shape )
-            print( npatch )
-        myevr = math.nan # dont want to fail if something odd happens in patch extraction
-        try:
-            myevr = antspyt1w.patch_eigenvalue_ratio( image, npatch, patch_shape,
-                evdepth = 0.9, mask=msk )
-        except:
-            pass
-        if pull_rank:
-            image = ants.rank_intensity(image)
-        imagereflect = ants.reflect_image(image, axis=0)
-        asym_err = ( image - imagereflect ).abs().mean()
-        # estimate noise by center cropping, denoizing and taking magnitude of difference
-        nocrop=False
-        if image.dimension == 3:
-            if image.shape[2] == 1:
-                nocrop=True
-        if maxspc/minspc > 10:
-            nocrop=True
-        if nocrop:
-            mycc = ants.image_clone( image )
-        else:
-            mycc = antspyt1w.special_crop( image,
-                ants.get_center_of_mass( msk *0 + 1 ), patch_shape )
-        myccd = ants.denoise_image( mycc, p=2,r=2,noise_model='Gaussian' )
-        noizlevel = ( mycc - myccd ).abs().mean()
-#        ants.plot_ortho( image, crop=False, filename=viz_filename, flat=True, xyz_lines=False, orient_labels=False, xyz_pad=0 )
-#        from brisque import BRISQUE
-#        obj = BRISQUE(url=False)
-#        mybrisq = obj.score( np.array( Image.open( viz_filename )) )
         spc = ants.get_spacing( image )
         org = ants.get_origin( image )
-        msk_vol = msk.sum() * np.prod( spc )
-        bgstd = image[ bgmsk == 1 ].std()
-        fgmean = image[ msk == 1 ].mean()
-        bgmean = image[ bgmsk == 1 ].mean()
-        snrref = fgmean / bgstd
-        cnrref = ( fgmean - bgmean ) / bgstd
-        psnrref = antspynet.psnr(  image_compare, image  )
-        ssimref = antspynet.ssim(  image_compare, image  )
-        if nocrop:
-            mymi = math.inf
+        if ( nvox > 0 ):
+            image = ants.crop_image( image, mskdil ).iMath("Normalize")
+            msk = ants.crop_image( msk, mskdil ).iMath("Normalize")
+            bgmsk = ants.crop_image( bgmsk, mskdil ).iMath("Normalize")
+            image_compare = ants.crop_image( image_compare, mskdil ).iMath("Normalize")
+            npatch = int( np.round(  0.1 * nvox ) )
+            npatch = np.min(  [512,npatch ] )
+            patch_shape = []
+            for k in range( 3 ):
+                p = int( 32.0 / ants.get_spacing( image  )[k] )
+                if p > int( np.round( image.shape[k] * 0.5 ) ):
+                    p = int( np.round( image.shape[k] * 0.5 ) )
+                patch_shape.append( p )
+            if verbose:
+                print(image)
+                print( patch_shape )
+                print( npatch )
+            myevr = math.nan # dont want to fail if something odd happens in patch extraction
+            try:
+                myevr = antspyt1w.patch_eigenvalue_ratio( image, npatch, patch_shape,
+                    evdepth = 0.9, mask=msk )
+            except:
+                pass
+            if pull_rank:
+                image = ants.rank_intensity(image)
+            imagereflect = ants.reflect_image(image, axis=0)
+            asym_err = ( image - imagereflect ).abs().mean()
+            # estimate noise by center cropping, denoizing and taking magnitude of difference
+            nocrop=False
+            if image.dimension == 3:
+                if image.shape[2] == 1:
+                    nocrop=True
+            if maxspc/minspc > 10:
+                nocrop=True
+            if nocrop:
+                mycc = ants.image_clone( image )
+            else:
+                mycc = antspyt1w.special_crop( image,
+                    ants.get_center_of_mass( msk *0 + 1 ), patch_shape )
+            myccd = ants.denoise_image( mycc, p=2,r=2,noise_model='Gaussian' )
+            noizlevel = ( mycc - myccd ).abs().mean()
+    #        ants.plot_ortho( image, crop=False, filename=viz_filename, flat=True, xyz_lines=False, orient_labels=False, xyz_pad=0 )
+    #        from brisque import BRISQUE
+    #        obj = BRISQUE(url=False)
+    #        mybrisq = obj.score( np.array( Image.open( viz_filename )) )
+            msk_vol = msk.sum() * np.prod( spc )
+            bgstd = image[ bgmsk == 1 ].std()
+            fgmean = image[ msk == 1 ].mean()
+            bgmean = image[ bgmsk == 1 ].mean()
+            snrref = fgmean / bgstd
+            cnrref = ( fgmean - bgmean ) / bgstd
+            psnrref = antspynet.psnr(  image_compare, image  )
+            ssimref = antspynet.ssim(  image_compare, image  )
+            if nocrop:
+                mymi = math.inf
+            else:
+                mymi = ants.image_mutual_information( image_compare, image )
         else:
-            mymi = ants.image_mutual_information( image_compare, image )
-        mriseries='NA'
-        mrimfg='NA'
-        mrimodel='NA'
+            msk_vol = 0
+            myevr = mymi = ssimref = psnrref = cnrref = asym_err = noizlevel = math.nan
+        mriseries=None
+        mrimfg=None
+        mrimodel=None
+        mriSAR=None
+        BandwidthPerPixelPhaseEncode=None
+        PixelBandwidth=None
         if mymeta is not None:
             # mriseries=mymeta['']
             try:
@@ -9470,13 +9672,39 @@ def blind_image_assessment(
                 mrimodel=mymeta['ManufacturersModelName']
             except:
                 pass
+            try:
+                MagneticFieldStrength=mymeta['MagneticFieldStrength']
+            except:
+                pass
+            try:
+                PixelBandwidth=mymeta['PixelBandwidth']
+            except:
+                pass
+            try:
+                BandwidthPerPixelPhaseEncode=mymeta['BandwidthPerPixelPhaseEncode']
+            except:
+                pass
+            try:
+                mriSAR=mymeta['SAR']
+            except:
+                pass
         ttl=mystem + ' '
         ttl=''
         ttl=ttl + "NZ: " + "{:0.4f}".format(noizlevel) + " SNR: " + "{:0.4f}".format(snrref) + " CNR: " + "{:0.4f}".format(cnrref) + " PS: " + "{:0.4f}".format(psnrref)+ " SS: " + "{:0.4f}".format(ssimref) + " EVR: " + "{:0.4f}".format(myevr)+ " MI: " + "{:0.4f}".format(mymi)
-        if viz_filename is not None and ( jjj == 0 or (jjj % 30 == 0) ):
+        if viz_filename is not None and ( jjj == 0 or (jjj % 30 == 0) ) and image.shape[2] < 685:
             viz_filename_use = re.sub( ".png", "_slice"+str(jjj).zfill(4)+".png", viz_filename )
             ants.plot_ortho( image, crop=False, filename=viz_filename_use, flat=True, xyz_lines=False, orient_labels=False, xyz_pad=0,  title=ttl, titlefontsize=12, title_dy=-0.02,textfontcolor='red' )
-        df = pd.DataFrame([[ mystem, noizlevel, snrref, cnrref, psnrref, ssimref, mymi, asym_err, myevr, msk_vol, spc[0], spc[1], spc[2],org[0], org[1], org[2], image.shape[0], image.shape[1], image.shape[2], jjj, modality, mriseries, mrimfg, mrimodel ]], columns=['filename', 'noise', 'snr', 'cnr', 'psnr', 'ssim', 'mi', 'reflection_err', 'EVR', 'msk_vol', 'spc0','spc1','spc2','org0','org1','org2','dimx','dimy','dimz','slice','modality', 'mriseries', 'mrimfg', 'mrimodel' ])
+        df = pd.DataFrame([[
+            mystem,
+            image_reference.dimension,
+            noizlevel, snrref, cnrref, psnrref, ssimref, mymi, asym_err, myevr, msk_vol,
+            spc[0], spc[1], spc[2],org[0], org[1], org[2],
+            image.shape[0], image.shape[1], image.shape[2], ntimepoints,
+            jjj, modality, mriseries, mrimfg, mrimodel, MagneticFieldStrength, mriSAR, PixelBandwidth, BandwidthPerPixelPhaseEncode, bvalueMax ]],
+            columns=[
+                'filename',
+                'dimensionality',
+                'noise', 'snr', 'cnr', 'psnr', 'ssim', 'mi', 'reflection_err', 'EVR', 'msk_vol', 'spc0','spc1','spc2','org0','org1','org2','dimx','dimy','dimz','dimt','slice','modality', 'mriseries', 'mrimfg', 'mrimodel', 'mriMagneticFieldStrength', 'mriSAR', 'mriPixelBandwidth', 'mriPixelBandwidthPE', 'dti_bvalueMax' ])
         outdf = pd.concat( [outdf, df ], axis=0, ignore_index=False )
         if verbose:
             print( outdf )
@@ -9485,6 +9713,29 @@ def blind_image_assessment(
         outdf.to_csv( csvfn )
     return outdf
+def remove_unwanted_columns(df):
+    # Identify columns to drop: those named 'X' or starting with 'Unnamed'
+    cols_to_drop = [col for col in df.columns if col == 'X' or col.startswith('Unnamed')]
+    # Drop the identified columns from the DataFrame, if any
+    df_cleaned = df.drop(columns=cols_to_drop, errors='ignore')
+    return df_cleaned
+def process_dataframe_generalized(df, group_by_column):
+    # Make sure the group_by_column is excluded from both numeric and other columns calculations
+    numeric_cols = df.select_dtypes(include='number').columns.difference([group_by_column])
+    other_cols = df.columns.difference(numeric_cols).difference([group_by_column])
+    # Define aggregation functions: mean for numeric cols, mode for other cols
+    # Update to handle empty mode results safely
+    agg_dict = {col: 'mean' for col in numeric_cols}
+    agg_dict.update({
+        col: lambda x: pd.Series.mode(x).iloc[0] if not pd.Series.mode(x).empty else None for col in other_cols
+    })
+    # Group by the specified column, applying different aggregation functions to different columns
+    processed_df = df.groupby(group_by_column, as_index=False).agg(agg_dict)
+    return processed_df
 def average_blind_qc_by_modality(qc_full,verbose=False):
     """
@@ -9496,21 +9747,14 @@ def average_blind_qc_by_modality(qc_full,verbose=False):
     Returns:
     pandas dataframe containing the processed qc data.
     """
+    qc_full = remove_unwanted_columns( qc_full )
     # Get unique modalities
     modalities = qc_full['modality'].unique()
     modalities = modalities[modalities != 'unknown']
-    # Get modalities to select
-    m0sel = qc_full['modality'].isin(modalities)
     # Get unique ids
-    uid = qc_full['filename'] + "_" + qc_full['modality'].astype(str)
+    uid = qc_full['filename']
     to_average = uid.unique()
-    # Define column indices
-    contcols = ['noise', 'snr', 'cnr', 'psnr', 'ssim', 'mi','reflection_err', 'EVR', 'msk_vol', 'spc0', 'spc1', 'spc2', 'org0','org1','org2', 'dimx', 'dimy', 'dimz', 'slice']
-    ocols = ['filename','modality', 'mriseries', 'mrimfg', 'mrimodel']
-    # restrict to columns we "know"
-    qc_full = qc_full[ocols+contcols]
-    # Create empty meta dataframe
-    meta = pd.DataFrame(columns=ocols+contcols)
+    meta = pd.DataFrame(columns=qc_full.columns )
     # Process each unique id
     n = len(to_average)
     for k in range(n):
@@ -9522,15 +9766,11 @@ def average_blind_qc_by_modality(qc_full,verbose=False):
         if sum(m1sel) > 1:
             # If more than one entry for id, take the average of continuous columns,
             # maximum of the slice column, and the first entry of the other columns
+            mfsub = process_dataframe_generalized(qc_full[m1sel],'filename')
+        else:
             mfsub = qc_full[m1sel]
-            if mfsub.shape[0] > 1:
-                meta.loc[k, contcols] = mfsub.loc[:, contcols].mean(numeric_only=True)
-                meta.loc[k, 'slice'] = mfsub['slice'].max()
-                meta.loc[k, ocols] = mfsub[ocols].iloc[0]
-        elif sum(m1sel) == 1:
-            # If only one entry for id, just copy the entry
-            mfsub = qc_full[m1sel]
-            meta.loc[k] = mfsub.iloc[0]
+        meta.loc[k] = mfsub.iloc[0]
+    meta['modality'] = meta['modality'].replace(['DTIdwi', 'DTIb0'], 'DTI', regex=True)
     return meta
 def wmh( flair, t1, t1seg,
@@ -10163,15 +10403,14 @@ def novelty_detection_quantile(df_train, df_test):
         myqs[mykey] = abs( temp - 0.5 ) / 0.5
     return myqs
-def brainmap_figure(statistical_df, data_dictionary_path, output_prefix, brain_image, overlay_cmap='bwr', nslices=21, ncol=7, edge_image_dilation = 0, black_bg=True, axes = [0,1,2], fixed_overlay_range=None, crop=True, verbose=False ):
+def brainmap_figure(statistical_df, data_dictionary_path, output_prefix, brain_image, overlay_cmap='bwr', nslices=21, ncol=7, edge_image_dilation = 0, black_bg=True, axes = [0,1,2], fixed_overlay_range=None, crop=5, verbose=False ):
     """
     Create figures based on statistical data and an underlying brain image.
     Assumes both ~/.antspyt1w and ~/.antspymm data is available
     Parameters:
-    - statistical_df (pandas dataframe): with 2 columns named anat and value
+    - statistical_df (pandas dataframe): with 2 columns named anat and values
         the anat column should have names that meet *partial matching* criterion
         with respect to regions that are measured in antspymm.   value will be
         the value to be displayed.   if two examples of a given region exist in
@@ -10186,12 +10425,13 @@ def brainmap_figure(statistical_df, data_dictionary_path, output_prefix, brain_i
     - black_bg (bool): boolean
     - axes (list): integer list typically [0,1,2] sagittal coronal axial
     - fixed_overlay_range (list): scalar pair will try to keep a constant cbar and will truncate the overlay at these min/max values
-    - crop (bool): crops the image to display by the extent of the overlay
+    - crop (int): crops the image to display by the extent of the overlay; larger values dilate the masks more.
     - verbose (bool): boolean
     Returns:
     an image with values mapped to the associated regions
     """
+    import re
     # Read the statistical file
     zz = statistical_df
@@ -10200,19 +10440,21 @@ def brainmap_figure(statistical_df, data_dictionary_path, output_prefix, brain_i
     mydict = pd.read_csv(data_dictionary_path)
     mydict = mydict[~mydict['Measurement'].str.contains("tractography-based connectivity", na=False)]
+    statistical_df['anat'] = statistical_df['anat'].str.replace("_", ".", regex=True)
     # Load image and process it
     edgeimg = ants.iMath(brain_image,"Normalize")
     if edge_image_dilation > 0:
         edgeimg = ants.iMath( edgeimg, "MD", edge_image_dilation)
     # Define lists and data frames
-    postfix = ['bf', 'deep_cit168lab', 'mtl', 'cerebellum', 'dkt_cortex','brainstem']
-    atlas = ['BF', 'CIT168', 'MTL', 'TustisonCobra', 'desikan-killiany-tourville','brainstem']
-    postdesc = ['nbm3CH13', 'CIT168_Reinf_Learn_v1_label_descriptions_pad', 'mtl_description', 'cerebellum', 'dkt','CIT168_T1w_700um_pad_adni_brainstem']
+    postfix = ['bf', 'cit168lab', 'mtl', 'cerebellum', 'dkt_cortex','brainstem','JHU_wm','yeo']
+    atlas = ['BF', 'CIT168', 'MTL', 'TustisonCobra', 'desikan-killiany-tourville','brainstem','JHU_wm','yeo']
+    postdesc = ['nbm3CH13', 'CIT168_Reinf_Learn_v1_label_descriptions_pad', 'mtl_description', 'cerebellum', 'dkt','CIT168_T1w_700um_pad_adni_brainstem','FA_JHU_labels_edited','ppmi_template_500Parcels_Yeo2011_17Networks_2023_homotopic']
     statdf = pd.DataFrame({'img': postfix, 'atlas': atlas, 'csvdescript': postdesc})
     templateprefix = '~/.antspymm/PPMI_template0_'
     # Iterate through columns and create figures
-    col2viz = 'value'
+    col2viz = 'values'
     if True:
         anattoshow = zz['anat'].unique()
         if verbose:
@@ -10224,21 +10466,74 @@ def brainmap_figure(statistical_df, data_dictionary_path, output_prefix, brain_i
             if verbose:
                 print(str(k) +  " " + anattoshow[k]  )
             mysub = zz[zz['anat'].str.contains(anattoshow[k])]
+            anatsear=re.sub("dti.fa","",anattoshow[k])
+            anatsear=re.sub("t1.volasym","",anatsear)
+            anatsear=re.sub("t1.thkasym","",anatsear)
+            anatsear=re.sub("t1.areaasym","",anatsear)
+            anatsear=re.sub("t1.vol.","",anatsear)
+            anatsear=re.sub("t1.thk.","",anatsear)
+            anatsear=re.sub("t1.area.","",anatsear)
+            anatsear=re.sub("asymdp.","",anatsear)
+            anatsear=re.sub("asym.","",anatsear)
+            anatsear=re.sub("dti.md.","",anatsear)
+            anatsear=re.sub("dti.fa.","",anatsear)
+            anatsear=re.sub("dti.md","",anatsear)
+            anatsear=re.sub("dti.mean.md.","",anatsear)
+            anatsear=re.sub("dti.mean.fa.","",anatsear)
+            anatsear=re.sub("lravg","",anatsear)
+            atlassearch = mydict['tidynames'].str.contains(anatsear)
+            if atlassearch.sum() > 0:
+                whichatlas = mydict[atlassearch]['Atlas'].iloc[0]
+                oglabelname = mydict[atlassearch]['Label'].iloc[0]
+            else:
+                print(anatsear)
+                oglabelname='unknown'
+                whichatlas=None
+            if verbose:
+                print("oglabelname " + oglabelname )
             vals2viz = mysub[col2viz].agg(['min', 'max'])
             vals2viz = vals2viz[abs(vals2viz).idxmax()]
             myext = None
             if 'dktcortex' in anattoshow[k]:
                 myext = 'dkt_cortex'
             elif 'cit168' in anattoshow[k]:
-                myext = 'deep_cit168lab'
+                myext = 'cit168lab'
             elif 'mtl' in anattoshow[k]:
                 myext = 'mtl'
+                oglabelname=re.sub('mtl', '',anatsear)
             elif 'cerebellum' in anattoshow[k]:
                 myext = 'cerebellum'
+                oglabelname=re.sub('cerebellum', '',anatsear)
+                # oglabelname=oglabelname[2:]
             elif 'brainstem' in anattoshow[k]:
                 myext = 'brainstem'
             elif any(item in anattoshow[k] for item in ['nbm', 'bf']):
                 myext = 'bf'
+                oglabelname=re.sub(r'\.', '_',anatsear)
+            elif whichatlas == 'johns hopkins white matter':
+                myext = 'JHU_wm'
+            elif whichatlas == 'desikan-killiany-tourville':
+                myext = 'dkt_cortex'
+            elif whichatlas == 'CIT168':
+                myext = 'cit168lab'
+            elif whichatlas == 'BF':
+                myext = 'bf'
+                oglabelname=re.sub('bf', '',oglabelname)
+            elif whichatlas == 'yeo_homotopic':
+                myext = 'yeo'
+            if myext is None and verbose:
+                if whichatlas is None:
+                    whichatlas='None'
+                if anattoshow[k] is None:
+                    anattoshow[k]='None'
+                print( "MYEXT " + anattoshow[k] + ' unfound ' + whichatlas )
+            else:
+                if verbose:
+                    print( "MYEXT " + myext )
+            if myext == 'cit168lab':
+                oglabelname=re.sub("cit168","",oglabelname)
             for j in postfix:
                 if j == "dkt_cortex":
                     j = 'dktcortex'
@@ -10252,30 +10547,86 @@ def brainmap_figure(statistical_df, data_dictionary_path, output_prefix, brain_i
             locfilename =  templateprefix + myext + '.nii.gz'
             if verbose:
                 print( locfilename )
-            myatlas = ants.image_read(locfilename)
-            atlasDescript = pd.read_csv(f"~/.antspyt1w/{correctdescript}.csv")
-            atlasDescript['Description'] = atlasDescript['Description'].str.lower()
-            atlasDescript['Description'] = atlasDescript['Description'].str.replace(" ", "_")
-            atlasDescript['Description'] = atlasDescript['Description'].str.replace("_left_", "_")
-            atlasDescript['Description'] = atlasDescript['Description'].str.replace("_right_", "_")
-            atlasDescript['Description'] = atlasDescript['Description'].str.replace("_left", "")
-            atlasDescript['Description'] = atlasDescript['Description'].str.replace("_right", "")
+            if myext == 'yeo':
+                oglabelname=oglabelname.lower()
+                oglabelname=re.sub("rsfmri_fcnxpro122_","",oglabelname)
+                oglabelname=re.sub("rsfmri_fcnxpro129_","",oglabelname)
+                oglabelname=re.sub("rsfmri_fcnxpro134_","",oglabelname)
+                locfilename = "~/.antspymm/ppmi_template_500Parcels_Yeo2011_17Networks_2023_homotopic.nii.gz"
+                atlasDescript = pd.read_csv(f"~/.antspymm/{correctdescript}.csv")
+                atlasDescript.rename(columns={'SystemName': 'Description'}, inplace=True)
+                atlasDescript.rename(columns={'ROI': 'Label'}, inplace=True)
+                atlasDescript['Description'] = atlasDescript['Description'].str.lower()
+            else:
+                atlasDescript = pd.read_csv(f"~/.antspyt1w/{correctdescript}.csv")
+                atlasDescript['Description'] = atlasDescript['Description'].str.lower()
+                atlasDescript['Description'] = atlasDescript['Description'].str.replace(" ", "_")
+                atlasDescript['Description'] = atlasDescript['Description'].str.replace("_left_", "_")
+                atlasDescript['Description'] = atlasDescript['Description'].str.replace("_right_", "_")
+                atlasDescript['Description'] = atlasDescript['Description'].str.replace("_left", "")
+                atlasDescript['Description'] = atlasDescript['Description'].str.replace("_right", "")
+                atlasDescript['Description'] = atlasDescript['Description'].str.replace("left_", "")
+                atlasDescript['Description'] = atlasDescript['Description'].str.replace("right_", "")
+                atlasDescript['Description'] = atlasDescript['Description'].str.replace("/",".")
+                if myext == 'JHU_wm':
+                    atlasDescript['Description'] = atlasDescript['Description'].str.replace("fa-", "")
+                    atlasDescript['Description'] = atlasDescript['Description'].str.replace("-left-", "")
+                    atlasDescript['Description'] = atlasDescript['Description'].str.replace("-right-", "")
+                if myext == 'cerebellum':
+                    atlasDescript['Description'] = atlasDescript['Description'].str.replace("l_", "")
+                    atlasDescript['Description'] = atlasDescript['Description'].str.replace("r_", "")
+            if verbose:
+                print( atlasDescript )
+            oglabelname = oglabelname.lower()
+            oglabelname = re.sub(" ", "_",oglabelname)
+            oglabelname = re.sub("_left_", "_",oglabelname)
+            oglabelname = re.sub("_right_", "_",oglabelname)
+            oglabelname = re.sub("_left", "",oglabelname)
+            oglabelname = re.sub("_right", "",oglabelname)
+            oglabelname = re.sub("t1hier_vol_", "",oglabelname)
+            oglabelname = re.sub("t1hier_area_", "",oglabelname)
+            oglabelname = re.sub("t1hier_thk_", "",oglabelname)
+            oglabelname = re.sub("dktregions", "",oglabelname)
+            oglabelname = re.sub("dktcortex", "",oglabelname)
+            if myext == 'JHU_wm':
+                oglabelname = re.sub("dti_mean_fa.", "",oglabelname)
+                oglabelname = re.sub("dti_mean_md.", "",oglabelname)
+                oglabelname = re.sub(".left.", "",oglabelname)
+                oglabelname = re.sub(".right.", "",oglabelname)
+                oglabelname = re.sub(".lravg.", "",oglabelname)
+                oglabelname = re.sub(".asym.", "",oglabelname)
+            if verbose:
+                print("oglabelname " + oglabelname )
             if myext == 'cerebellum':
                 atlasDescript['Description'] = atlasDescript['Description'].str.replace("l_", "")
                 atlasDescript['Description'] = atlasDescript['Description'].str.replace("r_", "")
-                whichindex = atlasDescript.index[atlasDescript['Description'] == anattoshow[k]].values[0]
+                whichindex = atlasDescript.index[atlasDescript['Description'] == oglabelname].values[0]
             else:
-                whichindex = atlasDescript.index[atlasDescript['Description'].str.contains(anattoshow[k])]
+                whichindex = atlasDescript.index[atlasDescript['Description'].str.contains(oglabelname)]
             if type(whichindex) is np.int64:
                 labelnums = atlasDescript.loc[whichindex, 'Label']
             else:
                 labelnums = list(atlasDescript.loc[whichindex, 'Label'])
+            if myext == 'yeo':
+                parts = re.findall(r'\D+', oglabelname)
+                oglabelname = [part.replace('_', '') for part in parts if part.replace('_', '')]
+                filtered_df = atlasDescript[atlasDescript['Description'].isin(oglabelname)]
+                labelnums = filtered_df['Label'].tolist()
             if not isinstance(labelnums, list):
                 labelnums=[labelnums]
             addemiszero = ants.threshold_image(addem, 0, 0)
             temp = ants.image_read(locfilename)
             temp = ants.mask_image(temp, temp, level=labelnums, binarize=True)
+            if verbose:
+                print("DEBUG")
+                print(  temp.sum() )
+                print( labelnums )
             temp[temp == 1] = (vals2viz)
             temp[addemiszero == 0] = 0
             addem = addem + temp
@@ -10284,8 +10635,8 @@ def brainmap_figure(statistical_df, data_dictionary_path, output_prefix, brain_i
             print('Done Adding')
         for axx in axes:
             figfn=output_prefix+f"fig{col2viz}ax{axx}_py.jpg"
-            if crop:
-                cmask = ants.threshold_image( addem,1e-5, 1e9 ).iMath("MD",3) + ants.threshold_image( addem,-1e9, -1e-5 ).iMath("MD",3)
+            if crop > 0:
+                cmask = ants.threshold_image( addem,1e-5, 1e9 ).iMath("MD",crop) + ants.threshold_image( addem,-1e9, -1e-5 ).iMath("MD",crop)
                 addemC = ants.crop_image( addem, cmask )
                 edgeimgC = ants.crop_image( edgeimg, cmask )
             else:
@@ -10305,7 +10656,6 @@ def brainmap_figure(statistical_df, data_dictionary_path, output_prefix, brain_i
         print("DONE brain map figures")
     return addem
 def filter_df(indf, myprefix):
     """
     Process and filter a pandas DataFrame, removing certain columns,
@@ -10479,6 +10829,27 @@ def aggregate_antspymm_results(input_csv, subject_col='subjectID', date_col='dat
     df=df.drop(badnames, axis=1)
     return( df )
+def find_most_recent_file(file_list):
+    """
+    Finds and returns the most recently modified file from a list of file paths.
+    Parameters:
+    - file_list: A list of strings, where each string is a path to a file.
+    Returns:
+    - The path to the most recently modified file in the list, or None if the list is empty or contains no valid files.
+    """
+    # Filter out items that are not files or do not exist
+    valid_files = [f for f in file_list if os.path.isfile(f)]
+    # Check if the filtered list is not empty
+    if valid_files:
+        # Find the file with the latest modification time
+        most_recent_file = max(valid_files, key=os.path.getmtime)
+        return [most_recent_file]
+    else:
+        return None
 def aggregate_antspymm_results_sdf(
     study_df,
     project_col='projectID',
@@ -10685,8 +11056,8 @@ def aggregate_antspymm_results_sdf(
                 t1wfn = sorted( glob( modsearch ) )
                 if len( t1wfn ) > 1:
                     nlarge = len(t1wfn)
-                    t1wfn = [ t1wfn[ len(t1wfn)-1 ] ]
-                    warnings.warn("there are " + str( nlarge ) + " number of wide fns with search path " + modsearch + " we take the last of these " + t1wfn[0] )
+                    t1wfn = find_most_recent_file( t1wfn )
+                    warnings.warn("there are " + str( nlarge ) + " number of wide fns with search path " + modsearch + " we take the most recent of these " + t1wfn[0] )
 #                    raise ValueError("there are " + str( len( t1wfn ) ) + " number of wide fns with search path " + modsearch )
                 if len( t1wfn ) == 1:
                     if verbose:
@@ -10802,3 +11173,276 @@ def filter_image_files(image_paths, criteria='largest'):
         raise ValueError("Criteria must be 'smallest', 'largest', or 'brightest'.")
     return selected_image_path
+def mm_match_by_qc_scoring(df_a, df_b, match_column, criteria, prefix='matched_', exclude_columns=None):
+    """
+    Match each row in df_a to a row in df_b based on a matching column and criteria for selecting the best match,
+    with options to prefix column names from df_b and exclude certain columns from the final output. Additionally,
+    returns a DataFrame containing rows from df_b that were not matched to any row in df_a.
+    Parameters:
+    - df_a: DataFrame A.
+    - df_b: DataFrame B.
+    - match_column: The column name on which rows should match between DataFrame A and B.
+    - criteria: A dictionary where keys are column names and values are 'min' or 'max', indicating whether
+                the column should be minimized or maximized for the best match.
+    - prefix: A string prefix to add to column names from df_b in the final output to avoid duplication.
+    - exclude_columns: A list of column names from df_b to exclude from the final output.
+    Returns:
+    - A tuple of two DataFrames:
+        1. A new DataFrame combining df_a with matched rows from df_b.
+        2. A DataFrame containing rows from df_b that were not matched to df_a.
+    """
+    from scipy.stats import zscore
+    df_a = df_a.loc[:, ~df_a.columns.str.startswith('Unnamed:')]
+    df_b = df_b.loc[:, ~df_b.columns.str.startswith('Unnamed:')].copy()
+    # Normalize df_b based on criteria
+    for col, crit in criteria.items():
+        if crit == 'max':
+            df_b.loc[df_b.index, f'score_{col}'] = zscore(-df_b[col])
+        elif crit == 'min':
+            df_b.loc[df_b.index, f'score_{col}'] = zscore(df_b[col])
+    # Calculate 'best_score' by summing all score columns
+    score_columns = [f'score_{col}' for col in criteria.keys()]
+    df_b['best_score'] = df_b[score_columns].sum(axis=1)
+    matched_indices = []  # Track indices of matched rows in df_b
+    # Match rows
+    matched_rows = []
+    for _, row_a in df_a.iterrows():
+        matches = df_b[df_b[match_column] == row_a[match_column]]
+        if not matches.empty:
+            best_idx = matches['best_score'].idxmin()
+            best_match = matches.loc[best_idx]
+            matched_indices.append(best_idx)  # Track this index as matched
+            matched_rows.append(best_match)
+        else:
+            matched_rows.append(pd.Series(dtype='float64'))
+    # Create a DataFrame from matched rows
+    df_matched = pd.DataFrame(matched_rows).reset_index(drop=True)
+    # Exclude specified columns and add prefix
+    if exclude_columns is not None:
+        df_matched = df_matched.drop(columns=exclude_columns, errors='ignore')
+    df_matched = df_matched.rename(columns=lambda x: f"{prefix}{x}" if x != match_column and x in df_matched.columns else x)
+    # Combine df_a with matched rows from df_b
+    result_df = pd.concat([df_a.reset_index(drop=True), df_matched], axis=1)
+    # Extract unmatched rows from df_b
+    unmatched_df_b = df_b.drop(index=matched_indices).reset_index(drop=True)
+    return result_df, unmatched_df_b
+def fix_LR_RL_stuff(df, col1, col2, size_col1, size_col2, id1, id2 ):
+    df_copy = df.copy()
+    # Ensure columns contain strings for substring checks
+    df_copy[col1] = df_copy[col1].astype(str)
+    df_copy[col2] = df_copy[col2].astype(str)
+    df_copy[id1] = df_copy[id1].astype(str)
+    df_copy[id2] = df_copy[id2].astype(str)
+    for index, row in df_copy.iterrows():
+        col1_val = row[col1]
+        col2_val = row[col2]
+        size1 = row[size_col1]
+        size2 = row[size_col2]
+        # Check for 'RL' or 'LR' in each column and compare sizes
+        if ('RL' in col1_val or 'LR' in col1_val) and ('RL' in col2_val or 'LR' in col2_val):
+            continue
+        elif 'RL' not in col1_val and 'LR' not in col1_val and 'RL' not in col2_val and 'LR' not in col2_val:
+            if size1 < size2:
+                df_copy.at[index, col1] = df_copy.at[index, col2]
+                df_copy.at[index, size_col1] = df_copy.at[index, size_col2]
+                df_copy.at[index, id1] = df_copy.at[index, id2]
+                df_copy.at[index, size_col2] = 0
+                df_copy.at[index, col2] = None
+                df_copy.at[index, id2] = None
+            else:
+                df_copy.at[index, col2] = None
+                df_copy.at[index, size_col2] = 0
+                df_copy.at[index, id2] = None
+        elif 'RL' in col1_val or 'LR' in col1_val:
+            if size1 < size2:
+                df_copy.at[index, col1] = df_copy.at[index, col2]
+                df_copy.at[index, id1] = df_copy.at[index, id2]
+                df_copy.at[index, size_col1] = df_copy.at[index, size_col2]
+                df_copy.at[index, size_col2] = 0
+                df_copy.at[index, col2] = None
+                df_copy.at[index, id2] = None
+            else:
+                df_copy.at[index, col2] = None
+                df_copy.at[index, id2] = None
+                df_copy.at[index, size_col2] = 0
+        elif 'RL' in col2_val or 'LR' in col2_val:
+            if size2 < size1:
+                df_copy.at[index, id2] = None
+                df_copy.at[index, col2] = None
+                df_copy.at[index, size_col2] = 0
+            else:
+                df_copy.at[index, col1] = df_copy.at[index, col2]
+                df_copy.at[index, id1] = df_copy.at[index, id2]
+                df_copy.at[index, size_col1] = df_copy.at[index, size_col2]
+                df_copy.at[index, size_col2] = 0
+                df_copy.at[index, col2] = None
+                df_copy.at[index, id2] = None
+    return df_copy
+def renameit(df, old_col_name, new_col_name):
+    """
+    Renames a column in a pandas DataFrame in place. Raises an error if the specified old column name does not exist.
+    Parameters:
+    - df: pandas.DataFrame
+        The DataFrame in which the column is to be renamed.
+    - old_col_name: str
+        The current name of the column to be renamed.
+    - new_col_name: str
+        The new name for the column.
+    Raises:
+    - ValueError: If the old column name does not exist in the DataFrame.
+    Returns:
+    None
+    """
+    import warnings
+    # Check if the old column name exists in the DataFrame
+    if old_col_name not in df.columns:
+        warnings.warn(f"The column '{old_col_name}' does not exist in the DataFrame.")
+        return
+    # Proceed with renaming the column if it exists
+    df.rename(columns={old_col_name: new_col_name}, inplace=True)
+def mm_match_by_qc_scoring_all( qc_dataframe, fix_LRRL=True, verbose=True ):
+    """
+    Processes a quality control (QC) DataFrame to perform modality-specific matching and filtering based
+    on predefined criteria, optimizing for minimal outliers and noise, and maximal signal-to-noise ratio (SNR),
+    expected value of randomness (EVR), and dimensionality time (dimt).
+    This function iteratively matches dataframes derived from the QC dataframe for different imaging modalities,
+    applying a series of filters to select the best matches based on the QC metrics. Matches are made with
+    consideration to minimize outlier loop and noise, while maximizing SNR, EVR, and dimt for each modality.
+    Parameters:
+    ----------
+    qc_dataframe : pandas.DataFrame
+        The DataFrame containing QC metrics for different modalities and imaging data.
+    fix_LRRL : bool, optional
+    verbose : bool, optional
+        If True, prints the progress and the shape of the DataFrame being processed in each step.
+    Process:
+    -------
+    1. Standardizes modalities by merging DTI-related entries.
+    2. Converts specific columns to appropriate data types for processing.
+    3. Performs modality-specific matching and filtering based on the outlier column and criteria for each modality.
+    4. Iteratively processes unmatched data for predefined modalities with specific prefixes to find further matches.
+    Returns:
+    -------
+    pandas.DataFrame
+        The matched and filtered DataFrame after applying all QC scoring and matching operations across specified modalities.
+    """
+    qc_dataframe['modality'] = qc_dataframe['modality'].replace(['DTIdwi', 'DTIb0'], 'DTI', regex=True)
+    qc_dataframe['filename']=qc_dataframe['filename'].astype(str)
+    qc_dataframe['ol_loop']=qc_dataframe['ol_loop'].astype(float)
+    qc_dataframe['ol_lof']=qc_dataframe['ol_lof'].astype(float)
+    qc_dataframe['ol_lof_decision']=qc_dataframe['ol_lof_decision'].astype(float)
+    outlier_column='ol_loop'
+    mmdf0 = best_mmm( qc_dataframe, 'T1w', outlier_column=outlier_column )['filt']
+    fldf = best_mmm( qc_dataframe, 'T2Flair', outlier_column=outlier_column )['filt']
+    nmdf = best_mmm( qc_dataframe, 'NM2DMT', outlier_column=outlier_column )['filt']
+    rsdf = best_mmm( qc_dataframe, 'rsfMRI', outlier_column=outlier_column )['filt']
+    dtdf = best_mmm( qc_dataframe, 'DTI', outlier_column=outlier_column )['filt']
+    criteria = {'ol_loop': 'min', 'noise': 'min', 'snr': 'max', 'EVR': 'max', 'reflection_err':'min'}
+    xcl = [ 'mrimfg', 'mrimodel','mriMagneticFieldStrength', 'dti_failed', 'rsf_failed', 'subjectID', 'date', 'subjectIDdate','repeat']
+    # Assuming df_a and df_b are already loaded
+    mmdf, undffl = mm_match_by_qc_scoring(mmdf0, fldf, 'subjectIDdate', criteria,
+                        prefix='T2Flair_', exclude_columns=xcl )
+    prefixes = ['NM1_', 'NM2_', 'NM3_', 'NM4_', 'NM5_', 'NM6_']
+    undfmod = nmdf  # Initialize 'undfmod' with 'nmdf' for the first iteration
+    if verbose:
+        print('start NM')
+        print( undfmod.shape )
+    for prefix in prefixes:
+        if undfmod.shape[0] > 50:
+            mmdf, undfmod = mm_match_by_qc_scoring(mmdf, undfmod, 'subjectIDdate', criteria, prefix=prefix, exclude_columns=xcl)
+            if verbose:
+                print( prefix )
+                print( undfmod.shape )
+    criteria = {'ol_loop': 'min', 'noise': 'min', 'snr': 'max', 'EVR': 'max', 'dimt':'max'}
+    # higher bvalues lead to more noise ...
+    criteria = {'ol_loop': 'min', 'noise': 'min',  'dti_bvalueMax':'min',  'dimt':'max'}
+    prefixes = ['DTI1_', 'DTI2_', 'DTI3_']  # List of prefixes for each matching iteration
+    undfmod = dtdf
+    if verbose:
+        print('start DT')
+        print( undfmod.shape )
+    for prefix in prefixes:
+        if undfmod.shape[0] > 50:
+            mmdf, undfmod = mm_match_by_qc_scoring(mmdf, undfmod, 'subjectIDdate', criteria, prefix=prefix, exclude_columns=xcl)
+            if verbose:
+                print( prefix )
+                print( undfmod.shape )
+    prefixes = ['rsf1_', 'rsf2_', 'rsf3_']  # List of prefixes for each matching iteration
+    undfmod = rsdf  # Initialize 'undfmod' with 'nmdf' for the first iteration
+    if verbose:
+        print('start rsf')
+        print( undfmod.shape )
+    for prefix in prefixes:
+        if undfmod.shape[0] > 50:
+            mmdf, undfmod = mm_match_by_qc_scoring(mmdf, undfmod, 'subjectIDdate', criteria, prefix=prefix, exclude_columns=xcl)
+            if verbose:
+                print( prefix )
+                print( undfmod.shape )
+    if fix_LRRL:
+        #        mmdf=fix_LR_RL_stuff( mmdf, 'DTI1_filename', 'DTI2_filename', 'DTI1_dimt', 'DTI2_dimt')
+        mmdf=fix_LR_RL_stuff( mmdf, 'rsf1_filename', 'rsf2_filename', 'rsf1_dimt', 'rsf2_dimt', 'rsf1_imageID', 'rsf2_imageID'  )
+    else:
+        import warnings
+        warnings.warn("FIXME: should fix LR and RL situation for the DTI and rsfMRI")
+    # now do the necessary replacements
+    renameit( mmdf, 'perf_imageID', 'perfid' )
+    renameit( mmdf, 'perf_filename', 'perffn' )
+    renameit( mmdf, 'T2Flair_imageID', 'flairid' )
+    renameit( mmdf, 'T2Flair_filename', 'flairfn' )
+    renameit( mmdf, 'rsf1_imageID', 'rsfid1' )
+    renameit( mmdf, 'rsf2_imageID', 'rsfid2' )
+    renameit( mmdf, 'rsf1_filename', 'rsffn1' )
+    renameit( mmdf, 'rsf2_filename', 'rsffn2' )
+    renameit( mmdf, 'DTI1_imageID', 'dtid1' )
+    renameit( mmdf, 'DTI2_imageID', 'dtid2' )
+    renameit( mmdf, 'DTI3_imageID', 'dtid3' )
+    renameit( mmdf, 'DTI1_filename', 'dtfn1' )
+    renameit( mmdf, 'DTI2_filename', 'dtfn2' )
+    renameit( mmdf, 'DTI3_filename', 'dtfn3' )
+    for x in range(1,6):
+        temp0="NM"+str(x)+"_imageID"
+        temp1="nmid"+str(x)
+        renameit( mmdf, temp0, temp1 )
+        temp0="NM"+str(x)+"_filename"
+        temp1="nmfn"+str(x)
+        renameit( mmdf, temp0, temp1 )
+    return mmdf

antspymm 1.2.8__py3-none-any.whl → 1.3.3__py3-none-any.whl

antspymm 1.2.8py3-none-any.whl → 1.3.3py3-none-any.whl