antspymm 1.2.8__py3-none-any.whl → 1.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
antspymm/mm.py CHANGED
@@ -161,6 +161,23 @@ def version( ):
161
161
  'antspymm': pkg_resources.require("antspymm")[0].version
162
162
  }
163
163
 
164
+ def nrg_filename_to_subjectvisit(s, separator='-'):
165
+ """
166
+ Extracts a pattern from the input string.
167
+
168
+ Parameters:
169
+ - s: The input string from which to extract the pattern.
170
+ - separator: The separator used in the string (default is '-').
171
+
172
+ Returns:
173
+ - A string in the format of 'PREFIX-Number-Date'
174
+ """
175
+ parts = os.path.basename(s).split(separator)
176
+ # Assuming the pattern is always in the form of PREFIX-Number-Date-...
177
+ # and PREFIX is always "PPMI", extract the first three parts
178
+ extracted = separator.join(parts[:3])
179
+ return extracted
180
+
164
181
 
165
182
  def validate_nrg_file_format(path, separator):
166
183
  """
@@ -436,11 +453,11 @@ def docsamson(locmod, studycsv, outputdir, projid, sid, dtid, mysep, t1iid=None,
436
453
  imfns.append('nmid' + str(i))
437
454
  elif locmod == 'rsfMRI':
438
455
  imfns=[]
439
- for i in range(3):
456
+ for i in range(4):
440
457
  imfns.append('rsfid' + str(i))
441
458
  elif locmod == 'DTI':
442
459
  imfns=[]
443
- for i in range(3):
460
+ for i in range(4):
444
461
  imfns.append('dtid' + str(i))
445
462
 
446
463
  # Process each file name
@@ -489,7 +506,7 @@ def get_valid_modalities( long=False, asString=False, qc=False ):
489
506
  if long:
490
507
  mymod = ["T1w", "NM2DMT", "rsfMRI", "rsfMRI_LR", "rsfMRI_RL", "rsfMRILR", "rsfMRIRL", "DTI", "DTI_LR","DTI_RL", "DTILR","DTIRL","T2Flair", "dwi", "dwi_ap", "dwi_pa", "func", "func_ap", "func_pa", "perf"]
491
508
  elif qc:
492
- mymod = [ 'T1w', 'T2Flair', 'NM2DMT','DTIdwi','DTIb0', 'rsfMRI', "perf" ]
509
+ mymod = [ 'T1w', 'T2Flair', 'NM2DMT', 'DTI', 'DTIdwi','DTIb0', 'rsfMRI', "perf" ]
493
510
  else:
494
511
  mymod = ["T1w", "NM2DMT", "DTI","T2Flair", "rsfMRI", "perf" ]
495
512
  if not asString:
@@ -565,15 +582,15 @@ def generate_mm_dataframe(
565
582
  raise ValueError("source_image_directory does not exist")
566
583
  if len( rsf_filenames ) > 2:
567
584
  raise ValueError("len( rsf_filenames ) > 2")
568
- if len( dti_filenames ) > 2:
569
- raise ValueError("len( dti_filenames ) > 2")
585
+ if len( dti_filenames ) > 3:
586
+ raise ValueError("len( dti_filenames ) > 3")
570
587
  if len( nm_filenames ) > 11:
571
588
  raise ValueError("len( nm_filenames ) > 11")
572
589
  if len( rsf_filenames ) < 2:
573
590
  for k in range(len(rsf_filenames),2):
574
591
  rsf_filenames.append(None)
575
- if len( dti_filenames ) < 2:
576
- for k in range(len(dti_filenames),2):
592
+ if len( dti_filenames ) < 3:
593
+ for k in range(len(dti_filenames),3):
577
594
  dti_filenames.append(None)
578
595
  if len( nm_filenames ) < 10:
579
596
  for k in range(len(nm_filenames),10):
@@ -649,7 +666,7 @@ def generate_mm_dataframe(
649
666
  'perfid']
650
667
  mycols0 = corecols + [
651
668
  'rsfid1', 'rsfid2',
652
- 'dtid1', 'dtid2']
669
+ 'dtid1', 'dtid2','dtid3']
653
670
  nmext = [
654
671
  'nmid1', 'nmid2' 'nmid3', 'nmid4', 'nmid5',
655
672
  'nmid6', 'nmid7','nmid8', 'nmid9', 'nmid10', 'nmid11'
@@ -981,25 +998,28 @@ def outlierness_by_modality( qcdf, uid='filename', outlier_columns = ['noise', '
981
998
  from PyNomaly import loop
982
999
  from sklearn.neighbors import LocalOutlierFactor
983
1000
  qcdfout = qcdf.copy()
1001
+ pd.set_option('future.no_silent_downcasting', True)
1002
+ qcdfout.replace([np.inf, -np.inf], np.nan, inplace=True)
984
1003
  if uid not in qcdfout.keys():
985
- raise ValueError(uid + " not in dataframe")
1004
+ raise ValueError( str(uid) + " not in dataframe")
986
1005
  if 'ol_loop' not in qcdfout.keys():
987
1006
  qcdfout['ol_loop']=math.nan
988
1007
  if 'ol_lof' not in qcdfout.keys():
989
1008
  qcdfout['ol_lof']=math.nan
1009
+ didit=False
990
1010
  for mod in get_valid_modalities( qc=True ):
1011
+ didit=True
991
1012
  lof = LocalOutlierFactor()
992
1013
  locsel = qcdfout["modality"] == mod
993
1014
  rr = qcdfout[locsel][outlier_columns]
994
- with pd.option_context('mode.use_inf_as_na', True):
995
- for myolcol in outlier_columns:
996
- rr[myolcol].fillna(rr[myolcol].mean(), inplace=True)
1015
+ column_means = rr.mean()
1016
+ rr.fillna(column_means, inplace=True)
997
1017
  if rr.shape[0] > 1:
998
1018
  if verbose:
999
- print(mod)
1019
+ print("calc: " + mod + " outlierness " )
1000
1020
  myneigh = np.min( [24, int(np.round(rr.shape[0]*0.5)) ] )
1001
1021
  temp = antspyt1w.loop_outlierness(rr.astype(float), standardize=True, extent=3, n_neighbors=myneigh, cluster_labels=None)
1002
- qcdfout.loc[locsel,'ol_loop']=temp
1022
+ qcdfout.loc[locsel,'ol_loop']=temp.astype('float64')
1003
1023
  yhat = lof.fit_predict(rr)
1004
1024
  temp = lof.negative_outlier_factor_*(-1.0)
1005
1025
  temp = temp - temp.min()
@@ -1007,6 +1027,8 @@ def outlierness_by_modality( qcdf, uid='filename', outlier_columns = ['noise', '
1007
1027
  yhat[ yhat == -1] = 1 # these are outliers
1008
1028
  qcdfout.loc[locsel,'ol_lof_decision']=yhat
1009
1029
  qcdfout.loc[locsel,'ol_lof']=temp/temp.max()
1030
+ if verbose:
1031
+ print( didit )
1010
1032
  return qcdfout
1011
1033
 
1012
1034
 
@@ -1084,6 +1106,11 @@ def study_dataframe_from_matched_dataframe( matched_dataframe, rootdir, outputdi
1084
1106
  dtfn2=glob.glob(os.path.join(rootdir, pid, sid, dt, 'DTI*', dtid, str(csvrow['dtfn2'].iloc[0]+iext) ))[0]
1085
1107
  if exists( dtfn2 ):
1086
1108
  dtList.append( dtfn2 )
1109
+ if 'dtfn3' in csvrow.keys():
1110
+ dtid=str(int(csvrow['dtid3'].iloc[0]))
1111
+ dtfn3=glob.glob(os.path.join(rootdir, pid, sid, dt, 'DTI*', dtid, str(csvrow['dtfn3'].iloc[0]+iext) ))[0]
1112
+ if exists( dtfn3 ):
1113
+ dtList.append( dtfn3 )
1087
1114
  if 'rsffn1' in csvrow.keys():
1088
1115
  rsid=str(int(csvrow['rsfid1'].iloc[0]))
1089
1116
  rsfn1=glob.glob(os.path.join( rootdir, pid, sid, dt, 'rsfMRI*', rsid, str(csvrow['rsffn1'].iloc[0]+iext) ))[0]
@@ -1159,6 +1186,8 @@ def highest_quality_repeat(mxdfin, idvar, visitvar, qualityvar):
1159
1186
  if qualityvar not in mxdfin.columns:
1160
1187
  raise ValueError("qualityvar not in dataframe")
1161
1188
 
1189
+ mxdfin[qualityvar] = mxdfin[qualityvar].astype(float)
1190
+
1162
1191
  vizzes = mxdfin[visitvar].unique()
1163
1192
  uids = mxdfin[idvar].unique()
1164
1193
  useit = np.zeros(mxdfin.shape[0], dtype=bool)
@@ -1195,40 +1224,48 @@ def match_modalities( qc_dataframe, unique_identifier='filename', outlier_column
1195
1224
  """
1196
1225
  import pandas as pd
1197
1226
  import numpy as np
1227
+ qc_dataframe['filename']=qc_dataframe['filename'].astype(str)
1228
+ qc_dataframe['ol_loop']=qc_dataframe['ol_loop'].astype(float)
1229
+ qc_dataframe['ol_lof']=qc_dataframe['ol_lof'].astype(float)
1230
+ qc_dataframe['ol_lof_decision']=qc_dataframe['ol_lof_decision'].astype(float)
1198
1231
  mmdf = best_mmm( qc_dataframe, 'T1w', outlier_column=outlier_column )['filt']
1199
1232
  fldf = best_mmm( qc_dataframe, 'T2Flair', outlier_column=outlier_column )['filt']
1200
1233
  nmdf = best_mmm( qc_dataframe, 'NM2DMT', outlier_column=outlier_column )['filt']
1201
1234
  rsdf = best_mmm( qc_dataframe, 'rsfMRI', outlier_column=outlier_column )['filt']
1202
1235
  dtdf = best_mmm( qc_dataframe, 'DTI', outlier_column=outlier_column )['filt']
1203
- mmdf['flairid'] = np.nan
1204
- mmdf['flairfn'] = np.nan
1205
- mmdf['flairloop'] = np.nan
1206
- mmdf['flairlof'] = np.nan
1207
- mmdf['dtid1'] = np.nan
1208
- mmdf['dtfn1'] = np.nan
1209
- mmdf['dtloop1'] = np.nan
1210
- mmdf['dtlof1'] = np.nan
1211
- mmdf['dtid2'] = np.nan
1212
- mmdf['dtfn2'] = np.nan
1213
- mmdf['dtloop2'] = np.nan
1214
- mmdf['dtlof2'] = np.nan
1215
- mmdf['rsfid1'] = np.nan
1216
- mmdf['rsffn1'] = np.nan
1217
- mmdf['rsfloop1'] = np.nan
1218
- mmdf['rsflof1'] = np.nan
1219
- mmdf['rsfid2'] = np.nan
1220
- mmdf['rsffn2'] = np.nan
1221
- mmdf['rsfloop2'] = np.nan
1222
- mmdf['rsflof2'] = np.nan
1236
+ mmdf['flairid'] = None
1237
+ mmdf['flairfn'] = None
1238
+ mmdf['flairloop'] = None
1239
+ mmdf['flairlof'] = None
1240
+ mmdf['dtid1'] = None
1241
+ mmdf['dtfn1'] = None
1242
+ mmdf['dtntimepoints1'] = 0
1243
+ mmdf['dtloop1'] = math.nan
1244
+ mmdf['dtlof1'] = math.nan
1245
+ mmdf['dtid2'] = None
1246
+ mmdf['dtfn2'] = None
1247
+ mmdf['dtntimepoints2'] = 0
1248
+ mmdf['dtloop2'] = math.nan
1249
+ mmdf['dtlof2'] = math.nan
1250
+ mmdf['rsfid1'] = None
1251
+ mmdf['rsffn1'] = None
1252
+ mmdf['rsfntimepoints1'] = 0
1253
+ mmdf['rsfloop1'] = math.nan
1254
+ mmdf['rsflof1'] = math.nan
1255
+ mmdf['rsfid2'] = None
1256
+ mmdf['rsffn2'] = None
1257
+ mmdf['rsfntimepoints2'] = 0
1258
+ mmdf['rsfloop2'] = math.nan
1259
+ mmdf['rsflof2'] = math.nan
1223
1260
  for k in range(1,11):
1224
1261
  myid='nmid'+str(k)
1225
- mmdf[myid] = np.nan
1262
+ mmdf[myid] = None
1226
1263
  myid='nmfn'+str(k)
1227
- mmdf[myid] = np.nan
1264
+ mmdf[myid] = None
1228
1265
  myid='nmloop'+str(k)
1229
- mmdf[myid] = np.nan
1266
+ mmdf[myid] = math.nan
1230
1267
  myid='nmlof'+str(k)
1231
- mmdf[myid] = np.nan
1268
+ mmdf[myid] = math.nan
1232
1269
  if verbose:
1233
1270
  print( mmdf.shape )
1234
1271
  for k in range(mmdf.shape[0]):
@@ -1237,12 +1274,13 @@ def match_modalities( qc_dataframe, unique_identifier='filename', outlier_column
1237
1274
  progger = str( k ) # np.round( k / mmdf.shape[0] * 100 ) )
1238
1275
  print( progger, end ="...", flush=True)
1239
1276
  if dtdf is not None:
1240
- locsel = (dtdf["subjectIDdate"] == mmdf["subjectIDdate"].iloc[k]) & (dtdf[outlier_column] < 0.5)
1277
+ locsel = (dtdf["subjectIDdate"] == mmdf["subjectIDdate"].iloc[k])
1241
1278
  if sum(locsel) == 1:
1242
1279
  mmdf.iloc[k, mmdf.columns.get_loc("dtid1")] = dtdf["imageID"][locsel].values[0]
1243
- mmdf.iloc[k, mmdf.columns.get_loc("dtfn1")] = dtdf["fn"][locsel].values[0]
1280
+ mmdf.iloc[k, mmdf.columns.get_loc("dtfn1")] = dtdf[unique_identifier][locsel].values[0]
1244
1281
  mmdf.iloc[k, mmdf.columns.get_loc("dtloop1")] = dtdf[outlier_column][locsel].values[0]
1245
- mmdf.iloc[k, mmdf.columns.get_loc("dtlof1")] = dtdf['ol_lof_decision'][locsel].values[0]
1282
+ mmdf.iloc[k, mmdf.columns.get_loc("dtlof1")] = float(dtdf['ol_lof_decision'][locsel].values[0])
1283
+ mmdf.iloc[k, mmdf.columns.get_loc("dtntimepoints1")] = float(dtdf['dimt'][locsel].values[0])
1246
1284
  elif sum(locsel) > 1:
1247
1285
  locdf = dtdf[locsel]
1248
1286
  dedupe = locdf[["snr","cnr"]].duplicated()
@@ -1250,21 +1288,24 @@ def match_modalities( qc_dataframe, unique_identifier='filename', outlier_column
1250
1288
  if locdf.shape[0] > 1:
1251
1289
  locdf = locdf.sort_values(outlier_column).iloc[:2]
1252
1290
  mmdf.iloc[k, mmdf.columns.get_loc("dtid1")] = locdf["imageID"].values[0]
1253
- mmdf.iloc[k, mmdf.columns.get_loc("dtfn1")] = locdf["fn"].values[0]
1291
+ mmdf.iloc[k, mmdf.columns.get_loc("dtfn1")] = locdf[unique_identifier].values[0]
1254
1292
  mmdf.iloc[k, mmdf.columns.get_loc("dtloop1")] = locdf[outlier_column].values[0]
1255
- mmdf.iloc[k, mmdf.columns.get_loc("dtlof1")] = locdf['ol_lof_decision'][locsel].values[0]
1293
+ mmdf.iloc[k, mmdf.columns.get_loc("dtlof1")] = float(locdf['ol_lof_decision'][locsel].values[0])
1294
+ mmdf.iloc[k, mmdf.columns.get_loc("dtntimepoints1")] = float(dtdf['dimt'][locsel].values[0])
1256
1295
  if locdf.shape[0] > 1:
1257
1296
  mmdf.iloc[k, mmdf.columns.get_loc("dtid2")] = locdf["imageID"].values[1]
1258
- mmdf.iloc[k, mmdf.columns.get_loc("dtfn2")] = locdf["fn"].values[1]
1297
+ mmdf.iloc[k, mmdf.columns.get_loc("dtfn2")] = locdf[unique_identifier].values[1]
1259
1298
  mmdf.iloc[k, mmdf.columns.get_loc("dtloop2")] = locdf[outlier_column].values[1]
1260
- mmdf.iloc[k, mmdf.columns.get_loc("dtlof2")] = locdf['ol_lof_decision'][locsel].values[1]
1299
+ mmdf.iloc[k, mmdf.columns.get_loc("dtlof2")] = float(locdf['ol_lof_decision'][locsel].values[1])
1300
+ mmdf.iloc[k, mmdf.columns.get_loc("dtntimepoints2")] = float(dtdf['dimt'][locsel].values[1])
1261
1301
  if rsdf is not None:
1262
- locsel = (rsdf["subjectIDdate"] == mmdf["subjectIDdate"].iloc[k]) & (rsdf[outlier_column] < 0.5)
1302
+ locsel = (rsdf["subjectIDdate"] == mmdf["subjectIDdate"].iloc[k])
1263
1303
  if sum(locsel) == 1:
1264
1304
  mmdf.iloc[k, mmdf.columns.get_loc("rsfid1")] = rsdf["imageID"][locsel].values[0]
1265
- mmdf.iloc[k, mmdf.columns.get_loc("rsffn1")] = rsdf["fn"][locsel].values[0]
1305
+ mmdf.iloc[k, mmdf.columns.get_loc("rsffn1")] = rsdf[unique_identifier][locsel].values[0]
1266
1306
  mmdf.iloc[k, mmdf.columns.get_loc("rsfloop1")] = rsdf[outlier_column][locsel].values[0]
1267
- mmdf.iloc[k, mmdf.columns.get_loc("rsflof1")] = rsdf['ol_lof_decision'][locsel].values[0]
1307
+ mmdf.iloc[k, mmdf.columns.get_loc("rsflof1")] = float(rsdf['ol_lof_decision'].values[0])
1308
+ mmdf.iloc[k, mmdf.columns.get_loc("rsfntimepoints1")] = float(rsdf['dimt'][locsel].values[0])
1268
1309
  elif sum(locsel) > 1:
1269
1310
  locdf = rsdf[locsel]
1270
1311
  dedupe = locdf[["snr","cnr"]].duplicated()
@@ -1272,22 +1313,24 @@ def match_modalities( qc_dataframe, unique_identifier='filename', outlier_column
1272
1313
  if locdf.shape[0] > 1:
1273
1314
  locdf = locdf.sort_values(outlier_column).iloc[:2]
1274
1315
  mmdf.iloc[k, mmdf.columns.get_loc("rsfid1")] = locdf["imageID"].values[0]
1275
- mmdf.iloc[k, mmdf.columns.get_loc("rsffn1")] = locdf["fn"].values[0]
1316
+ mmdf.iloc[k, mmdf.columns.get_loc("rsffn1")] = locdf[unique_identifier].values[0]
1276
1317
  mmdf.iloc[k, mmdf.columns.get_loc("rsfloop1")] = locdf[outlier_column].values[0]
1277
- mmdf.iloc[k, mmdf.columns.get_loc("rsflof1")] = locdf['ol_lof_decision'].values[0]
1318
+ mmdf.iloc[k, mmdf.columns.get_loc("rsflof1")] = float(locdf['ol_lof_decision'].values[0])
1319
+ mmdf.iloc[k, mmdf.columns.get_loc("rsfntimepoints1")] = float(locdf['dimt'][locsel].values[0])
1278
1320
  if locdf.shape[0] > 1:
1279
1321
  mmdf.iloc[k, mmdf.columns.get_loc("rsfid2")] = locdf["imageID"].values[1]
1280
- mmdf.iloc[k, mmdf.columns.get_loc("rsffn2")] = locdf["fn"].values[1]
1322
+ mmdf.iloc[k, mmdf.columns.get_loc("rsffn2")] = locdf[unique_identifier].values[1]
1281
1323
  mmdf.iloc[k, mmdf.columns.get_loc("rsfloop2")] = locdf[outlier_column].values[1]
1282
- mmdf.iloc[k, mmdf.columns.get_loc("rsflof2")] = locdf['ol_lof_decision'].values[1]
1324
+ mmdf.iloc[k, mmdf.columns.get_loc("rsflof2")] = float(locdf['ol_lof_decision'].values[1])
1325
+ mmdf.iloc[k, mmdf.columns.get_loc("rsfntimepoints2")] = float(locdf['dimt'][locsel].values[1])
1283
1326
 
1284
1327
  if fldf is not None:
1285
1328
  locsel = fldf['subjectIDdate'] == mmdf['subjectIDdate'].iloc[k]
1286
1329
  if locsel.sum() == 1:
1287
1330
  mmdf.iloc[k, mmdf.columns.get_loc("flairid")] = fldf['imageID'][locsel].values[0]
1288
- mmdf.iloc[k, mmdf.columns.get_loc("flairfn")] = fldf['filename'][locsel].values[0]
1331
+ mmdf.iloc[k, mmdf.columns.get_loc("flairfn")] = fldf[unique_identifier][locsel].values[0]
1289
1332
  mmdf.iloc[k, mmdf.columns.get_loc("flairloop")] = fldf[outlier_column][locsel].values[0]
1290
- mmdf.iloc[k, mmdf.columns.get_loc("flairlof")] = fldf['ol_lof_decision'][locsel].values[0]
1333
+ mmdf.iloc[k, mmdf.columns.get_loc("flairlof")] = float(fldf['ol_lof_decision'][locsel].values[0])
1291
1334
  elif sum(locsel) > 1:
1292
1335
  locdf = fldf[locsel]
1293
1336
  dedupe = locdf[["snr","cnr"]].duplicated()
@@ -1295,9 +1338,9 @@ def match_modalities( qc_dataframe, unique_identifier='filename', outlier_column
1295
1338
  if locdf.shape[0] > 1:
1296
1339
  locdf = locdf.sort_values(outlier_column).iloc[:2]
1297
1340
  mmdf.iloc[k, mmdf.columns.get_loc("flairid")] = locdf["imageID"].values[0]
1298
- mmdf.iloc[k, mmdf.columns.get_loc("flairfn")] = locdf["fn"].values[0]
1341
+ mmdf.iloc[k, mmdf.columns.get_loc("flairfn")] = locdf[unique_identifier].values[0]
1299
1342
  mmdf.iloc[k, mmdf.columns.get_loc("flairloop")] = locdf[outlier_column].values[0]
1300
- mmdf.iloc[k, mmdf.columns.get_loc("flairlof")] = locdf['ol_lof_decision'].values[0]
1343
+ mmdf.iloc[k, mmdf.columns.get_loc("flairlof")] = float(locdf['ol_lof_decision'].values[0])
1301
1344
 
1302
1345
  if nmdf is not None:
1303
1346
  locsel = nmdf['subjectIDdate'] == mmdf['subjectIDdate'].iloc[k]
@@ -1305,16 +1348,40 @@ def match_modalities( qc_dataframe, unique_identifier='filename', outlier_column
1305
1348
  locdf = nmdf[locsel]
1306
1349
  for i in range(np.min( [10,locdf.shape[0]])):
1307
1350
  nmid = "nmid"+str(i+1)
1308
- mmdf[nmid].iloc[k] = locdf['imageID'].iloc[i]
1351
+ mmdf.loc[k,nmid] = locdf['imageID'].iloc[i]
1309
1352
  nmfn = "nmfn"+str(i+1)
1310
- mmdf[nmfn].iloc[k] = locdf['imageID'].iloc[i]
1353
+ mmdf.loc[k,nmfn] = locdf['imageID'].iloc[i]
1311
1354
  nmloop = "nmloop"+str(i+1)
1312
- mmdf[nmloop].iloc[k] = locdf[outlier_column].iloc[i]
1355
+ mmdf.loc[k,nmloop] = locdf[outlier_column].iloc[i]
1313
1356
  nmloop = "nmlof"+str(i+1)
1314
- mmdf[nmloop].iloc[k] = locdf['ol_lof_decision'].iloc[i]
1357
+ mmdf.loc[k,nmloop] = float(locdf['ol_lof_decision'].iloc[i])
1315
1358
 
1359
+ mmdf['rsf_total_timepoints']=mmdf['rsfntimepoints1']+mmdf['rsfntimepoints2']
1360
+ mmdf['dt_total_timepoints']=mmdf['dtntimepoints1']+mmdf['dtntimepoints2']
1316
1361
  return mmdf
1317
1362
 
1363
+
1364
+ def add_repeat_column(df, groupby_column):
1365
+ """
1366
+ Adds a 'repeat' column to the DataFrame that counts occurrences of each unique value
1367
+ in the specified 'groupby_column'. The count increments from 1 for each identical entry.
1368
+
1369
+ Parameters:
1370
+ - df: pandas DataFrame.
1371
+ - groupby_column: The name of the column to group by and count repeats.
1372
+
1373
+ Returns:
1374
+ - Modified pandas DataFrame with an added 'repeat' column.
1375
+ """
1376
+ # Validate if the groupby_column exists in the DataFrame
1377
+ if groupby_column not in df.columns:
1378
+ raise ValueError(f"Column '{groupby_column}' does not exist in the DataFrame.")
1379
+
1380
+ # Count the occurrences of each unique value in the specified column and increment from 1
1381
+ df['repeat'] = df.groupby(groupby_column).cumcount() + 1
1382
+
1383
+ return df
1384
+
1318
1385
  def best_mmm( mmdf, wmod, mysep='-', outlier_column='ol_loop', verbose=False):
1319
1386
  """
1320
1387
  Selects the best repeats per modality.
@@ -1333,6 +1400,8 @@ def best_mmm( mmdf, wmod, mysep='-', outlier_column='ol_loop', verbose=False):
1333
1400
  list: a list containing two metadata dataframes - raw and filt. raw contains all the metadata for the selected modality and filt contains the metadata filtered for highest quality repeats.
1334
1401
 
1335
1402
  """
1403
+ # mmdf = mmdf.astype(str)
1404
+ mmdf[outlier_column]=mmdf[outlier_column].astype(float)
1336
1405
  msel = mmdf['modality'] == wmod
1337
1406
  if wmod == 'rsfMRI':
1338
1407
  msel1 = mmdf['modality'] == 'rsfMRI'
@@ -1345,34 +1414,44 @@ def best_mmm( mmdf, wmod, mysep='-', outlier_column='ol_loop', verbose=False):
1345
1414
  msel2 = mmdf['modality'] == 'DTI_LR'
1346
1415
  msel3 = mmdf['modality'] == 'DTI_RL'
1347
1416
  msel4 = mmdf['modality'] == 'DTIdwi'
1348
- msel = msel1 | msel2 | msel3 | msel4
1417
+ msel5 = mmdf['modality'] == 'DTIb0'
1418
+ msel = msel1 | msel2 | msel3 | msel4 | msel5
1349
1419
  if sum(msel) == 0:
1350
1420
  return {'raw': None, 'filt': None}
1351
- uids = list(mmdf['filename'][msel])
1352
- metasub = mmdf[msel]
1421
+ metasub = mmdf[msel].copy()
1353
1422
 
1354
1423
  if verbose:
1355
1424
  print(f"{wmod} {(metasub.shape[0])} pre")
1356
1425
 
1357
- metasub['subjectID']=math.nan
1358
- metasub['date']=math.nan
1359
- metasub['subjectIDdate']=math.nan
1360
- metasub['imageID']=math.nan
1361
- for k in range(len(uids)):
1362
- temp = uids[k].split( mysep )
1363
- metasub['subjectID'].iloc[k] = temp[1]
1364
- metasub['date'].iloc[k] = temp[2]
1365
- metasub['subjectIDdate'].iloc[k] = temp[1] + mysep + temp[2]
1366
- metasub['imageID'].iloc[k] = temp[4]
1367
-
1368
- metasub['negol'] = metasub[outlier_column].max() - metasub[outlier_column]
1426
+ metasub['subjectID']=None
1427
+ metasub['date']=None
1428
+ metasub['subjectIDdate']=None
1429
+ metasub['imageID']=None
1430
+ metasub['negol']=math.nan
1431
+ for k in metasub.index:
1432
+ temp = metasub.loc[k, 'filename'].split( mysep )
1433
+ metasub.loc[k,'subjectID'] = str( temp[1] )
1434
+ metasub.loc[k,'date'] = str( temp[2] )
1435
+ metasub.loc[k,'subjectIDdate'] = str( temp[1] + mysep + temp[2] )
1436
+ metasub.loc[k,'imageID'] = str( temp[4])
1437
+
1438
+
1439
+ if 'ol_' in outlier_column:
1440
+ metasub['negol'] = metasub[outlier_column].max() - metasub[outlier_column]
1441
+ else:
1442
+ metasub['negol'] = metasub[outlier_column]
1369
1443
  if 'date' not in metasub.keys():
1370
- metasub['date']='NA'
1371
- metasubq = highest_quality_repeat(metasub, 'filename', 'date', 'negol')
1444
+ metasub['date']=None
1445
+ metasubq = add_repeat_column( metasub, 'subjectIDdate' )
1446
+ metasubq = highest_quality_repeat(metasubq, 'filename', 'date', 'negol')
1372
1447
 
1373
1448
  if verbose:
1374
1449
  print(f"{wmod} {metasubq.shape[0]} post")
1375
1450
 
1451
+ # metasub = metasub.astype(str)
1452
+ # metasubq = metasubq.astype(str)
1453
+ metasub[outlier_column]=metasub[outlier_column].astype(float)
1454
+ metasubq[outlier_column]=metasubq[outlier_column].astype(float)
1376
1455
  return {'raw': metasub, 'filt': metasubq}
1377
1456
 
1378
1457
  def mm_read( x, standardize_intensity=False, modality='' ):
@@ -1783,10 +1862,12 @@ def merge_dwi_data( img_LRdwp, bval_LR, bvec_LR, img_RLdwp, bval_RL, bvec_RL ):
1783
1862
  bvec_RL : array
1784
1863
 
1785
1864
  """
1865
+ import warnings
1786
1866
  insamespace = ants.image_physical_space_consistency( img_LRdwp, img_RLdwp )
1787
1867
  if not insamespace :
1788
- raise ValueError('not insamespace ... corrected image pair should occupy the same physical space')
1789
-
1868
+ warnings.warn('not insamespace ... corrected image pair should occupy the same physical space; returning only the 1st set and wont join these data.')
1869
+ return img_LRdwp, bval_LR, bvec_LR
1870
+
1790
1871
  bval_LR = np.concatenate([bval_LR,bval_RL])
1791
1872
  bvec_LR = np.concatenate([bvec_LR,bvec_RL])
1792
1873
  # concatenate the images
@@ -2555,7 +2636,7 @@ def template_figure_with_overlay(scalar_label_df, prefix, outputfilename=None, t
2555
2636
  toviz = temp['overlay']
2556
2637
  return { "underlay": seggm, 'overlay': toviz, 'seg': tcrop }
2557
2638
 
2558
- def get_data( name=None, force_download=False, version=21, target_extension='.csv' ):
2639
+ def get_data( name=None, force_download=False, version=23, target_extension='.csv' ):
2559
2640
  """
2560
2641
  Get ANTsPyMM data filename
2561
2642
 
@@ -2839,6 +2920,34 @@ def super_res_mcimage( image,
2839
2920
  return ants.list_to_ndimage( imageup, mcsr )
2840
2921
 
2841
2922
 
2923
+ def segment_timeseries_by_bvalue(bvals):
2924
+ """
2925
+ Segments a time series based on a threshold applied to b-values.
2926
+
2927
+ This function categorizes indices of the given b-values array into two groups:
2928
+ one for indices where b-values are above a near-zero threshold, and another
2929
+ where b-values are at or below this threshold. The threshold is set to 1e-12.
2930
+
2931
+ Parameters:
2932
+ - bvals (numpy.ndarray): An array of b-values.
2933
+
2934
+ Returns:
2935
+ - dict: A dictionary with two keys, 'lowermeans' and 'highermeans', each containing
2936
+ the indices of bvals where the b-values are above and at/below the threshold, respectively.
2937
+ """
2938
+ # Define the threshold
2939
+ threshold = 1e-12
2940
+
2941
+ # Get indices where b-values are greater than the threshold
2942
+ lowermeans = list(np.where(bvals > threshold)[0])
2943
+
2944
+ # Get indices where b-values are less than or equal to the threshold
2945
+ highermeans = list(np.where(bvals <= threshold)[0])
2946
+
2947
+ return {
2948
+ 'lowermeans': lowermeans,
2949
+ 'highermeans': highermeans
2950
+ }
2842
2951
 
2843
2952
  def segment_timeseries_by_meanvalue( image, quantile = 0.995 ):
2844
2953
  """
@@ -3290,7 +3399,7 @@ def dipy_dti_recon(
3290
3399
  space as the image, we will resample directly to the image space. This
3291
3400
  could lead to problems if the inputs are really incorrect.
3292
3401
 
3293
- b0_idx : the indices of the B0; if None, use segment_timeseries_by_meanvalue to guess
3402
+ b0_idx : the indices of the B0; if None, use segment_timeseries_by_bvalue
3294
3403
 
3295
3404
  mask_dilation : integer zero or more dilates the brain mask
3296
3405
 
@@ -3321,8 +3430,7 @@ def dipy_dti_recon(
3321
3430
  bvals = bvalsfn.copy()
3322
3431
  bvecs = bvecsfn.copy()
3323
3432
 
3324
- if b0_idx is None:
3325
- b0_idx = segment_timeseries_by_meanvalue( image )['highermeans']
3433
+ b0_idx = segment_timeseries_by_bvalue( bvals )['highermeans']
3326
3434
 
3327
3435
  b0 = ants.slice_image( image, axis=3, idx=b0_idx[0] )
3328
3436
  bxtmod='bold'
@@ -3532,6 +3640,9 @@ def joint_dti_recon(
3532
3640
  def fix_dwi_shape( img, bvalfn, bvecfn ):
3533
3641
  if isinstance(bvecfn, str):
3534
3642
  bvals, bvecs = read_bvals_bvecs( bvalfn , bvecfn )
3643
+ else:
3644
+ bvals = bvalfn
3645
+ bvecs = bvecfn
3535
3646
  if bvecs.shape[0] < img.shape[3]:
3536
3647
  imgout = ants.from_numpy( img[:,:,:,0:bvecs.shape[0]] )
3537
3648
  imgout = ants.copy_image_info( img, imgout )
@@ -4586,7 +4697,7 @@ def get_rsf_outputs( coords ):
4586
4697
  return list( yeo['SystemName'].unique() )
4587
4698
 
4588
4699
  def tra_initializer( fixed, moving, n_simulations=32, max_rotation=30,
4589
- transform=['rigid'], verbose=False ):
4700
+ transform=['rigid'], compreg=None, verbose=False ):
4590
4701
  """
4591
4702
  multi-start multi-transform registration solution - based on ants.registration
4592
4703
 
@@ -4600,6 +4711,8 @@ def tra_initializer( fixed, moving, n_simulations=32, max_rotation=30,
4600
4711
 
4601
4712
  transform : list of transforms to loop through
4602
4713
 
4714
+ compreg : registration results against which to compare
4715
+
4603
4716
  verbose : boolean
4604
4717
 
4605
4718
  """
@@ -4608,15 +4721,20 @@ def tra_initializer( fixed, moving, n_simulations=32, max_rotation=30,
4608
4721
  output_directory_w = output_directory + "/tra_reg/"
4609
4722
  os.makedirs(output_directory_w,exist_ok=True)
4610
4723
  bestmi = math.inf
4724
+ bestvar = 0.0
4611
4725
  myorig = list(ants.get_origin( fixed ))
4612
4726
  mymax = 0;
4613
4727
  for k in range(len( myorig ) ):
4614
4728
  if abs(myorig[k]) > mymax:
4615
4729
  mymax = abs(myorig[k])
4616
4730
  maxtrans = mymax * 0.05
4617
- bestreg=ants.registration( fixed,moving,'Translation',
4618
- outprefix=output_directory_w+"trans")
4619
- initx = ants.read_transform( bestreg['fwdtransforms'][0] )
4731
+ if compreg is None:
4732
+ bestreg=ants.registration( fixed,moving,'Translation',
4733
+ outprefix=output_directory_w+"trans")
4734
+ initx = ants.read_transform( bestreg['fwdtransforms'][0] )
4735
+ else :
4736
+ bestreg=compreg
4737
+ initx = ants.read_transform( bestreg['fwdtransforms'][0] )
4620
4738
  for mytx in transform:
4621
4739
  regtx = 'Rigid'
4622
4740
  with tempfile.NamedTemporaryFile(suffix='.h5') as tp:
@@ -4653,6 +4771,9 @@ def tra_initializer( fixed, moving, n_simulations=32, max_rotation=30,
4653
4771
  print( "mi @ " + str(k) + " : " + str(mymi), flush=True)
4654
4772
  bestmi = mymi
4655
4773
  bestreg = reg
4774
+ bestvar = myvar
4775
+ if bestvar == 0.0 and compreg is not None:
4776
+ return compreg
4656
4777
  return bestreg
4657
4778
 
4658
4779
  def neuromelanin( list_nm_images, t1, t1_head, t1lab, brain_stem_dilation=8,
@@ -4834,7 +4955,7 @@ def neuromelanin( list_nm_images, t1, t1_head, t1lab, brain_stem_dilation=8,
4834
4955
  nm_avg_cropped_new = nm_avg_cropped_new + warpednext
4835
4956
  nm_avg_cropped = nm_avg_cropped_new / len( crop_nm_list )
4836
4957
 
4837
- slabregUpdated = tra_initializer( nm_avg_cropped, t1c, verbose=verbose )
4958
+ slabregUpdated = tra_initializer( nm_avg_cropped, t1c, compreg=slabreg,verbose=verbose )
4838
4959
  tempOrig = ants.apply_transforms( nm_avg_cropped_new, t1c, slabreg['fwdtransforms'] )
4839
4960
  tempUpdate = ants.apply_transforms( nm_avg_cropped_new, t1c, slabregUpdated['fwdtransforms'] )
4840
4961
  miUpdate = ants.image_mutual_information(
@@ -5409,7 +5530,8 @@ def resting_state_fmri_networks( fmri, fmri_template, t1, t1segmentation,
5409
5530
  ptImage=ants.threshold_image( ptImg, pts2bold.loc[i,'ROI'], pts2bold.loc[i,'ROI'] )
5410
5531
  if debug:
5411
5532
  ptImgAll = ptImgAll + ptImage
5412
- meanROI[:,i] = ants.timeseries_to_matrix( simg, ptImage).mean(axis=1)
5533
+ if ptImage.sum() > 0 :
5534
+ meanROI[:,i] = ants.timeseries_to_matrix( simg, ptImage).mean(axis=1)
5413
5535
 
5414
5536
  if debug:
5415
5537
  ants.image_write( simg, '/tmp/simg.nii.gz' )
@@ -5527,9 +5649,15 @@ def resting_state_fmri_networks( fmri, fmri_template, t1, t1segmentation,
5527
5649
  fname='falffPoint'+kk+anatname
5528
5650
  aname='alffPoint'+kk+anatname
5529
5651
  pname='perafPoint'+kk+anatname
5530
- outdict[fname]=(outdict['falff'][ptImg==k]).mean()
5531
- outdict[aname]=(outdict['alff'][ptImg==k]).mean()
5532
- outdict[pname]=(perafimg[ptImg==k]).mean()
5652
+ localsel = ptImg == k
5653
+ if localsel.sum() > 0 : # check if non-empty
5654
+ outdict[fname]=(outdict['falff'][localsel]).mean()
5655
+ outdict[aname]=(outdict['alff'][localsel]).mean()
5656
+ outdict[pname]=(perafimg[localsel]).mean()
5657
+ else:
5658
+ outdict[fname]=math.nan
5659
+ outdict[aname]=math.nan
5660
+ outdict[pname]=math.nan
5533
5661
 
5534
5662
  rsfNuisance = pd.DataFrame( nuisance )
5535
5663
  if remove_it:
@@ -5562,6 +5690,7 @@ def resting_state_fmri_networks( fmri, fmri_template, t1, t1segmentation,
5562
5690
  outdict['despiking_count_summary'] = despiking_count_summary
5563
5691
  outdict['FD_max'] = corrmo['FD'].max()
5564
5692
  outdict['FD_mean'] = corrmo['FD'].mean()
5693
+ outdict['FD_sd'] = corrmo['FD'].std()
5565
5694
  outdict['bold_evr'] = antspyt1w.patch_eigenvalue_ratio( und, 512, [16,16,16], evdepth = 0.9, mask = bmask )
5566
5695
  outdict['n_outliers'] = len(hlinds)
5567
5696
  outdict['nc_wm'] = int(nc_wm)
@@ -5638,11 +5767,18 @@ def despike_time_series_afni(image, c1=2.5, c2=4):
5638
5767
  residuals = data - curve
5639
5768
  mad = np.median(np.abs(residuals - np.median(residuals, axis=-1, keepdims=True)), axis=-1, keepdims=True)
5640
5769
  sigma = np.sqrt(np.pi / 2) * mad
5770
+ # Ensure sigma is not zero to avoid division by zero
5771
+ sigma_safe = np.where(sigma == 0, 1e-10, sigma)
5772
+
5773
+ # Optionally, handle NaN or inf values in data, curve, or sigma
5774
+ data = np.nan_to_num(data, nan=0.0, posinf=np.finfo(np.float64).max, neginf=np.finfo(np.float64).min)
5775
+ curve = np.nan_to_num(curve, nan=0.0, posinf=np.finfo(np.float64).max, neginf=np.finfo(np.float64).min)
5776
+ sigma_safe = np.nan_to_num(sigma_safe, nan=1e-10, posinf=np.finfo(np.float64).max, neginf=np.finfo(np.float64).min)
5641
5777
 
5642
5778
  # Despike algorithm
5643
5779
  spike_counts = np.zeros( image.shape[3] )
5644
5780
  for i in range(data.shape[-1]):
5645
- s = (data[..., i] - curve[..., i]) / sigma[..., 0]
5781
+ s = (data[..., i] - curve[..., i]) / sigma_safe[..., 0]
5646
5782
  ww = s > c1
5647
5783
  s_prime = np.where( ww, c1 + (c2 - c1) * np.tanh((s - c1) / (c2 - c1)), s)
5648
5784
  spike_counts[i] = ww.sum()
@@ -5907,6 +6043,7 @@ def bold_perfusion_minimal(
5907
6043
  outdict['dvars'] = dvars( corrmo['motion_corrected'], gmseg )
5908
6044
  outdict['FD_max'] = rsfNuisance['FD'].max()
5909
6045
  outdict['FD_mean'] = rsfNuisance['FD'].mean()
6046
+ outdict['FD_sd'] = rsfNuisance['FD'].std()
5910
6047
  outdict['outlier_volumes']=hlinds
5911
6048
  outdict['negative_voxels']=negative_voxels
5912
6049
  return convert_np_in_dict( outdict )
@@ -6313,6 +6450,7 @@ Where:
6313
6450
  outdict['high_motion_pct'] = (rsfNuisance['FD'] > FD_threshold ).sum() / rsfNuisance.shape[0]
6314
6451
  outdict['FD_max'] = rsfNuisance['FD'].max()
6315
6452
  outdict['FD_mean'] = rsfNuisance['FD'].mean()
6453
+ outdict['FD_sd'] = rsfNuisance['FD'].std()
6316
6454
  outdict['bold_evr'] = antspyt1w.patch_eigenvalue_ratio( und, 512, [16,16,16], evdepth = 0.9, mask = bmask )
6317
6455
  outdict['t1reg'] = t1reg
6318
6456
  outdict['outlier_volumes']=hlinds
@@ -6877,11 +7015,14 @@ def mm(
6877
7015
  normalization_dict['MD_norm'] = ants.apply_transforms( group_template, mydti['recon_md'],group_transform+dtirig['fwdtransforms'] )
6878
7016
  normalization_dict['FA_norm'] = ants.apply_transforms( group_template, mydti['recon_fa'],group_transform+dtirig['fwdtransforms'] )
6879
7017
  output_directory = tempfile.mkdtemp()
6880
- comptx = ants.apply_transforms( group_template, group_template,
6881
- group_transform+dtirig['fwdtransforms'],
6882
- compose = output_directory + '/xxx' )
6883
- normalization_dict['DTI_norm'] = transform_and_reorient_dti(
6884
- group_template, mydti['dti'], comptx, py_based=True, verbose=True )
7018
+ do_dti_norm=False
7019
+ if do_dti_norm:
7020
+ comptx = ants.apply_transforms( group_template, group_template, group_transform+dtirig['fwdtransforms'], compose = output_directory + '/xxx' )
7021
+ tspc=[2.,2.,2.]
7022
+ if srmodel is not None:
7023
+ tspc=[1.,1.,1.]
7024
+ group_template2mm = ants.resample_image( group_template, tspc )
7025
+ normalization_dict['DTI_norm'] = transform_and_reorient_dti( group_template2mm, mydti['dti'], comptx, py_based=True, verbose=True )
6885
7026
  import shutil
6886
7027
  shutil.rmtree(output_directory, ignore_errors=True )
6887
7028
  if output_dict['rsf'] is not None:
@@ -7019,6 +7160,8 @@ def write_mm( output_prefix, mm, mm_norm=None, t1wide=None, separator='_', verbo
7019
7160
  if 'flair' in mm:
7020
7161
  if mm['flair'] is not None:
7021
7162
  myop = output_prefix + separator + 'wmh.nii.gz'
7163
+ pngfnb = output_prefix + separator + 'wmh_seg.png'
7164
+ ants.plot( mm['flair']['flair'], mm['flair']['WMH_posterior_probability_map'], axis=2, nslices=21, ncol=7, filename=pngfnb, crop=True )
7022
7165
  if mm['flair']['WMH_probability_map'] is not None:
7023
7166
  image_write_with_thumbnail( mm['flair']['WMH_probability_map'], myop, thumb=False )
7024
7167
  flwide = dict_to_dataframe( mm['flair'] )
@@ -7066,9 +7209,10 @@ def write_mm( output_prefix, mm, mm_norm=None, t1wide=None, separator='_', verbo
7066
7209
  mm_wide['dti_high_motion_count'] = mydti['high_motion_count']
7067
7210
  mm_wide['dti_FD_mean'] = mydti['framewise_displacement'].mean()
7068
7211
  mm_wide['dti_FD_max'] = mydti['framewise_displacement'].max()
7212
+ mm_wide['dti_FD_sd'] = mydti['framewise_displacement'].std()
7069
7213
  fdfn = output_prefix + separator + '_fd.csv'
7070
7214
  else:
7071
- mm_wide['dti_FD_mean'] = mm_wide['dti_FD_max'] = 'NA'
7215
+ mm_wide['dti_FD_mean'] = mm_wide['dti_FD_max'] = mm_wide['dti_FD_sd'] = 'NA'
7072
7216
 
7073
7217
  if 'perf' in mm:
7074
7218
  if mm['perf'] is not None:
@@ -7708,6 +7852,7 @@ def mm_csv(
7708
7852
  writes output to disk and produces figures
7709
7853
 
7710
7854
  """
7855
+ import traceback
7711
7856
  visualize = True
7712
7857
  verbose = True
7713
7858
  if verbose:
@@ -7965,9 +8110,11 @@ def mm_csv(
7965
8110
  test_run=test_run,
7966
8111
  verbose=True )
7967
8112
  except Exception as e:
8113
+ error_info = traceback.format_exc()
8114
+ print(error_info)
7968
8115
  visualize=False
7969
8116
  dowrite=False
7970
- print(f"An error occurred while processing {overmodX}: {e}")
8117
+ print(f"antspymmerror occurred while processing {overmodX}: {e}")
7971
8118
  pass
7972
8119
  if not test_run:
7973
8120
  write_mm( output_prefix=mymm, mm=tabPro, mm_norm=normPro, t1wide=None, separator=mysep )
@@ -8021,9 +8168,11 @@ def mm_csv(
8021
8168
  test_run=test_run,
8022
8169
  verbose=True )
8023
8170
  except Exception as e:
8171
+ error_info = traceback.format_exc()
8172
+ print(error_info)
8024
8173
  visualize=False
8025
8174
  dowrite=False
8026
- print(f"An error occurred while processing {overmodX}: {e}")
8175
+ print(f"antspymmerror occurred while processing {overmodX}: {e}")
8027
8176
  pass
8028
8177
  if visualize:
8029
8178
  maxslice = np.min( [21, hier['brain_n4_dnz'].shape[2] ] )
@@ -8044,9 +8193,11 @@ def mm_csv(
8044
8193
  test_run=test_run,
8045
8194
  verbose=True )
8046
8195
  except Exception as e:
8196
+ error_info = traceback.format_exc()
8197
+ print(error_info)
8047
8198
  visualize=False
8048
8199
  dowrite=False
8049
- print(f"An error occurred while processing {overmodX}: {e}")
8200
+ print(f"antspymmerror occurred while processing {overmodX}: {e}")
8050
8201
  pass
8051
8202
  if visualize:
8052
8203
  maxslice = np.min( [21, img.shape[2] ] )
@@ -8081,11 +8232,13 @@ def mm_csv(
8081
8232
  test_run=test_run,
8082
8233
  verbose=True )
8083
8234
  except Exception as e:
8235
+ error_info = traceback.format_exc()
8236
+ print(error_info)
8084
8237
  visualize=False
8085
8238
  dowrite=False
8086
8239
  tabPro={'rsf':None}
8087
8240
  normPro={'rsf':None}
8088
- print(f"An error occurred while processing {overmodX}: {e}")
8241
+ print(f"antspymmerror occurred while processing {overmodX}: {e}")
8089
8242
  pass
8090
8243
  if tabPro['rsf'] is not None and visualize:
8091
8244
  for tpro in tabPro['rsf']: # FIXMERSF
@@ -8117,10 +8270,12 @@ def mm_csv(
8117
8270
  perfusion_m0=perfusion_m0,
8118
8271
  verbose=True )
8119
8272
  except Exception as e:
8273
+ error_info = traceback.format_exc()
8274
+ print(error_info)
8120
8275
  visualize=False
8121
8276
  dowrite=False
8122
8277
  tabPro={'perf':None}
8123
- print(f"An error occurred while processing {overmodX}: {e}")
8278
+ print(f"antspymmerror occurred while processing {overmodX}: {e}")
8124
8279
  pass
8125
8280
  if tabPro['perf'] is not None and visualize:
8126
8281
  maxslice = np.min( [21, tabPro['perf']['meanBold'].shape[2] ] )
@@ -8137,7 +8292,7 @@ def mm_csv(
8137
8292
  bvalfnList = [ bvalfn ]
8138
8293
  bvecfnList = [ bvecfn ]
8139
8294
  missing_dti_data=False # bval, bvec or images
8140
- if len( myimgsr ) > 1: # find DTI_RL
8295
+ if len( myimgsr ) == 2: # find DTI_RL
8141
8296
  dtilrfn = myimgsr[myimgcount+1]
8142
8297
  if exists( dtilrfn ):
8143
8298
  bvalfnRL = re.sub( '.nii.gz', '.bval' , dtilrfn )
@@ -8146,6 +8301,28 @@ def mm_csv(
8146
8301
  imgList.append( imgRL )
8147
8302
  bvalfnList.append( bvalfnRL )
8148
8303
  bvecfnList.append( bvecfnRL )
8304
+ elif len( myimgsr ) == 3: # find DTI_RL
8305
+ print("DTI trinity")
8306
+ dtilrfn = myimgsr[myimgcount+1]
8307
+ dtilrfn2 = myimgsr[myimgcount+2]
8308
+ if exists( dtilrfn ) and exists( dtilrfn2 ):
8309
+ bvalfnRL = re.sub( '.nii.gz', '.bval' , dtilrfn )
8310
+ bvecfnRL = re.sub( '.nii.gz', '.bvec' , dtilrfn )
8311
+ bvalfnRL2 = re.sub( '.nii.gz', '.bval' , dtilrfn2 )
8312
+ bvecfnRL2 = re.sub( '.nii.gz', '.bvec' , dtilrfn2 )
8313
+ imgRL = ants.image_read( dtilrfn )
8314
+ imgRL2 = ants.image_read( dtilrfn2 )
8315
+ bvals, bvecs = read_bvals_bvecs( bvalfnRL , bvecfnRL )
8316
+ print( bvals.max() )
8317
+ bvals2, bvecs2 = read_bvals_bvecs( bvalfnRL2 , bvecfnRL2 )
8318
+ print( bvals2.max() )
8319
+ temp = merge_dwi_data( imgRL, bvals, bvecs, imgRL2, bvals2, bvecs2 )
8320
+ imgList.append( temp[0] )
8321
+ bvalfnList.append( mymm+mysep+'joined.bval' )
8322
+ bvecfnList.append( mymm+mysep+'joined.bvec' )
8323
+ write_bvals_bvecs( temp[1], temp[2], mymm+mysep+'joined' )
8324
+ bvalsX, bvecsX = read_bvals_bvecs( bvalfnRL2 , bvecfnRL2 )
8325
+ print( bvalsX.max() )
8149
8326
  # check existence of all files expected ...
8150
8327
  for dtiex in bvalfnList+bvecfnList+myimgsr:
8151
8328
  if not exists(dtiex):
@@ -8175,20 +8352,22 @@ def mm_csv(
8175
8352
  bvals = bvalfnList,
8176
8353
  bvecs = bvecfnList,
8177
8354
  srmodel=srmodel_DTI_mdl,
8178
- do_tractography=False, #not test_run,
8355
+ do_tractography=not test_run,
8179
8356
  do_kk=False,
8180
8357
  do_normalization=templateTx,
8181
- group_template = ants.resample_image(normalization_template,(2,2,2)),
8358
+ group_template = normalization_template,
8182
8359
  group_transform = groupTx,
8183
8360
  dti_motion_correct = dti_motion_correct,
8184
8361
  dti_denoise = dti_denoise,
8185
8362
  test_run=test_run,
8186
8363
  verbose=True )
8187
8364
  except Exception as e:
8365
+ error_info = traceback.format_exc()
8366
+ print(error_info)
8188
8367
  visualize=False
8189
8368
  dowrite=False
8190
8369
  tabPro={'DTI':None}
8191
- print(f"An error occurred while processing {overmodX}: {e}")
8370
+ print(f"antspymmerror occurred while processing {overmodX}: {e}")
8192
8371
  pass
8193
8372
  mydti = tabPro['DTI']
8194
8373
  if visualize and tabPro['DTI'] is not None:
@@ -8835,6 +9014,7 @@ def boot_wmh( flair, t1, t1seg, mmfromconvexhull = 0.0, strict=True,
8835
9014
  wmh_sum_aug = wmh_sum_aug / float( n_simulations )
8836
9015
  wmh_sum_prior_aug = wmh_sum_prior_aug / float( n_simulations )
8837
9016
  return{
9017
+ 'flair' : ants.iMath(flair,"Normalize"),
8838
9018
  'WMH_probability_map' : augprob,
8839
9019
  'WMH_posterior_probability_map' : augprob_prior,
8840
9020
  'wmh_mass': wmh_sum_aug,
@@ -9267,6 +9447,7 @@ def blind_image_assessment(
9267
9447
  title=False,
9268
9448
  pull_rank=False,
9269
9449
  resample=None,
9450
+ n_to_skip = 10,
9270
9451
  verbose=False
9271
9452
  ):
9272
9453
  """
@@ -9296,6 +9477,8 @@ def blind_image_assessment(
9296
9477
 
9297
9478
  resample : None, numeric max or min, resamples image to isotropy
9298
9479
 
9480
+ n_to_skip : 10 by default; samples time series every n_to_skip volume
9481
+
9299
9482
  verbose : boolean
9300
9483
 
9301
9484
  """
@@ -9307,6 +9490,7 @@ def blind_image_assessment(
9307
9490
  from pathlib import Path
9308
9491
  import json
9309
9492
  import re
9493
+ from dipy.io.gradients import read_bvals_bvecs
9310
9494
  mystem=''
9311
9495
  if isinstance(image,list):
9312
9496
  isfilename=isinstance( image[0], str)
@@ -9315,6 +9499,7 @@ def blind_image_assessment(
9315
9499
  isfilename=isinstance( image, str)
9316
9500
  outdf = pd.DataFrame()
9317
9501
  mymeta = None
9502
+ MagneticFieldStrength = None
9318
9503
  image_filename=''
9319
9504
  if isfilename:
9320
9505
  image_filename = image
@@ -9322,10 +9507,14 @@ def blind_image_assessment(
9322
9507
  image_filename=image[0]
9323
9508
  json_name = re.sub(".nii.gz",".json",image_filename)
9324
9509
  if exists( json_name ):
9325
- with open(json_name, 'r') as fcc_file:
9326
- mymeta = json.load(fcc_file, strict=False)
9327
- if verbose:
9328
- print(json.dumps(mymeta, indent=4))
9510
+ try:
9511
+ with open(json_name, 'r') as fcc_file:
9512
+ mymeta = json.load(fcc_file)
9513
+ if verbose:
9514
+ print(json.dumps(mymeta, indent=4))
9515
+ fcc_file.close()
9516
+ except:
9517
+ pass
9329
9518
  mystem=Path( image ).stem
9330
9519
  mystem=Path( mystem ).stem
9331
9520
  image_reference = ants.image_read( image )
@@ -9333,6 +9522,7 @@ def blind_image_assessment(
9333
9522
  else:
9334
9523
  image_reference = ants.image_clone( image )
9335
9524
  ntimepoints = 1
9525
+ bvalueMax=None
9336
9526
  if image_reference.dimension == 4:
9337
9527
  ntimepoints = image_reference.shape[3]
9338
9528
  if "DTI" in image_filename:
@@ -9340,11 +9530,16 @@ def blind_image_assessment(
9340
9530
  image_b0, image_dwi = get_average_dwi_b0( image_reference, fast=True )
9341
9531
  image_b0 = ants.iMath( image_b0, 'Normalize' )
9342
9532
  image_dwi = ants.iMath( image_dwi, 'Normalize' )
9533
+ bval_name = re.sub(".nii.gz",".bval",image_filename)
9534
+ bvec_name = re.sub(".nii.gz",".bvec",image_filename)
9535
+ if exists( bval_name ) and exists( bvec_name ):
9536
+ bvals, bvecs = read_bvals_bvecs( bval_name , bvec_name )
9537
+ bvalueMax = bvals.max()
9343
9538
  else:
9344
9539
  image_b0 = ants.get_average_of_timeseries( image_reference ).iMath("Normalize")
9345
9540
  else:
9346
9541
  image_compare = ants.smooth_image( image_reference, 3, sigma_in_physical_coordinates=False )
9347
- for jjj in range(ntimepoints):
9542
+ for jjj in range(0,ntimepoints,n_to_skip):
9348
9543
  modality='unknown'
9349
9544
  if "rsfMRI" in image_filename:
9350
9545
  modality='rsfMRI'
@@ -9365,7 +9560,7 @@ def blind_image_assessment(
9365
9560
  modality='DTIdwi'
9366
9561
  else:
9367
9562
  image_compare = ants.image_clone( image_b0 )
9368
- image = ants.iMath( image, 'TruncateIntensity',0.01,0.995)
9563
+ # image = ants.iMath( image, 'TruncateIntensity',0.01,0.995)
9369
9564
  minspc = np.min(ants.get_spacing(image))
9370
9565
  maxspc = np.max(ants.get_spacing(image))
9371
9566
  if resample is not None:
@@ -9397,69 +9592,76 @@ def blind_image_assessment(
9397
9592
  bgmsk = msk*0+1-msk
9398
9593
  mskdil = ants.iMath(msk, "MD", 4 )
9399
9594
  # ants.plot_ortho( image, msk, crop=False )
9400
- image = ants.crop_image( image, mskdil ).iMath("Normalize")
9401
- msk = ants.crop_image( msk, mskdil ).iMath("Normalize")
9402
- bgmsk = ants.crop_image( bgmsk, mskdil ).iMath("Normalize")
9403
- image_compare = ants.crop_image( image_compare, mskdil ).iMath("Normalize")
9404
9595
  nvox = int( msk.sum() )
9405
- minshp = np.min( image.shape )
9406
- npatch = int( np.round( 0.1 * nvox ) )
9407
- npatch = np.min( [512,npatch ] )
9408
- patch_shape = []
9409
- for k in range( 3 ):
9410
- p = int( 32.0 / ants.get_spacing( image )[k] )
9411
- if p > int( np.round( image.shape[k] * 0.5 ) ):
9412
- p = int( np.round( image.shape[k] * 0.5 ) )
9413
- patch_shape.append( p )
9414
- if verbose:
9415
- print(image)
9416
- print( patch_shape )
9417
- print( npatch )
9418
- myevr = math.nan # dont want to fail if something odd happens in patch extraction
9419
- try:
9420
- myevr = antspyt1w.patch_eigenvalue_ratio( image, npatch, patch_shape,
9421
- evdepth = 0.9, mask=msk )
9422
- except:
9423
- pass
9424
- if pull_rank:
9425
- image = ants.rank_intensity(image)
9426
- imagereflect = ants.reflect_image(image, axis=0)
9427
- asym_err = ( image - imagereflect ).abs().mean()
9428
- # estimate noise by center cropping, denoizing and taking magnitude of difference
9429
- nocrop=False
9430
- if image.dimension == 3:
9431
- if image.shape[2] == 1:
9432
- nocrop=True
9433
- if maxspc/minspc > 10:
9434
- nocrop=True
9435
- if nocrop:
9436
- mycc = ants.image_clone( image )
9437
- else:
9438
- mycc = antspyt1w.special_crop( image,
9439
- ants.get_center_of_mass( msk *0 + 1 ), patch_shape )
9440
- myccd = ants.denoise_image( mycc, p=2,r=2,noise_model='Gaussian' )
9441
- noizlevel = ( mycc - myccd ).abs().mean()
9442
- # ants.plot_ortho( image, crop=False, filename=viz_filename, flat=True, xyz_lines=False, orient_labels=False, xyz_pad=0 )
9443
- # from brisque import BRISQUE
9444
- # obj = BRISQUE(url=False)
9445
- # mybrisq = obj.score( np.array( Image.open( viz_filename )) )
9446
9596
  spc = ants.get_spacing( image )
9447
9597
  org = ants.get_origin( image )
9448
- msk_vol = msk.sum() * np.prod( spc )
9449
- bgstd = image[ bgmsk == 1 ].std()
9450
- fgmean = image[ msk == 1 ].mean()
9451
- bgmean = image[ bgmsk == 1 ].mean()
9452
- snrref = fgmean / bgstd
9453
- cnrref = ( fgmean - bgmean ) / bgstd
9454
- psnrref = antspynet.psnr( image_compare, image )
9455
- ssimref = antspynet.ssim( image_compare, image )
9456
- if nocrop:
9457
- mymi = math.inf
9598
+ if ( nvox > 0 ):
9599
+ image = ants.crop_image( image, mskdil ).iMath("Normalize")
9600
+ msk = ants.crop_image( msk, mskdil ).iMath("Normalize")
9601
+ bgmsk = ants.crop_image( bgmsk, mskdil ).iMath("Normalize")
9602
+ image_compare = ants.crop_image( image_compare, mskdil ).iMath("Normalize")
9603
+ npatch = int( np.round( 0.1 * nvox ) )
9604
+ npatch = np.min( [512,npatch ] )
9605
+ patch_shape = []
9606
+ for k in range( 3 ):
9607
+ p = int( 32.0 / ants.get_spacing( image )[k] )
9608
+ if p > int( np.round( image.shape[k] * 0.5 ) ):
9609
+ p = int( np.round( image.shape[k] * 0.5 ) )
9610
+ patch_shape.append( p )
9611
+ if verbose:
9612
+ print(image)
9613
+ print( patch_shape )
9614
+ print( npatch )
9615
+ myevr = math.nan # dont want to fail if something odd happens in patch extraction
9616
+ try:
9617
+ myevr = antspyt1w.patch_eigenvalue_ratio( image, npatch, patch_shape,
9618
+ evdepth = 0.9, mask=msk )
9619
+ except:
9620
+ pass
9621
+ if pull_rank:
9622
+ image = ants.rank_intensity(image)
9623
+ imagereflect = ants.reflect_image(image, axis=0)
9624
+ asym_err = ( image - imagereflect ).abs().mean()
9625
+ # estimate noise by center cropping, denoizing and taking magnitude of difference
9626
+ nocrop=False
9627
+ if image.dimension == 3:
9628
+ if image.shape[2] == 1:
9629
+ nocrop=True
9630
+ if maxspc/minspc > 10:
9631
+ nocrop=True
9632
+ if nocrop:
9633
+ mycc = ants.image_clone( image )
9634
+ else:
9635
+ mycc = antspyt1w.special_crop( image,
9636
+ ants.get_center_of_mass( msk *0 + 1 ), patch_shape )
9637
+ myccd = ants.denoise_image( mycc, p=2,r=2,noise_model='Gaussian' )
9638
+ noizlevel = ( mycc - myccd ).abs().mean()
9639
+ # ants.plot_ortho( image, crop=False, filename=viz_filename, flat=True, xyz_lines=False, orient_labels=False, xyz_pad=0 )
9640
+ # from brisque import BRISQUE
9641
+ # obj = BRISQUE(url=False)
9642
+ # mybrisq = obj.score( np.array( Image.open( viz_filename )) )
9643
+ msk_vol = msk.sum() * np.prod( spc )
9644
+ bgstd = image[ bgmsk == 1 ].std()
9645
+ fgmean = image[ msk == 1 ].mean()
9646
+ bgmean = image[ bgmsk == 1 ].mean()
9647
+ snrref = fgmean / bgstd
9648
+ cnrref = ( fgmean - bgmean ) / bgstd
9649
+ psnrref = antspynet.psnr( image_compare, image )
9650
+ ssimref = antspynet.ssim( image_compare, image )
9651
+ if nocrop:
9652
+ mymi = math.inf
9653
+ else:
9654
+ mymi = ants.image_mutual_information( image_compare, image )
9458
9655
  else:
9459
- mymi = ants.image_mutual_information( image_compare, image )
9460
- mriseries='NA'
9461
- mrimfg='NA'
9462
- mrimodel='NA'
9656
+ msk_vol = 0
9657
+ myevr = mymi = ssimref = psnrref = cnrref = asym_err = noizlevel = math.nan
9658
+
9659
+ mriseries=None
9660
+ mrimfg=None
9661
+ mrimodel=None
9662
+ mriSAR=None
9663
+ BandwidthPerPixelPhaseEncode=None
9664
+ PixelBandwidth=None
9463
9665
  if mymeta is not None:
9464
9666
  # mriseries=mymeta['']
9465
9667
  try:
@@ -9470,13 +9672,39 @@ def blind_image_assessment(
9470
9672
  mrimodel=mymeta['ManufacturersModelName']
9471
9673
  except:
9472
9674
  pass
9675
+ try:
9676
+ MagneticFieldStrength=mymeta['MagneticFieldStrength']
9677
+ except:
9678
+ pass
9679
+ try:
9680
+ PixelBandwidth=mymeta['PixelBandwidth']
9681
+ except:
9682
+ pass
9683
+ try:
9684
+ BandwidthPerPixelPhaseEncode=mymeta['BandwidthPerPixelPhaseEncode']
9685
+ except:
9686
+ pass
9687
+ try:
9688
+ mriSAR=mymeta['SAR']
9689
+ except:
9690
+ pass
9473
9691
  ttl=mystem + ' '
9474
9692
  ttl=''
9475
9693
  ttl=ttl + "NZ: " + "{:0.4f}".format(noizlevel) + " SNR: " + "{:0.4f}".format(snrref) + " CNR: " + "{:0.4f}".format(cnrref) + " PS: " + "{:0.4f}".format(psnrref)+ " SS: " + "{:0.4f}".format(ssimref) + " EVR: " + "{:0.4f}".format(myevr)+ " MI: " + "{:0.4f}".format(mymi)
9476
- if viz_filename is not None and ( jjj == 0 or (jjj % 30 == 0) ):
9694
+ if viz_filename is not None and ( jjj == 0 or (jjj % 30 == 0) ) and image.shape[2] < 685:
9477
9695
  viz_filename_use = re.sub( ".png", "_slice"+str(jjj).zfill(4)+".png", viz_filename )
9478
9696
  ants.plot_ortho( image, crop=False, filename=viz_filename_use, flat=True, xyz_lines=False, orient_labels=False, xyz_pad=0, title=ttl, titlefontsize=12, title_dy=-0.02,textfontcolor='red' )
9479
- df = pd.DataFrame([[ mystem, noizlevel, snrref, cnrref, psnrref, ssimref, mymi, asym_err, myevr, msk_vol, spc[0], spc[1], spc[2],org[0], org[1], org[2], image.shape[0], image.shape[1], image.shape[2], jjj, modality, mriseries, mrimfg, mrimodel ]], columns=['filename', 'noise', 'snr', 'cnr', 'psnr', 'ssim', 'mi', 'reflection_err', 'EVR', 'msk_vol', 'spc0','spc1','spc2','org0','org1','org2','dimx','dimy','dimz','slice','modality', 'mriseries', 'mrimfg', 'mrimodel' ])
9697
+ df = pd.DataFrame([[
9698
+ mystem,
9699
+ image_reference.dimension,
9700
+ noizlevel, snrref, cnrref, psnrref, ssimref, mymi, asym_err, myevr, msk_vol,
9701
+ spc[0], spc[1], spc[2],org[0], org[1], org[2],
9702
+ image.shape[0], image.shape[1], image.shape[2], ntimepoints,
9703
+ jjj, modality, mriseries, mrimfg, mrimodel, MagneticFieldStrength, mriSAR, PixelBandwidth, BandwidthPerPixelPhaseEncode, bvalueMax ]],
9704
+ columns=[
9705
+ 'filename',
9706
+ 'dimensionality',
9707
+ 'noise', 'snr', 'cnr', 'psnr', 'ssim', 'mi', 'reflection_err', 'EVR', 'msk_vol', 'spc0','spc1','spc2','org0','org1','org2','dimx','dimy','dimz','dimt','slice','modality', 'mriseries', 'mrimfg', 'mrimodel', 'mriMagneticFieldStrength', 'mriSAR', 'mriPixelBandwidth', 'mriPixelBandwidthPE', 'dti_bvalueMax' ])
9480
9708
  outdf = pd.concat( [outdf, df ], axis=0, ignore_index=False )
9481
9709
  if verbose:
9482
9710
  print( outdf )
@@ -9485,6 +9713,29 @@ def blind_image_assessment(
9485
9713
  outdf.to_csv( csvfn )
9486
9714
  return outdf
9487
9715
 
9716
+ def remove_unwanted_columns(df):
9717
+ # Identify columns to drop: those named 'X' or starting with 'Unnamed'
9718
+ cols_to_drop = [col for col in df.columns if col == 'X' or col.startswith('Unnamed')]
9719
+
9720
+ # Drop the identified columns from the DataFrame, if any
9721
+ df_cleaned = df.drop(columns=cols_to_drop, errors='ignore')
9722
+
9723
+ return df_cleaned
9724
+
9725
+ def process_dataframe_generalized(df, group_by_column):
9726
+ # Make sure the group_by_column is excluded from both numeric and other columns calculations
9727
+ numeric_cols = df.select_dtypes(include='number').columns.difference([group_by_column])
9728
+ other_cols = df.columns.difference(numeric_cols).difference([group_by_column])
9729
+
9730
+ # Define aggregation functions: mean for numeric cols, mode for other cols
9731
+ # Update to handle empty mode results safely
9732
+ agg_dict = {col: 'mean' for col in numeric_cols}
9733
+ agg_dict.update({
9734
+ col: lambda x: pd.Series.mode(x).iloc[0] if not pd.Series.mode(x).empty else None for col in other_cols
9735
+ })
9736
+ # Group by the specified column, applying different aggregation functions to different columns
9737
+ processed_df = df.groupby(group_by_column, as_index=False).agg(agg_dict)
9738
+ return processed_df
9488
9739
 
9489
9740
  def average_blind_qc_by_modality(qc_full,verbose=False):
9490
9741
  """
@@ -9496,21 +9747,14 @@ def average_blind_qc_by_modality(qc_full,verbose=False):
9496
9747
  Returns:
9497
9748
  pandas dataframe containing the processed qc data.
9498
9749
  """
9750
+ qc_full = remove_unwanted_columns( qc_full )
9499
9751
  # Get unique modalities
9500
9752
  modalities = qc_full['modality'].unique()
9501
9753
  modalities = modalities[modalities != 'unknown']
9502
- # Get modalities to select
9503
- m0sel = qc_full['modality'].isin(modalities)
9504
9754
  # Get unique ids
9505
- uid = qc_full['filename'] + "_" + qc_full['modality'].astype(str)
9755
+ uid = qc_full['filename']
9506
9756
  to_average = uid.unique()
9507
- # Define column indices
9508
- contcols = ['noise', 'snr', 'cnr', 'psnr', 'ssim', 'mi','reflection_err', 'EVR', 'msk_vol', 'spc0', 'spc1', 'spc2', 'org0','org1','org2', 'dimx', 'dimy', 'dimz', 'slice']
9509
- ocols = ['filename','modality', 'mriseries', 'mrimfg', 'mrimodel']
9510
- # restrict to columns we "know"
9511
- qc_full = qc_full[ocols+contcols]
9512
- # Create empty meta dataframe
9513
- meta = pd.DataFrame(columns=ocols+contcols)
9757
+ meta = pd.DataFrame(columns=qc_full.columns )
9514
9758
  # Process each unique id
9515
9759
  n = len(to_average)
9516
9760
  for k in range(n):
@@ -9522,15 +9766,11 @@ def average_blind_qc_by_modality(qc_full,verbose=False):
9522
9766
  if sum(m1sel) > 1:
9523
9767
  # If more than one entry for id, take the average of continuous columns,
9524
9768
  # maximum of the slice column, and the first entry of the other columns
9769
+ mfsub = process_dataframe_generalized(qc_full[m1sel],'filename')
9770
+ else:
9525
9771
  mfsub = qc_full[m1sel]
9526
- if mfsub.shape[0] > 1:
9527
- meta.loc[k, contcols] = mfsub.loc[:, contcols].mean(numeric_only=True)
9528
- meta.loc[k, 'slice'] = mfsub['slice'].max()
9529
- meta.loc[k, ocols] = mfsub[ocols].iloc[0]
9530
- elif sum(m1sel) == 1:
9531
- # If only one entry for id, just copy the entry
9532
- mfsub = qc_full[m1sel]
9533
- meta.loc[k] = mfsub.iloc[0]
9772
+ meta.loc[k] = mfsub.iloc[0]
9773
+ meta['modality'] = meta['modality'].replace(['DTIdwi', 'DTIb0'], 'DTI', regex=True)
9534
9774
  return meta
9535
9775
 
9536
9776
  def wmh( flair, t1, t1seg,
@@ -10163,15 +10403,14 @@ def novelty_detection_quantile(df_train, df_test):
10163
10403
  myqs[mykey] = abs( temp - 0.5 ) / 0.5
10164
10404
  return myqs
10165
10405
 
10166
-
10167
- def brainmap_figure(statistical_df, data_dictionary_path, output_prefix, brain_image, overlay_cmap='bwr', nslices=21, ncol=7, edge_image_dilation = 0, black_bg=True, axes = [0,1,2], fixed_overlay_range=None, crop=True, verbose=False ):
10406
+ def brainmap_figure(statistical_df, data_dictionary_path, output_prefix, brain_image, overlay_cmap='bwr', nslices=21, ncol=7, edge_image_dilation = 0, black_bg=True, axes = [0,1,2], fixed_overlay_range=None, crop=5, verbose=False ):
10168
10407
  """
10169
10408
  Create figures based on statistical data and an underlying brain image.
10170
10409
 
10171
10410
  Assumes both ~/.antspyt1w and ~/.antspymm data is available
10172
10411
 
10173
10412
  Parameters:
10174
- - statistical_df (pandas dataframe): with 2 columns named anat and value
10413
+ - statistical_df (pandas dataframe): with 2 columns named anat and values
10175
10414
  the anat column should have names that meet *partial matching* criterion
10176
10415
  with respect to regions that are measured in antspymm. value will be
10177
10416
  the value to be displayed. if two examples of a given region exist in
@@ -10186,12 +10425,13 @@ def brainmap_figure(statistical_df, data_dictionary_path, output_prefix, brain_i
10186
10425
  - black_bg (bool): boolean
10187
10426
  - axes (list): integer list typically [0,1,2] sagittal coronal axial
10188
10427
  - fixed_overlay_range (list): scalar pair will try to keep a constant cbar and will truncate the overlay at these min/max values
10189
- - crop (bool): crops the image to display by the extent of the overlay
10428
+ - crop (int): crops the image to display by the extent of the overlay; larger values dilate the masks more.
10190
10429
  - verbose (bool): boolean
10191
10430
 
10192
10431
  Returns:
10193
10432
  an image with values mapped to the associated regions
10194
10433
  """
10434
+ import re
10195
10435
 
10196
10436
  # Read the statistical file
10197
10437
  zz = statistical_df
@@ -10200,19 +10440,21 @@ def brainmap_figure(statistical_df, data_dictionary_path, output_prefix, brain_i
10200
10440
  mydict = pd.read_csv(data_dictionary_path)
10201
10441
  mydict = mydict[~mydict['Measurement'].str.contains("tractography-based connectivity", na=False)]
10202
10442
 
10443
+ statistical_df['anat'] = statistical_df['anat'].str.replace("_", ".", regex=True)
10444
+
10203
10445
  # Load image and process it
10204
10446
  edgeimg = ants.iMath(brain_image,"Normalize")
10205
10447
  if edge_image_dilation > 0:
10206
10448
  edgeimg = ants.iMath( edgeimg, "MD", edge_image_dilation)
10207
10449
 
10208
10450
  # Define lists and data frames
10209
- postfix = ['bf', 'deep_cit168lab', 'mtl', 'cerebellum', 'dkt_cortex','brainstem']
10210
- atlas = ['BF', 'CIT168', 'MTL', 'TustisonCobra', 'desikan-killiany-tourville','brainstem']
10211
- postdesc = ['nbm3CH13', 'CIT168_Reinf_Learn_v1_label_descriptions_pad', 'mtl_description', 'cerebellum', 'dkt','CIT168_T1w_700um_pad_adni_brainstem']
10451
+ postfix = ['bf', 'cit168lab', 'mtl', 'cerebellum', 'dkt_cortex','brainstem','JHU_wm','yeo']
10452
+ atlas = ['BF', 'CIT168', 'MTL', 'TustisonCobra', 'desikan-killiany-tourville','brainstem','JHU_wm','yeo']
10453
+ postdesc = ['nbm3CH13', 'CIT168_Reinf_Learn_v1_label_descriptions_pad', 'mtl_description', 'cerebellum', 'dkt','CIT168_T1w_700um_pad_adni_brainstem','FA_JHU_labels_edited','ppmi_template_500Parcels_Yeo2011_17Networks_2023_homotopic']
10212
10454
  statdf = pd.DataFrame({'img': postfix, 'atlas': atlas, 'csvdescript': postdesc})
10213
10455
  templateprefix = '~/.antspymm/PPMI_template0_'
10214
10456
  # Iterate through columns and create figures
10215
- col2viz = 'value'
10457
+ col2viz = 'values'
10216
10458
  if True:
10217
10459
  anattoshow = zz['anat'].unique()
10218
10460
  if verbose:
@@ -10224,21 +10466,74 @@ def brainmap_figure(statistical_df, data_dictionary_path, output_prefix, brain_i
10224
10466
  if verbose:
10225
10467
  print(str(k) + " " + anattoshow[k] )
10226
10468
  mysub = zz[zz['anat'].str.contains(anattoshow[k])]
10469
+ anatsear=re.sub("dti.fa","",anattoshow[k])
10470
+ anatsear=re.sub("t1.volasym","",anatsear)
10471
+ anatsear=re.sub("t1.thkasym","",anatsear)
10472
+ anatsear=re.sub("t1.areaasym","",anatsear)
10473
+ anatsear=re.sub("t1.vol.","",anatsear)
10474
+ anatsear=re.sub("t1.thk.","",anatsear)
10475
+ anatsear=re.sub("t1.area.","",anatsear)
10476
+ anatsear=re.sub("asymdp.","",anatsear)
10477
+ anatsear=re.sub("asym.","",anatsear)
10478
+ anatsear=re.sub("dti.md.","",anatsear)
10479
+ anatsear=re.sub("dti.fa.","",anatsear)
10480
+ anatsear=re.sub("dti.md","",anatsear)
10481
+ anatsear=re.sub("dti.mean.md.","",anatsear)
10482
+ anatsear=re.sub("dti.mean.fa.","",anatsear)
10483
+ anatsear=re.sub("lravg","",anatsear)
10484
+ atlassearch = mydict['tidynames'].str.contains(anatsear)
10485
+ if atlassearch.sum() > 0:
10486
+ whichatlas = mydict[atlassearch]['Atlas'].iloc[0]
10487
+ oglabelname = mydict[atlassearch]['Label'].iloc[0]
10488
+ else:
10489
+ print(anatsear)
10490
+ oglabelname='unknown'
10491
+ whichatlas=None
10492
+ if verbose:
10493
+ print("oglabelname " + oglabelname )
10227
10494
  vals2viz = mysub[col2viz].agg(['min', 'max'])
10228
10495
  vals2viz = vals2viz[abs(vals2viz).idxmax()]
10229
10496
  myext = None
10230
10497
  if 'dktcortex' in anattoshow[k]:
10231
10498
  myext = 'dkt_cortex'
10232
10499
  elif 'cit168' in anattoshow[k]:
10233
- myext = 'deep_cit168lab'
10500
+ myext = 'cit168lab'
10234
10501
  elif 'mtl' in anattoshow[k]:
10235
10502
  myext = 'mtl'
10503
+ oglabelname=re.sub('mtl', '',anatsear)
10236
10504
  elif 'cerebellum' in anattoshow[k]:
10237
10505
  myext = 'cerebellum'
10506
+ oglabelname=re.sub('cerebellum', '',anatsear)
10507
+ # oglabelname=oglabelname[2:]
10238
10508
  elif 'brainstem' in anattoshow[k]:
10239
10509
  myext = 'brainstem'
10240
10510
  elif any(item in anattoshow[k] for item in ['nbm', 'bf']):
10241
10511
  myext = 'bf'
10512
+ oglabelname=re.sub(r'\.', '_',anatsear)
10513
+ elif whichatlas == 'johns hopkins white matter':
10514
+ myext = 'JHU_wm'
10515
+ elif whichatlas == 'desikan-killiany-tourville':
10516
+ myext = 'dkt_cortex'
10517
+ elif whichatlas == 'CIT168':
10518
+ myext = 'cit168lab'
10519
+ elif whichatlas == 'BF':
10520
+ myext = 'bf'
10521
+ oglabelname=re.sub('bf', '',oglabelname)
10522
+ elif whichatlas == 'yeo_homotopic':
10523
+ myext = 'yeo'
10524
+ if myext is None and verbose:
10525
+ if whichatlas is None:
10526
+ whichatlas='None'
10527
+ if anattoshow[k] is None:
10528
+ anattoshow[k]='None'
10529
+ print( "MYEXT " + anattoshow[k] + ' unfound ' + whichatlas )
10530
+ else:
10531
+ if verbose:
10532
+ print( "MYEXT " + myext )
10533
+
10534
+ if myext == 'cit168lab':
10535
+ oglabelname=re.sub("cit168","",oglabelname)
10536
+
10242
10537
  for j in postfix:
10243
10538
  if j == "dkt_cortex":
10244
10539
  j = 'dktcortex'
@@ -10252,30 +10547,86 @@ def brainmap_figure(statistical_df, data_dictionary_path, output_prefix, brain_i
10252
10547
  locfilename = templateprefix + myext + '.nii.gz'
10253
10548
  if verbose:
10254
10549
  print( locfilename )
10255
- myatlas = ants.image_read(locfilename)
10256
- atlasDescript = pd.read_csv(f"~/.antspyt1w/{correctdescript}.csv")
10257
- atlasDescript['Description'] = atlasDescript['Description'].str.lower()
10258
- atlasDescript['Description'] = atlasDescript['Description'].str.replace(" ", "_")
10259
- atlasDescript['Description'] = atlasDescript['Description'].str.replace("_left_", "_")
10260
- atlasDescript['Description'] = atlasDescript['Description'].str.replace("_right_", "_")
10261
- atlasDescript['Description'] = atlasDescript['Description'].str.replace("_left", "")
10262
- atlasDescript['Description'] = atlasDescript['Description'].str.replace("_right", "")
10550
+ if myext == 'yeo':
10551
+ oglabelname=oglabelname.lower()
10552
+ oglabelname=re.sub("rsfmri_fcnxpro122_","",oglabelname)
10553
+ oglabelname=re.sub("rsfmri_fcnxpro129_","",oglabelname)
10554
+ oglabelname=re.sub("rsfmri_fcnxpro134_","",oglabelname)
10555
+ locfilename = "~/.antspymm/ppmi_template_500Parcels_Yeo2011_17Networks_2023_homotopic.nii.gz"
10556
+ atlasDescript = pd.read_csv(f"~/.antspymm/{correctdescript}.csv")
10557
+ atlasDescript.rename(columns={'SystemName': 'Description'}, inplace=True)
10558
+ atlasDescript.rename(columns={'ROI': 'Label'}, inplace=True)
10559
+ atlasDescript['Description'] = atlasDescript['Description'].str.lower()
10560
+ else:
10561
+ atlasDescript = pd.read_csv(f"~/.antspyt1w/{correctdescript}.csv")
10562
+ atlasDescript['Description'] = atlasDescript['Description'].str.lower()
10563
+ atlasDescript['Description'] = atlasDescript['Description'].str.replace(" ", "_")
10564
+ atlasDescript['Description'] = atlasDescript['Description'].str.replace("_left_", "_")
10565
+ atlasDescript['Description'] = atlasDescript['Description'].str.replace("_right_", "_")
10566
+ atlasDescript['Description'] = atlasDescript['Description'].str.replace("_left", "")
10567
+ atlasDescript['Description'] = atlasDescript['Description'].str.replace("_right", "")
10568
+ atlasDescript['Description'] = atlasDescript['Description'].str.replace("left_", "")
10569
+ atlasDescript['Description'] = atlasDescript['Description'].str.replace("right_", "")
10570
+ atlasDescript['Description'] = atlasDescript['Description'].str.replace("/",".")
10571
+ if myext == 'JHU_wm':
10572
+ atlasDescript['Description'] = atlasDescript['Description'].str.replace("fa-", "")
10573
+ atlasDescript['Description'] = atlasDescript['Description'].str.replace("-left-", "")
10574
+ atlasDescript['Description'] = atlasDescript['Description'].str.replace("-right-", "")
10575
+ if myext == 'cerebellum':
10576
+ atlasDescript['Description'] = atlasDescript['Description'].str.replace("l_", "")
10577
+ atlasDescript['Description'] = atlasDescript['Description'].str.replace("r_", "")
10578
+
10579
+ if verbose:
10580
+ print( atlasDescript )
10581
+ oglabelname = oglabelname.lower()
10582
+ oglabelname = re.sub(" ", "_",oglabelname)
10583
+ oglabelname = re.sub("_left_", "_",oglabelname)
10584
+ oglabelname = re.sub("_right_", "_",oglabelname)
10585
+ oglabelname = re.sub("_left", "",oglabelname)
10586
+ oglabelname = re.sub("_right", "",oglabelname)
10587
+ oglabelname = re.sub("t1hier_vol_", "",oglabelname)
10588
+ oglabelname = re.sub("t1hier_area_", "",oglabelname)
10589
+ oglabelname = re.sub("t1hier_thk_", "",oglabelname)
10590
+ oglabelname = re.sub("dktregions", "",oglabelname)
10591
+ oglabelname = re.sub("dktcortex", "",oglabelname)
10592
+ if myext == 'JHU_wm':
10593
+ oglabelname = re.sub("dti_mean_fa.", "",oglabelname)
10594
+ oglabelname = re.sub("dti_mean_md.", "",oglabelname)
10595
+ oglabelname = re.sub(".left.", "",oglabelname)
10596
+ oglabelname = re.sub(".right.", "",oglabelname)
10597
+ oglabelname = re.sub(".lravg.", "",oglabelname)
10598
+ oglabelname = re.sub(".asym.", "",oglabelname)
10599
+
10600
+ if verbose:
10601
+ print("oglabelname " + oglabelname )
10602
+
10263
10603
  if myext == 'cerebellum':
10264
10604
  atlasDescript['Description'] = atlasDescript['Description'].str.replace("l_", "")
10265
10605
  atlasDescript['Description'] = atlasDescript['Description'].str.replace("r_", "")
10266
- whichindex = atlasDescript.index[atlasDescript['Description'] == anattoshow[k]].values[0]
10606
+ whichindex = atlasDescript.index[atlasDescript['Description'] == oglabelname].values[0]
10267
10607
  else:
10268
- whichindex = atlasDescript.index[atlasDescript['Description'].str.contains(anattoshow[k])]
10608
+ whichindex = atlasDescript.index[atlasDescript['Description'].str.contains(oglabelname)]
10269
10609
 
10270
10610
  if type(whichindex) is np.int64:
10271
10611
  labelnums = atlasDescript.loc[whichindex, 'Label']
10272
10612
  else:
10273
10613
  labelnums = list(atlasDescript.loc[whichindex, 'Label'])
10614
+
10615
+ if myext == 'yeo':
10616
+ parts = re.findall(r'\D+', oglabelname)
10617
+ oglabelname = [part.replace('_', '') for part in parts if part.replace('_', '')]
10618
+ filtered_df = atlasDescript[atlasDescript['Description'].isin(oglabelname)]
10619
+ labelnums = filtered_df['Label'].tolist()
10620
+
10274
10621
  if not isinstance(labelnums, list):
10275
10622
  labelnums=[labelnums]
10276
10623
  addemiszero = ants.threshold_image(addem, 0, 0)
10277
10624
  temp = ants.image_read(locfilename)
10278
10625
  temp = ants.mask_image(temp, temp, level=labelnums, binarize=True)
10626
+ if verbose:
10627
+ print("DEBUG")
10628
+ print( temp.sum() )
10629
+ print( labelnums )
10279
10630
  temp[temp == 1] = (vals2viz)
10280
10631
  temp[addemiszero == 0] = 0
10281
10632
  addem = addem + temp
@@ -10284,8 +10635,8 @@ def brainmap_figure(statistical_df, data_dictionary_path, output_prefix, brain_i
10284
10635
  print('Done Adding')
10285
10636
  for axx in axes:
10286
10637
  figfn=output_prefix+f"fig{col2viz}ax{axx}_py.jpg"
10287
- if crop:
10288
- cmask = ants.threshold_image( addem,1e-5, 1e9 ).iMath("MD",3) + ants.threshold_image( addem,-1e9, -1e-5 ).iMath("MD",3)
10638
+ if crop > 0:
10639
+ cmask = ants.threshold_image( addem,1e-5, 1e9 ).iMath("MD",crop) + ants.threshold_image( addem,-1e9, -1e-5 ).iMath("MD",crop)
10289
10640
  addemC = ants.crop_image( addem, cmask )
10290
10641
  edgeimgC = ants.crop_image( edgeimg, cmask )
10291
10642
  else:
@@ -10305,7 +10656,6 @@ def brainmap_figure(statistical_df, data_dictionary_path, output_prefix, brain_i
10305
10656
  print("DONE brain map figures")
10306
10657
  return addem
10307
10658
 
10308
-
10309
10659
  def filter_df(indf, myprefix):
10310
10660
  """
10311
10661
  Process and filter a pandas DataFrame, removing certain columns,
@@ -10479,6 +10829,27 @@ def aggregate_antspymm_results(input_csv, subject_col='subjectID', date_col='dat
10479
10829
  df=df.drop(badnames, axis=1)
10480
10830
  return( df )
10481
10831
 
10832
+ def find_most_recent_file(file_list):
10833
+ """
10834
+ Finds and returns the most recently modified file from a list of file paths.
10835
+
10836
+ Parameters:
10837
+ - file_list: A list of strings, where each string is a path to a file.
10838
+
10839
+ Returns:
10840
+ - The path to the most recently modified file in the list, or None if the list is empty or contains no valid files.
10841
+ """
10842
+ # Filter out items that are not files or do not exist
10843
+ valid_files = [f for f in file_list if os.path.isfile(f)]
10844
+
10845
+ # Check if the filtered list is not empty
10846
+ if valid_files:
10847
+ # Find the file with the latest modification time
10848
+ most_recent_file = max(valid_files, key=os.path.getmtime)
10849
+ return [most_recent_file]
10850
+ else:
10851
+ return None
10852
+
10482
10853
  def aggregate_antspymm_results_sdf(
10483
10854
  study_df,
10484
10855
  project_col='projectID',
@@ -10685,8 +11056,8 @@ def aggregate_antspymm_results_sdf(
10685
11056
  t1wfn = sorted( glob( modsearch ) )
10686
11057
  if len( t1wfn ) > 1:
10687
11058
  nlarge = len(t1wfn)
10688
- t1wfn = [ t1wfn[ len(t1wfn)-1 ] ]
10689
- warnings.warn("there are " + str( nlarge ) + " number of wide fns with search path " + modsearch + " we take the last of these " + t1wfn[0] )
11059
+ t1wfn = find_most_recent_file( t1wfn )
11060
+ warnings.warn("there are " + str( nlarge ) + " number of wide fns with search path " + modsearch + " we take the most recent of these " + t1wfn[0] )
10690
11061
  # raise ValueError("there are " + str( len( t1wfn ) ) + " number of wide fns with search path " + modsearch )
10691
11062
  if len( t1wfn ) == 1:
10692
11063
  if verbose:
@@ -10802,3 +11173,276 @@ def filter_image_files(image_paths, criteria='largest'):
10802
11173
  raise ValueError("Criteria must be 'smallest', 'largest', or 'brightest'.")
10803
11174
 
10804
11175
  return selected_image_path
11176
+
11177
+
11178
+
11179
+ def mm_match_by_qc_scoring(df_a, df_b, match_column, criteria, prefix='matched_', exclude_columns=None):
11180
+ """
11181
+ Match each row in df_a to a row in df_b based on a matching column and criteria for selecting the best match,
11182
+ with options to prefix column names from df_b and exclude certain columns from the final output. Additionally,
11183
+ returns a DataFrame containing rows from df_b that were not matched to any row in df_a.
11184
+
11185
+ Parameters:
11186
+ - df_a: DataFrame A.
11187
+ - df_b: DataFrame B.
11188
+ - match_column: The column name on which rows should match between DataFrame A and B.
11189
+ - criteria: A dictionary where keys are column names and values are 'min' or 'max', indicating whether
11190
+ the column should be minimized or maximized for the best match.
11191
+ - prefix: A string prefix to add to column names from df_b in the final output to avoid duplication.
11192
+ - exclude_columns: A list of column names from df_b to exclude from the final output.
11193
+
11194
+ Returns:
11195
+ - A tuple of two DataFrames:
11196
+ 1. A new DataFrame combining df_a with matched rows from df_b.
11197
+ 2. A DataFrame containing rows from df_b that were not matched to df_a.
11198
+ """
11199
+ from scipy.stats import zscore
11200
+ df_a = df_a.loc[:, ~df_a.columns.str.startswith('Unnamed:')]
11201
+ df_b = df_b.loc[:, ~df_b.columns.str.startswith('Unnamed:')].copy()
11202
+
11203
+ # Normalize df_b based on criteria
11204
+ for col, crit in criteria.items():
11205
+ if crit == 'max':
11206
+ df_b.loc[df_b.index, f'score_{col}'] = zscore(-df_b[col])
11207
+ elif crit == 'min':
11208
+ df_b.loc[df_b.index, f'score_{col}'] = zscore(df_b[col])
11209
+
11210
+ # Calculate 'best_score' by summing all score columns
11211
+ score_columns = [f'score_{col}' for col in criteria.keys()]
11212
+ df_b['best_score'] = df_b[score_columns].sum(axis=1)
11213
+
11214
+ matched_indices = [] # Track indices of matched rows in df_b
11215
+
11216
+ # Match rows
11217
+ matched_rows = []
11218
+ for _, row_a in df_a.iterrows():
11219
+ matches = df_b[df_b[match_column] == row_a[match_column]]
11220
+ if not matches.empty:
11221
+ best_idx = matches['best_score'].idxmin()
11222
+ best_match = matches.loc[best_idx]
11223
+ matched_indices.append(best_idx) # Track this index as matched
11224
+ matched_rows.append(best_match)
11225
+ else:
11226
+ matched_rows.append(pd.Series(dtype='float64'))
11227
+
11228
+ # Create a DataFrame from matched rows
11229
+ df_matched = pd.DataFrame(matched_rows).reset_index(drop=True)
11230
+
11231
+ # Exclude specified columns and add prefix
11232
+ if exclude_columns is not None:
11233
+ df_matched = df_matched.drop(columns=exclude_columns, errors='ignore')
11234
+ df_matched = df_matched.rename(columns=lambda x: f"{prefix}{x}" if x != match_column and x in df_matched.columns else x)
11235
+
11236
+ # Combine df_a with matched rows from df_b
11237
+ result_df = pd.concat([df_a.reset_index(drop=True), df_matched], axis=1)
11238
+
11239
+ # Extract unmatched rows from df_b
11240
+ unmatched_df_b = df_b.drop(index=matched_indices).reset_index(drop=True)
11241
+
11242
+ return result_df, unmatched_df_b
11243
+
11244
+
11245
+ def fix_LR_RL_stuff(df, col1, col2, size_col1, size_col2, id1, id2 ):
11246
+ df_copy = df.copy()
11247
+ # Ensure columns contain strings for substring checks
11248
+ df_copy[col1] = df_copy[col1].astype(str)
11249
+ df_copy[col2] = df_copy[col2].astype(str)
11250
+ df_copy[id1] = df_copy[id1].astype(str)
11251
+ df_copy[id2] = df_copy[id2].astype(str)
11252
+
11253
+ for index, row in df_copy.iterrows():
11254
+ col1_val = row[col1]
11255
+ col2_val = row[col2]
11256
+ size1 = row[size_col1]
11257
+ size2 = row[size_col2]
11258
+
11259
+ # Check for 'RL' or 'LR' in each column and compare sizes
11260
+ if ('RL' in col1_val or 'LR' in col1_val) and ('RL' in col2_val or 'LR' in col2_val):
11261
+ continue
11262
+ elif 'RL' not in col1_val and 'LR' not in col1_val and 'RL' not in col2_val and 'LR' not in col2_val:
11263
+ if size1 < size2:
11264
+ df_copy.at[index, col1] = df_copy.at[index, col2]
11265
+ df_copy.at[index, size_col1] = df_copy.at[index, size_col2]
11266
+ df_copy.at[index, id1] = df_copy.at[index, id2]
11267
+ df_copy.at[index, size_col2] = 0
11268
+ df_copy.at[index, col2] = None
11269
+ df_copy.at[index, id2] = None
11270
+ else:
11271
+ df_copy.at[index, col2] = None
11272
+ df_copy.at[index, size_col2] = 0
11273
+ df_copy.at[index, id2] = None
11274
+ elif 'RL' in col1_val or 'LR' in col1_val:
11275
+ if size1 < size2:
11276
+ df_copy.at[index, col1] = df_copy.at[index, col2]
11277
+ df_copy.at[index, id1] = df_copy.at[index, id2]
11278
+ df_copy.at[index, size_col1] = df_copy.at[index, size_col2]
11279
+ df_copy.at[index, size_col2] = 0
11280
+ df_copy.at[index, col2] = None
11281
+ df_copy.at[index, id2] = None
11282
+ else:
11283
+ df_copy.at[index, col2] = None
11284
+ df_copy.at[index, id2] = None
11285
+ df_copy.at[index, size_col2] = 0
11286
+ elif 'RL' in col2_val or 'LR' in col2_val:
11287
+ if size2 < size1:
11288
+ df_copy.at[index, id2] = None
11289
+ df_copy.at[index, col2] = None
11290
+ df_copy.at[index, size_col2] = 0
11291
+ else:
11292
+ df_copy.at[index, col1] = df_copy.at[index, col2]
11293
+ df_copy.at[index, id1] = df_copy.at[index, id2]
11294
+ df_copy.at[index, size_col1] = df_copy.at[index, size_col2]
11295
+ df_copy.at[index, size_col2] = 0
11296
+ df_copy.at[index, col2] = None
11297
+ df_copy.at[index, id2] = None
11298
+ return df_copy
11299
+
11300
+
11301
+ def renameit(df, old_col_name, new_col_name):
11302
+ """
11303
+ Renames a column in a pandas DataFrame in place. Raises an error if the specified old column name does not exist.
11304
+
11305
+ Parameters:
11306
+ - df: pandas.DataFrame
11307
+ The DataFrame in which the column is to be renamed.
11308
+ - old_col_name: str
11309
+ The current name of the column to be renamed.
11310
+ - new_col_name: str
11311
+ The new name for the column.
11312
+
11313
+ Raises:
11314
+ - ValueError: If the old column name does not exist in the DataFrame.
11315
+
11316
+ Returns:
11317
+ None
11318
+ """
11319
+ import warnings
11320
+ # Check if the old column name exists in the DataFrame
11321
+ if old_col_name not in df.columns:
11322
+ warnings.warn(f"The column '{old_col_name}' does not exist in the DataFrame.")
11323
+ return
11324
+
11325
+ # Proceed with renaming the column if it exists
11326
+ df.rename(columns={old_col_name: new_col_name}, inplace=True)
11327
+
11328
+
11329
+ def mm_match_by_qc_scoring_all( qc_dataframe, fix_LRRL=True, verbose=True ):
11330
+ """
11331
+ Processes a quality control (QC) DataFrame to perform modality-specific matching and filtering based
11332
+ on predefined criteria, optimizing for minimal outliers and noise, and maximal signal-to-noise ratio (SNR),
11333
+ expected value of randomness (EVR), and dimensionality time (dimt).
11334
+
11335
+ This function iteratively matches dataframes derived from the QC dataframe for different imaging modalities,
11336
+ applying a series of filters to select the best matches based on the QC metrics. Matches are made with
11337
+ consideration to minimize outlier loop and noise, while maximizing SNR, EVR, and dimt for each modality.
11338
+
11339
+ Parameters:
11340
+ ----------
11341
+ qc_dataframe : pandas.DataFrame
11342
+ The DataFrame containing QC metrics for different modalities and imaging data.
11343
+ fix_LRRL : bool, optional
11344
+
11345
+ verbose : bool, optional
11346
+ If True, prints the progress and the shape of the DataFrame being processed in each step.
11347
+
11348
+ Process:
11349
+ -------
11350
+ 1. Standardizes modalities by merging DTI-related entries.
11351
+ 2. Converts specific columns to appropriate data types for processing.
11352
+ 3. Performs modality-specific matching and filtering based on the outlier column and criteria for each modality.
11353
+ 4. Iteratively processes unmatched data for predefined modalities with specific prefixes to find further matches.
11354
+
11355
+ Returns:
11356
+ -------
11357
+ pandas.DataFrame
11358
+ The matched and filtered DataFrame after applying all QC scoring and matching operations across specified modalities.
11359
+
11360
+ """
11361
+ qc_dataframe['modality'] = qc_dataframe['modality'].replace(['DTIdwi', 'DTIb0'], 'DTI', regex=True)
11362
+ qc_dataframe['filename']=qc_dataframe['filename'].astype(str)
11363
+ qc_dataframe['ol_loop']=qc_dataframe['ol_loop'].astype(float)
11364
+ qc_dataframe['ol_lof']=qc_dataframe['ol_lof'].astype(float)
11365
+ qc_dataframe['ol_lof_decision']=qc_dataframe['ol_lof_decision'].astype(float)
11366
+ outlier_column='ol_loop'
11367
+ mmdf0 = best_mmm( qc_dataframe, 'T1w', outlier_column=outlier_column )['filt']
11368
+ fldf = best_mmm( qc_dataframe, 'T2Flair', outlier_column=outlier_column )['filt']
11369
+ nmdf = best_mmm( qc_dataframe, 'NM2DMT', outlier_column=outlier_column )['filt']
11370
+ rsdf = best_mmm( qc_dataframe, 'rsfMRI', outlier_column=outlier_column )['filt']
11371
+ dtdf = best_mmm( qc_dataframe, 'DTI', outlier_column=outlier_column )['filt']
11372
+
11373
+ criteria = {'ol_loop': 'min', 'noise': 'min', 'snr': 'max', 'EVR': 'max', 'reflection_err':'min'}
11374
+ xcl = [ 'mrimfg', 'mrimodel','mriMagneticFieldStrength', 'dti_failed', 'rsf_failed', 'subjectID', 'date', 'subjectIDdate','repeat']
11375
+ # Assuming df_a and df_b are already loaded
11376
+ mmdf, undffl = mm_match_by_qc_scoring(mmdf0, fldf, 'subjectIDdate', criteria,
11377
+ prefix='T2Flair_', exclude_columns=xcl )
11378
+
11379
+ prefixes = ['NM1_', 'NM2_', 'NM3_', 'NM4_', 'NM5_', 'NM6_']
11380
+ undfmod = nmdf # Initialize 'undfmod' with 'nmdf' for the first iteration
11381
+ if verbose:
11382
+ print('start NM')
11383
+ print( undfmod.shape )
11384
+ for prefix in prefixes:
11385
+ if undfmod.shape[0] > 50:
11386
+ mmdf, undfmod = mm_match_by_qc_scoring(mmdf, undfmod, 'subjectIDdate', criteria, prefix=prefix, exclude_columns=xcl)
11387
+ if verbose:
11388
+ print( prefix )
11389
+ print( undfmod.shape )
11390
+
11391
+ criteria = {'ol_loop': 'min', 'noise': 'min', 'snr': 'max', 'EVR': 'max', 'dimt':'max'}
11392
+ # higher bvalues lead to more noise ...
11393
+ criteria = {'ol_loop': 'min', 'noise': 'min', 'dti_bvalueMax':'min', 'dimt':'max'}
11394
+ prefixes = ['DTI1_', 'DTI2_', 'DTI3_'] # List of prefixes for each matching iteration
11395
+ undfmod = dtdf
11396
+ if verbose:
11397
+ print('start DT')
11398
+ print( undfmod.shape )
11399
+ for prefix in prefixes:
11400
+ if undfmod.shape[0] > 50:
11401
+ mmdf, undfmod = mm_match_by_qc_scoring(mmdf, undfmod, 'subjectIDdate', criteria, prefix=prefix, exclude_columns=xcl)
11402
+ if verbose:
11403
+ print( prefix )
11404
+ print( undfmod.shape )
11405
+
11406
+ prefixes = ['rsf1_', 'rsf2_', 'rsf3_'] # List of prefixes for each matching iteration
11407
+ undfmod = rsdf # Initialize 'undfmod' with 'nmdf' for the first iteration
11408
+ if verbose:
11409
+ print('start rsf')
11410
+ print( undfmod.shape )
11411
+ for prefix in prefixes:
11412
+ if undfmod.shape[0] > 50:
11413
+ mmdf, undfmod = mm_match_by_qc_scoring(mmdf, undfmod, 'subjectIDdate', criteria, prefix=prefix, exclude_columns=xcl)
11414
+ if verbose:
11415
+ print( prefix )
11416
+ print( undfmod.shape )
11417
+
11418
+ if fix_LRRL:
11419
+ # mmdf=fix_LR_RL_stuff( mmdf, 'DTI1_filename', 'DTI2_filename', 'DTI1_dimt', 'DTI2_dimt')
11420
+ mmdf=fix_LR_RL_stuff( mmdf, 'rsf1_filename', 'rsf2_filename', 'rsf1_dimt', 'rsf2_dimt', 'rsf1_imageID', 'rsf2_imageID' )
11421
+ else:
11422
+ import warnings
11423
+ warnings.warn("FIXME: should fix LR and RL situation for the DTI and rsfMRI")
11424
+
11425
+ # now do the necessary replacements
11426
+
11427
+ renameit( mmdf, 'perf_imageID', 'perfid' )
11428
+ renameit( mmdf, 'perf_filename', 'perffn' )
11429
+ renameit( mmdf, 'T2Flair_imageID', 'flairid' )
11430
+ renameit( mmdf, 'T2Flair_filename', 'flairfn' )
11431
+ renameit( mmdf, 'rsf1_imageID', 'rsfid1' )
11432
+ renameit( mmdf, 'rsf2_imageID', 'rsfid2' )
11433
+ renameit( mmdf, 'rsf1_filename', 'rsffn1' )
11434
+ renameit( mmdf, 'rsf2_filename', 'rsffn2' )
11435
+ renameit( mmdf, 'DTI1_imageID', 'dtid1' )
11436
+ renameit( mmdf, 'DTI2_imageID', 'dtid2' )
11437
+ renameit( mmdf, 'DTI3_imageID', 'dtid3' )
11438
+ renameit( mmdf, 'DTI1_filename', 'dtfn1' )
11439
+ renameit( mmdf, 'DTI2_filename', 'dtfn2' )
11440
+ renameit( mmdf, 'DTI3_filename', 'dtfn3' )
11441
+ for x in range(1,6):
11442
+ temp0="NM"+str(x)+"_imageID"
11443
+ temp1="nmid"+str(x)
11444
+ renameit( mmdf, temp0, temp1 )
11445
+ temp0="NM"+str(x)+"_filename"
11446
+ temp1="nmfn"+str(x)
11447
+ renameit( mmdf, temp0, temp1 )
11448
+ return mmdf