antspymm 1.2.7__py3-none-any.whl → 1.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
antspymm/mm.py CHANGED
@@ -92,6 +92,7 @@ __all__ = ['version',
92
92
  'remove_volumes_from_timeseries',
93
93
  'loop_timeseries_censoring',
94
94
  'clean_tmp_directory',
95
+ 'validate_nrg_file_format',
95
96
  'dict_to_dataframe']
96
97
 
97
98
  from pathlib import Path
@@ -160,6 +161,122 @@ def version( ):
160
161
  'antspymm': pkg_resources.require("antspymm")[0].version
161
162
  }
162
163
 
164
+ def nrg_filename_to_subjectvisit(s, separator='-'):
165
+ """
166
+ Extracts a pattern from the input string.
167
+
168
+ Parameters:
169
+ - s: The input string from which to extract the pattern.
170
+ - separator: The separator used in the string (default is '-').
171
+
172
+ Returns:
173
+ - A string in the format of 'PREFIX-Number-Date'
174
+ """
175
+ parts = os.path.basename(s).split(separator)
176
+ # Assuming the pattern is always in the form of PREFIX-Number-Date-...
177
+ # and PREFIX is always "PPMI", extract the first three parts
178
+ extracted = separator.join(parts[:3])
179
+ return extracted
180
+
181
+
182
+ def validate_nrg_file_format(path, separator):
183
+ """
184
+ is your path nrg-etic?
185
+ Validates if a given path conforms to the NRG file format, taking into account known extensions
186
+ and the expected directory structure.
187
+
188
+ :param path: The file path to validate.
189
+ :param separator: The separator used in the filename and directory structure.
190
+ :return: A tuple (bool, str) indicating whether the path is valid and a message explaining the validation result.
191
+
192
+ : example
193
+
194
+ ntfn='/Users/ntustison/Data/Stone/LIMBIC/NRG/ANTsLIMBIC/sub08C105120Yr/ses-1/rsfMRI_RL/000/ANTsLIMBIC_sub08C105120Yr_ses-1_rsfMRI_RL_000.nii.gz'
195
+ ntfngood='/Users/ntustison/Data/Stone/LIMBIC/NRG/ANTsLIMBIC/sub08C105120Yr/ses_1/rsfMRI_RL/000/ANTsLIMBIC-sub08C105120Yr-ses_1-rsfMRI_RL-000.nii.gz'
196
+
197
+ validate_nrg_detailed(ntfngood, '-')
198
+ print( validate_nrg_detailed(ntfn, '-') )
199
+ print( validate_nrg_detailed(ntfn, '_') )
200
+
201
+ """
202
+ import re
203
+
204
+ def normalize_path(path):
205
+ """
206
+ Replace multiple repeated '/' with just a single '/'
207
+
208
+ :param path: The file path to normalize.
209
+ :return: The normalized file path with single '/'.
210
+ """
211
+ normalized_path = re.sub(r'/+', '/', path)
212
+ return normalized_path
213
+
214
+ def strip_known_extension(filename, known_extensions):
215
+ """
216
+ Strips a known extension from the filename.
217
+
218
+ :param filename: The filename from which to strip the extension.
219
+ :param known_extensions: A list of known extensions to strip from the filename.
220
+ :return: The filename with the known extension stripped off, if found.
221
+ """
222
+ for ext in known_extensions:
223
+ if filename.endswith(ext):
224
+ # Strip the extension and return the modified filename
225
+ return filename[:-len(ext)]
226
+ # If no known extension is found, return the original filename
227
+ return filename
228
+
229
+ import warnings
230
+ if normalize_path( path ) != path:
231
+ path = normalize_path( path )
232
+ warnings.warn("Probably had multiple repeated slashes eg /// in the file path. this might cause issues. clean up with re.sub(r'/+', '/', path)")
233
+
234
+ known_extensions = [".nii.gz", ".nii", ".mhd", ".nrrd", ".mha", ".json", ".bval", ".bvec"]
235
+ known_extensions2 = [ext.lstrip('.') for ext in known_extensions]
236
+ def get_extension(filename, known_extensions ):
237
+ # List of known extensions in priority order
238
+ for ext in known_extensions:
239
+ if filename.endswith(ext):
240
+ return ext.strip('.')
241
+ return "Invalid extension"
242
+
243
+ parts = path.split('/')
244
+ if len(parts) < 7: # Checking for minimum path structure
245
+ return False, "Path structure is incomplete. Expected at least 7 components, found {}.".format(len(parts))
246
+
247
+ # Extract directory components and filename
248
+ directory_components = parts[1:-1] # Exclude the root '/' and filename
249
+ filename = parts[-1]
250
+ filename_without_extension = strip_known_extension( filename, known_extensions )
251
+ file_extension = get_extension( filename, known_extensions )
252
+
253
+ # Validating file extension
254
+ if file_extension not in known_extensions2:
255
+ print( file_extension )
256
+ return False, "Invalid file extension: {}. Expected 'nii.gz' or 'json'.".format(file_extension)
257
+
258
+ # Splitting the filename to validate individual parts
259
+ filename_parts = filename_without_extension.split(separator)
260
+ if len(filename_parts) != 5: # Expecting 5 parts based on the NRG format
261
+ print( filename_parts )
262
+ return False, "Filename does not have exactly 5 parts separated by '{}'. Found {} parts.".format(separator, len(filename_parts))
263
+
264
+ # Reconstruct expected filename from directory components
265
+ expected_filename_parts = directory_components[-5:]
266
+ expected_filename = separator.join(expected_filename_parts)
267
+ if filename_without_extension != expected_filename:
268
+ print( filename_without_extension )
269
+ print("--- vs expected ---")
270
+ print( expected_filename )
271
+ return False, "Filename structure does not match directory structure. Expected filename: {}.".format(expected_filename)
272
+
273
+ # Validate directory structure against NRG format
274
+ study_name, subject_id, session, modality = directory_components[-4:-1] + [directory_components[-1].split('/')[0]]
275
+ if not all([study_name, subject_id, session, modality]):
276
+ return False, "Directory structure does not follow NRG format. Ensure StudyName, SubjectID, Session (ses_x), and Modality are correctly specified."
277
+
278
+ # If all checks pass
279
+ return True, "The path conforms to the NRG format."
163
280
 
164
281
  def get_antsimage_keys(dictionary):
165
282
  """
@@ -336,11 +453,11 @@ def docsamson(locmod, studycsv, outputdir, projid, sid, dtid, mysep, t1iid=None,
336
453
  imfns.append('nmid' + str(i))
337
454
  elif locmod == 'rsfMRI':
338
455
  imfns=[]
339
- for i in range(3):
456
+ for i in range(4):
340
457
  imfns.append('rsfid' + str(i))
341
458
  elif locmod == 'DTI':
342
459
  imfns=[]
343
- for i in range(3):
460
+ for i in range(4):
344
461
  imfns.append('dtid' + str(i))
345
462
 
346
463
  # Process each file name
@@ -387,11 +504,11 @@ def get_valid_modalities( long=False, asString=False, qc=False ):
387
504
  asString - concat list to string
388
505
  """
389
506
  if long:
390
- mymod = ["T1w", "NM2DMT", "rsfMRI", "rsfMRI_LR", "rsfMRI_RL", "DTI", "DTI_LR","DTI_RL","T2Flair", "dwi", "func", "perf"]
507
+ mymod = ["T1w", "NM2DMT", "rsfMRI", "rsfMRI_LR", "rsfMRI_RL", "rsfMRILR", "rsfMRIRL", "DTI", "DTI_LR","DTI_RL", "DTILR","DTIRL","T2Flair", "dwi", "dwi_ap", "dwi_pa", "func", "func_ap", "func_pa", "perf"]
391
508
  elif qc:
392
- mymod = [ 'T1w', 'T2Flair', 'NM2DMT','DTIdwi','DTIb0', 'rsfMRI', "perf" ]
509
+ mymod = [ 'T1w', 'T2Flair', 'NM2DMT', 'DTI', 'DTIdwi','DTIb0', 'rsfMRI', "perf" ]
393
510
  else:
394
- mymod = ["T1w", "NM2DMT", "DTI","T2Flair", "rsfMRI", "perf" ]
511
+ mymod = ["T1w", "NM2DMT", "DTI","T2Flair", "rsfMRI", "perf" ]
395
512
  if not asString:
396
513
  return mymod
397
514
  else:
@@ -465,15 +582,15 @@ def generate_mm_dataframe(
465
582
  raise ValueError("source_image_directory does not exist")
466
583
  if len( rsf_filenames ) > 2:
467
584
  raise ValueError("len( rsf_filenames ) > 2")
468
- if len( dti_filenames ) > 2:
469
- raise ValueError("len( dti_filenames ) > 2")
585
+ if len( dti_filenames ) > 3:
586
+ raise ValueError("len( dti_filenames ) > 3")
470
587
  if len( nm_filenames ) > 11:
471
588
  raise ValueError("len( nm_filenames ) > 11")
472
589
  if len( rsf_filenames ) < 2:
473
590
  for k in range(len(rsf_filenames),2):
474
591
  rsf_filenames.append(None)
475
- if len( dti_filenames ) < 2:
476
- for k in range(len(dti_filenames),2):
592
+ if len( dti_filenames ) < 3:
593
+ for k in range(len(dti_filenames),3):
477
594
  dti_filenames.append(None)
478
595
  if len( nm_filenames ) < 10:
479
596
  for k in range(len(nm_filenames),10):
@@ -549,7 +666,7 @@ def generate_mm_dataframe(
549
666
  'perfid']
550
667
  mycols0 = corecols + [
551
668
  'rsfid1', 'rsfid2',
552
- 'dtid1', 'dtid2']
669
+ 'dtid1', 'dtid2','dtid3']
553
670
  nmext = [
554
671
  'nmid1', 'nmid2' 'nmid3', 'nmid4', 'nmid5',
555
672
  'nmid6', 'nmid7','nmid8', 'nmid9', 'nmid10', 'nmid11'
@@ -664,6 +781,49 @@ def generate_mm_dataframe_gpt(
664
781
  return studycsv
665
782
 
666
783
 
784
+
785
+ def filter_columns_by_nan_percentage(df, max_nan_percentage=50.0):
786
+ """
787
+ Filter columns in a DataFrame based on a threshold for the percentage of NaN values.
788
+
789
+ Parameters
790
+ ----------
791
+ df : pandas.DataFrame
792
+ The input DataFrame from which columns are to be filtered.
793
+ max_nan_percentage : float, optional
794
+ The maximum allowed percentage of NaN values in a column. Columns with a higher
795
+ percentage of NaN values than this threshold will be removed from the DataFrame.
796
+ The default is 50.0, which means columns with more than 50% NaN values will be removed.
797
+
798
+ Returns
799
+ -------
800
+ pandas.DataFrame
801
+ A DataFrame with columns filtered based on the NaN values percentage criterion.
802
+
803
+ Examples
804
+ --------
805
+ >>> import pandas as pd
806
+ >>> data = {'A': [1, 2, None, 4], 'B': [None, 2, 3, None], 'C': [1, 2, 3, 4]}
807
+ >>> df = pd.DataFrame(data)
808
+ >>> filtered_df = filter_columns_by_nan_percentage(df, 50.0)
809
+ >>> print(filtered_df)
810
+
811
+ Notes
812
+ -----
813
+ The function calculates the percentage of NaN values in each column and filters out
814
+ those columns where the percentage exceeds the `max_nan_percentage` threshold.
815
+ """
816
+ # Calculate the percentage of NaN values for each column
817
+ nan_percentage = df.isnull().mean() * 100
818
+
819
+ # Filter columns where the percentage of NaN values is less than or equal to the threshold
820
+ columns_to_keep = nan_percentage[nan_percentage <= max_nan_percentage].index
821
+
822
+ # Return the filtered DataFrame
823
+ return df[columns_to_keep]
824
+
825
+
826
+
667
827
  def parse_nrg_filename( x, separator='-' ):
668
828
  """
669
829
  split a NRG filename into its named parts
@@ -838,25 +998,28 @@ def outlierness_by_modality( qcdf, uid='filename', outlier_columns = ['noise', '
838
998
  from PyNomaly import loop
839
999
  from sklearn.neighbors import LocalOutlierFactor
840
1000
  qcdfout = qcdf.copy()
1001
+ pd.set_option('future.no_silent_downcasting', True)
1002
+ qcdfout.replace([np.inf, -np.inf], np.nan, inplace=True)
841
1003
  if uid not in qcdfout.keys():
842
- raise ValueError(uid + " not in dataframe")
1004
+ raise ValueError( str(uid) + " not in dataframe")
843
1005
  if 'ol_loop' not in qcdfout.keys():
844
1006
  qcdfout['ol_loop']=math.nan
845
1007
  if 'ol_lof' not in qcdfout.keys():
846
1008
  qcdfout['ol_lof']=math.nan
1009
+ didit=False
847
1010
  for mod in get_valid_modalities( qc=True ):
1011
+ didit=True
848
1012
  lof = LocalOutlierFactor()
849
1013
  locsel = qcdfout["modality"] == mod
850
1014
  rr = qcdfout[locsel][outlier_columns]
851
- with pd.option_context('mode.use_inf_as_na', True):
852
- for myolcol in outlier_columns:
853
- rr[myolcol].fillna(rr[myolcol].mean(), inplace=True)
1015
+ column_means = rr.mean()
1016
+ rr.fillna(column_means, inplace=True)
854
1017
  if rr.shape[0] > 1:
855
1018
  if verbose:
856
- print(mod)
1019
+ print("calc: " + mod + " outlierness " )
857
1020
  myneigh = np.min( [24, int(np.round(rr.shape[0]*0.5)) ] )
858
1021
  temp = antspyt1w.loop_outlierness(rr.astype(float), standardize=True, extent=3, n_neighbors=myneigh, cluster_labels=None)
859
- qcdfout.loc[locsel,'ol_loop']=temp
1022
+ qcdfout.loc[locsel,'ol_loop']=temp.astype('float64')
860
1023
  yhat = lof.fit_predict(rr)
861
1024
  temp = lof.negative_outlier_factor_*(-1.0)
862
1025
  temp = temp - temp.min()
@@ -864,6 +1027,8 @@ def outlierness_by_modality( qcdf, uid='filename', outlier_columns = ['noise', '
864
1027
  yhat[ yhat == -1] = 1 # these are outliers
865
1028
  qcdfout.loc[locsel,'ol_lof_decision']=yhat
866
1029
  qcdfout.loc[locsel,'ol_lof']=temp/temp.max()
1030
+ if verbose:
1031
+ print( didit )
867
1032
  return qcdfout
868
1033
 
869
1034
 
@@ -941,6 +1106,11 @@ def study_dataframe_from_matched_dataframe( matched_dataframe, rootdir, outputdi
941
1106
  dtfn2=glob.glob(os.path.join(rootdir, pid, sid, dt, 'DTI*', dtid, str(csvrow['dtfn2'].iloc[0]+iext) ))[0]
942
1107
  if exists( dtfn2 ):
943
1108
  dtList.append( dtfn2 )
1109
+ if 'dtfn3' in csvrow.keys():
1110
+ dtid=str(int(csvrow['dtid3'].iloc[0]))
1111
+ dtfn3=glob.glob(os.path.join(rootdir, pid, sid, dt, 'DTI*', dtid, str(csvrow['dtfn3'].iloc[0]+iext) ))[0]
1112
+ if exists( dtfn3 ):
1113
+ dtList.append( dtfn3 )
944
1114
  if 'rsffn1' in csvrow.keys():
945
1115
  rsid=str(int(csvrow['rsfid1'].iloc[0]))
946
1116
  rsfn1=glob.glob(os.path.join( rootdir, pid, sid, dt, 'rsfMRI*', rsid, str(csvrow['rsffn1'].iloc[0]+iext) ))[0]
@@ -1016,6 +1186,8 @@ def highest_quality_repeat(mxdfin, idvar, visitvar, qualityvar):
1016
1186
  if qualityvar not in mxdfin.columns:
1017
1187
  raise ValueError("qualityvar not in dataframe")
1018
1188
 
1189
+ mxdfin[qualityvar] = mxdfin[qualityvar].astype(float)
1190
+
1019
1191
  vizzes = mxdfin[visitvar].unique()
1020
1192
  uids = mxdfin[idvar].unique()
1021
1193
  useit = np.zeros(mxdfin.shape[0], dtype=bool)
@@ -1052,40 +1224,48 @@ def match_modalities( qc_dataframe, unique_identifier='filename', outlier_column
1052
1224
  """
1053
1225
  import pandas as pd
1054
1226
  import numpy as np
1227
+ qc_dataframe['filename']=qc_dataframe['filename'].astype(str)
1228
+ qc_dataframe['ol_loop']=qc_dataframe['ol_loop'].astype(float)
1229
+ qc_dataframe['ol_lof']=qc_dataframe['ol_lof'].astype(float)
1230
+ qc_dataframe['ol_lof_decision']=qc_dataframe['ol_lof_decision'].astype(float)
1055
1231
  mmdf = best_mmm( qc_dataframe, 'T1w', outlier_column=outlier_column )['filt']
1056
1232
  fldf = best_mmm( qc_dataframe, 'T2Flair', outlier_column=outlier_column )['filt']
1057
1233
  nmdf = best_mmm( qc_dataframe, 'NM2DMT', outlier_column=outlier_column )['filt']
1058
1234
  rsdf = best_mmm( qc_dataframe, 'rsfMRI', outlier_column=outlier_column )['filt']
1059
1235
  dtdf = best_mmm( qc_dataframe, 'DTI', outlier_column=outlier_column )['filt']
1060
- mmdf['flairid'] = np.nan
1061
- mmdf['flairfn'] = np.nan
1062
- mmdf['flairloop'] = np.nan
1063
- mmdf['flairlof'] = np.nan
1064
- mmdf['dtid1'] = np.nan
1065
- mmdf['dtfn1'] = np.nan
1066
- mmdf['dtloop1'] = np.nan
1067
- mmdf['dtlof1'] = np.nan
1068
- mmdf['dtid2'] = np.nan
1069
- mmdf['dtfn2'] = np.nan
1070
- mmdf['dtloop2'] = np.nan
1071
- mmdf['dtlof2'] = np.nan
1072
- mmdf['rsfid1'] = np.nan
1073
- mmdf['rsffn1'] = np.nan
1074
- mmdf['rsfloop1'] = np.nan
1075
- mmdf['rsflof1'] = np.nan
1076
- mmdf['rsfid2'] = np.nan
1077
- mmdf['rsffn2'] = np.nan
1078
- mmdf['rsfloop2'] = np.nan
1079
- mmdf['rsflof2'] = np.nan
1236
+ mmdf['flairid'] = None
1237
+ mmdf['flairfn'] = None
1238
+ mmdf['flairloop'] = None
1239
+ mmdf['flairlof'] = None
1240
+ mmdf['dtid1'] = None
1241
+ mmdf['dtfn1'] = None
1242
+ mmdf['dtntimepoints1'] = 0
1243
+ mmdf['dtloop1'] = math.nan
1244
+ mmdf['dtlof1'] = math.nan
1245
+ mmdf['dtid2'] = None
1246
+ mmdf['dtfn2'] = None
1247
+ mmdf['dtntimepoints2'] = 0
1248
+ mmdf['dtloop2'] = math.nan
1249
+ mmdf['dtlof2'] = math.nan
1250
+ mmdf['rsfid1'] = None
1251
+ mmdf['rsffn1'] = None
1252
+ mmdf['rsfntimepoints1'] = 0
1253
+ mmdf['rsfloop1'] = math.nan
1254
+ mmdf['rsflof1'] = math.nan
1255
+ mmdf['rsfid2'] = None
1256
+ mmdf['rsffn2'] = None
1257
+ mmdf['rsfntimepoints2'] = 0
1258
+ mmdf['rsfloop2'] = math.nan
1259
+ mmdf['rsflof2'] = math.nan
1080
1260
  for k in range(1,11):
1081
1261
  myid='nmid'+str(k)
1082
- mmdf[myid] = np.nan
1262
+ mmdf[myid] = None
1083
1263
  myid='nmfn'+str(k)
1084
- mmdf[myid] = np.nan
1264
+ mmdf[myid] = None
1085
1265
  myid='nmloop'+str(k)
1086
- mmdf[myid] = np.nan
1266
+ mmdf[myid] = math.nan
1087
1267
  myid='nmlof'+str(k)
1088
- mmdf[myid] = np.nan
1268
+ mmdf[myid] = math.nan
1089
1269
  if verbose:
1090
1270
  print( mmdf.shape )
1091
1271
  for k in range(mmdf.shape[0]):
@@ -1094,12 +1274,13 @@ def match_modalities( qc_dataframe, unique_identifier='filename', outlier_column
1094
1274
  progger = str( k ) # np.round( k / mmdf.shape[0] * 100 ) )
1095
1275
  print( progger, end ="...", flush=True)
1096
1276
  if dtdf is not None:
1097
- locsel = (dtdf["subjectIDdate"] == mmdf["subjectIDdate"].iloc[k]) & (dtdf[outlier_column] < 0.5)
1277
+ locsel = (dtdf["subjectIDdate"] == mmdf["subjectIDdate"].iloc[k])
1098
1278
  if sum(locsel) == 1:
1099
1279
  mmdf.iloc[k, mmdf.columns.get_loc("dtid1")] = dtdf["imageID"][locsel].values[0]
1100
- mmdf.iloc[k, mmdf.columns.get_loc("dtfn1")] = dtdf["fn"][locsel].values[0]
1280
+ mmdf.iloc[k, mmdf.columns.get_loc("dtfn1")] = dtdf[unique_identifier][locsel].values[0]
1101
1281
  mmdf.iloc[k, mmdf.columns.get_loc("dtloop1")] = dtdf[outlier_column][locsel].values[0]
1102
- mmdf.iloc[k, mmdf.columns.get_loc("dtlof1")] = dtdf['ol_lof_decision'][locsel].values[0]
1282
+ mmdf.iloc[k, mmdf.columns.get_loc("dtlof1")] = float(dtdf['ol_lof_decision'][locsel].values[0])
1283
+ mmdf.iloc[k, mmdf.columns.get_loc("dtntimepoints1")] = float(dtdf['dimt'][locsel].values[0])
1103
1284
  elif sum(locsel) > 1:
1104
1285
  locdf = dtdf[locsel]
1105
1286
  dedupe = locdf[["snr","cnr"]].duplicated()
@@ -1107,21 +1288,24 @@ def match_modalities( qc_dataframe, unique_identifier='filename', outlier_column
1107
1288
  if locdf.shape[0] > 1:
1108
1289
  locdf = locdf.sort_values(outlier_column).iloc[:2]
1109
1290
  mmdf.iloc[k, mmdf.columns.get_loc("dtid1")] = locdf["imageID"].values[0]
1110
- mmdf.iloc[k, mmdf.columns.get_loc("dtfn1")] = locdf["fn"].values[0]
1291
+ mmdf.iloc[k, mmdf.columns.get_loc("dtfn1")] = locdf[unique_identifier].values[0]
1111
1292
  mmdf.iloc[k, mmdf.columns.get_loc("dtloop1")] = locdf[outlier_column].values[0]
1112
- mmdf.iloc[k, mmdf.columns.get_loc("dtlof1")] = locdf['ol_lof_decision'][locsel].values[0]
1293
+ mmdf.iloc[k, mmdf.columns.get_loc("dtlof1")] = float(locdf['ol_lof_decision'][locsel].values[0])
1294
+ mmdf.iloc[k, mmdf.columns.get_loc("dtntimepoints1")] = float(dtdf['dimt'][locsel].values[0])
1113
1295
  if locdf.shape[0] > 1:
1114
1296
  mmdf.iloc[k, mmdf.columns.get_loc("dtid2")] = locdf["imageID"].values[1]
1115
- mmdf.iloc[k, mmdf.columns.get_loc("dtfn2")] = locdf["fn"].values[1]
1297
+ mmdf.iloc[k, mmdf.columns.get_loc("dtfn2")] = locdf[unique_identifier].values[1]
1116
1298
  mmdf.iloc[k, mmdf.columns.get_loc("dtloop2")] = locdf[outlier_column].values[1]
1117
- mmdf.iloc[k, mmdf.columns.get_loc("dtlof2")] = locdf['ol_lof_decision'][locsel].values[1]
1299
+ mmdf.iloc[k, mmdf.columns.get_loc("dtlof2")] = float(locdf['ol_lof_decision'][locsel].values[1])
1300
+ mmdf.iloc[k, mmdf.columns.get_loc("dtntimepoints2")] = float(dtdf['dimt'][locsel].values[1])
1118
1301
  if rsdf is not None:
1119
- locsel = (rsdf["subjectIDdate"] == mmdf["subjectIDdate"].iloc[k]) & (rsdf[outlier_column] < 0.5)
1302
+ locsel = (rsdf["subjectIDdate"] == mmdf["subjectIDdate"].iloc[k])
1120
1303
  if sum(locsel) == 1:
1121
1304
  mmdf.iloc[k, mmdf.columns.get_loc("rsfid1")] = rsdf["imageID"][locsel].values[0]
1122
- mmdf.iloc[k, mmdf.columns.get_loc("rsffn1")] = rsdf["fn"][locsel].values[0]
1305
+ mmdf.iloc[k, mmdf.columns.get_loc("rsffn1")] = rsdf[unique_identifier][locsel].values[0]
1123
1306
  mmdf.iloc[k, mmdf.columns.get_loc("rsfloop1")] = rsdf[outlier_column][locsel].values[0]
1124
- mmdf.iloc[k, mmdf.columns.get_loc("rsflof1")] = rsdf['ol_lof_decision'][locsel].values[0]
1307
+ mmdf.iloc[k, mmdf.columns.get_loc("rsflof1")] = float(rsdf['ol_lof_decision'].values[0])
1308
+ mmdf.iloc[k, mmdf.columns.get_loc("rsfntimepoints1")] = float(rsdf['dimt'][locsel].values[0])
1125
1309
  elif sum(locsel) > 1:
1126
1310
  locdf = rsdf[locsel]
1127
1311
  dedupe = locdf[["snr","cnr"]].duplicated()
@@ -1129,22 +1313,24 @@ def match_modalities( qc_dataframe, unique_identifier='filename', outlier_column
1129
1313
  if locdf.shape[0] > 1:
1130
1314
  locdf = locdf.sort_values(outlier_column).iloc[:2]
1131
1315
  mmdf.iloc[k, mmdf.columns.get_loc("rsfid1")] = locdf["imageID"].values[0]
1132
- mmdf.iloc[k, mmdf.columns.get_loc("rsffn1")] = locdf["fn"].values[0]
1316
+ mmdf.iloc[k, mmdf.columns.get_loc("rsffn1")] = locdf[unique_identifier].values[0]
1133
1317
  mmdf.iloc[k, mmdf.columns.get_loc("rsfloop1")] = locdf[outlier_column].values[0]
1134
- mmdf.iloc[k, mmdf.columns.get_loc("rsflof1")] = locdf['ol_lof_decision'].values[0]
1318
+ mmdf.iloc[k, mmdf.columns.get_loc("rsflof1")] = float(locdf['ol_lof_decision'].values[0])
1319
+ mmdf.iloc[k, mmdf.columns.get_loc("rsfntimepoints1")] = float(locdf['dimt'][locsel].values[0])
1135
1320
  if locdf.shape[0] > 1:
1136
1321
  mmdf.iloc[k, mmdf.columns.get_loc("rsfid2")] = locdf["imageID"].values[1]
1137
- mmdf.iloc[k, mmdf.columns.get_loc("rsffn2")] = locdf["fn"].values[1]
1322
+ mmdf.iloc[k, mmdf.columns.get_loc("rsffn2")] = locdf[unique_identifier].values[1]
1138
1323
  mmdf.iloc[k, mmdf.columns.get_loc("rsfloop2")] = locdf[outlier_column].values[1]
1139
- mmdf.iloc[k, mmdf.columns.get_loc("rsflof2")] = locdf['ol_lof_decision'].values[1]
1324
+ mmdf.iloc[k, mmdf.columns.get_loc("rsflof2")] = float(locdf['ol_lof_decision'].values[1])
1325
+ mmdf.iloc[k, mmdf.columns.get_loc("rsfntimepoints2")] = float(locdf['dimt'][locsel].values[1])
1140
1326
 
1141
1327
  if fldf is not None:
1142
1328
  locsel = fldf['subjectIDdate'] == mmdf['subjectIDdate'].iloc[k]
1143
1329
  if locsel.sum() == 1:
1144
1330
  mmdf.iloc[k, mmdf.columns.get_loc("flairid")] = fldf['imageID'][locsel].values[0]
1145
- mmdf.iloc[k, mmdf.columns.get_loc("flairfn")] = fldf['filename'][locsel].values[0]
1331
+ mmdf.iloc[k, mmdf.columns.get_loc("flairfn")] = fldf[unique_identifier][locsel].values[0]
1146
1332
  mmdf.iloc[k, mmdf.columns.get_loc("flairloop")] = fldf[outlier_column][locsel].values[0]
1147
- mmdf.iloc[k, mmdf.columns.get_loc("flairlof")] = fldf['ol_lof_decision'][locsel].values[0]
1333
+ mmdf.iloc[k, mmdf.columns.get_loc("flairlof")] = float(fldf['ol_lof_decision'][locsel].values[0])
1148
1334
  elif sum(locsel) > 1:
1149
1335
  locdf = fldf[locsel]
1150
1336
  dedupe = locdf[["snr","cnr"]].duplicated()
@@ -1152,9 +1338,9 @@ def match_modalities( qc_dataframe, unique_identifier='filename', outlier_column
1152
1338
  if locdf.shape[0] > 1:
1153
1339
  locdf = locdf.sort_values(outlier_column).iloc[:2]
1154
1340
  mmdf.iloc[k, mmdf.columns.get_loc("flairid")] = locdf["imageID"].values[0]
1155
- mmdf.iloc[k, mmdf.columns.get_loc("flairfn")] = locdf["fn"].values[0]
1341
+ mmdf.iloc[k, mmdf.columns.get_loc("flairfn")] = locdf[unique_identifier].values[0]
1156
1342
  mmdf.iloc[k, mmdf.columns.get_loc("flairloop")] = locdf[outlier_column].values[0]
1157
- mmdf.iloc[k, mmdf.columns.get_loc("flairlof")] = locdf['ol_lof_decision'].values[0]
1343
+ mmdf.iloc[k, mmdf.columns.get_loc("flairlof")] = float(locdf['ol_lof_decision'].values[0])
1158
1344
 
1159
1345
  if nmdf is not None:
1160
1346
  locsel = nmdf['subjectIDdate'] == mmdf['subjectIDdate'].iloc[k]
@@ -1162,16 +1348,40 @@ def match_modalities( qc_dataframe, unique_identifier='filename', outlier_column
1162
1348
  locdf = nmdf[locsel]
1163
1349
  for i in range(np.min( [10,locdf.shape[0]])):
1164
1350
  nmid = "nmid"+str(i+1)
1165
- mmdf[nmid].iloc[k] = locdf['imageID'].iloc[i]
1351
+ mmdf.loc[k,nmid] = locdf['imageID'].iloc[i]
1166
1352
  nmfn = "nmfn"+str(i+1)
1167
- mmdf[nmfn].iloc[k] = locdf['imageID'].iloc[i]
1353
+ mmdf.loc[k,nmfn] = locdf['imageID'].iloc[i]
1168
1354
  nmloop = "nmloop"+str(i+1)
1169
- mmdf[nmloop].iloc[k] = locdf[outlier_column].iloc[i]
1355
+ mmdf.loc[k,nmloop] = locdf[outlier_column].iloc[i]
1170
1356
  nmloop = "nmlof"+str(i+1)
1171
- mmdf[nmloop].iloc[k] = locdf['ol_lof_decision'].iloc[i]
1357
+ mmdf.loc[k,nmloop] = float(locdf['ol_lof_decision'].iloc[i])
1172
1358
 
1359
+ mmdf['rsf_total_timepoints']=mmdf['rsfntimepoints1']+mmdf['rsfntimepoints2']
1360
+ mmdf['dt_total_timepoints']=mmdf['dtntimepoints1']+mmdf['dtntimepoints2']
1173
1361
  return mmdf
1174
1362
 
1363
+
1364
+ def add_repeat_column(df, groupby_column):
1365
+ """
1366
+ Adds a 'repeat' column to the DataFrame that counts occurrences of each unique value
1367
+ in the specified 'groupby_column'. The count increments from 1 for each identical entry.
1368
+
1369
+ Parameters:
1370
+ - df: pandas DataFrame.
1371
+ - groupby_column: The name of the column to group by and count repeats.
1372
+
1373
+ Returns:
1374
+ - Modified pandas DataFrame with an added 'repeat' column.
1375
+ """
1376
+ # Validate if the groupby_column exists in the DataFrame
1377
+ if groupby_column not in df.columns:
1378
+ raise ValueError(f"Column '{groupby_column}' does not exist in the DataFrame.")
1379
+
1380
+ # Count the occurrences of each unique value in the specified column and increment from 1
1381
+ df['repeat'] = df.groupby(groupby_column).cumcount() + 1
1382
+
1383
+ return df
1384
+
1175
1385
  def best_mmm( mmdf, wmod, mysep='-', outlier_column='ol_loop', verbose=False):
1176
1386
  """
1177
1387
  Selects the best repeats per modality.
@@ -1190,6 +1400,8 @@ def best_mmm( mmdf, wmod, mysep='-', outlier_column='ol_loop', verbose=False):
1190
1400
  list: a list containing two metadata dataframes - raw and filt. raw contains all the metadata for the selected modality and filt contains the metadata filtered for highest quality repeats.
1191
1401
 
1192
1402
  """
1403
+ # mmdf = mmdf.astype(str)
1404
+ mmdf[outlier_column]=mmdf[outlier_column].astype(float)
1193
1405
  msel = mmdf['modality'] == wmod
1194
1406
  if wmod == 'rsfMRI':
1195
1407
  msel1 = mmdf['modality'] == 'rsfMRI'
@@ -1202,34 +1414,44 @@ def best_mmm( mmdf, wmod, mysep='-', outlier_column='ol_loop', verbose=False):
1202
1414
  msel2 = mmdf['modality'] == 'DTI_LR'
1203
1415
  msel3 = mmdf['modality'] == 'DTI_RL'
1204
1416
  msel4 = mmdf['modality'] == 'DTIdwi'
1205
- msel = msel1 | msel2 | msel3 | msel4
1417
+ msel5 = mmdf['modality'] == 'DTIb0'
1418
+ msel = msel1 | msel2 | msel3 | msel4 | msel5
1206
1419
  if sum(msel) == 0:
1207
1420
  return {'raw': None, 'filt': None}
1208
- uids = list(mmdf['filename'][msel])
1209
- metasub = mmdf[msel]
1421
+ metasub = mmdf[msel].copy()
1210
1422
 
1211
1423
  if verbose:
1212
1424
  print(f"{wmod} {(metasub.shape[0])} pre")
1213
1425
 
1214
- metasub['subjectID']=math.nan
1215
- metasub['date']=math.nan
1216
- metasub['subjectIDdate']=math.nan
1217
- metasub['imageID']=math.nan
1218
- for k in range(len(uids)):
1219
- temp = uids[k].split( mysep )
1220
- metasub['subjectID'].iloc[k] = temp[1]
1221
- metasub['date'].iloc[k] = temp[2]
1222
- metasub['subjectIDdate'].iloc[k] = temp[1] + mysep + temp[2]
1223
- metasub['imageID'].iloc[k] = temp[4]
1224
-
1225
- metasub['negol'] = metasub[outlier_column].max() - metasub[outlier_column]
1426
+ metasub['subjectID']=None
1427
+ metasub['date']=None
1428
+ metasub['subjectIDdate']=None
1429
+ metasub['imageID']=None
1430
+ metasub['negol']=math.nan
1431
+ for k in metasub.index:
1432
+ temp = metasub.loc[k, 'filename'].split( mysep )
1433
+ metasub.loc[k,'subjectID'] = str( temp[1] )
1434
+ metasub.loc[k,'date'] = str( temp[2] )
1435
+ metasub.loc[k,'subjectIDdate'] = str( temp[1] + mysep + temp[2] )
1436
+ metasub.loc[k,'imageID'] = str( temp[4])
1437
+
1438
+
1439
+ if 'ol_' in outlier_column:
1440
+ metasub['negol'] = metasub[outlier_column].max() - metasub[outlier_column]
1441
+ else:
1442
+ metasub['negol'] = metasub[outlier_column]
1226
1443
  if 'date' not in metasub.keys():
1227
- metasub['date']='NA'
1228
- metasubq = highest_quality_repeat(metasub, 'filename', 'date', 'negol')
1444
+ metasub['date']=None
1445
+ metasubq = add_repeat_column( metasub, 'subjectIDdate' )
1446
+ metasubq = highest_quality_repeat(metasubq, 'filename', 'date', 'negol')
1229
1447
 
1230
1448
  if verbose:
1231
1449
  print(f"{wmod} {metasubq.shape[0]} post")
1232
1450
 
1451
+ # metasub = metasub.astype(str)
1452
+ # metasubq = metasubq.astype(str)
1453
+ metasub[outlier_column]=metasub[outlier_column].astype(float)
1454
+ metasubq[outlier_column]=metasubq[outlier_column].astype(float)
1233
1455
  return {'raw': metasub, 'filt': metasubq}
1234
1456
 
1235
1457
  def mm_read( x, standardize_intensity=False, modality='' ):
@@ -1640,10 +1862,12 @@ def merge_dwi_data( img_LRdwp, bval_LR, bvec_LR, img_RLdwp, bval_RL, bvec_RL ):
1640
1862
  bvec_RL : array
1641
1863
 
1642
1864
  """
1865
+ import warnings
1643
1866
  insamespace = ants.image_physical_space_consistency( img_LRdwp, img_RLdwp )
1644
1867
  if not insamespace :
1645
- raise ValueError('not insamespace ... corrected image pair should occupy the same physical space')
1646
-
1868
+ warnings.warn('not insamespace ... corrected image pair should occupy the same physical space; returning only the 1st set and wont join these data.')
1869
+ return img_LRdwp, bval_LR, bvec_LR
1870
+
1647
1871
  bval_LR = np.concatenate([bval_LR,bval_RL])
1648
1872
  bvec_LR = np.concatenate([bvec_LR,bvec_RL])
1649
1873
  # concatenate the images
@@ -2412,7 +2636,7 @@ def template_figure_with_overlay(scalar_label_df, prefix, outputfilename=None, t
2412
2636
  toviz = temp['overlay']
2413
2637
  return { "underlay": seggm, 'overlay': toviz, 'seg': tcrop }
2414
2638
 
2415
- def get_data( name=None, force_download=False, version=21, target_extension='.csv' ):
2639
+ def get_data( name=None, force_download=False, version=23, target_extension='.csv' ):
2416
2640
  """
2417
2641
  Get ANTsPyMM data filename
2418
2642
 
@@ -2696,6 +2920,34 @@ def super_res_mcimage( image,
2696
2920
  return ants.list_to_ndimage( imageup, mcsr )
2697
2921
 
2698
2922
 
2923
+ def segment_timeseries_by_bvalue(bvals):
2924
+ """
2925
+ Segments a time series based on a threshold applied to b-values.
2926
+
2927
+ This function categorizes indices of the given b-values array into two groups:
2928
+ one for indices where b-values are above a near-zero threshold, and another
2929
+ where b-values are at or below this threshold. The threshold is set to 1e-12.
2930
+
2931
+ Parameters:
2932
+ - bvals (numpy.ndarray): An array of b-values.
2933
+
2934
+ Returns:
2935
+ - dict: A dictionary with two keys, 'lowermeans' and 'highermeans', each containing
2936
+ the indices of bvals where the b-values are above and at/below the threshold, respectively.
2937
+ """
2938
+ # Define the threshold
2939
+ threshold = 1e-12
2940
+
2941
+ # Get indices where b-values are greater than the threshold
2942
+ lowermeans = list(np.where(bvals > threshold)[0])
2943
+
2944
+ # Get indices where b-values are less than or equal to the threshold
2945
+ highermeans = list(np.where(bvals <= threshold)[0])
2946
+
2947
+ return {
2948
+ 'lowermeans': lowermeans,
2949
+ 'highermeans': highermeans
2950
+ }
2699
2951
 
2700
2952
  def segment_timeseries_by_meanvalue( image, quantile = 0.995 ):
2701
2953
  """
@@ -2754,7 +3006,7 @@ def get_average_rsf( x, min_t=10, max_t=35 ):
2754
3006
  if x.shape[3] <= min_t:
2755
3007
  min_t=0
2756
3008
  if x.shape[3] <= max_t:
2757
- max_t=x.shape[3]-1
3009
+ max_t=x.shape[3]
2758
3010
  for myidx in range(min_t,max_t):
2759
3011
  b0 = ants.slice_image( x, axis=3, idx=myidx)
2760
3012
  bavg = bavg + ants.registration(oavg,b0,'Rigid',outprefix=ofn)['warpedmovout']
@@ -3147,7 +3399,7 @@ def dipy_dti_recon(
3147
3399
  space as the image, we will resample directly to the image space. This
3148
3400
  could lead to problems if the inputs are really incorrect.
3149
3401
 
3150
- b0_idx : the indices of the B0; if None, use segment_timeseries_by_meanvalue to guess
3402
+ b0_idx : the indices of the B0; if None, use segment_timeseries_by_bvalue
3151
3403
 
3152
3404
  mask_dilation : integer zero or more dilates the brain mask
3153
3405
 
@@ -3178,8 +3430,7 @@ def dipy_dti_recon(
3178
3430
  bvals = bvalsfn.copy()
3179
3431
  bvecs = bvecsfn.copy()
3180
3432
 
3181
- if b0_idx is None:
3182
- b0_idx = segment_timeseries_by_meanvalue( image )['highermeans']
3433
+ b0_idx = segment_timeseries_by_bvalue( bvals )['highermeans']
3183
3434
 
3184
3435
  b0 = ants.slice_image( image, axis=3, idx=b0_idx[0] )
3185
3436
  bxtmod='bold'
@@ -3389,6 +3640,9 @@ def joint_dti_recon(
3389
3640
  def fix_dwi_shape( img, bvalfn, bvecfn ):
3390
3641
  if isinstance(bvecfn, str):
3391
3642
  bvals, bvecs = read_bvals_bvecs( bvalfn , bvecfn )
3643
+ else:
3644
+ bvals = bvalfn
3645
+ bvecs = bvecfn
3392
3646
  if bvecs.shape[0] < img.shape[3]:
3393
3647
  imgout = ants.from_numpy( img[:,:,:,0:bvecs.shape[0]] )
3394
3648
  imgout = ants.copy_image_info( img, imgout )
@@ -4443,7 +4697,7 @@ def get_rsf_outputs( coords ):
4443
4697
  return list( yeo['SystemName'].unique() )
4444
4698
 
4445
4699
  def tra_initializer( fixed, moving, n_simulations=32, max_rotation=30,
4446
- transform=['rigid'], verbose=False ):
4700
+ transform=['rigid'], compreg=None, verbose=False ):
4447
4701
  """
4448
4702
  multi-start multi-transform registration solution - based on ants.registration
4449
4703
 
@@ -4457,6 +4711,8 @@ def tra_initializer( fixed, moving, n_simulations=32, max_rotation=30,
4457
4711
 
4458
4712
  transform : list of transforms to loop through
4459
4713
 
4714
+ compreg : registration results against which to compare
4715
+
4460
4716
  verbose : boolean
4461
4717
 
4462
4718
  """
@@ -4465,15 +4721,20 @@ def tra_initializer( fixed, moving, n_simulations=32, max_rotation=30,
4465
4721
  output_directory_w = output_directory + "/tra_reg/"
4466
4722
  os.makedirs(output_directory_w,exist_ok=True)
4467
4723
  bestmi = math.inf
4724
+ bestvar = 0.0
4468
4725
  myorig = list(ants.get_origin( fixed ))
4469
4726
  mymax = 0;
4470
4727
  for k in range(len( myorig ) ):
4471
4728
  if abs(myorig[k]) > mymax:
4472
4729
  mymax = abs(myorig[k])
4473
4730
  maxtrans = mymax * 0.05
4474
- bestreg=ants.registration( fixed,moving,'Translation',
4475
- outprefix=output_directory_w+"trans")
4476
- initx = ants.read_transform( bestreg['fwdtransforms'][0] )
4731
+ if compreg is None:
4732
+ bestreg=ants.registration( fixed,moving,'Translation',
4733
+ outprefix=output_directory_w+"trans")
4734
+ initx = ants.read_transform( bestreg['fwdtransforms'][0] )
4735
+ else :
4736
+ bestreg=compreg
4737
+ initx = ants.read_transform( bestreg['fwdtransforms'][0] )
4477
4738
  for mytx in transform:
4478
4739
  regtx = 'Rigid'
4479
4740
  with tempfile.NamedTemporaryFile(suffix='.h5') as tp:
@@ -4510,6 +4771,9 @@ def tra_initializer( fixed, moving, n_simulations=32, max_rotation=30,
4510
4771
  print( "mi @ " + str(k) + " : " + str(mymi), flush=True)
4511
4772
  bestmi = mymi
4512
4773
  bestreg = reg
4774
+ bestvar = myvar
4775
+ if bestvar == 0.0 and compreg is not None:
4776
+ return compreg
4513
4777
  return bestreg
4514
4778
 
4515
4779
  def neuromelanin( list_nm_images, t1, t1_head, t1lab, brain_stem_dilation=8,
@@ -4691,7 +4955,7 @@ def neuromelanin( list_nm_images, t1, t1_head, t1lab, brain_stem_dilation=8,
4691
4955
  nm_avg_cropped_new = nm_avg_cropped_new + warpednext
4692
4956
  nm_avg_cropped = nm_avg_cropped_new / len( crop_nm_list )
4693
4957
 
4694
- slabregUpdated = tra_initializer( nm_avg_cropped, t1c, verbose=verbose )
4958
+ slabregUpdated = tra_initializer( nm_avg_cropped, t1c, compreg=slabreg,verbose=verbose )
4695
4959
  tempOrig = ants.apply_transforms( nm_avg_cropped_new, t1c, slabreg['fwdtransforms'] )
4696
4960
  tempUpdate = ants.apply_transforms( nm_avg_cropped_new, t1c, slabregUpdated['fwdtransforms'] )
4697
4961
  miUpdate = ants.image_mutual_information(
@@ -5266,7 +5530,8 @@ def resting_state_fmri_networks( fmri, fmri_template, t1, t1segmentation,
5266
5530
  ptImage=ants.threshold_image( ptImg, pts2bold.loc[i,'ROI'], pts2bold.loc[i,'ROI'] )
5267
5531
  if debug:
5268
5532
  ptImgAll = ptImgAll + ptImage
5269
- meanROI[:,i] = ants.timeseries_to_matrix( simg, ptImage).mean(axis=1)
5533
+ if ptImage.sum() > 0 :
5534
+ meanROI[:,i] = ants.timeseries_to_matrix( simg, ptImage).mean(axis=1)
5270
5535
 
5271
5536
  if debug:
5272
5537
  ants.image_write( simg, '/tmp/simg.nii.gz' )
@@ -5384,9 +5649,15 @@ def resting_state_fmri_networks( fmri, fmri_template, t1, t1segmentation,
5384
5649
  fname='falffPoint'+kk+anatname
5385
5650
  aname='alffPoint'+kk+anatname
5386
5651
  pname='perafPoint'+kk+anatname
5387
- outdict[fname]=(outdict['falff'][ptImg==k]).mean()
5388
- outdict[aname]=(outdict['alff'][ptImg==k]).mean()
5389
- outdict[pname]=(perafimg[ptImg==k]).mean()
5652
+ localsel = ptImg == k
5653
+ if localsel.sum() > 0 : # check if non-empty
5654
+ outdict[fname]=(outdict['falff'][localsel]).mean()
5655
+ outdict[aname]=(outdict['alff'][localsel]).mean()
5656
+ outdict[pname]=(perafimg[localsel]).mean()
5657
+ else:
5658
+ outdict[fname]=math.nan
5659
+ outdict[aname]=math.nan
5660
+ outdict[pname]=math.nan
5390
5661
 
5391
5662
  rsfNuisance = pd.DataFrame( nuisance )
5392
5663
  if remove_it:
@@ -5419,6 +5690,7 @@ def resting_state_fmri_networks( fmri, fmri_template, t1, t1segmentation,
5419
5690
  outdict['despiking_count_summary'] = despiking_count_summary
5420
5691
  outdict['FD_max'] = corrmo['FD'].max()
5421
5692
  outdict['FD_mean'] = corrmo['FD'].mean()
5693
+ outdict['FD_sd'] = corrmo['FD'].std()
5422
5694
  outdict['bold_evr'] = antspyt1w.patch_eigenvalue_ratio( und, 512, [16,16,16], evdepth = 0.9, mask = bmask )
5423
5695
  outdict['n_outliers'] = len(hlinds)
5424
5696
  outdict['nc_wm'] = int(nc_wm)
@@ -5495,11 +5767,18 @@ def despike_time_series_afni(image, c1=2.5, c2=4):
5495
5767
  residuals = data - curve
5496
5768
  mad = np.median(np.abs(residuals - np.median(residuals, axis=-1, keepdims=True)), axis=-1, keepdims=True)
5497
5769
  sigma = np.sqrt(np.pi / 2) * mad
5770
+ # Ensure sigma is not zero to avoid division by zero
5771
+ sigma_safe = np.where(sigma == 0, 1e-10, sigma)
5772
+
5773
+ # Optionally, handle NaN or inf values in data, curve, or sigma
5774
+ data = np.nan_to_num(data, nan=0.0, posinf=np.finfo(np.float64).max, neginf=np.finfo(np.float64).min)
5775
+ curve = np.nan_to_num(curve, nan=0.0, posinf=np.finfo(np.float64).max, neginf=np.finfo(np.float64).min)
5776
+ sigma_safe = np.nan_to_num(sigma_safe, nan=1e-10, posinf=np.finfo(np.float64).max, neginf=np.finfo(np.float64).min)
5498
5777
 
5499
5778
  # Despike algorithm
5500
5779
  spike_counts = np.zeros( image.shape[3] )
5501
5780
  for i in range(data.shape[-1]):
5502
- s = (data[..., i] - curve[..., i]) / sigma[..., 0]
5781
+ s = (data[..., i] - curve[..., i]) / sigma_safe[..., 0]
5503
5782
  ww = s > c1
5504
5783
  s_prime = np.where( ww, c1 + (c2 - c1) * np.tanh((s - c1) / (c2 - c1)), s)
5505
5784
  spike_counts[i] = ww.sum()
@@ -5764,6 +6043,7 @@ def bold_perfusion_minimal(
5764
6043
  outdict['dvars'] = dvars( corrmo['motion_corrected'], gmseg )
5765
6044
  outdict['FD_max'] = rsfNuisance['FD'].max()
5766
6045
  outdict['FD_mean'] = rsfNuisance['FD'].mean()
6046
+ outdict['FD_sd'] = rsfNuisance['FD'].std()
5767
6047
  outdict['outlier_volumes']=hlinds
5768
6048
  outdict['negative_voxels']=negative_voxels
5769
6049
  return convert_np_in_dict( outdict )
@@ -6170,6 +6450,7 @@ Where:
6170
6450
  outdict['high_motion_pct'] = (rsfNuisance['FD'] > FD_threshold ).sum() / rsfNuisance.shape[0]
6171
6451
  outdict['FD_max'] = rsfNuisance['FD'].max()
6172
6452
  outdict['FD_mean'] = rsfNuisance['FD'].mean()
6453
+ outdict['FD_sd'] = rsfNuisance['FD'].std()
6173
6454
  outdict['bold_evr'] = antspyt1w.patch_eigenvalue_ratio( und, 512, [16,16,16], evdepth = 0.9, mask = bmask )
6174
6455
  outdict['t1reg'] = t1reg
6175
6456
  outdict['outlier_volumes']=hlinds
@@ -6734,11 +7015,14 @@ def mm(
6734
7015
  normalization_dict['MD_norm'] = ants.apply_transforms( group_template, mydti['recon_md'],group_transform+dtirig['fwdtransforms'] )
6735
7016
  normalization_dict['FA_norm'] = ants.apply_transforms( group_template, mydti['recon_fa'],group_transform+dtirig['fwdtransforms'] )
6736
7017
  output_directory = tempfile.mkdtemp()
6737
- comptx = ants.apply_transforms( group_template, group_template,
6738
- group_transform+dtirig['fwdtransforms'],
6739
- compose = output_directory + '/xxx' )
6740
- normalization_dict['DTI_norm'] = transform_and_reorient_dti(
6741
- group_template, mydti['dti'], comptx, py_based=True, verbose=True )
7018
+ do_dti_norm=False
7019
+ if do_dti_norm:
7020
+ comptx = ants.apply_transforms( group_template, group_template, group_transform+dtirig['fwdtransforms'], compose = output_directory + '/xxx' )
7021
+ tspc=[2.,2.,2.]
7022
+ if srmodel is not None:
7023
+ tspc=[1.,1.,1.]
7024
+ group_template2mm = ants.resample_image( group_template, tspc )
7025
+ normalization_dict['DTI_norm'] = transform_and_reorient_dti( group_template2mm, mydti['dti'], comptx, py_based=True, verbose=True )
6742
7026
  import shutil
6743
7027
  shutil.rmtree(output_directory, ignore_errors=True )
6744
7028
  if output_dict['rsf'] is not None:
@@ -6870,15 +7154,19 @@ def write_mm( output_prefix, mm, mm_norm=None, t1wide=None, separator='_', verbo
6870
7154
  if 'NM' in mm:
6871
7155
  if mm['NM'] is not None:
6872
7156
  nmwide = dict_to_dataframe( mm['NM'] )
6873
- nmwide.set_index( mm_wide.index, inplace=True )
7157
+ if mm_wide.shape[0] > 0 and nmwide.shape[0] > 0:
7158
+ nmwide.set_index( mm_wide.index, inplace=True )
6874
7159
  mm_wide = pd.concat( [mm_wide, nmwide ], axis=1, ignore_index=False )
6875
7160
  if 'flair' in mm:
6876
7161
  if mm['flair'] is not None:
6877
7162
  myop = output_prefix + separator + 'wmh.nii.gz'
7163
+ pngfnb = output_prefix + separator + 'wmh_seg.png'
7164
+ ants.plot( mm['flair']['flair'], mm['flair']['WMH_posterior_probability_map'], axis=2, nslices=21, ncol=7, filename=pngfnb, crop=True )
6878
7165
  if mm['flair']['WMH_probability_map'] is not None:
6879
7166
  image_write_with_thumbnail( mm['flair']['WMH_probability_map'], myop, thumb=False )
6880
7167
  flwide = dict_to_dataframe( mm['flair'] )
6881
- flwide.set_index( mm_wide.index, inplace=True )
7168
+ if mm_wide.shape[0] > 0 and flwide.shape[0] > 0:
7169
+ flwide.set_index( mm_wide.index, inplace=True )
6882
7170
  mm_wide = pd.concat( [mm_wide, flwide ], axis=1, ignore_index=False )
6883
7171
  if 'rsf' in mm:
6884
7172
  if mm['rsf'] is not None:
@@ -6921,15 +7209,17 @@ def write_mm( output_prefix, mm, mm_norm=None, t1wide=None, separator='_', verbo
6921
7209
  mm_wide['dti_high_motion_count'] = mydti['high_motion_count']
6922
7210
  mm_wide['dti_FD_mean'] = mydti['framewise_displacement'].mean()
6923
7211
  mm_wide['dti_FD_max'] = mydti['framewise_displacement'].max()
7212
+ mm_wide['dti_FD_sd'] = mydti['framewise_displacement'].std()
6924
7213
  fdfn = output_prefix + separator + '_fd.csv'
6925
7214
  else:
6926
- mm_wide['dti_FD_mean'] = mm_wide['dti_FD_max'] = 'NA'
7215
+ mm_wide['dti_FD_mean'] = mm_wide['dti_FD_max'] = mm_wide['dti_FD_sd'] = 'NA'
6927
7216
 
6928
7217
  if 'perf' in mm:
6929
7218
  if mm['perf'] is not None:
6930
7219
  perfpro = mm['perf']
6931
7220
  prwide = dict_to_dataframe( perfpro )
6932
- prwide.set_index( mm_wide.index, inplace=True )
7221
+ if mm_wide.shape[0] > 0 and prwide.shape[0] > 0:
7222
+ prwide.set_index( mm_wide.index, inplace=True )
6933
7223
  mm_wide = pd.concat( [mm_wide, prwide ], axis=1, ignore_index=False )
6934
7224
  if 'perf_dataframe' in perfpro.keys():
6935
7225
  pderk = perfpro['perf_dataframe'].iloc[: , 1:]
@@ -7562,6 +7852,7 @@ def mm_csv(
7562
7852
  writes output to disk and produces figures
7563
7853
 
7564
7854
  """
7855
+ import traceback
7565
7856
  visualize = True
7566
7857
  verbose = True
7567
7858
  if verbose:
@@ -7669,6 +7960,11 @@ def mm_csv(
7669
7960
  elif not testloop:
7670
7961
  t1wide = antspyt1w.merge_hierarchical_csvs_to_wide_format(
7671
7962
  hier['dataframes'], identifier=None )
7963
+ if t1wide['resnetGrade'].iloc[0] < 0.35:
7964
+ rgrade = str( t1wide['resnetGrade'].iloc[0] )
7965
+ warnings.warn('T1w quality check indicates failure: ' + rgrade + " will not process." )
7966
+ return
7967
+
7672
7968
  if srmodel_T1 is not False :
7673
7969
  hierfntest = hierfnSR + 'mtl.csv'
7674
7970
  if verbose:
@@ -7814,9 +8110,11 @@ def mm_csv(
7814
8110
  test_run=test_run,
7815
8111
  verbose=True )
7816
8112
  except Exception as e:
8113
+ error_info = traceback.format_exc()
8114
+ print(error_info)
7817
8115
  visualize=False
7818
8116
  dowrite=False
7819
- print(f"An error occurred while processing {overmodX}: {e}")
8117
+ print(f"antspymmerror occurred while processing {overmodX}: {e}")
7820
8118
  pass
7821
8119
  if not test_run:
7822
8120
  write_mm( output_prefix=mymm, mm=tabPro, mm_norm=normPro, t1wide=None, separator=mysep )
@@ -7870,9 +8168,11 @@ def mm_csv(
7870
8168
  test_run=test_run,
7871
8169
  verbose=True )
7872
8170
  except Exception as e:
8171
+ error_info = traceback.format_exc()
8172
+ print(error_info)
7873
8173
  visualize=False
7874
8174
  dowrite=False
7875
- print(f"An error occurred while processing {overmodX}: {e}")
8175
+ print(f"antspymmerror occurred while processing {overmodX}: {e}")
7876
8176
  pass
7877
8177
  if visualize:
7878
8178
  maxslice = np.min( [21, hier['brain_n4_dnz'].shape[2] ] )
@@ -7893,9 +8193,11 @@ def mm_csv(
7893
8193
  test_run=test_run,
7894
8194
  verbose=True )
7895
8195
  except Exception as e:
8196
+ error_info = traceback.format_exc()
8197
+ print(error_info)
7896
8198
  visualize=False
7897
8199
  dowrite=False
7898
- print(f"An error occurred while processing {overmodX}: {e}")
8200
+ print(f"antspymmerror occurred while processing {overmodX}: {e}")
7899
8201
  pass
7900
8202
  if visualize:
7901
8203
  maxslice = np.min( [21, img.shape[2] ] )
@@ -7930,11 +8232,13 @@ def mm_csv(
7930
8232
  test_run=test_run,
7931
8233
  verbose=True )
7932
8234
  except Exception as e:
8235
+ error_info = traceback.format_exc()
8236
+ print(error_info)
7933
8237
  visualize=False
7934
8238
  dowrite=False
7935
8239
  tabPro={'rsf':None}
7936
8240
  normPro={'rsf':None}
7937
- print(f"An error occurred while processing {overmodX}: {e}")
8241
+ print(f"antspymmerror occurred while processing {overmodX}: {e}")
7938
8242
  pass
7939
8243
  if tabPro['rsf'] is not None and visualize:
7940
8244
  for tpro in tabPro['rsf']: # FIXMERSF
@@ -7966,10 +8270,12 @@ def mm_csv(
7966
8270
  perfusion_m0=perfusion_m0,
7967
8271
  verbose=True )
7968
8272
  except Exception as e:
8273
+ error_info = traceback.format_exc()
8274
+ print(error_info)
7969
8275
  visualize=False
7970
8276
  dowrite=False
7971
8277
  tabPro={'perf':None}
7972
- print(f"An error occurred while processing {overmodX}: {e}")
8278
+ print(f"antspymmerror occurred while processing {overmodX}: {e}")
7973
8279
  pass
7974
8280
  if tabPro['perf'] is not None and visualize:
7975
8281
  maxslice = np.min( [21, tabPro['perf']['meanBold'].shape[2] ] )
@@ -7986,7 +8292,7 @@ def mm_csv(
7986
8292
  bvalfnList = [ bvalfn ]
7987
8293
  bvecfnList = [ bvecfn ]
7988
8294
  missing_dti_data=False # bval, bvec or images
7989
- if len( myimgsr ) > 1: # find DTI_RL
8295
+ if len( myimgsr ) == 2: # find DTI_RL
7990
8296
  dtilrfn = myimgsr[myimgcount+1]
7991
8297
  if exists( dtilrfn ):
7992
8298
  bvalfnRL = re.sub( '.nii.gz', '.bval' , dtilrfn )
@@ -7995,6 +8301,28 @@ def mm_csv(
7995
8301
  imgList.append( imgRL )
7996
8302
  bvalfnList.append( bvalfnRL )
7997
8303
  bvecfnList.append( bvecfnRL )
8304
+ elif len( myimgsr ) == 3: # find DTI_RL
8305
+ print("DTI trinity")
8306
+ dtilrfn = myimgsr[myimgcount+1]
8307
+ dtilrfn2 = myimgsr[myimgcount+2]
8308
+ if exists( dtilrfn ) and exists( dtilrfn2 ):
8309
+ bvalfnRL = re.sub( '.nii.gz', '.bval' , dtilrfn )
8310
+ bvecfnRL = re.sub( '.nii.gz', '.bvec' , dtilrfn )
8311
+ bvalfnRL2 = re.sub( '.nii.gz', '.bval' , dtilrfn2 )
8312
+ bvecfnRL2 = re.sub( '.nii.gz', '.bvec' , dtilrfn2 )
8313
+ imgRL = ants.image_read( dtilrfn )
8314
+ imgRL2 = ants.image_read( dtilrfn2 )
8315
+ bvals, bvecs = read_bvals_bvecs( bvalfnRL , bvecfnRL )
8316
+ print( bvals.max() )
8317
+ bvals2, bvecs2 = read_bvals_bvecs( bvalfnRL2 , bvecfnRL2 )
8318
+ print( bvals2.max() )
8319
+ temp = merge_dwi_data( imgRL, bvals, bvecs, imgRL2, bvals2, bvecs2 )
8320
+ imgList.append( temp[0] )
8321
+ bvalfnList.append( mymm+mysep+'joined.bval' )
8322
+ bvecfnList.append( mymm+mysep+'joined.bvec' )
8323
+ write_bvals_bvecs( temp[1], temp[2], mymm+mysep+'joined' )
8324
+ bvalsX, bvecsX = read_bvals_bvecs( bvalfnRL2 , bvecfnRL2 )
8325
+ print( bvalsX.max() )
7998
8326
  # check existence of all files expected ...
7999
8327
  for dtiex in bvalfnList+bvecfnList+myimgsr:
8000
8328
  if not exists(dtiex):
@@ -8034,10 +8362,12 @@ def mm_csv(
8034
8362
  test_run=test_run,
8035
8363
  verbose=True )
8036
8364
  except Exception as e:
8365
+ error_info = traceback.format_exc()
8366
+ print(error_info)
8037
8367
  visualize=False
8038
8368
  dowrite=False
8039
8369
  tabPro={'DTI':None}
8040
- print(f"An error occurred while processing {overmodX}: {e}")
8370
+ print(f"antspymmerror occurred while processing {overmodX}: {e}")
8041
8371
  pass
8042
8372
  mydti = tabPro['DTI']
8043
8373
  if visualize and tabPro['DTI'] is not None:
@@ -8684,6 +9014,7 @@ def boot_wmh( flair, t1, t1seg, mmfromconvexhull = 0.0, strict=True,
8684
9014
  wmh_sum_aug = wmh_sum_aug / float( n_simulations )
8685
9015
  wmh_sum_prior_aug = wmh_sum_prior_aug / float( n_simulations )
8686
9016
  return{
9017
+ 'flair' : ants.iMath(flair,"Normalize"),
8687
9018
  'WMH_probability_map' : augprob,
8688
9019
  'WMH_posterior_probability_map' : augprob_prior,
8689
9020
  'wmh_mass': wmh_sum_aug,
@@ -9116,6 +9447,7 @@ def blind_image_assessment(
9116
9447
  title=False,
9117
9448
  pull_rank=False,
9118
9449
  resample=None,
9450
+ n_to_skip = 10,
9119
9451
  verbose=False
9120
9452
  ):
9121
9453
  """
@@ -9145,6 +9477,8 @@ def blind_image_assessment(
9145
9477
 
9146
9478
  resample : None, numeric max or min, resamples image to isotropy
9147
9479
 
9480
+ n_to_skip : 10 by default; samples time series every n_to_skip volume
9481
+
9148
9482
  verbose : boolean
9149
9483
 
9150
9484
  """
@@ -9156,6 +9490,7 @@ def blind_image_assessment(
9156
9490
  from pathlib import Path
9157
9491
  import json
9158
9492
  import re
9493
+ from dipy.io.gradients import read_bvals_bvecs
9159
9494
  mystem=''
9160
9495
  if isinstance(image,list):
9161
9496
  isfilename=isinstance( image[0], str)
@@ -9164,6 +9499,7 @@ def blind_image_assessment(
9164
9499
  isfilename=isinstance( image, str)
9165
9500
  outdf = pd.DataFrame()
9166
9501
  mymeta = None
9502
+ MagneticFieldStrength = None
9167
9503
  image_filename=''
9168
9504
  if isfilename:
9169
9505
  image_filename = image
@@ -9171,10 +9507,14 @@ def blind_image_assessment(
9171
9507
  image_filename=image[0]
9172
9508
  json_name = re.sub(".nii.gz",".json",image_filename)
9173
9509
  if exists( json_name ):
9174
- with open(json_name, 'r') as fcc_file:
9175
- mymeta = json.load(fcc_file, strict=False)
9176
- if verbose:
9177
- print(json.dumps(mymeta, indent=4))
9510
+ try:
9511
+ with open(json_name, 'r') as fcc_file:
9512
+ mymeta = json.load(fcc_file)
9513
+ if verbose:
9514
+ print(json.dumps(mymeta, indent=4))
9515
+ fcc_file.close()
9516
+ except:
9517
+ pass
9178
9518
  mystem=Path( image ).stem
9179
9519
  mystem=Path( mystem ).stem
9180
9520
  image_reference = ants.image_read( image )
@@ -9182,6 +9522,7 @@ def blind_image_assessment(
9182
9522
  else:
9183
9523
  image_reference = ants.image_clone( image )
9184
9524
  ntimepoints = 1
9525
+ bvalueMax=None
9185
9526
  if image_reference.dimension == 4:
9186
9527
  ntimepoints = image_reference.shape[3]
9187
9528
  if "DTI" in image_filename:
@@ -9189,11 +9530,16 @@ def blind_image_assessment(
9189
9530
  image_b0, image_dwi = get_average_dwi_b0( image_reference, fast=True )
9190
9531
  image_b0 = ants.iMath( image_b0, 'Normalize' )
9191
9532
  image_dwi = ants.iMath( image_dwi, 'Normalize' )
9533
+ bval_name = re.sub(".nii.gz",".bval",image_filename)
9534
+ bvec_name = re.sub(".nii.gz",".bvec",image_filename)
9535
+ if exists( bval_name ) and exists( bvec_name ):
9536
+ bvals, bvecs = read_bvals_bvecs( bval_name , bvec_name )
9537
+ bvalueMax = bvals.max()
9192
9538
  else:
9193
9539
  image_b0 = ants.get_average_of_timeseries( image_reference ).iMath("Normalize")
9194
9540
  else:
9195
9541
  image_compare = ants.smooth_image( image_reference, 3, sigma_in_physical_coordinates=False )
9196
- for jjj in range(ntimepoints):
9542
+ for jjj in range(0,ntimepoints,n_to_skip):
9197
9543
  modality='unknown'
9198
9544
  if "rsfMRI" in image_filename:
9199
9545
  modality='rsfMRI'
@@ -9214,7 +9560,7 @@ def blind_image_assessment(
9214
9560
  modality='DTIdwi'
9215
9561
  else:
9216
9562
  image_compare = ants.image_clone( image_b0 )
9217
- image = ants.iMath( image, 'TruncateIntensity',0.01,0.995)
9563
+ # image = ants.iMath( image, 'TruncateIntensity',0.01,0.995)
9218
9564
  minspc = np.min(ants.get_spacing(image))
9219
9565
  maxspc = np.max(ants.get_spacing(image))
9220
9566
  if resample is not None:
@@ -9246,69 +9592,76 @@ def blind_image_assessment(
9246
9592
  bgmsk = msk*0+1-msk
9247
9593
  mskdil = ants.iMath(msk, "MD", 4 )
9248
9594
  # ants.plot_ortho( image, msk, crop=False )
9249
- image = ants.crop_image( image, mskdil ).iMath("Normalize")
9250
- msk = ants.crop_image( msk, mskdil ).iMath("Normalize")
9251
- bgmsk = ants.crop_image( bgmsk, mskdil ).iMath("Normalize")
9252
- image_compare = ants.crop_image( image_compare, mskdil ).iMath("Normalize")
9253
9595
  nvox = int( msk.sum() )
9254
- minshp = np.min( image.shape )
9255
- npatch = int( np.round( 0.1 * nvox ) )
9256
- npatch = np.min( [512,npatch ] )
9257
- patch_shape = []
9258
- for k in range( 3 ):
9259
- p = int( 32.0 / ants.get_spacing( image )[k] )
9260
- if p > int( np.round( image.shape[k] * 0.5 ) ):
9261
- p = int( np.round( image.shape[k] * 0.5 ) )
9262
- patch_shape.append( p )
9263
- if verbose:
9264
- print(image)
9265
- print( patch_shape )
9266
- print( npatch )
9267
- myevr = math.nan # dont want to fail if something odd happens in patch extraction
9268
- try:
9269
- myevr = antspyt1w.patch_eigenvalue_ratio( image, npatch, patch_shape,
9270
- evdepth = 0.9, mask=msk )
9271
- except:
9272
- pass
9273
- if pull_rank:
9274
- image = ants.rank_intensity(image)
9275
- imagereflect = ants.reflect_image(image, axis=0)
9276
- asym_err = ( image - imagereflect ).abs().mean()
9277
- # estimate noise by center cropping, denoizing and taking magnitude of difference
9278
- nocrop=False
9279
- if image.dimension == 3:
9280
- if image.shape[2] == 1:
9281
- nocrop=True
9282
- if maxspc/minspc > 10:
9283
- nocrop=True
9284
- if nocrop:
9285
- mycc = ants.image_clone( image )
9286
- else:
9287
- mycc = antspyt1w.special_crop( image,
9288
- ants.get_center_of_mass( msk *0 + 1 ), patch_shape )
9289
- myccd = ants.denoise_image( mycc, p=2,r=2,noise_model='Gaussian' )
9290
- noizlevel = ( mycc - myccd ).abs().mean()
9291
- # ants.plot_ortho( image, crop=False, filename=viz_filename, flat=True, xyz_lines=False, orient_labels=False, xyz_pad=0 )
9292
- # from brisque import BRISQUE
9293
- # obj = BRISQUE(url=False)
9294
- # mybrisq = obj.score( np.array( Image.open( viz_filename )) )
9295
9596
  spc = ants.get_spacing( image )
9296
9597
  org = ants.get_origin( image )
9297
- msk_vol = msk.sum() * np.prod( spc )
9298
- bgstd = image[ bgmsk == 1 ].std()
9299
- fgmean = image[ msk == 1 ].mean()
9300
- bgmean = image[ bgmsk == 1 ].mean()
9301
- snrref = fgmean / bgstd
9302
- cnrref = ( fgmean - bgmean ) / bgstd
9303
- psnrref = antspynet.psnr( image_compare, image )
9304
- ssimref = antspynet.ssim( image_compare, image )
9305
- if nocrop:
9306
- mymi = math.inf
9598
+ if ( nvox > 0 ):
9599
+ image = ants.crop_image( image, mskdil ).iMath("Normalize")
9600
+ msk = ants.crop_image( msk, mskdil ).iMath("Normalize")
9601
+ bgmsk = ants.crop_image( bgmsk, mskdil ).iMath("Normalize")
9602
+ image_compare = ants.crop_image( image_compare, mskdil ).iMath("Normalize")
9603
+ npatch = int( np.round( 0.1 * nvox ) )
9604
+ npatch = np.min( [512,npatch ] )
9605
+ patch_shape = []
9606
+ for k in range( 3 ):
9607
+ p = int( 32.0 / ants.get_spacing( image )[k] )
9608
+ if p > int( np.round( image.shape[k] * 0.5 ) ):
9609
+ p = int( np.round( image.shape[k] * 0.5 ) )
9610
+ patch_shape.append( p )
9611
+ if verbose:
9612
+ print(image)
9613
+ print( patch_shape )
9614
+ print( npatch )
9615
+ myevr = math.nan # dont want to fail if something odd happens in patch extraction
9616
+ try:
9617
+ myevr = antspyt1w.patch_eigenvalue_ratio( image, npatch, patch_shape,
9618
+ evdepth = 0.9, mask=msk )
9619
+ except:
9620
+ pass
9621
+ if pull_rank:
9622
+ image = ants.rank_intensity(image)
9623
+ imagereflect = ants.reflect_image(image, axis=0)
9624
+ asym_err = ( image - imagereflect ).abs().mean()
9625
+ # estimate noise by center cropping, denoizing and taking magnitude of difference
9626
+ nocrop=False
9627
+ if image.dimension == 3:
9628
+ if image.shape[2] == 1:
9629
+ nocrop=True
9630
+ if maxspc/minspc > 10:
9631
+ nocrop=True
9632
+ if nocrop:
9633
+ mycc = ants.image_clone( image )
9634
+ else:
9635
+ mycc = antspyt1w.special_crop( image,
9636
+ ants.get_center_of_mass( msk *0 + 1 ), patch_shape )
9637
+ myccd = ants.denoise_image( mycc, p=2,r=2,noise_model='Gaussian' )
9638
+ noizlevel = ( mycc - myccd ).abs().mean()
9639
+ # ants.plot_ortho( image, crop=False, filename=viz_filename, flat=True, xyz_lines=False, orient_labels=False, xyz_pad=0 )
9640
+ # from brisque import BRISQUE
9641
+ # obj = BRISQUE(url=False)
9642
+ # mybrisq = obj.score( np.array( Image.open( viz_filename )) )
9643
+ msk_vol = msk.sum() * np.prod( spc )
9644
+ bgstd = image[ bgmsk == 1 ].std()
9645
+ fgmean = image[ msk == 1 ].mean()
9646
+ bgmean = image[ bgmsk == 1 ].mean()
9647
+ snrref = fgmean / bgstd
9648
+ cnrref = ( fgmean - bgmean ) / bgstd
9649
+ psnrref = antspynet.psnr( image_compare, image )
9650
+ ssimref = antspynet.ssim( image_compare, image )
9651
+ if nocrop:
9652
+ mymi = math.inf
9653
+ else:
9654
+ mymi = ants.image_mutual_information( image_compare, image )
9307
9655
  else:
9308
- mymi = ants.image_mutual_information( image_compare, image )
9309
- mriseries='NA'
9310
- mrimfg='NA'
9311
- mrimodel='NA'
9656
+ msk_vol = 0
9657
+ myevr = mymi = ssimref = psnrref = cnrref = asym_err = noizlevel = math.nan
9658
+
9659
+ mriseries=None
9660
+ mrimfg=None
9661
+ mrimodel=None
9662
+ mriSAR=None
9663
+ BandwidthPerPixelPhaseEncode=None
9664
+ PixelBandwidth=None
9312
9665
  if mymeta is not None:
9313
9666
  # mriseries=mymeta['']
9314
9667
  try:
@@ -9319,13 +9672,39 @@ def blind_image_assessment(
9319
9672
  mrimodel=mymeta['ManufacturersModelName']
9320
9673
  except:
9321
9674
  pass
9675
+ try:
9676
+ MagneticFieldStrength=mymeta['MagneticFieldStrength']
9677
+ except:
9678
+ pass
9679
+ try:
9680
+ PixelBandwidth=mymeta['PixelBandwidth']
9681
+ except:
9682
+ pass
9683
+ try:
9684
+ BandwidthPerPixelPhaseEncode=mymeta['BandwidthPerPixelPhaseEncode']
9685
+ except:
9686
+ pass
9687
+ try:
9688
+ mriSAR=mymeta['SAR']
9689
+ except:
9690
+ pass
9322
9691
  ttl=mystem + ' '
9323
9692
  ttl=''
9324
9693
  ttl=ttl + "NZ: " + "{:0.4f}".format(noizlevel) + " SNR: " + "{:0.4f}".format(snrref) + " CNR: " + "{:0.4f}".format(cnrref) + " PS: " + "{:0.4f}".format(psnrref)+ " SS: " + "{:0.4f}".format(ssimref) + " EVR: " + "{:0.4f}".format(myevr)+ " MI: " + "{:0.4f}".format(mymi)
9325
- if viz_filename is not None and ( jjj == 0 or (jjj % 30 == 0) ):
9694
+ if viz_filename is not None and ( jjj == 0 or (jjj % 30 == 0) ) and image.shape[2] < 685:
9326
9695
  viz_filename_use = re.sub( ".png", "_slice"+str(jjj).zfill(4)+".png", viz_filename )
9327
9696
  ants.plot_ortho( image, crop=False, filename=viz_filename_use, flat=True, xyz_lines=False, orient_labels=False, xyz_pad=0, title=ttl, titlefontsize=12, title_dy=-0.02,textfontcolor='red' )
9328
- df = pd.DataFrame([[ mystem, noizlevel, snrref, cnrref, psnrref, ssimref, mymi, asym_err, myevr, msk_vol, spc[0], spc[1], spc[2],org[0], org[1], org[2], image.shape[0], image.shape[1], image.shape[2], jjj, modality, mriseries, mrimfg, mrimodel ]], columns=['filename', 'noise', 'snr', 'cnr', 'psnr', 'ssim', 'mi', 'reflection_err', 'EVR', 'msk_vol', 'spc0','spc1','spc2','org0','org1','org2','dimx','dimy','dimz','slice','modality', 'mriseries', 'mrimfg', 'mrimodel' ])
9697
+ df = pd.DataFrame([[
9698
+ mystem,
9699
+ image_reference.dimension,
9700
+ noizlevel, snrref, cnrref, psnrref, ssimref, mymi, asym_err, myevr, msk_vol,
9701
+ spc[0], spc[1], spc[2],org[0], org[1], org[2],
9702
+ image.shape[0], image.shape[1], image.shape[2], ntimepoints,
9703
+ jjj, modality, mriseries, mrimfg, mrimodel, MagneticFieldStrength, mriSAR, PixelBandwidth, BandwidthPerPixelPhaseEncode, bvalueMax ]],
9704
+ columns=[
9705
+ 'filename',
9706
+ 'dimensionality',
9707
+ 'noise', 'snr', 'cnr', 'psnr', 'ssim', 'mi', 'reflection_err', 'EVR', 'msk_vol', 'spc0','spc1','spc2','org0','org1','org2','dimx','dimy','dimz','dimt','slice','modality', 'mriseries', 'mrimfg', 'mrimodel', 'mriMagneticFieldStrength', 'mriSAR', 'mriPixelBandwidth', 'mriPixelBandwidthPE', 'dti_bvalueMax' ])
9329
9708
  outdf = pd.concat( [outdf, df ], axis=0, ignore_index=False )
9330
9709
  if verbose:
9331
9710
  print( outdf )
@@ -9334,6 +9713,29 @@ def blind_image_assessment(
9334
9713
  outdf.to_csv( csvfn )
9335
9714
  return outdf
9336
9715
 
9716
+ def remove_unwanted_columns(df):
9717
+ # Identify columns to drop: those named 'X' or starting with 'Unnamed'
9718
+ cols_to_drop = [col for col in df.columns if col == 'X' or col.startswith('Unnamed')]
9719
+
9720
+ # Drop the identified columns from the DataFrame, if any
9721
+ df_cleaned = df.drop(columns=cols_to_drop, errors='ignore')
9722
+
9723
+ return df_cleaned
9724
+
9725
+ def process_dataframe_generalized(df, group_by_column):
9726
+ # Make sure the group_by_column is excluded from both numeric and other columns calculations
9727
+ numeric_cols = df.select_dtypes(include='number').columns.difference([group_by_column])
9728
+ other_cols = df.columns.difference(numeric_cols).difference([group_by_column])
9729
+
9730
+ # Define aggregation functions: mean for numeric cols, mode for other cols
9731
+ # Update to handle empty mode results safely
9732
+ agg_dict = {col: 'mean' for col in numeric_cols}
9733
+ agg_dict.update({
9734
+ col: lambda x: pd.Series.mode(x).iloc[0] if not pd.Series.mode(x).empty else None for col in other_cols
9735
+ })
9736
+ # Group by the specified column, applying different aggregation functions to different columns
9737
+ processed_df = df.groupby(group_by_column, as_index=False).agg(agg_dict)
9738
+ return processed_df
9337
9739
 
9338
9740
  def average_blind_qc_by_modality(qc_full,verbose=False):
9339
9741
  """
@@ -9345,21 +9747,14 @@ def average_blind_qc_by_modality(qc_full,verbose=False):
9345
9747
  Returns:
9346
9748
  pandas dataframe containing the processed qc data.
9347
9749
  """
9750
+ qc_full = remove_unwanted_columns( qc_full )
9348
9751
  # Get unique modalities
9349
9752
  modalities = qc_full['modality'].unique()
9350
9753
  modalities = modalities[modalities != 'unknown']
9351
- # Get modalities to select
9352
- m0sel = qc_full['modality'].isin(modalities)
9353
9754
  # Get unique ids
9354
- uid = qc_full['filename'] + "_" + qc_full['modality'].astype(str)
9755
+ uid = qc_full['filename']
9355
9756
  to_average = uid.unique()
9356
- # Define column indices
9357
- contcols = ['noise', 'snr', 'cnr', 'psnr', 'ssim', 'mi','reflection_err', 'EVR', 'msk_vol', 'spc0', 'spc1', 'spc2', 'org0','org1','org2', 'dimx', 'dimy', 'dimz', 'slice']
9358
- ocols = ['filename','modality', 'mriseries', 'mrimfg', 'mrimodel']
9359
- # restrict to columns we "know"
9360
- qc_full = qc_full[ocols+contcols]
9361
- # Create empty meta dataframe
9362
- meta = pd.DataFrame(columns=ocols+contcols)
9757
+ meta = pd.DataFrame(columns=qc_full.columns )
9363
9758
  # Process each unique id
9364
9759
  n = len(to_average)
9365
9760
  for k in range(n):
@@ -9371,15 +9766,11 @@ def average_blind_qc_by_modality(qc_full,verbose=False):
9371
9766
  if sum(m1sel) > 1:
9372
9767
  # If more than one entry for id, take the average of continuous columns,
9373
9768
  # maximum of the slice column, and the first entry of the other columns
9769
+ mfsub = process_dataframe_generalized(qc_full[m1sel],'filename')
9770
+ else:
9374
9771
  mfsub = qc_full[m1sel]
9375
- if mfsub.shape[0] > 1:
9376
- meta.loc[k, contcols] = mfsub.loc[:, contcols].mean(numeric_only=True)
9377
- meta.loc[k, 'slice'] = mfsub['slice'].max()
9378
- meta.loc[k, ocols] = mfsub[ocols].iloc[0]
9379
- elif sum(m1sel) == 1:
9380
- # If only one entry for id, just copy the entry
9381
- mfsub = qc_full[m1sel]
9382
- meta.loc[k] = mfsub.iloc[0]
9772
+ meta.loc[k] = mfsub.iloc[0]
9773
+ meta['modality'] = meta['modality'].replace(['DTIdwi', 'DTIb0'], 'DTI', regex=True)
9383
9774
  return meta
9384
9775
 
9385
9776
  def wmh( flair, t1, t1seg,
@@ -9830,6 +10221,10 @@ def loop_timeseries_censoring(x, threshold=0.5, mask=None, verbose=False):
9830
10221
  Returns:
9831
10222
  tuple: A tuple containing the censored time series (ANTsImage) and the indices of the high leverage volumes.
9832
10223
  """
10224
+ import warnings
10225
+ if x.shape[3] < 20: # just a guess at what we need here ...
10226
+ warnings.warn("Warning: the time dimension is < 20 - too few samples for loop. just return the original data.")
10227
+ return x, []
9833
10228
  if mask is None:
9834
10229
  flattened_series = flatten_time_series(x.numpy())
9835
10230
  else:
@@ -10008,15 +10403,14 @@ def novelty_detection_quantile(df_train, df_test):
10008
10403
  myqs[mykey] = abs( temp - 0.5 ) / 0.5
10009
10404
  return myqs
10010
10405
 
10011
-
10012
- def brainmap_figure(statistical_df, data_dictionary_path, output_prefix, brain_image, overlay_cmap='bwr', nslices=21, ncol=7, edge_image_dilation = 0, black_bg=True, axes = [0,1,2], fixed_overlay_range=None, crop=True, verbose=False ):
10406
+ def brainmap_figure(statistical_df, data_dictionary_path, output_prefix, brain_image, overlay_cmap='bwr', nslices=21, ncol=7, edge_image_dilation = 0, black_bg=True, axes = [0,1,2], fixed_overlay_range=None, crop=5, verbose=False ):
10013
10407
  """
10014
10408
  Create figures based on statistical data and an underlying brain image.
10015
10409
 
10016
10410
  Assumes both ~/.antspyt1w and ~/.antspymm data is available
10017
10411
 
10018
10412
  Parameters:
10019
- - statistical_df (pandas dataframe): with 2 columns named anat and value
10413
+ - statistical_df (pandas dataframe): with 2 columns named anat and values
10020
10414
  the anat column should have names that meet *partial matching* criterion
10021
10415
  with respect to regions that are measured in antspymm. value will be
10022
10416
  the value to be displayed. if two examples of a given region exist in
@@ -10031,12 +10425,13 @@ def brainmap_figure(statistical_df, data_dictionary_path, output_prefix, brain_i
10031
10425
  - black_bg (bool): boolean
10032
10426
  - axes (list): integer list typically [0,1,2] sagittal coronal axial
10033
10427
  - fixed_overlay_range (list): scalar pair will try to keep a constant cbar and will truncate the overlay at these min/max values
10034
- - crop (bool): crops the image to display by the extent of the overlay
10428
+ - crop (int): crops the image to display by the extent of the overlay; larger values dilate the masks more.
10035
10429
  - verbose (bool): boolean
10036
10430
 
10037
10431
  Returns:
10038
10432
  an image with values mapped to the associated regions
10039
10433
  """
10434
+ import re
10040
10435
 
10041
10436
  # Read the statistical file
10042
10437
  zz = statistical_df
@@ -10051,13 +10446,13 @@ def brainmap_figure(statistical_df, data_dictionary_path, output_prefix, brain_i
10051
10446
  edgeimg = ants.iMath( edgeimg, "MD", edge_image_dilation)
10052
10447
 
10053
10448
  # Define lists and data frames
10054
- postfix = ['bf', 'deep_cit168lab', 'mtl', 'cerebellum', 'dkt_cortex','brainstem']
10055
- atlas = ['BF', 'CIT168', 'MTL', 'TustisonCobra', 'desikan-killiany-tourville','brainstem']
10056
- postdesc = ['nbm3CH13', 'CIT168_Reinf_Learn_v1_label_descriptions_pad', 'mtl_description', 'cerebellum', 'dkt','CIT168_T1w_700um_pad_adni_brainstem']
10449
+ postfix = ['bf', 'cit168lab', 'mtl', 'cerebellum', 'dkt_cortex','brainstem','JHU_wm','yeo']
10450
+ atlas = ['BF', 'CIT168', 'MTL', 'TustisonCobra', 'desikan-killiany-tourville','brainstem','JHU_wm','yeo']
10451
+ postdesc = ['nbm3CH13', 'CIT168_Reinf_Learn_v1_label_descriptions_pad', 'mtl_description', 'cerebellum', 'dkt','CIT168_T1w_700um_pad_adni_brainstem','FA_JHU_labels_edited','ppmi_template_500Parcels_Yeo2011_17Networks_2023_homotopic']
10057
10452
  statdf = pd.DataFrame({'img': postfix, 'atlas': atlas, 'csvdescript': postdesc})
10058
10453
  templateprefix = '~/.antspymm/PPMI_template0_'
10059
10454
  # Iterate through columns and create figures
10060
- col2viz = 'value'
10455
+ col2viz = 'values'
10061
10456
  if True:
10062
10457
  anattoshow = zz['anat'].unique()
10063
10458
  if verbose:
@@ -10069,21 +10464,70 @@ def brainmap_figure(statistical_df, data_dictionary_path, output_prefix, brain_i
10069
10464
  if verbose:
10070
10465
  print(str(k) + " " + anattoshow[k] )
10071
10466
  mysub = zz[zz['anat'].str.contains(anattoshow[k])]
10467
+ anatsear=re.sub("dti.fa","",anattoshow[k])
10468
+ anatsear=re.sub("t1.volasym","",anatsear)
10469
+ anatsear=re.sub("t1.thkasym","",anatsear)
10470
+ anatsear=re.sub("t1.areaasym","",anatsear)
10471
+ anatsear=re.sub("t1.vol.","",anatsear)
10472
+ anatsear=re.sub("t1.thk.","",anatsear)
10473
+ anatsear=re.sub("t1.area.","",anatsear)
10474
+ anatsear=re.sub("asymdp.","",anatsear)
10475
+ anatsear=re.sub("asym.","",anatsear)
10476
+ anatsear=re.sub("dti.md.","",anatsear)
10477
+ anatsear=re.sub("dti.fa.","",anatsear)
10478
+ anatsear=re.sub("dti.md","",anatsear)
10479
+ anatsear=re.sub("dti.mean.md.","",anatsear)
10480
+ anatsear=re.sub("dti.mean.fa.","",anatsear)
10481
+ anatsear=re.sub("lravg","",anatsear)
10482
+ atlassearch = mydict['tidynames'].str.contains(anatsear)
10483
+ if atlassearch.sum() > 0:
10484
+ whichatlas = mydict[atlassearch]['Atlas'].iloc[0]
10485
+ oglabelname = mydict[atlassearch]['Label'].iloc[0]
10486
+ else:
10487
+ print(anatsear)
10488
+ oglabelname='unknown'
10489
+ whichatlas=None
10490
+ if verbose:
10491
+ print("oglabelname " + oglabelname )
10072
10492
  vals2viz = mysub[col2viz].agg(['min', 'max'])
10073
10493
  vals2viz = vals2viz[abs(vals2viz).idxmax()]
10074
10494
  myext = None
10075
10495
  if 'dktcortex' in anattoshow[k]:
10076
10496
  myext = 'dkt_cortex'
10077
10497
  elif 'cit168' in anattoshow[k]:
10078
- myext = 'deep_cit168lab'
10498
+ myext = 'cit168lab'
10079
10499
  elif 'mtl' in anattoshow[k]:
10080
10500
  myext = 'mtl'
10501
+ oglabelname=re.sub('mtl', '',anatsear)
10081
10502
  elif 'cerebellum' in anattoshow[k]:
10082
10503
  myext = 'cerebellum'
10504
+ oglabelname=re.sub('cerebellum', '',anatsear)
10505
+ # oglabelname=oglabelname[2:]
10083
10506
  elif 'brainstem' in anattoshow[k]:
10084
10507
  myext = 'brainstem'
10085
10508
  elif any(item in anattoshow[k] for item in ['nbm', 'bf']):
10086
10509
  myext = 'bf'
10510
+ oglabelname=re.sub(r'\.', '_',anatsear)
10511
+ elif whichatlas == 'johns hopkins white matter':
10512
+ myext = 'JHU_wm'
10513
+ elif whichatlas == 'desikan-killiany-tourville':
10514
+ myext = 'dkt_cortex'
10515
+ elif whichatlas == 'CIT168':
10516
+ myext = 'cit168lab'
10517
+ elif whichatlas == 'BF':
10518
+ myext = 'bf'
10519
+ oglabelname=re.sub('bf', '',oglabelname)
10520
+ elif whichatlas == 'yeo_homotopic':
10521
+ myext = 'yeo'
10522
+ if myext is None and verbose:
10523
+ print( "MYEXT " + anattoshow[k] + ' unfound ' + whichatlas )
10524
+ else:
10525
+ if verbose:
10526
+ print( "MYEXT " + myext )
10527
+
10528
+ if myext == 'cit168lab':
10529
+ oglabelname=re.sub("cit168","",oglabelname)
10530
+
10087
10531
  for j in postfix:
10088
10532
  if j == "dkt_cortex":
10089
10533
  j = 'dktcortex'
@@ -10097,30 +10541,86 @@ def brainmap_figure(statistical_df, data_dictionary_path, output_prefix, brain_i
10097
10541
  locfilename = templateprefix + myext + '.nii.gz'
10098
10542
  if verbose:
10099
10543
  print( locfilename )
10100
- myatlas = ants.image_read(locfilename)
10101
- atlasDescript = pd.read_csv(f"~/.antspyt1w/{correctdescript}.csv")
10102
- atlasDescript['Description'] = atlasDescript['Description'].str.lower()
10103
- atlasDescript['Description'] = atlasDescript['Description'].str.replace(" ", "_")
10104
- atlasDescript['Description'] = atlasDescript['Description'].str.replace("_left_", "_")
10105
- atlasDescript['Description'] = atlasDescript['Description'].str.replace("_right_", "_")
10106
- atlasDescript['Description'] = atlasDescript['Description'].str.replace("_left", "")
10107
- atlasDescript['Description'] = atlasDescript['Description'].str.replace("_right", "")
10544
+ if myext == 'yeo':
10545
+ oglabelname=oglabelname.lower()
10546
+ oglabelname=re.sub("rsfmri_fcnxpro122_","",oglabelname)
10547
+ oglabelname=re.sub("rsfmri_fcnxpro129_","",oglabelname)
10548
+ oglabelname=re.sub("rsfmri_fcnxpro134_","",oglabelname)
10549
+ locfilename = "~/.antspymm/ppmi_template_500Parcels_Yeo2011_17Networks_2023_homotopic.nii.gz"
10550
+ atlasDescript = pd.read_csv(f"~/.antspymm/{correctdescript}.csv")
10551
+ atlasDescript.rename(columns={'SystemName': 'Description'}, inplace=True)
10552
+ atlasDescript.rename(columns={'ROI': 'Label'}, inplace=True)
10553
+ atlasDescript['Description'] = atlasDescript['Description'].str.lower()
10554
+ else:
10555
+ atlasDescript = pd.read_csv(f"~/.antspyt1w/{correctdescript}.csv")
10556
+ atlasDescript['Description'] = atlasDescript['Description'].str.lower()
10557
+ atlasDescript['Description'] = atlasDescript['Description'].str.replace(" ", "_")
10558
+ atlasDescript['Description'] = atlasDescript['Description'].str.replace("_left_", "_")
10559
+ atlasDescript['Description'] = atlasDescript['Description'].str.replace("_right_", "_")
10560
+ atlasDescript['Description'] = atlasDescript['Description'].str.replace("_left", "")
10561
+ atlasDescript['Description'] = atlasDescript['Description'].str.replace("_right", "")
10562
+ atlasDescript['Description'] = atlasDescript['Description'].str.replace("left_", "")
10563
+ atlasDescript['Description'] = atlasDescript['Description'].str.replace("right_", "")
10564
+ atlasDescript['Description'] = atlasDescript['Description'].str.replace("/",".")
10565
+ if myext == 'JHU_wm':
10566
+ atlasDescript['Description'] = atlasDescript['Description'].str.replace("fa-", "")
10567
+ atlasDescript['Description'] = atlasDescript['Description'].str.replace("-left-", "")
10568
+ atlasDescript['Description'] = atlasDescript['Description'].str.replace("-right-", "")
10569
+ if myext == 'cerebellum':
10570
+ atlasDescript['Description'] = atlasDescript['Description'].str.replace("l_", "")
10571
+ atlasDescript['Description'] = atlasDescript['Description'].str.replace("r_", "")
10572
+
10573
+ if verbose:
10574
+ print( atlasDescript )
10575
+ oglabelname = oglabelname.lower()
10576
+ oglabelname = re.sub(" ", "_",oglabelname)
10577
+ oglabelname = re.sub("_left_", "_",oglabelname)
10578
+ oglabelname = re.sub("_right_", "_",oglabelname)
10579
+ oglabelname = re.sub("_left", "",oglabelname)
10580
+ oglabelname = re.sub("_right", "",oglabelname)
10581
+ oglabelname = re.sub("t1hier_vol_", "",oglabelname)
10582
+ oglabelname = re.sub("t1hier_area_", "",oglabelname)
10583
+ oglabelname = re.sub("t1hier_thk_", "",oglabelname)
10584
+ oglabelname = re.sub("dktregions", "",oglabelname)
10585
+ oglabelname = re.sub("dktcortex", "",oglabelname)
10586
+ if myext == 'JHU_wm':
10587
+ oglabelname = re.sub("dti_mean_fa.", "",oglabelname)
10588
+ oglabelname = re.sub("dti_mean_md.", "",oglabelname)
10589
+ oglabelname = re.sub(".left.", "",oglabelname)
10590
+ oglabelname = re.sub(".right.", "",oglabelname)
10591
+ oglabelname = re.sub(".lravg.", "",oglabelname)
10592
+ oglabelname = re.sub(".asym.", "",oglabelname)
10593
+
10594
+ if verbose:
10595
+ print("oglabelname " + oglabelname )
10596
+
10108
10597
  if myext == 'cerebellum':
10109
10598
  atlasDescript['Description'] = atlasDescript['Description'].str.replace("l_", "")
10110
10599
  atlasDescript['Description'] = atlasDescript['Description'].str.replace("r_", "")
10111
- whichindex = atlasDescript.index[atlasDescript['Description'] == anattoshow[k]].values[0]
10600
+ whichindex = atlasDescript.index[atlasDescript['Description'] == oglabelname].values[0]
10112
10601
  else:
10113
- whichindex = atlasDescript.index[atlasDescript['Description'].str.contains(anattoshow[k])]
10602
+ whichindex = atlasDescript.index[atlasDescript['Description'].str.contains(oglabelname)]
10114
10603
 
10115
10604
  if type(whichindex) is np.int64:
10116
10605
  labelnums = atlasDescript.loc[whichindex, 'Label']
10117
10606
  else:
10118
10607
  labelnums = list(atlasDescript.loc[whichindex, 'Label'])
10608
+
10609
+ if myext == 'yeo':
10610
+ parts = re.findall(r'\D+', oglabelname)
10611
+ oglabelname = [part.replace('_', '') for part in parts if part.replace('_', '')]
10612
+ filtered_df = atlasDescript[atlasDescript['Description'].isin(oglabelname)]
10613
+ labelnums = filtered_df['Label'].tolist()
10614
+
10119
10615
  if not isinstance(labelnums, list):
10120
10616
  labelnums=[labelnums]
10121
10617
  addemiszero = ants.threshold_image(addem, 0, 0)
10122
10618
  temp = ants.image_read(locfilename)
10123
10619
  temp = ants.mask_image(temp, temp, level=labelnums, binarize=True)
10620
+ if verbose:
10621
+ print("DEBUG")
10622
+ print( temp.sum() )
10623
+ print( labelnums )
10124
10624
  temp[temp == 1] = (vals2viz)
10125
10625
  temp[addemiszero == 0] = 0
10126
10626
  addem = addem + temp
@@ -10129,8 +10629,8 @@ def brainmap_figure(statistical_df, data_dictionary_path, output_prefix, brain_i
10129
10629
  print('Done Adding')
10130
10630
  for axx in axes:
10131
10631
  figfn=output_prefix+f"fig{col2viz}ax{axx}_py.jpg"
10132
- if crop:
10133
- cmask = ants.threshold_image( addem,1e-5, 1e9 ).iMath("MD",3) + ants.threshold_image( addem,-1e9, -1e-5 ).iMath("MD",3)
10632
+ if crop > 0:
10633
+ cmask = ants.threshold_image( addem,1e-5, 1e9 ).iMath("MD",crop) + ants.threshold_image( addem,-1e9, -1e-5 ).iMath("MD",crop)
10134
10634
  addemC = ants.crop_image( addem, cmask )
10135
10635
  edgeimgC = ants.crop_image( edgeimg, cmask )
10136
10636
  else:
@@ -10150,7 +10650,6 @@ def brainmap_figure(statistical_df, data_dictionary_path, output_prefix, brain_i
10150
10650
  print("DONE brain map figures")
10151
10651
  return addem
10152
10652
 
10153
-
10154
10653
  def filter_df(indf, myprefix):
10155
10654
  """
10156
10655
  Process and filter a pandas DataFrame, removing certain columns,
@@ -10324,6 +10823,27 @@ def aggregate_antspymm_results(input_csv, subject_col='subjectID', date_col='dat
10324
10823
  df=df.drop(badnames, axis=1)
10325
10824
  return( df )
10326
10825
 
10826
+ def find_most_recent_file(file_list):
10827
+ """
10828
+ Finds and returns the most recently modified file from a list of file paths.
10829
+
10830
+ Parameters:
10831
+ - file_list: A list of strings, where each string is a path to a file.
10832
+
10833
+ Returns:
10834
+ - The path to the most recently modified file in the list, or None if the list is empty or contains no valid files.
10835
+ """
10836
+ # Filter out items that are not files or do not exist
10837
+ valid_files = [f for f in file_list if os.path.isfile(f)]
10838
+
10839
+ # Check if the filtered list is not empty
10840
+ if valid_files:
10841
+ # Find the file with the latest modification time
10842
+ most_recent_file = max(valid_files, key=os.path.getmtime)
10843
+ return [most_recent_file]
10844
+ else:
10845
+ return None
10846
+
10327
10847
  def aggregate_antspymm_results_sdf(
10328
10848
  study_df,
10329
10849
  project_col='projectID',
@@ -10402,8 +10922,6 @@ def aggregate_antspymm_results_sdf(
10402
10922
  # Warning message for untested function
10403
10923
  warnings.warn("Warning: This function is not well tested. Use with caution.")
10404
10924
 
10405
- # if valid_modalities is None:
10406
- valid_modalities = get_valid_modalities('long')
10407
10925
  vmoddict = {}
10408
10926
  # Add key-value pairs
10409
10927
  vmoddict['imageID'] = 'T1w'
@@ -10532,8 +11050,8 @@ def aggregate_antspymm_results_sdf(
10532
11050
  t1wfn = sorted( glob( modsearch ) )
10533
11051
  if len( t1wfn ) > 1:
10534
11052
  nlarge = len(t1wfn)
10535
- t1wfn = [ t1wfn[ len(t1wfn)-1 ] ]
10536
- warnings.warn("there are " + str( nlarge ) + " number of wide fns with search path " + modsearch + " we take the last of these " + t1wfn[0] )
11053
+ t1wfn = find_most_recent_file( t1wfn )
11054
+ warnings.warn("there are " + str( nlarge ) + " number of wide fns with search path " + modsearch + " we take the most recent of these " + t1wfn[0] )
10537
11055
  # raise ValueError("there are " + str( len( t1wfn ) ) + " number of wide fns with search path " + modsearch )
10538
11056
  if len( t1wfn ) == 1:
10539
11057
  if verbose:
@@ -10649,3 +11167,276 @@ def filter_image_files(image_paths, criteria='largest'):
10649
11167
  raise ValueError("Criteria must be 'smallest', 'largest', or 'brightest'.")
10650
11168
 
10651
11169
  return selected_image_path
11170
+
11171
+
11172
+
11173
+ def mm_match_by_qc_scoring(df_a, df_b, match_column, criteria, prefix='matched_', exclude_columns=None):
11174
+ """
11175
+ Match each row in df_a to a row in df_b based on a matching column and criteria for selecting the best match,
11176
+ with options to prefix column names from df_b and exclude certain columns from the final output. Additionally,
11177
+ returns a DataFrame containing rows from df_b that were not matched to any row in df_a.
11178
+
11179
+ Parameters:
11180
+ - df_a: DataFrame A.
11181
+ - df_b: DataFrame B.
11182
+ - match_column: The column name on which rows should match between DataFrame A and B.
11183
+ - criteria: A dictionary where keys are column names and values are 'min' or 'max', indicating whether
11184
+ the column should be minimized or maximized for the best match.
11185
+ - prefix: A string prefix to add to column names from df_b in the final output to avoid duplication.
11186
+ - exclude_columns: A list of column names from df_b to exclude from the final output.
11187
+
11188
+ Returns:
11189
+ - A tuple of two DataFrames:
11190
+ 1. A new DataFrame combining df_a with matched rows from df_b.
11191
+ 2. A DataFrame containing rows from df_b that were not matched to df_a.
11192
+ """
11193
+ from scipy.stats import zscore
11194
+ df_a = df_a.loc[:, ~df_a.columns.str.startswith('Unnamed:')]
11195
+ df_b = df_b.loc[:, ~df_b.columns.str.startswith('Unnamed:')].copy()
11196
+
11197
+ # Normalize df_b based on criteria
11198
+ for col, crit in criteria.items():
11199
+ if crit == 'max':
11200
+ df_b.loc[df_b.index, f'score_{col}'] = zscore(-df_b[col])
11201
+ elif crit == 'min':
11202
+ df_b.loc[df_b.index, f'score_{col}'] = zscore(df_b[col])
11203
+
11204
+ # Calculate 'best_score' by summing all score columns
11205
+ score_columns = [f'score_{col}' for col in criteria.keys()]
11206
+ df_b['best_score'] = df_b[score_columns].sum(axis=1)
11207
+
11208
+ matched_indices = [] # Track indices of matched rows in df_b
11209
+
11210
+ # Match rows
11211
+ matched_rows = []
11212
+ for _, row_a in df_a.iterrows():
11213
+ matches = df_b[df_b[match_column] == row_a[match_column]]
11214
+ if not matches.empty:
11215
+ best_idx = matches['best_score'].idxmin()
11216
+ best_match = matches.loc[best_idx]
11217
+ matched_indices.append(best_idx) # Track this index as matched
11218
+ matched_rows.append(best_match)
11219
+ else:
11220
+ matched_rows.append(pd.Series(dtype='float64'))
11221
+
11222
+ # Create a DataFrame from matched rows
11223
+ df_matched = pd.DataFrame(matched_rows).reset_index(drop=True)
11224
+
11225
+ # Exclude specified columns and add prefix
11226
+ if exclude_columns is not None:
11227
+ df_matched = df_matched.drop(columns=exclude_columns, errors='ignore')
11228
+ df_matched = df_matched.rename(columns=lambda x: f"{prefix}{x}" if x != match_column and x in df_matched.columns else x)
11229
+
11230
+ # Combine df_a with matched rows from df_b
11231
+ result_df = pd.concat([df_a.reset_index(drop=True), df_matched], axis=1)
11232
+
11233
+ # Extract unmatched rows from df_b
11234
+ unmatched_df_b = df_b.drop(index=matched_indices).reset_index(drop=True)
11235
+
11236
+ return result_df, unmatched_df_b
11237
+
11238
+
11239
+ def fix_LR_RL_stuff(df, col1, col2, size_col1, size_col2, id1, id2 ):
11240
+ df_copy = df.copy()
11241
+ # Ensure columns contain strings for substring checks
11242
+ df_copy[col1] = df_copy[col1].astype(str)
11243
+ df_copy[col2] = df_copy[col2].astype(str)
11244
+ df_copy[id1] = df_copy[id1].astype(str)
11245
+ df_copy[id2] = df_copy[id2].astype(str)
11246
+
11247
+ for index, row in df_copy.iterrows():
11248
+ col1_val = row[col1]
11249
+ col2_val = row[col2]
11250
+ size1 = row[size_col1]
11251
+ size2 = row[size_col2]
11252
+
11253
+ # Check for 'RL' or 'LR' in each column and compare sizes
11254
+ if ('RL' in col1_val or 'LR' in col1_val) and ('RL' in col2_val or 'LR' in col2_val):
11255
+ continue
11256
+ elif 'RL' not in col1_val and 'LR' not in col1_val and 'RL' not in col2_val and 'LR' not in col2_val:
11257
+ if size1 < size2:
11258
+ df_copy.at[index, col1] = df_copy.at[index, col2]
11259
+ df_copy.at[index, size_col1] = df_copy.at[index, size_col2]
11260
+ df_copy.at[index, id1] = df_copy.at[index, id2]
11261
+ df_copy.at[index, size_col2] = 0
11262
+ df_copy.at[index, col2] = None
11263
+ df_copy.at[index, id2] = None
11264
+ else:
11265
+ df_copy.at[index, col2] = None
11266
+ df_copy.at[index, size_col2] = 0
11267
+ df_copy.at[index, id2] = None
11268
+ elif 'RL' in col1_val or 'LR' in col1_val:
11269
+ if size1 < size2:
11270
+ df_copy.at[index, col1] = df_copy.at[index, col2]
11271
+ df_copy.at[index, id1] = df_copy.at[index, id2]
11272
+ df_copy.at[index, size_col1] = df_copy.at[index, size_col2]
11273
+ df_copy.at[index, size_col2] = 0
11274
+ df_copy.at[index, col2] = None
11275
+ df_copy.at[index, id2] = None
11276
+ else:
11277
+ df_copy.at[index, col2] = None
11278
+ df_copy.at[index, id2] = None
11279
+ df_copy.at[index, size_col2] = 0
11280
+ elif 'RL' in col2_val or 'LR' in col2_val:
11281
+ if size2 < size1:
11282
+ df_copy.at[index, id2] = None
11283
+ df_copy.at[index, col2] = None
11284
+ df_copy.at[index, size_col2] = 0
11285
+ else:
11286
+ df_copy.at[index, col1] = df_copy.at[index, col2]
11287
+ df_copy.at[index, id1] = df_copy.at[index, id2]
11288
+ df_copy.at[index, size_col1] = df_copy.at[index, size_col2]
11289
+ df_copy.at[index, size_col2] = 0
11290
+ df_copy.at[index, col2] = None
11291
+ df_copy.at[index, id2] = None
11292
+ return df_copy
11293
+
11294
+
11295
+ def renameit(df, old_col_name, new_col_name):
11296
+ """
11297
+ Renames a column in a pandas DataFrame in place. Raises an error if the specified old column name does not exist.
11298
+
11299
+ Parameters:
11300
+ - df: pandas.DataFrame
11301
+ The DataFrame in which the column is to be renamed.
11302
+ - old_col_name: str
11303
+ The current name of the column to be renamed.
11304
+ - new_col_name: str
11305
+ The new name for the column.
11306
+
11307
+ Raises:
11308
+ - ValueError: If the old column name does not exist in the DataFrame.
11309
+
11310
+ Returns:
11311
+ None
11312
+ """
11313
+ import warnings
11314
+ # Check if the old column name exists in the DataFrame
11315
+ if old_col_name not in df.columns:
11316
+ warnings.warn(f"The column '{old_col_name}' does not exist in the DataFrame.")
11317
+ return
11318
+
11319
+ # Proceed with renaming the column if it exists
11320
+ df.rename(columns={old_col_name: new_col_name}, inplace=True)
11321
+
11322
+
11323
+ def mm_match_by_qc_scoring_all( qc_dataframe, fix_LRRL=True, verbose=True ):
11324
+ """
11325
+ Processes a quality control (QC) DataFrame to perform modality-specific matching and filtering based
11326
+ on predefined criteria, optimizing for minimal outliers and noise, and maximal signal-to-noise ratio (SNR),
11327
+ expected value of randomness (EVR), and dimensionality time (dimt).
11328
+
11329
+ This function iteratively matches dataframes derived from the QC dataframe for different imaging modalities,
11330
+ applying a series of filters to select the best matches based on the QC metrics. Matches are made with
11331
+ consideration to minimize outlier loop and noise, while maximizing SNR, EVR, and dimt for each modality.
11332
+
11333
+ Parameters:
11334
+ ----------
11335
+ qc_dataframe : pandas.DataFrame
11336
+ The DataFrame containing QC metrics for different modalities and imaging data.
11337
+ fix_LRRL : bool, optional
11338
+
11339
+ verbose : bool, optional
11340
+ If True, prints the progress and the shape of the DataFrame being processed in each step.
11341
+
11342
+ Process:
11343
+ -------
11344
+ 1. Standardizes modalities by merging DTI-related entries.
11345
+ 2. Converts specific columns to appropriate data types for processing.
11346
+ 3. Performs modality-specific matching and filtering based on the outlier column and criteria for each modality.
11347
+ 4. Iteratively processes unmatched data for predefined modalities with specific prefixes to find further matches.
11348
+
11349
+ Returns:
11350
+ -------
11351
+ pandas.DataFrame
11352
+ The matched and filtered DataFrame after applying all QC scoring and matching operations across specified modalities.
11353
+
11354
+ """
11355
+ qc_dataframe['modality'] = qc_dataframe['modality'].replace(['DTIdwi', 'DTIb0'], 'DTI', regex=True)
11356
+ qc_dataframe['filename']=qc_dataframe['filename'].astype(str)
11357
+ qc_dataframe['ol_loop']=qc_dataframe['ol_loop'].astype(float)
11358
+ qc_dataframe['ol_lof']=qc_dataframe['ol_lof'].astype(float)
11359
+ qc_dataframe['ol_lof_decision']=qc_dataframe['ol_lof_decision'].astype(float)
11360
+ outlier_column='ol_loop'
11361
+ mmdf0 = best_mmm( qc_dataframe, 'T1w', outlier_column=outlier_column )['filt']
11362
+ fldf = best_mmm( qc_dataframe, 'T2Flair', outlier_column=outlier_column )['filt']
11363
+ nmdf = best_mmm( qc_dataframe, 'NM2DMT', outlier_column=outlier_column )['filt']
11364
+ rsdf = best_mmm( qc_dataframe, 'rsfMRI', outlier_column=outlier_column )['filt']
11365
+ dtdf = best_mmm( qc_dataframe, 'DTI', outlier_column=outlier_column )['filt']
11366
+
11367
+ criteria = {'ol_loop': 'min', 'noise': 'min', 'snr': 'max', 'EVR': 'max', 'reflection_err':'min'}
11368
+ xcl = [ 'mrimfg', 'mrimodel','mriMagneticFieldStrength', 'dti_failed', 'rsf_failed', 'subjectID', 'date', 'subjectIDdate','repeat']
11369
+ # Assuming df_a and df_b are already loaded
11370
+ mmdf, undffl = mm_match_by_qc_scoring(mmdf0, fldf, 'subjectIDdate', criteria,
11371
+ prefix='T2Flair_', exclude_columns=xcl )
11372
+
11373
+ prefixes = ['NM1_', 'NM2_', 'NM3_', 'NM4_', 'NM5_', 'NM6_']
11374
+ undfmod = nmdf # Initialize 'undfmod' with 'nmdf' for the first iteration
11375
+ if verbose:
11376
+ print('start NM')
11377
+ print( undfmod.shape )
11378
+ for prefix in prefixes:
11379
+ if undfmod.shape[0] > 50:
11380
+ mmdf, undfmod = mm_match_by_qc_scoring(mmdf, undfmod, 'subjectIDdate', criteria, prefix=prefix, exclude_columns=xcl)
11381
+ if verbose:
11382
+ print( prefix )
11383
+ print( undfmod.shape )
11384
+
11385
+ criteria = {'ol_loop': 'min', 'noise': 'min', 'snr': 'max', 'EVR': 'max', 'dimt':'max'}
11386
+ # higher bvalues lead to more noise ...
11387
+ criteria = {'ol_loop': 'min', 'noise': 'min', 'dti_bvalueMax':'min', 'dimt':'max'}
11388
+ prefixes = ['DTI1_', 'DTI2_', 'DTI3_'] # List of prefixes for each matching iteration
11389
+ undfmod = dtdf
11390
+ if verbose:
11391
+ print('start DT')
11392
+ print( undfmod.shape )
11393
+ for prefix in prefixes:
11394
+ if undfmod.shape[0] > 50:
11395
+ mmdf, undfmod = mm_match_by_qc_scoring(mmdf, undfmod, 'subjectIDdate', criteria, prefix=prefix, exclude_columns=xcl)
11396
+ if verbose:
11397
+ print( prefix )
11398
+ print( undfmod.shape )
11399
+
11400
+ prefixes = ['rsf1_', 'rsf2_', 'rsf3_'] # List of prefixes for each matching iteration
11401
+ undfmod = rsdf # Initialize 'undfmod' with 'nmdf' for the first iteration
11402
+ if verbose:
11403
+ print('start rsf')
11404
+ print( undfmod.shape )
11405
+ for prefix in prefixes:
11406
+ if undfmod.shape[0] > 50:
11407
+ mmdf, undfmod = mm_match_by_qc_scoring(mmdf, undfmod, 'subjectIDdate', criteria, prefix=prefix, exclude_columns=xcl)
11408
+ if verbose:
11409
+ print( prefix )
11410
+ print( undfmod.shape )
11411
+
11412
+ if fix_LRRL:
11413
+ # mmdf=fix_LR_RL_stuff( mmdf, 'DTI1_filename', 'DTI2_filename', 'DTI1_dimt', 'DTI2_dimt')
11414
+ mmdf=fix_LR_RL_stuff( mmdf, 'rsf1_filename', 'rsf2_filename', 'rsf1_dimt', 'rsf2_dimt', 'rsf1_imageID', 'rsf2_imageID' )
11415
+ else:
11416
+ import warnings
11417
+ warnings.warn("FIXME: should fix LR and RL situation for the DTI and rsfMRI")
11418
+
11419
+ # now do the necessary replacements
11420
+
11421
+ renameit( mmdf, 'perf_imageID', 'perfid' )
11422
+ renameit( mmdf, 'perf_filename', 'perffn' )
11423
+ renameit( mmdf, 'T2Flair_imageID', 'flairid' )
11424
+ renameit( mmdf, 'T2Flair_filename', 'flairfn' )
11425
+ renameit( mmdf, 'rsf1_imageID', 'rsfid1' )
11426
+ renameit( mmdf, 'rsf2_imageID', 'rsfid2' )
11427
+ renameit( mmdf, 'rsf1_filename', 'rsffn1' )
11428
+ renameit( mmdf, 'rsf2_filename', 'rsffn2' )
11429
+ renameit( mmdf, 'DTI1_imageID', 'dtid1' )
11430
+ renameit( mmdf, 'DTI2_imageID', 'dtid2' )
11431
+ renameit( mmdf, 'DTI3_imageID', 'dtid3' )
11432
+ renameit( mmdf, 'DTI1_filename', 'dtfn1' )
11433
+ renameit( mmdf, 'DTI2_filename', 'dtfn2' )
11434
+ renameit( mmdf, 'DTI3_filename', 'dtfn3' )
11435
+ for x in range(1,6):
11436
+ temp0="NM"+str(x)+"_imageID"
11437
+ temp1="nmid"+str(x)
11438
+ renameit( mmdf, temp0, temp1 )
11439
+ temp0="NM"+str(x)+"_filename"
11440
+ temp1="nmfn"+str(x)
11441
+ renameit( mmdf, temp0, temp1 )
11442
+ return mmdf