masster 0.4.20__py3-none-any.whl → 0.4.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

masster/study/merge.py CHANGED
@@ -400,7 +400,7 @@ def merge(self, **kwargs) -> None:
400
400
  # Feature maps will be generated on-demand within each merge method
401
401
 
402
402
  self.logger.info(
403
- f"Merge: {params.method}, samples={params.min_samples}, rt_tol={params.rt_tol}s, mz_tol={params.mz_tol}Da, min_rel_cc_size={params.min_rel_cc_size}, max_pairwise_log_fc={params.max_pairwise_log_fc}, max_nr_conflicts={params.max_nr_conflicts}"
403
+ f"Merge: {params.method}, samples={params.min_samples}, rt_tol={params.rt_tol}s, mz_tol={params.mz_tol}Da"
404
404
  )
405
405
 
406
406
  # Initialize
@@ -446,7 +446,7 @@ def merge(self, **kwargs) -> None:
446
446
  # Note: _merge_qt_chunked populates consensus_df directly, no need to extract
447
447
 
448
448
  # Enhanced post-clustering to merge over-segmented features (for qt and kd methods)
449
- if params.method in ['qt', 'sensitivity', 'qt_chunked', 'kd_chunked']:
449
+ if params.method in ['qt', 'sensitivity', 'qt_chunked', 'kd_chunked', 'quality']:
450
450
  self._consensus_cleanup(params.rt_tol, params.mz_tol)
451
451
 
452
452
  # Perform adduct grouping
@@ -705,11 +705,11 @@ def _merge_kd_strict(self, params: merge_defaults) -> oms.ConsensusMap:
705
705
  optimized_params = params
706
706
 
707
707
  # Phase 1: Standard KD clustering
708
- self.logger.info("Initial KD clustering")
708
+ self.logger.debug("Initial KD clustering")
709
709
  consensus_map = _merge_kd(self, optimized_params)
710
710
 
711
711
  # Phase 2: Post-processing quality control
712
- self.logger.info("Post-processing quality control")
712
+ self.logger.debug("Post-processing quality control")
713
713
  consensus_map = _apply_kd_strict_postprocessing(self, consensus_map, optimized_params)
714
714
 
715
715
  return consensus_map
@@ -911,7 +911,7 @@ def _apply_kd_strict_postprocessing(self, consensus_map: oms.ConsensusMap, param
911
911
  final_feature_count = len(self.consensus_df)
912
912
  reduction_pct = ((initial_feature_count - final_feature_count) / initial_feature_count * 100) if initial_feature_count > 0 else 0
913
913
 
914
- self.logger.info(f"Post-processing complete: {initial_feature_count} → {final_feature_count} features ({reduction_pct:.1f}% reduction)")
914
+ self.logger.info(f"Consensus cleanup complete: {initial_feature_count} → {final_feature_count} features ({reduction_pct:.1f}% reduction)")
915
915
 
916
916
  # Create a new consensus map for compatibility (the processed data is in consensus_df)
917
917
  processed_consensus_map = oms.ConsensusMap()
@@ -1691,8 +1691,12 @@ def _merge_chunk_results(self, chunk_consensus_maps: list, params: merge_default
1691
1691
  mz_min_local = mz_max_local = consensus_mz
1692
1692
 
1693
1693
  # Store chunk consensus with feature tracking
1694
+ # Generate unique 16-character consensus_id string
1695
+ import uuid
1696
+ consensus_id_str = str(uuid.uuid4()).replace('-', '')[:16]
1697
+
1694
1698
  chunk_consensus_data = {
1695
- 'consensus_id': consensus_id_counter,
1699
+ 'consensus_id': consensus_id_str,
1696
1700
  'chunk_idx': chunk_idx,
1697
1701
  'chunk_start_idx': chunk_start_idx,
1698
1702
  'mz': consensus_mz,
@@ -1710,7 +1714,6 @@ def _merge_chunk_results(self, chunk_consensus_maps: list, params: merge_default
1710
1714
  }
1711
1715
 
1712
1716
  all_chunk_consensus.append(chunk_consensus_data)
1713
- consensus_id_counter += 1
1714
1717
 
1715
1718
  if not all_chunk_consensus:
1716
1719
  # No valid consensus features found
@@ -2094,9 +2097,13 @@ def _calculate_consensus_statistics(study_obj, consensus_uid: int, feature_data_
2094
2097
  ms2_count += len(ms2_scans)
2095
2098
 
2096
2099
  # Build consensus metadata
2100
+ # Generate unique 16-character consensus_id string
2101
+ import uuid
2102
+ consensus_id_str = str(uuid.uuid4()).replace('-', '')[:16]
2103
+
2097
2104
  return {
2098
2105
  "consensus_uid": int(consensus_uid),
2099
- "consensus_id": str(consensus_uid), # Use simple string ID
2106
+ "consensus_id": consensus_id_str, # Use unique 16-char string ID
2100
2107
  "quality": round(float(np.mean(quality_values)), 3) if len(quality_values) > 0 else 1.0,
2101
2108
  "number_samples": number_samples if number_samples is not None else len(feature_data_list),
2102
2109
  "rt": round(float(np.mean(rt_values)), 4) if len(rt_values) > 0 else 0.0,
@@ -2118,6 +2125,7 @@ def _calculate_consensus_statistics(study_obj, consensus_uid: int, feature_data_
2118
2125
  "chrom_prominence_mean": round(float(np.mean(prominence_values)), 0) if len(prominence_values) > 0 else 0.0,
2119
2126
  "chrom_prominence_scaled_mean": round(float(np.mean(prominence_scaled_values)), 3) if len(prominence_scaled_values) > 0 else 0.0,
2120
2127
  "chrom_height_scaled_mean": round(float(np.mean(height_scaled_values)), 3) if len(height_scaled_values) > 0 else 0.0,
2128
+ "iso": None, # Will be filled by find_iso() function
2121
2129
  "iso_mean": round(float(np.mean(iso_values)), 2) if len(iso_values) > 0 else 0.0,
2122
2130
  "charge_mean": round(float(np.mean(charge_values)), 2) if len(charge_values) > 0 else 0.0,
2123
2131
  "number_ms2": int(ms2_count),
@@ -2509,10 +2517,14 @@ def _extract_consensus_features(self, consensus_map, min_samples, cached_adducts
2509
2517
  if ms2_scans is not None:
2510
2518
  ms2_count += len(ms2_scans)
2511
2519
 
2520
+ # Generate unique 16-character consensus_id string (UUID-based)
2521
+ import uuid
2522
+ consensus_id_str = str(uuid.uuid4()).replace('-', '')[:16]
2523
+
2512
2524
  metadata_list.append(
2513
2525
  {
2514
2526
  "consensus_uid": int(i), # "consensus_id": i,
2515
- "consensus_id": str(feature.getUniqueId()),
2527
+ "consensus_id": consensus_id_str, # Use unique 16-char string ID
2516
2528
  "quality": round(float(feature.getQuality()), 3),
2517
2529
  "number_samples": len(feature_data_list),
2518
2530
  # "number_ext": int(len(features_list)),
@@ -2577,6 +2589,7 @@ def _extract_consensus_features(self, consensus_map, min_samples, cached_adducts
2577
2589
  )
2578
2590
  if len(height_scaled_values) > 0
2579
2591
  else 0.0,
2592
+ "iso": None, # Will be filled by find_iso() function
2580
2593
  "iso_mean": round(float(np.mean(iso_values)), 2)
2581
2594
  if len(iso_values) > 0
2582
2595
  else 0.0,
@@ -3325,7 +3338,9 @@ def _finalize_merge(self, link_ms2, min_samples):
3325
3338
  f"Merging completed with empty result. Consensus features: {len(self.consensus_df)}. "
3326
3339
  f"This may be due to min_samples ({min_samples}) being too high for the available data.",
3327
3340
  )
3328
-
3341
+
3342
+ # add iso data from raw files.
3343
+ self.find_iso()
3329
3344
  if link_ms2:
3330
3345
  self.find_ms2()
3331
3346
 
masster/study/plot.py CHANGED
@@ -42,9 +42,10 @@ def _isolated_save_plot(plot_object, filename, abs_filename, logger, plot_title=
42
42
  from bokeh.io.export import export_png
43
43
  export_png(plot_object, filename=filename)
44
44
  logger.info(f"Plot saved to: {abs_filename}")
45
- except Exception:
45
+ except Exception as e:
46
46
  # Fall back to HTML if PNG export not available
47
47
  html_filename = filename.replace('.png', '.html')
48
+ abs_html_filename = html_filename if abs_filename == filename else abs_filename.replace('.png', '.html')
48
49
  from bokeh.resources import Resources
49
50
  from bokeh.embed import file_html
50
51
 
@@ -54,7 +55,7 @@ def _isolated_save_plot(plot_object, filename, abs_filename, logger, plot_title=
54
55
  with open(html_filename, 'w', encoding='utf-8') as f:
55
56
  f.write(html)
56
57
 
57
- logger.warning(f"PNG export not available, saved as HTML instead: {html_filename}")
58
+ logger.warning(f"PNG export not available ({str(e)}). Use export_png. Saved as HTML instead: {abs_html_filename}")
58
59
  elif filename.endswith(".pdf"):
59
60
  # Try to save as PDF, fall back to HTML if not available
60
61
  try:
@@ -74,6 +75,26 @@ def _isolated_save_plot(plot_object, filename, abs_filename, logger, plot_title=
74
75
  f.write(html)
75
76
 
76
77
  logger.warning(f"PDF export not available, saved as HTML instead: {html_filename}")
78
+ elif filename.endswith(".svg"):
79
+ # Try to save as SVG, fall back to HTML if not available
80
+ try:
81
+ from bokeh.io.export import export_svg
82
+ export_svg(plot_object, filename=filename)
83
+ logger.info(f"Plot saved to: {abs_filename}")
84
+ except Exception as e:
85
+ # Fall back to HTML if SVG export not available
86
+ html_filename = filename.replace('.svg', '.html')
87
+ abs_html_filename = html_filename if abs_filename == filename else abs_filename.replace('.svg', '.html')
88
+ from bokeh.resources import Resources
89
+ from bokeh.embed import file_html
90
+
91
+ resources = Resources(mode='cdn')
92
+ html = file_html(plot_object, resources, title=plot_title)
93
+
94
+ with open(html_filename, 'w', encoding='utf-8') as f:
95
+ f.write(html)
96
+
97
+ logger.warning(f"SVG export not available ({str(e)}). Saved as HTML instead: {abs_html_filename}")
77
98
  else:
78
99
  # Default to HTML for unknown extensions using isolated approach
79
100
  from bokeh.resources import Resources
@@ -181,6 +202,22 @@ def _isolated_save_panel_plot(panel_obj, filename, abs_filename, logger, plot_ti
181
202
  logger.warning(f"PDF export not available, saved as HTML instead: {abs_html_filename}")
182
203
  except Exception as e:
183
204
  logger.error(f"Failed to save {plot_title} as HTML fallback: {e}")
205
+ elif filename.endswith(".svg"):
206
+ # Try to save as SVG, fall back to HTML if not available
207
+ try:
208
+ from bokeh.io.export import export_svg
209
+ bokeh_layout = panel_obj.get_root()
210
+ export_svg(bokeh_layout, filename=filename)
211
+ logger.info(f"{plot_title} saved to: {abs_filename}")
212
+ except Exception as e:
213
+ # Fall back to HTML if SVG export not available
214
+ html_filename = filename.replace('.svg', '.html')
215
+ abs_html_filename = os.path.abspath(html_filename)
216
+ try:
217
+ panel_obj.save(html_filename, embed=True)
218
+ logger.warning(f"SVG export not available ({str(e)}), saved as HTML instead: {abs_html_filename}")
219
+ except Exception as e:
220
+ logger.error(f"Failed to save {plot_title} as HTML fallback: {e}")
184
221
  else:
185
222
  # Default to HTML for unknown extensions
186
223
  try:
@@ -59,6 +59,17 @@ def align(self, **kwargs):
59
59
  """
60
60
  # parameters initialization
61
61
  params = align_defaults()
62
+
63
+ # Handle 'params' keyword argument specifically (like merge does)
64
+ if 'params' in kwargs:
65
+ provided_params = kwargs.pop('params')
66
+ if isinstance(provided_params, align_defaults):
67
+ params = provided_params
68
+ self.logger.debug("Using provided align_defaults parameters from 'params' argument")
69
+ else:
70
+ self.logger.warning("'params' argument is not an align_defaults instance, ignoring")
71
+
72
+ # Process remaining kwargs
62
73
  for key, value in kwargs.items():
63
74
  if isinstance(value, align_defaults):
64
75
  params = value
@@ -72,7 +83,7 @@ def align(self, **kwargs):
72
83
  f"Failed to set parameter {key} = {value} (validation failed)",
73
84
  )
74
85
  else:
75
- self.logger.debug(f"Unknown parameter {key} ignored")
86
+ self.logger.warning(f"Unknown parameter '{key}' ignored")
76
87
  # end of parameter initialization
77
88
 
78
89
  # Store parameters in the Study object
@@ -825,6 +836,11 @@ def _align_kd_algorithm(study_obj, fmaps, params):
825
836
  f"Align time axes with rt_tol={params.get('rt_tol')}, min_samples={params.get('min_samples')}, max_points={max_points}",
826
837
  )
827
838
 
839
+ # Check if feature maps are empty before proceeding
840
+ if not fmaps:
841
+ study_obj.logger.error("No feature maps available for alignment. Cannot proceed with alignment.")
842
+ raise ValueError("No feature maps available for alignment. This usually indicates that all samples failed to load properly.")
843
+
828
844
  # Choose reference map (largest number of features)
829
845
  ref_index = max(range(len(fmaps)), key=lambda i: fmaps[i].size())
830
846
  ref_map = fmaps[ref_index]
@@ -1003,3 +1019,243 @@ def _align_pose_clustering_fallback(study_obj, fmaps, params):
1003
1019
  transformer.transformRetentionTimes(fm, trafo, True)
1004
1020
 
1005
1021
  study_obj.alignment_ref_index = ref_index
1022
+
1023
+
1024
+ def find_iso(self, rt_tol=0.1, mz_tol=0.01):
1025
+ """
1026
+ Find isotope patterns for consensus features by searching raw MS1 data.
1027
+ OPTIMIZED VERSION: Each sample file is loaded only once for maximum efficiency.
1028
+
1029
+ For each consensus feature:
1030
+ 1. Find the associated feature with highest intensity
1031
+ 2. Load the corresponding sample5 file to access raw MS1 data
1032
+ 3. Use original_rt (before alignment) to find the correct scan
1033
+ 4. Search for isotope patterns in raw MS1 spectra
1034
+ 5. Look for isotope patterns: 0.33, 0.50, 0.66, 1.00, 1.50, 2.00, 3.00, 4.00, 5.00 Da
1035
+ 6. Store results as numpy arrays with [mz, inty] in the iso column
1036
+
1037
+ Parameters:
1038
+ rt_tol (float): RT tolerance for scan matching in seconds
1039
+ mz_tol (float): Additional m/z tolerance for isotope matching in Da
1040
+ """
1041
+ if self.consensus_df is None or self.consensus_df.is_empty():
1042
+ self.logger.error("No consensus features found. Please run merge() first.")
1043
+ return
1044
+
1045
+ if self.consensus_mapping_df is None or self.consensus_mapping_df.is_empty():
1046
+ self.logger.error("No consensus mapping found. Please run merge() first.")
1047
+ return
1048
+
1049
+ if self.features_df is None or self.features_df.is_empty():
1050
+ self.logger.error("No features found.")
1051
+ return
1052
+
1053
+ if self.samples_df is None or self.samples_df.is_empty():
1054
+ self.logger.error("No samples found.")
1055
+ return
1056
+
1057
+ # Add iso column if it doesn't exist
1058
+ if "iso" not in self.consensus_df.columns:
1059
+ self.consensus_df = self.consensus_df.with_columns(
1060
+ pl.lit(None, dtype=pl.Object).alias("iso")
1061
+ )
1062
+
1063
+ self.logger.info("Extracting isotopomers from raw MS1 data...")
1064
+
1065
+ # Isotope mass shifts to search for (up to 7x 13C isotopes)
1066
+ isotope_shifts = [
1067
+ 0.33,
1068
+ 0.50,
1069
+ 0.66,
1070
+ 1.00335,
1071
+ 1.50502,
1072
+ 2.00670,
1073
+ 3.01005,
1074
+ 4.01340,
1075
+ 5.01675,
1076
+ 6.02010,
1077
+ 7.02345,
1078
+ ]
1079
+
1080
+ consensus_iso_data = {}
1081
+
1082
+ # SUPER OPTIMIZATION: Vectorized pre-calculation using joins (10-100x faster)
1083
+ self.logger.debug("Building sample-to-consensus mapping using vectorized operations...")
1084
+
1085
+ # Step 1: Join consensus_mapping with features to get intensities in one operation
1086
+ consensus_with_features = self.consensus_mapping_df.join(
1087
+ self.features_df.select(['feature_uid', 'sample_uid', 'inty', 'mz', 'rt', 'rt_original']),
1088
+ on=['feature_uid', 'sample_uid'],
1089
+ how='left'
1090
+ )
1091
+
1092
+ # Step 2: Find the best feature (highest intensity) for each consensus using window functions
1093
+ best_features = consensus_with_features.with_columns(
1094
+ pl.col('inty').fill_null(0) # Handle null intensities
1095
+ ).with_columns(
1096
+ pl.col('inty').max().over('consensus_uid').alias('max_inty')
1097
+ ).filter(
1098
+ pl.col('inty') == pl.col('max_inty')
1099
+ ).group_by('consensus_uid').first() # Take first if there are ties
1100
+
1101
+ # Step 3: Join with samples to get sample paths in one operation
1102
+ best_features_with_paths = best_features.join(
1103
+ self.samples_df.select(['sample_uid', 'sample_path']),
1104
+ on='sample_uid',
1105
+ how='left'
1106
+ ).filter(
1107
+ pl.col('sample_path').is_not_null()
1108
+ )
1109
+
1110
+ # Step 4: Group by sample path for batch processing (much faster than nested loops)
1111
+ sample_to_consensus = {}
1112
+ for row in best_features_with_paths.iter_rows(named=True):
1113
+ sample_path = row['sample_path']
1114
+ consensus_uid = row['consensus_uid']
1115
+
1116
+ # Create feature data dictionary for compatibility
1117
+ feature_data = {
1118
+ 'mz': row['mz'],
1119
+ 'rt': row['rt'],
1120
+ 'rt_original': row.get('rt_original', row['rt']),
1121
+ 'inty': row['inty']
1122
+ }
1123
+
1124
+ if sample_path not in sample_to_consensus:
1125
+ sample_to_consensus[sample_path] = []
1126
+
1127
+ sample_to_consensus[sample_path].append((consensus_uid, feature_data))
1128
+
1129
+ # Initialize failed consensus features (those not in the mapping)
1130
+ processed_consensus_uids = set(best_features_with_paths['consensus_uid'].to_list())
1131
+ for consensus_row in self.consensus_df.iter_rows(named=True):
1132
+ consensus_uid = consensus_row["consensus_uid"]
1133
+ if consensus_uid not in processed_consensus_uids:
1134
+ consensus_iso_data[consensus_uid] = None
1135
+
1136
+ self.logger.debug(f"Will read {len(sample_to_consensus)} unique sample files for {len(self.consensus_df)} consensus features")
1137
+
1138
+ tdqm_disable = self.log_level not in ["TRACE", "DEBUG", "INFO"]
1139
+
1140
+ # OPTIMIZATION 2: Process by sample file (load each file only once)
1141
+ for sample_path, consensus_list in tqdm(
1142
+ sample_to_consensus.items(),
1143
+ desc=f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO | {self.log_label}Read files",
1144
+ disable=tdqm_disable,
1145
+ ):
1146
+ try:
1147
+ # Load MS1 data once per sample
1148
+ ms1_df = self._load_ms1(sample_path)
1149
+
1150
+ if ms1_df is None or ms1_df.is_empty():
1151
+ # Mark all consensus features from this sample as failed
1152
+ for consensus_uid, _ in consensus_list:
1153
+ consensus_iso_data[consensus_uid] = None
1154
+ continue
1155
+
1156
+ # Process all consensus features for this sample
1157
+ for consensus_uid, best_feature in consensus_list:
1158
+ # Get the original RT (before alignment correction)
1159
+ base_mz = best_feature["mz"]
1160
+ original_rt = best_feature.get("rt_original", best_feature["rt"])
1161
+
1162
+ # Find MS1 scans near the original RT
1163
+ rt_min = original_rt - rt_tol
1164
+ rt_max = original_rt + rt_tol
1165
+
1166
+ # Filter MS1 data for scans within RT window
1167
+ ms1_window = ms1_df.filter(
1168
+ (pl.col("rt") >= rt_min) & (pl.col("rt") <= rt_max)
1169
+ )
1170
+
1171
+ if ms1_window.is_empty():
1172
+ consensus_iso_data[consensus_uid] = None
1173
+ continue
1174
+
1175
+ isotope_matches = []
1176
+
1177
+ # Search for each isotope shift
1178
+ for shift in isotope_shifts:
1179
+ target_mz = base_mz + shift
1180
+ mz_min_iso = target_mz - mz_tol
1181
+ mz_max_iso = target_mz + mz_tol
1182
+
1183
+ # Find peaks in MS1 data within m/z tolerance
1184
+ isotope_peaks = ms1_window.filter(
1185
+ (pl.col("mz") >= mz_min_iso) & (pl.col("mz") <= mz_max_iso)
1186
+ )
1187
+
1188
+ if not isotope_peaks.is_empty():
1189
+ # Get the peak with maximum intensity for this isotope
1190
+ max_peak = isotope_peaks.filter(
1191
+ pl.col("inty") == pl.col("inty").max()
1192
+ ).row(0, named=True)
1193
+
1194
+ # Store as float with specific precision: m/z to 4 decimals, intensity rounded to integer
1195
+ mz_formatted = round(float(max_peak["mz"]), 4)
1196
+ inty_formatted = float(round(max_peak["inty"])) # Round to integer, but keep as float
1197
+ isotope_matches.append([mz_formatted, inty_formatted])
1198
+
1199
+ # Store results as numpy array
1200
+ if isotope_matches:
1201
+ consensus_iso_data[consensus_uid] = np.array(isotope_matches)
1202
+ else:
1203
+ consensus_iso_data[consensus_uid] = None
1204
+
1205
+ except Exception as e:
1206
+ self.logger.warning(f"Failed to load MS1 data from {sample_path}: {e}")
1207
+ # Mark all consensus features from this sample as failed
1208
+ for consensus_uid, _ in consensus_list:
1209
+ consensus_iso_data[consensus_uid] = None
1210
+ continue
1211
+
1212
+ # Update consensus_df with isotope data
1213
+ # Create mapping function for update
1214
+ def get_iso_data(uid):
1215
+ return consensus_iso_data.get(uid, None)
1216
+
1217
+ # Update the iso column
1218
+ self.consensus_df = self.consensus_df.with_columns(
1219
+ pl.col("consensus_uid").map_elements(
1220
+ lambda uid: get_iso_data(uid),
1221
+ return_dtype=pl.Object
1222
+ ).alias("iso")
1223
+ )
1224
+
1225
+ # Count how many consensus features have isotope data
1226
+ iso_count = sum(1 for data in consensus_iso_data.values() if data is not None and len(data) > 0)
1227
+
1228
+ self.logger.info(f"Optimized isotope detection completed. Found isotope patterns for {iso_count}/{len(self.consensus_df)} consensus features.")
1229
+
1230
+
1231
+ def reset_iso(self):
1232
+ """
1233
+ Reset the iso column in consensus_df to None, clearing all isotope data.
1234
+
1235
+ This function clears any previously computed isotope patterns from the
1236
+ consensus_df, setting the 'iso' column to None for all features. This
1237
+ is useful before re-running isotope detection with different parameters
1238
+ or to clear isotope data entirely.
1239
+
1240
+ Returns:
1241
+ None
1242
+ """
1243
+ if self.consensus_df is None:
1244
+ self.logger.warning("No consensus_df found. Nothing to reset.")
1245
+ return
1246
+
1247
+ if "iso" not in self.consensus_df.columns:
1248
+ self.logger.warning("No 'iso' column found in consensus_df. Nothing to reset.")
1249
+ return
1250
+
1251
+ # Count how many features currently have isotope data
1252
+ iso_count = self.consensus_df.select(
1253
+ pl.col("iso").is_not_null().sum().alias("count")
1254
+ ).item(0, "count")
1255
+
1256
+ # Reset the iso column to None
1257
+ self.consensus_df = self.consensus_df.with_columns(
1258
+ pl.lit(None, dtype=pl.Object).alias("iso")
1259
+ )
1260
+
1261
+ self.logger.info(f"Reset isotope data for {iso_count} features. All 'iso' values set to None.")
masster/study/save.py CHANGED
@@ -154,13 +154,56 @@ def save_samples(self, samples=None):
154
154
 
155
155
 
156
156
  def _save_consensusXML(self, filename: str):
157
- if self.consensus_map is None:
158
- self.logger.error("No consensus map found.")
157
+ if self.consensus_df is None or self.consensus_df.is_empty():
158
+ self.logger.error("No consensus features found.")
159
159
  return
160
-
160
+
161
+ # Build consensus map from consensus_df with proper consensus_id values
162
+ import pyopenms as oms
163
+ consensus_map = oms.ConsensusMap()
164
+
165
+ # Set up file descriptions for all samples
166
+ file_descriptions = consensus_map.getColumnHeaders()
167
+ if hasattr(self, 'samples_df') and not self.samples_df.is_empty():
168
+ for i, sample_row in enumerate(self.samples_df.iter_rows(named=True)):
169
+ file_description = file_descriptions.get(i, oms.ColumnHeader())
170
+ file_description.filename = sample_row.get("sample_name", f"sample_{i}")
171
+ file_description.size = 0 # Will be updated if needed
172
+ file_description.unique_id = i + 1
173
+ file_descriptions[i] = file_description
174
+ consensus_map.setColumnHeaders(file_descriptions)
175
+
176
+ # Add consensus features to the map (simplified version without individual features)
177
+ for consensus_row in self.consensus_df.iter_rows(named=True):
178
+ consensus_feature = oms.ConsensusFeature()
179
+
180
+ # Set basic properties
181
+ consensus_feature.setRT(float(consensus_row.get("rt", 0.0)))
182
+ consensus_feature.setMZ(float(consensus_row.get("mz", 0.0)))
183
+ consensus_feature.setIntensity(float(consensus_row.get("inty_mean", 0.0)))
184
+ consensus_feature.setQuality(float(consensus_row.get("quality", 1.0)))
185
+
186
+ # Set the unique consensus_id as the unique ID
187
+ consensus_id_str = consensus_row.get("consensus_id", "")
188
+ if consensus_id_str and len(consensus_id_str) == 16:
189
+ try:
190
+ # Convert 16-character hex string to integer for OpenMS
191
+ consensus_uid = int(consensus_id_str, 16)
192
+ consensus_feature.setUniqueId(consensus_uid)
193
+ except ValueError:
194
+ # Fallback to hash if not hex
195
+ consensus_feature.setUniqueId(hash(consensus_id_str) & 0x7FFFFFFFFFFFFFFF)
196
+ else:
197
+ # Fallback to consensus_uid
198
+ consensus_feature.setUniqueId(consensus_row.get("consensus_uid", 0))
199
+
200
+ consensus_map.push_back(consensus_feature)
201
+
202
+ # Save the consensus map
161
203
  fh = oms.ConsensusXMLFile()
162
- fh.store(filename, self.consensus_map)
163
- self.logger.debug(f"Saved consensus map to {filename}")
204
+ fh.store(filename, consensus_map)
205
+ self.logger.debug(f"Saved consensus map with {len(self.consensus_df)} features to {filename}")
206
+ self.logger.debug("Features use unique 16-character consensus_id strings")
164
207
 
165
208
 
166
209
  def save_consensus(self, **kwargs):
masster/study/study.py CHANGED
@@ -55,6 +55,7 @@ import polars as pl
55
55
  from masster.study.h5 import _load_study5
56
56
  from masster.study.h5 import _save_study5
57
57
  from masster.study.h5 import _save_study5_compressed
58
+ from masster.study.h5 import _load_ms1
58
59
  from masster.study.helpers import _get_consensus_uids
59
60
  from masster.study.helpers import _get_feature_uids
60
61
  from masster.study.helpers import _get_sample_uids
@@ -126,6 +127,8 @@ from masster.study.merge import _finalize_merge
126
127
  from masster.study.merge import _count_tight_clusters
127
128
  from masster.study.processing import integrate
128
129
  from masster.study.processing import find_ms2
130
+ from masster.study.processing import find_iso
131
+ from masster.study.processing import reset_iso
129
132
  from masster.study.parameters import store_history
130
133
  from masster.study.parameters import get_parameters
131
134
  from masster.study.parameters import update_parameters
@@ -385,6 +388,9 @@ class Study:
385
388
  merge = merge
386
389
  find_consensus = merge # Backward compatibility alias
387
390
  find_ms2 = find_ms2
391
+ find_iso = find_iso
392
+ reset_iso = reset_iso
393
+ iso_reset = reset_iso
388
394
  integrate = integrate
389
395
  integrate_chrom = integrate # Backward compatibility alias
390
396
  fill = fill
@@ -421,9 +427,11 @@ class Study:
421
427
  set_source = set_source
422
428
  sample_color = sample_color
423
429
  sample_color_reset = sample_color_reset
430
+ reset_sample_color = sample_color_reset
424
431
  name_replace = sample_name_replace
425
432
  name_reset = sample_name_reset
426
-
433
+ reset_name = sample_name_reset
434
+
427
435
  # === Data Compression and Storage ===
428
436
  compress = compress
429
437
  compress_features = compress_features
@@ -436,8 +444,10 @@ class Study:
436
444
 
437
445
  # === Reset Operations ===
438
446
  fill_reset = fill_reset
447
+ reset_fill = fill_reset
439
448
  align_reset = align_reset
440
-
449
+ reset_align = align_reset
450
+
441
451
  # === Plotting and Visualization ===
442
452
  plot_alignment = plot_alignment
443
453
  plot_chrom = plot_chrom
@@ -461,8 +471,10 @@ class Study:
461
471
  identify = identify
462
472
  get_id = get_id
463
473
  id_reset = id_reset
474
+ reset_id = id_reset
464
475
  lib_reset = lib_reset
465
-
476
+ reset_lib = lib_reset
477
+
466
478
  # === Parameter Management ===
467
479
  store_history = store_history
468
480
  get_parameters = get_parameters
@@ -478,6 +490,7 @@ class Study:
478
490
  _load_study5 = _load_study5
479
491
  _save_study5 = _save_study5
480
492
  _save_study5_compressed = _save_study5_compressed
493
+ _load_ms1 = _load_ms1
481
494
  _get_consensus_uids = _get_consensus_uids
482
495
  _get_feature_uids = _get_feature_uids
483
496
  _get_sample_uids = _get_sample_uids
@@ -70,6 +70,9 @@
70
70
  "chrom_height_scaled_mean": {
71
71
  "dtype": "pl.Float64"
72
72
  },
73
+ "iso": {
74
+ "dtype": "pl.Object"
75
+ },
73
76
  "iso_mean": {
74
77
  "dtype": "pl.Float64"
75
78
  },
@@ -7,8 +7,11 @@ alignment, merging, plotting, and export.
7
7
 
8
8
  The create_script() function allows immediate generation of standalone analysis
9
9
  scripts without creating a Wizard instance first.
10
+
11
+ The execute() function combines create_script() with immediate execution of the
12
+ generated script for fully automated processing.
10
13
  """
11
14
 
12
- from .wizard import Wizard, wizard_def, create_script
15
+ from .wizard import Wizard, wizard_def, create_script, execute
13
16
 
14
- __all__ = ["Wizard", "wizard_def", "create_script"]
17
+ __all__ = ["Wizard", "wizard_def", "create_script", "execute"]