masster 0.4.19__py3-none-any.whl → 0.4.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

masster/study/plot.py CHANGED
@@ -42,9 +42,10 @@ def _isolated_save_plot(plot_object, filename, abs_filename, logger, plot_title=
42
42
  from bokeh.io.export import export_png
43
43
  export_png(plot_object, filename=filename)
44
44
  logger.info(f"Plot saved to: {abs_filename}")
45
- except Exception:
45
+ except Exception as e:
46
46
  # Fall back to HTML if PNG export not available
47
47
  html_filename = filename.replace('.png', '.html')
48
+ abs_html_filename = html_filename if abs_filename == filename else abs_filename.replace('.png', '.html')
48
49
  from bokeh.resources import Resources
49
50
  from bokeh.embed import file_html
50
51
 
@@ -54,7 +55,7 @@ def _isolated_save_plot(plot_object, filename, abs_filename, logger, plot_title=
54
55
  with open(html_filename, 'w', encoding='utf-8') as f:
55
56
  f.write(html)
56
57
 
57
- logger.warning(f"PNG export not available, saved as HTML instead: {html_filename}")
58
+ logger.warning(f"PNG export not available ({str(e)}). Use export_png. Saved as HTML instead: {abs_html_filename}")
58
59
  elif filename.endswith(".pdf"):
59
60
  # Try to save as PDF, fall back to HTML if not available
60
61
  try:
@@ -74,6 +75,26 @@ def _isolated_save_plot(plot_object, filename, abs_filename, logger, plot_title=
74
75
  f.write(html)
75
76
 
76
77
  logger.warning(f"PDF export not available, saved as HTML instead: {html_filename}")
78
+ elif filename.endswith(".svg"):
79
+ # Try to save as SVG, fall back to HTML if not available
80
+ try:
81
+ from bokeh.io.export import export_svg
82
+ export_svg(plot_object, filename=filename)
83
+ logger.info(f"Plot saved to: {abs_filename}")
84
+ except Exception as e:
85
+ # Fall back to HTML if SVG export not available
86
+ html_filename = filename.replace('.svg', '.html')
87
+ abs_html_filename = html_filename if abs_filename == filename else abs_filename.replace('.svg', '.html')
88
+ from bokeh.resources import Resources
89
+ from bokeh.embed import file_html
90
+
91
+ resources = Resources(mode='cdn')
92
+ html = file_html(plot_object, resources, title=plot_title)
93
+
94
+ with open(html_filename, 'w', encoding='utf-8') as f:
95
+ f.write(html)
96
+
97
+ logger.warning(f"SVG export not available ({str(e)}). Saved as HTML instead: {abs_html_filename}")
77
98
  else:
78
99
  # Default to HTML for unknown extensions using isolated approach
79
100
  from bokeh.resources import Resources
@@ -181,6 +202,22 @@ def _isolated_save_panel_plot(panel_obj, filename, abs_filename, logger, plot_ti
181
202
  logger.warning(f"PDF export not available, saved as HTML instead: {abs_html_filename}")
182
203
  except Exception as e:
183
204
  logger.error(f"Failed to save {plot_title} as HTML fallback: {e}")
205
+ elif filename.endswith(".svg"):
206
+ # Try to save as SVG, fall back to HTML if not available
207
+ try:
208
+ from bokeh.io.export import export_svg
209
+ bokeh_layout = panel_obj.get_root()
210
+ export_svg(bokeh_layout, filename=filename)
211
+ logger.info(f"{plot_title} saved to: {abs_filename}")
212
+ except Exception as e:
213
+ # Fall back to HTML if SVG export not available
214
+ html_filename = filename.replace('.svg', '.html')
215
+ abs_html_filename = os.path.abspath(html_filename)
216
+ try:
217
+ panel_obj.save(html_filename, embed=True)
218
+ logger.warning(f"SVG export not available ({str(e)}), saved as HTML instead: {abs_html_filename}")
219
+ except Exception as e:
220
+ logger.error(f"Failed to save {plot_title} as HTML fallback: {e}")
184
221
  else:
185
222
  # Default to HTML for unknown extensions
186
223
  try:
@@ -512,9 +549,9 @@ def plot_consensus_2d(
512
549
  filename=None,
513
550
  colorby="number_samples",
514
551
  cmap=None,
515
- markersize=4,
552
+ markersize=8,
516
553
  sizeby="inty_mean",
517
- scaling="dynamic",
554
+ scaling="static",
518
555
  alpha=0.7,
519
556
  width=600,
520
557
  height=450,
@@ -529,7 +566,7 @@ def plot_consensus_2d(
529
566
  colorby (str): Column name to use for color mapping (default: "number_samples")
530
567
  sizeby (str): Column name to use for size mapping (default: "inty_mean")
531
568
  markersize (int): Base marker size (default: 6)
532
- size (str): Controls whether points scale with zoom. Options:
569
+ scaling (str): Controls whether points scale with zoom. Options:
533
570
  'dynamic' - points use circle() and scale with zoom
534
571
  'static' - points use scatter() and maintain fixed pixel size
535
572
  alpha (float): Transparency level (default: 0.7)
@@ -553,7 +590,7 @@ def plot_consensus_2d(
553
590
  if colorby not in data.columns:
554
591
  self.logger.error(f"Column {colorby} not found in consensus_df.")
555
592
  return
556
- if sizeby not in data.columns:
593
+ if sizeby is not None and sizeby not in data.columns:
557
594
  self.logger.warning(f"Column {sizeby} not found in consensus_df.")
558
595
  sizeby = None
559
596
  # if sizeby is not None, set markersize to sizeby
@@ -673,10 +710,16 @@ def plot_consensus_2d(
673
710
  p.yaxis.axis_label = "m/z"
674
711
  scatter_renderer: Any = None
675
712
  if scaling.lower() in ["dyn", "dynamic"]:
713
+ # Calculate appropriate radius for dynamic scaling based on data range
714
+ rt_range = data["rt"].max() - data["rt"].min()
715
+ mz_range = data["mz"].max() - data["mz"].min()
716
+ # Use a fraction of the smaller dimension for radius, similar to sample plotting
717
+ dynamic_radius = min(rt_range, mz_range) * 0.0005 * markersize
718
+
676
719
  scatter_renderer = p.circle(
677
720
  x="rt",
678
721
  y="mz",
679
- radius=markersize,
722
+ radius=dynamic_radius,
680
723
  fill_color={"field": colorby, "transform": color_mapper},
681
724
  line_color=None,
682
725
  alpha=alpha,
@@ -59,6 +59,17 @@ def align(self, **kwargs):
59
59
  """
60
60
  # parameters initialization
61
61
  params = align_defaults()
62
+
63
+ # Handle 'params' keyword argument specifically (like merge does)
64
+ if 'params' in kwargs:
65
+ provided_params = kwargs.pop('params')
66
+ if isinstance(provided_params, align_defaults):
67
+ params = provided_params
68
+ self.logger.debug("Using provided align_defaults parameters from 'params' argument")
69
+ else:
70
+ self.logger.warning("'params' argument is not an align_defaults instance, ignoring")
71
+
72
+ # Process remaining kwargs
62
73
  for key, value in kwargs.items():
63
74
  if isinstance(value, align_defaults):
64
75
  params = value
@@ -72,7 +83,7 @@ def align(self, **kwargs):
72
83
  f"Failed to set parameter {key} = {value} (validation failed)",
73
84
  )
74
85
  else:
75
- self.logger.debug(f"Unknown parameter {key} ignored")
86
+ self.logger.warning(f"Unknown parameter '{key}' ignored")
76
87
  # end of parameter initialization
77
88
 
78
89
  # Store parameters in the Study object
@@ -825,6 +836,11 @@ def _align_kd_algorithm(study_obj, fmaps, params):
825
836
  f"Align time axes with rt_tol={params.get('rt_tol')}, min_samples={params.get('min_samples')}, max_points={max_points}",
826
837
  )
827
838
 
839
+ # Check if feature maps are empty before proceeding
840
+ if not fmaps:
841
+ study_obj.logger.error("No feature maps available for alignment. Cannot proceed with alignment.")
842
+ raise ValueError("No feature maps available for alignment. This usually indicates that all samples failed to load properly.")
843
+
828
844
  # Choose reference map (largest number of features)
829
845
  ref_index = max(range(len(fmaps)), key=lambda i: fmaps[i].size())
830
846
  ref_map = fmaps[ref_index]
@@ -1003,3 +1019,243 @@ def _align_pose_clustering_fallback(study_obj, fmaps, params):
1003
1019
  transformer.transformRetentionTimes(fm, trafo, True)
1004
1020
 
1005
1021
  study_obj.alignment_ref_index = ref_index
1022
+
1023
+
1024
+ def find_iso(self, rt_tol=0.1, mz_tol=0.01):
1025
+ """
1026
+ Find isotope patterns for consensus features by searching raw MS1 data.
1027
+ OPTIMIZED VERSION: Each sample file is loaded only once for maximum efficiency.
1028
+
1029
+ For each consensus feature:
1030
+ 1. Find the associated feature with highest intensity
1031
+ 2. Load the corresponding sample5 file to access raw MS1 data
1032
+ 3. Use original_rt (before alignment) to find the correct scan
1033
+ 4. Search for isotope patterns in raw MS1 spectra
1034
+ 5. Look for isotope patterns: 0.33, 0.50, 0.66, 1.00, 1.50, 2.00, 3.00, 4.00, 5.00 Da
1035
+ 6. Store results as numpy arrays with [mz, inty] in the iso column
1036
+
1037
+ Parameters:
1038
+ rt_tol (float): RT tolerance for scan matching in seconds
1039
+ mz_tol (float): Additional m/z tolerance for isotope matching in Da
1040
+ """
1041
+ if self.consensus_df is None or self.consensus_df.is_empty():
1042
+ self.logger.error("No consensus features found. Please run merge() first.")
1043
+ return
1044
+
1045
+ if self.consensus_mapping_df is None or self.consensus_mapping_df.is_empty():
1046
+ self.logger.error("No consensus mapping found. Please run merge() first.")
1047
+ return
1048
+
1049
+ if self.features_df is None or self.features_df.is_empty():
1050
+ self.logger.error("No features found.")
1051
+ return
1052
+
1053
+ if self.samples_df is None or self.samples_df.is_empty():
1054
+ self.logger.error("No samples found.")
1055
+ return
1056
+
1057
+ # Add iso column if it doesn't exist
1058
+ if "iso" not in self.consensus_df.columns:
1059
+ self.consensus_df = self.consensus_df.with_columns(
1060
+ pl.lit(None, dtype=pl.Object).alias("iso")
1061
+ )
1062
+
1063
+ self.logger.info("Extracting isotopomers from raw MS1 data...")
1064
+
1065
+ # Isotope mass shifts to search for (up to 7x 13C isotopes)
1066
+ isotope_shifts = [
1067
+ 0.33,
1068
+ 0.50,
1069
+ 0.66,
1070
+ 1.00335,
1071
+ 1.50502,
1072
+ 2.00670,
1073
+ 3.01005,
1074
+ 4.01340,
1075
+ 5.01675,
1076
+ 6.02010,
1077
+ 7.02345,
1078
+ ]
1079
+
1080
+ consensus_iso_data = {}
1081
+
1082
+ # SUPER OPTIMIZATION: Vectorized pre-calculation using joins (10-100x faster)
1083
+ self.logger.debug("Building sample-to-consensus mapping using vectorized operations...")
1084
+
1085
+ # Step 1: Join consensus_mapping with features to get intensities in one operation
1086
+ consensus_with_features = self.consensus_mapping_df.join(
1087
+ self.features_df.select(['feature_uid', 'sample_uid', 'inty', 'mz', 'rt', 'rt_original']),
1088
+ on=['feature_uid', 'sample_uid'],
1089
+ how='left'
1090
+ )
1091
+
1092
+ # Step 2: Find the best feature (highest intensity) for each consensus using window functions
1093
+ best_features = consensus_with_features.with_columns(
1094
+ pl.col('inty').fill_null(0) # Handle null intensities
1095
+ ).with_columns(
1096
+ pl.col('inty').max().over('consensus_uid').alias('max_inty')
1097
+ ).filter(
1098
+ pl.col('inty') == pl.col('max_inty')
1099
+ ).group_by('consensus_uid').first() # Take first if there are ties
1100
+
1101
+ # Step 3: Join with samples to get sample paths in one operation
1102
+ best_features_with_paths = best_features.join(
1103
+ self.samples_df.select(['sample_uid', 'sample_path']),
1104
+ on='sample_uid',
1105
+ how='left'
1106
+ ).filter(
1107
+ pl.col('sample_path').is_not_null()
1108
+ )
1109
+
1110
+ # Step 4: Group by sample path for batch processing (much faster than nested loops)
1111
+ sample_to_consensus = {}
1112
+ for row in best_features_with_paths.iter_rows(named=True):
1113
+ sample_path = row['sample_path']
1114
+ consensus_uid = row['consensus_uid']
1115
+
1116
+ # Create feature data dictionary for compatibility
1117
+ feature_data = {
1118
+ 'mz': row['mz'],
1119
+ 'rt': row['rt'],
1120
+ 'rt_original': row.get('rt_original', row['rt']),
1121
+ 'inty': row['inty']
1122
+ }
1123
+
1124
+ if sample_path not in sample_to_consensus:
1125
+ sample_to_consensus[sample_path] = []
1126
+
1127
+ sample_to_consensus[sample_path].append((consensus_uid, feature_data))
1128
+
1129
+ # Initialize failed consensus features (those not in the mapping)
1130
+ processed_consensus_uids = set(best_features_with_paths['consensus_uid'].to_list())
1131
+ for consensus_row in self.consensus_df.iter_rows(named=True):
1132
+ consensus_uid = consensus_row["consensus_uid"]
1133
+ if consensus_uid not in processed_consensus_uids:
1134
+ consensus_iso_data[consensus_uid] = None
1135
+
1136
+ self.logger.debug(f"Will read {len(sample_to_consensus)} unique sample files for {len(self.consensus_df)} consensus features")
1137
+
1138
+ tdqm_disable = self.log_level not in ["TRACE", "DEBUG", "INFO"]
1139
+
1140
+ # OPTIMIZATION 2: Process by sample file (load each file only once)
1141
+ for sample_path, consensus_list in tqdm(
1142
+ sample_to_consensus.items(),
1143
+ desc=f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO | {self.log_label}Read files",
1144
+ disable=tdqm_disable,
1145
+ ):
1146
+ try:
1147
+ # Load MS1 data once per sample
1148
+ ms1_df = self._load_ms1(sample_path)
1149
+
1150
+ if ms1_df is None or ms1_df.is_empty():
1151
+ # Mark all consensus features from this sample as failed
1152
+ for consensus_uid, _ in consensus_list:
1153
+ consensus_iso_data[consensus_uid] = None
1154
+ continue
1155
+
1156
+ # Process all consensus features for this sample
1157
+ for consensus_uid, best_feature in consensus_list:
1158
+ # Get the original RT (before alignment correction)
1159
+ base_mz = best_feature["mz"]
1160
+ original_rt = best_feature.get("rt_original", best_feature["rt"])
1161
+
1162
+ # Find MS1 scans near the original RT
1163
+ rt_min = original_rt - rt_tol
1164
+ rt_max = original_rt + rt_tol
1165
+
1166
+ # Filter MS1 data for scans within RT window
1167
+ ms1_window = ms1_df.filter(
1168
+ (pl.col("rt") >= rt_min) & (pl.col("rt") <= rt_max)
1169
+ )
1170
+
1171
+ if ms1_window.is_empty():
1172
+ consensus_iso_data[consensus_uid] = None
1173
+ continue
1174
+
1175
+ isotope_matches = []
1176
+
1177
+ # Search for each isotope shift
1178
+ for shift in isotope_shifts:
1179
+ target_mz = base_mz + shift
1180
+ mz_min_iso = target_mz - mz_tol
1181
+ mz_max_iso = target_mz + mz_tol
1182
+
1183
+ # Find peaks in MS1 data within m/z tolerance
1184
+ isotope_peaks = ms1_window.filter(
1185
+ (pl.col("mz") >= mz_min_iso) & (pl.col("mz") <= mz_max_iso)
1186
+ )
1187
+
1188
+ if not isotope_peaks.is_empty():
1189
+ # Get the peak with maximum intensity for this isotope
1190
+ max_peak = isotope_peaks.filter(
1191
+ pl.col("inty") == pl.col("inty").max()
1192
+ ).row(0, named=True)
1193
+
1194
+ # Store as float with specific precision: m/z to 4 decimals, intensity rounded to integer
1195
+ mz_formatted = round(float(max_peak["mz"]), 4)
1196
+ inty_formatted = float(round(max_peak["inty"])) # Round to integer, but keep as float
1197
+ isotope_matches.append([mz_formatted, inty_formatted])
1198
+
1199
+ # Store results as numpy array
1200
+ if isotope_matches:
1201
+ consensus_iso_data[consensus_uid] = np.array(isotope_matches)
1202
+ else:
1203
+ consensus_iso_data[consensus_uid] = None
1204
+
1205
+ except Exception as e:
1206
+ self.logger.warning(f"Failed to load MS1 data from {sample_path}: {e}")
1207
+ # Mark all consensus features from this sample as failed
1208
+ for consensus_uid, _ in consensus_list:
1209
+ consensus_iso_data[consensus_uid] = None
1210
+ continue
1211
+
1212
+ # Update consensus_df with isotope data
1213
+ # Create mapping function for update
1214
+ def get_iso_data(uid):
1215
+ return consensus_iso_data.get(uid, None)
1216
+
1217
+ # Update the iso column
1218
+ self.consensus_df = self.consensus_df.with_columns(
1219
+ pl.col("consensus_uid").map_elements(
1220
+ lambda uid: get_iso_data(uid),
1221
+ return_dtype=pl.Object
1222
+ ).alias("iso")
1223
+ )
1224
+
1225
+ # Count how many consensus features have isotope data
1226
+ iso_count = sum(1 for data in consensus_iso_data.values() if data is not None and len(data) > 0)
1227
+
1228
+ self.logger.info(f"Optimized isotope detection completed. Found isotope patterns for {iso_count}/{len(self.consensus_df)} consensus features.")
1229
+
1230
+
1231
+ def reset_iso(self):
1232
+ """
1233
+ Reset the iso column in consensus_df to None, clearing all isotope data.
1234
+
1235
+ This function clears any previously computed isotope patterns from the
1236
+ consensus_df, setting the 'iso' column to None for all features. This
1237
+ is useful before re-running isotope detection with different parameters
1238
+ or to clear isotope data entirely.
1239
+
1240
+ Returns:
1241
+ None
1242
+ """
1243
+ if self.consensus_df is None:
1244
+ self.logger.warning("No consensus_df found. Nothing to reset.")
1245
+ return
1246
+
1247
+ if "iso" not in self.consensus_df.columns:
1248
+ self.logger.warning("No 'iso' column found in consensus_df. Nothing to reset.")
1249
+ return
1250
+
1251
+ # Count how many features currently have isotope data
1252
+ iso_count = self.consensus_df.select(
1253
+ pl.col("iso").is_not_null().sum().alias("count")
1254
+ ).item(0, "count")
1255
+
1256
+ # Reset the iso column to None
1257
+ self.consensus_df = self.consensus_df.with_columns(
1258
+ pl.lit(None, dtype=pl.Object).alias("iso")
1259
+ )
1260
+
1261
+ self.logger.info(f"Reset isotope data for {iso_count} features. All 'iso' values set to None.")
masster/study/save.py CHANGED
@@ -154,13 +154,56 @@ def save_samples(self, samples=None):
154
154
 
155
155
 
156
156
  def _save_consensusXML(self, filename: str):
157
- if self.consensus_map is None:
158
- self.logger.error("No consensus map found.")
157
+ if self.consensus_df is None or self.consensus_df.is_empty():
158
+ self.logger.error("No consensus features found.")
159
159
  return
160
-
160
+
161
+ # Build consensus map from consensus_df with proper consensus_id values
162
+ import pyopenms as oms
163
+ consensus_map = oms.ConsensusMap()
164
+
165
+ # Set up file descriptions for all samples
166
+ file_descriptions = consensus_map.getColumnHeaders()
167
+ if hasattr(self, 'samples_df') and not self.samples_df.is_empty():
168
+ for i, sample_row in enumerate(self.samples_df.iter_rows(named=True)):
169
+ file_description = file_descriptions.get(i, oms.ColumnHeader())
170
+ file_description.filename = sample_row.get("sample_name", f"sample_{i}")
171
+ file_description.size = 0 # Will be updated if needed
172
+ file_description.unique_id = i + 1
173
+ file_descriptions[i] = file_description
174
+ consensus_map.setColumnHeaders(file_descriptions)
175
+
176
+ # Add consensus features to the map (simplified version without individual features)
177
+ for consensus_row in self.consensus_df.iter_rows(named=True):
178
+ consensus_feature = oms.ConsensusFeature()
179
+
180
+ # Set basic properties
181
+ consensus_feature.setRT(float(consensus_row.get("rt", 0.0)))
182
+ consensus_feature.setMZ(float(consensus_row.get("mz", 0.0)))
183
+ consensus_feature.setIntensity(float(consensus_row.get("inty_mean", 0.0)))
184
+ consensus_feature.setQuality(float(consensus_row.get("quality", 1.0)))
185
+
186
+ # Set the unique consensus_id as the unique ID
187
+ consensus_id_str = consensus_row.get("consensus_id", "")
188
+ if consensus_id_str and len(consensus_id_str) == 16:
189
+ try:
190
+ # Convert 16-character hex string to integer for OpenMS
191
+ consensus_uid = int(consensus_id_str, 16)
192
+ consensus_feature.setUniqueId(consensus_uid)
193
+ except ValueError:
194
+ # Fallback to hash if not hex
195
+ consensus_feature.setUniqueId(hash(consensus_id_str) & 0x7FFFFFFFFFFFFFFF)
196
+ else:
197
+ # Fallback to consensus_uid
198
+ consensus_feature.setUniqueId(consensus_row.get("consensus_uid", 0))
199
+
200
+ consensus_map.push_back(consensus_feature)
201
+
202
+ # Save the consensus map
161
203
  fh = oms.ConsensusXMLFile()
162
- fh.store(filename, self.consensus_map)
163
- self.logger.debug(f"Saved consensus map to {filename}")
204
+ fh.store(filename, consensus_map)
205
+ self.logger.debug(f"Saved consensus map with {len(self.consensus_df)} features to {filename}")
206
+ self.logger.debug("Features use unique 16-character consensus_id strings")
164
207
 
165
208
 
166
209
  def save_consensus(self, **kwargs):
masster/study/study.py CHANGED
@@ -55,6 +55,7 @@ import polars as pl
55
55
  from masster.study.h5 import _load_study5
56
56
  from masster.study.h5 import _save_study5
57
57
  from masster.study.h5 import _save_study5_compressed
58
+ from masster.study.h5 import _load_ms1
58
59
  from masster.study.helpers import _get_consensus_uids
59
60
  from masster.study.helpers import _get_feature_uids
60
61
  from masster.study.helpers import _get_sample_uids
@@ -120,9 +121,14 @@ from masster.study.merge import merge
120
121
  from masster.study.merge import _reset_consensus_data
121
122
  from masster.study.merge import _extract_consensus_features
122
123
  from masster.study.merge import _perform_adduct_grouping
124
+ from masster.study.merge import _consensus_cleanup
125
+ from masster.study.merge import _identify_adduct_by_mass_shift
123
126
  from masster.study.merge import _finalize_merge
127
+ from masster.study.merge import _count_tight_clusters
124
128
  from masster.study.processing import integrate
125
129
  from masster.study.processing import find_ms2
130
+ from masster.study.processing import find_iso
131
+ from masster.study.processing import reset_iso
126
132
  from masster.study.parameters import store_history
127
133
  from masster.study.parameters import get_parameters
128
134
  from masster.study.parameters import update_parameters
@@ -382,6 +388,9 @@ class Study:
382
388
  merge = merge
383
389
  find_consensus = merge # Backward compatibility alias
384
390
  find_ms2 = find_ms2
391
+ find_iso = find_iso
392
+ reset_iso = reset_iso
393
+ iso_reset = reset_iso
385
394
  integrate = integrate
386
395
  integrate_chrom = integrate # Backward compatibility alias
387
396
  fill = fill
@@ -418,9 +427,11 @@ class Study:
418
427
  set_source = set_source
419
428
  sample_color = sample_color
420
429
  sample_color_reset = sample_color_reset
430
+ reset_sample_color = sample_color_reset
421
431
  name_replace = sample_name_replace
422
432
  name_reset = sample_name_reset
423
-
433
+ reset_name = sample_name_reset
434
+
424
435
  # === Data Compression and Storage ===
425
436
  compress = compress
426
437
  compress_features = compress_features
@@ -433,8 +444,10 @@ class Study:
433
444
 
434
445
  # === Reset Operations ===
435
446
  fill_reset = fill_reset
447
+ reset_fill = fill_reset
436
448
  align_reset = align_reset
437
-
449
+ reset_align = align_reset
450
+
438
451
  # === Plotting and Visualization ===
439
452
  plot_alignment = plot_alignment
440
453
  plot_chrom = plot_chrom
@@ -458,8 +471,10 @@ class Study:
458
471
  identify = identify
459
472
  get_id = get_id
460
473
  id_reset = id_reset
474
+ reset_id = id_reset
461
475
  lib_reset = lib_reset
462
-
476
+ reset_lib = lib_reset
477
+
463
478
  # === Parameter Management ===
464
479
  store_history = store_history
465
480
  get_parameters = get_parameters
@@ -475,6 +490,7 @@ class Study:
475
490
  _load_study5 = _load_study5
476
491
  _save_study5 = _save_study5
477
492
  _save_study5_compressed = _save_study5_compressed
493
+ _load_ms1 = _load_ms1
478
494
  _get_consensus_uids = _get_consensus_uids
479
495
  _get_feature_uids = _get_feature_uids
480
496
  _get_sample_uids = _get_sample_uids
@@ -496,6 +512,8 @@ class Study:
496
512
  _reset_consensus_data = _reset_consensus_data
497
513
  _extract_consensus_features = _extract_consensus_features
498
514
  _perform_adduct_grouping = _perform_adduct_grouping
515
+ _consensus_cleanup = _consensus_cleanup
516
+ _identify_adduct_by_mass_shift = _identify_adduct_by_mass_shift
499
517
  _finalize_merge = _finalize_merge
500
518
 
501
519
  # === Default Parameters ===
@@ -873,6 +891,15 @@ class Study:
873
891
  + (self.consensus_mapping_df.estimated_size() if self.consensus_mapping_df is not None else 0)
874
892
  )
875
893
 
894
+ # Calculate tight clusters count
895
+ tight_clusters_count = 0
896
+ if consensus_df_len > 0:
897
+ try:
898
+ tight_clusters_count = _count_tight_clusters(self, mz_tol=0.04, rt_tol=0.3)
899
+ except Exception as e:
900
+ # If tight clusters calculation fails, just use 0
901
+ tight_clusters_count = 0
902
+
876
903
  # Add warning symbols for out-of-range values
877
904
  consensus_warning = f" {_WARNING_SYMBOL}" if consensus_df_len < 50 else ""
878
905
 
@@ -901,6 +928,9 @@ class Study:
901
928
  elif max_samples < samples_df_len * 0.8:
902
929
  max_samples_warning = f" {_WARNING_SYMBOL}"
903
930
 
931
+ # Add warning for tight clusters
932
+ tight_clusters_warning = f" {_WARNING_SYMBOL}" if tight_clusters_count > 10 else ""
933
+
904
934
  summary = (
905
935
  f"Study folder: {self.folder}\n"
906
936
  f"Last save: {self.filename}\n"
@@ -910,6 +940,7 @@ class Study:
910
940
  f"- not in consensus: {ratio_not_in_consensus_to_total:.0f}%\n"
911
941
  f"Consensus: {consensus_df_len}{consensus_warning}\n"
912
942
  f"- RT spread: {rt_spread_text}{rt_spread_warning}\n"
943
+ f"- Tight clusters: {tight_clusters_count}{tight_clusters_warning}\n"
913
944
  f"- Min samples count: {min_samples:.0f}\n"
914
945
  f"- Mean samples count: {mean_samples:.0f}\n"
915
946
  f"- Max samples count: {max_samples:.0f}{max_samples_warning}\n"
@@ -70,6 +70,9 @@
70
70
  "chrom_height_scaled_mean": {
71
71
  "dtype": "pl.Float64"
72
72
  },
73
+ "iso": {
74
+ "dtype": "pl.Object"
75
+ },
73
76
  "iso_mean": {
74
77
  "dtype": "pl.Float64"
75
78
  },
@@ -4,8 +4,14 @@ Wizard module for automated processing of mass spectrometry studies.
4
4
  This module provides the Wizard class for fully automated processing of MS data
5
5
  from raw files to final study results, including batch conversion, assembly,
6
6
  alignment, merging, plotting, and export.
7
+
8
+ The create_script() function allows immediate generation of standalone analysis
9
+ scripts without creating a Wizard instance first.
10
+
11
+ The execute() function combines create_script() with immediate execution of the
12
+ generated script for fully automated processing.
7
13
  """
8
14
 
9
- from .wizard import Wizard, wizard_def
15
+ from .wizard import Wizard, wizard_def, create_script, execute
10
16
 
11
- __all__ = ["Wizard", "wizard_def"]
17
+ __all__ = ["Wizard", "wizard_def", "create_script", "execute"]