masster 0.3.10__py3-none-any.whl → 0.3.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

@@ -519,6 +519,10 @@ def find_features(self, **kwargs):
519
519
  low-quality peaks), lower values make it more permissive. Typical tuning range: ~3 (relaxed) to >10
520
520
  (stringent). Default: 10.0.
521
521
 
522
+ - isotope_filtering_model (str):
523
+ Isotope filtering model ('metabolites (2% RMS)', 'metabolites (5% RMS)', 'peptides', 'none').
524
+ Default: 'metabolites (5% RMS)'.
525
+
522
526
  Tuning recommendation: first set ``chrom_fwhm`` to match your LC peak shape, then set ``noise`` to a baseline
523
527
  intensity filter for your data, and finally adjust ``chrom_peak_snr`` to reach the desired balance between
524
528
  sensitivity and specificity.
@@ -556,24 +560,25 @@ def find_features(self, **kwargs):
556
560
  self.logger.warning(f"Unknown parameter {key} ignored")
557
561
 
558
562
  # Set global parameters
559
- if hasattr(params, 'threads') and params.threads is not None:
563
+ if hasattr(params, "threads") and params.threads is not None:
560
564
  try:
561
565
  # Try setting via OpenMP environment variable first (newer approach)
562
566
  import os
563
- os.environ['OMP_NUM_THREADS'] = str(params.threads)
567
+
568
+ os.environ["OMP_NUM_THREADS"] = str(params.threads)
564
569
  self.logger.debug(f"Set thread count to {params.threads} via OMP_NUM_THREADS")
565
570
  except Exception:
566
571
  self.logger.warning(f"Could not set thread count to {params.threads} - using default")
567
-
572
+
568
573
  # Set debug mode if enabled
569
- if hasattr(params, 'debug') and params.debug:
574
+ if hasattr(params, "debug") and params.debug:
570
575
  self.logger.debug("Debug mode enabled")
571
- elif hasattr(params, 'no_progress') and params.no_progress:
576
+ elif hasattr(params, "no_progress") and params.no_progress:
572
577
  self.logger.debug("No progress mode enabled")
573
-
578
+
574
579
  self.logger.info("Starting feature detection...")
575
580
  self.logger.debug(
576
- f"Parameters: chrom_fwhm={params.get('chrom_fwhm')}, noise={params.get('noise')}, tol_ppm={params.get('tol_ppm')}",
581
+ f"Parameters: chrom_fwhm={params.get('chrom_fwhm')}, noise={params.get('noise')}, tol_ppm={params.get('tol_ppm')}, isotope_filtering_model={params.get('isotope_filtering_model')}",
577
582
  )
578
583
 
579
584
  exp = oms.MSExperiment()
@@ -602,7 +607,8 @@ def find_features(self, **kwargs):
602
607
  # Apply MTD parameters
603
608
  mtd_par.setValue("mass_error_ppm", float(params.get("tol_ppm")))
604
609
  mtd_par.setValue("noise_threshold_int", float(params.get("noise")))
605
- mtd_par.setValue("min_trace_length",
610
+ mtd_par.setValue(
611
+ "min_trace_length",
606
612
  float(params.get("min_trace_length_multiplier")) * float(params.get("chrom_fwhm_min")),
607
613
  )
608
614
  mtd_par.setValue(
@@ -610,7 +616,7 @@ def find_features(self, **kwargs):
610
616
  int(params.get("trace_termination_outliers")),
611
617
  )
612
618
  mtd_par.setValue("chrom_peak_snr", float(params.get("chrom_peak_snr")))
613
-
619
+
614
620
  # Additional MTD parameters
615
621
  mtd_par.setValue("min_sample_rate", float(params.get("min_sample_rate")))
616
622
  mtd_par.setValue("min_trace_length", float(params.get("min_trace_length")))
@@ -636,10 +642,10 @@ def find_features(self, **kwargs):
636
642
  epd_par.setValue("masstrace_snr_filtering", "true")
637
643
  if params.get("mz_scoring_13C"):
638
644
  epd_par.setValue("mz_scoring_13C", "true")
639
-
645
+
640
646
  # Additional EPD parameters
641
647
  epd_par.setValue("enabled", "true" if params.get("enabled") else "false")
642
-
648
+
643
649
  epd.setParameters(epd_par)
644
650
  epd.detectPeaks(mass_traces, mass_traces_deconvol)
645
651
 
@@ -675,7 +681,7 @@ def find_features(self, **kwargs):
675
681
  ffm_par.setValue("local_mz_range", float(params.get("local_mz_range")))
676
682
  ffm_par.setValue("charge_lower_bound", int(params.get("charge_lower_bound")))
677
683
  ffm_par.setValue("charge_upper_bound", int(params.get("charge_upper_bound")))
678
-
684
+ ffm_par.setValue("isotope_filtering_model", params.get("isotope_filtering_model"))
679
685
 
680
686
  ffm.setParameters(ffm_par)
681
687
 
masster/sample/sample.py CHANGED
@@ -62,6 +62,7 @@ from masster.sample.helpers import select_closest_scan
62
62
  from masster.sample.helpers import get_dda_stats
63
63
  from masster.sample.helpers import get_feature
64
64
  from masster.sample.helpers import get_scan
65
+ from masster.sample.helpers import get_eic
65
66
  from masster.sample.helpers import set_source
66
67
  from masster.sample.load import _load_featureXML
67
68
  from masster.sample.load import _load_ms2data
@@ -80,6 +81,8 @@ from masster.sample.plot import plot_feature_stats
80
81
  from masster.sample.plot import plot_ms2_cycle
81
82
  from masster.sample.plot import plot_ms2_eic
82
83
  from masster.sample.plot import plot_ms2_q1
84
+ from masster.sample.plot import plot_bpc
85
+ from masster.sample.plot import plot_tic
83
86
  from masster.sample.processing import _clean_features_df
84
87
  from masster.sample.processing import _features_deisotope
85
88
  from masster.sample.processing import _get_ztscan_stats
@@ -229,6 +232,9 @@ class Sample:
229
232
  plot_ms2_cycle = plot_ms2_cycle
230
233
  plot_ms2_eic = plot_ms2_eic
231
234
  plot_ms2_q1 = plot_ms2_q1
235
+ plot_bpc = plot_bpc
236
+ plot_tic = plot_tic
237
+ get_eic = get_eic
232
238
  get_feature = get_feature
233
239
  get_scan = get_scan
234
240
  get_dda_stats = get_dda_stats
@@ -333,15 +339,15 @@ class Sample:
333
339
  if module_name.startswith(study_module_prefix) and module_name != current_module:
334
340
  study_modules.append(module_name)
335
341
 
336
- ''' # Add parameters submodules
342
+ """ # Add parameters submodules
337
343
  parameters_modules = []
338
344
  parameters_module_prefix = f"{base_modname}.parameters."
339
345
  for module_name in sys.modules:
340
346
  if module_name.startswith(parameters_module_prefix) and module_name != current_module:
341
347
  parameters_modules.append(module_name)
342
- '''
343
-
344
- all_modules_to_reload = core_modules + sample_modules + study_modules #+ parameters_modules
348
+ """
349
+
350
+ all_modules_to_reload = core_modules + sample_modules + study_modules # + parameters_modules
345
351
 
346
352
  # Reload all discovered modules
347
353
  for full_module_name in all_modules_to_reload:
@@ -7,6 +7,9 @@
7
7
  "feature_id": {
8
8
  "dtype": "pl.Utf8"
9
9
  },
10
+ "sample_uid": {
11
+ "dtype": "pl.Int32"
12
+ },
10
13
  "mz": {
11
14
  "dtype": "pl.Float64"
12
15
  },
@@ -46,18 +49,24 @@
46
49
  "iso_of": {
47
50
  "dtype": "pl.Int64"
48
51
  },
49
- "adduct_group": {
50
- "dtype": "pl.Int64"
51
- },
52
52
  "adduct": {
53
53
  "dtype": "pl.Utf8"
54
54
  },
55
55
  "adduct_mass": {
56
56
  "dtype": "pl.Float64"
57
57
  },
58
+ "adduct_group": {
59
+ "dtype": "pl.Int64"
60
+ },
58
61
  "chrom": {
59
62
  "dtype": "pl.Object"
60
63
  },
64
+ "filled": {
65
+ "dtype": "pl.Boolean"
66
+ },
67
+ "chrom_area": {
68
+ "dtype": "pl.Float64"
69
+ },
61
70
  "chrom_coherence": {
62
71
  "dtype": "pl.Float64"
63
72
  },
@@ -100,64 +109,64 @@
100
109
  },
101
110
  "scans_df": {
102
111
  "columns": {
103
- "scan_uid": {
104
- "dtype": "pl.Int64"
112
+ "bl": {
113
+ "dtype": "pl.Float64"
114
+ },
115
+ "comment": {
116
+ "dtype": "pl.Utf8"
105
117
  },
106
118
  "cycle": {
107
119
  "dtype": "pl.Int64"
108
120
  },
109
- "ms_level": {
121
+ "energy": {
122
+ "dtype": "pl.Float64"
123
+ },
124
+ "feature_uid": {
110
125
  "dtype": "pl.Int64"
111
126
  },
112
- "rt": {
113
- "dtype": "pl.Float64"
127
+ "id": {
128
+ "dtype": "pl.Utf8"
114
129
  },
115
- "inty_tot": {
130
+ "inty_max": {
116
131
  "dtype": "pl.Float64"
117
132
  },
118
133
  "inty_min": {
119
134
  "dtype": "pl.Float64"
120
135
  },
121
- "inty_max": {
136
+ "inty_tot": {
122
137
  "dtype": "pl.Float64"
123
138
  },
124
- "bl": {
125
- "dtype": "pl.Float64"
139
+ "ms2_n": {
140
+ "dtype": "pl.Int64"
126
141
  },
127
- "mz_min": {
128
- "dtype": "pl.Float64"
142
+ "ms_level": {
143
+ "dtype": "pl.Int64"
129
144
  },
130
145
  "mz_max": {
131
146
  "dtype": "pl.Float64"
132
147
  },
133
- "comment": {
134
- "dtype": "pl.Utf8"
148
+ "mz_min": {
149
+ "dtype": "pl.Float64"
135
150
  },
136
151
  "name": {
137
152
  "dtype": "pl.Utf8"
138
153
  },
139
- "id": {
140
- "dtype": "pl.Utf8"
141
- },
142
- "prec_mz": {
154
+ "prec_inty": {
143
155
  "dtype": "pl.Float64"
144
156
  },
145
- "prec_mz_min": {
157
+ "prec_mz": {
146
158
  "dtype": "pl.Float64"
147
159
  },
148
160
  "prec_mz_max": {
149
161
  "dtype": "pl.Float64"
150
162
  },
151
- "prec_inty": {
163
+ "prec_mz_min": {
152
164
  "dtype": "pl.Float64"
153
165
  },
154
- "energy": {
166
+ "rt": {
155
167
  "dtype": "pl.Float64"
156
168
  },
157
- "feature_uid": {
158
- "dtype": "pl.Int64"
159
- },
160
- "ms2_n": {
169
+ "scan_uid": {
161
170
  "dtype": "pl.Int64"
162
171
  },
163
172
  "time_cycle": {
@@ -169,10 +178,10 @@
169
178
  "time_ms1_to_ms2": {
170
179
  "dtype": "pl.Float64"
171
180
  },
172
- "time_ms2_to_ms2": {
181
+ "time_ms2_to_ms1": {
173
182
  "dtype": "pl.Float64"
174
183
  },
175
- "time_ms2_to_ms1": {
184
+ "time_ms2_to_ms2": {
176
185
  "dtype": "pl.Float64"
177
186
  }
178
187
  }
masster/sample/save.py CHANGED
@@ -134,10 +134,10 @@ def export_features(self, filename="features.csv"):
134
134
  # clone df
135
135
  clean_df = self.features_df.clone()
136
136
  filename = os.path.abspath(filename)
137
- # add a column has_ms2=True if colum ms2_scans is not None
137
+ # add a column has_ms2=True if column ms2_scans is not None
138
138
  if "ms2_scans" in clean_df.columns:
139
139
  clean_df = clean_df.with_columns(
140
- (pl.col("ms2_scans").is_not_null()).alias("has_ms2")
140
+ (pl.col("ms2_scans").is_not_null()).alias("has_ms2"),
141
141
  )
142
142
  clean_df = self.features_df.select([
143
143
  col for col in self.features_df.columns if self.features_df[col].dtype not in (pl.List, pl.Object)
@@ -215,7 +215,7 @@ def export_mgf(
215
215
  return
216
216
  else:
217
217
  self.features_df = self.features.get_df()
218
-
218
+
219
219
  # Apply filtering at DataFrame level for better performance
220
220
  features = self.features_df
221
221
  if mz_start is not None:
@@ -228,7 +228,7 @@ def export_mgf(
228
228
  features = features.filter(pl.col("rt") <= rt_end)
229
229
  if not include_all_ms1:
230
230
  features = features.filter(pl.col("ms2_scans").is_not_null())
231
-
231
+
232
232
  # Convert to list of dictionaries for faster iteration
233
233
  features_list = features.to_dicts()
234
234
 
@@ -286,7 +286,10 @@ def export_mgf(
286
286
  centroid_algo = "cr"
287
287
 
288
288
  # count how many features have charge < 0
289
- if self.features_df.filter(pl.col("charge") < 0).shape[0]- self.features_df.filter(pl.col("charge") > 0).shape[0] > 0:
289
+ if (
290
+ self.features_df.filter(pl.col("charge") < 0).shape[0] - self.features_df.filter(pl.col("charge") > 0).shape[0]
291
+ > 0
292
+ ):
290
293
  preferred_charge = -1
291
294
  else:
292
295
  preferred_charge = 1
@@ -312,7 +315,7 @@ def export_mgf(
312
315
  rt = row["rt"]
313
316
  rt_str = f"{rt:.2f}"
314
317
  mz_str = f"{mz:.4f}"
315
-
318
+
316
319
  # Filtering is now done at DataFrame level, so we can skip these checks
317
320
  if row["ms2_scans"] is None and not include_all_ms1:
318
321
  skip = skip + 1
@@ -338,7 +341,7 @@ def export_mgf(
338
341
 
339
342
  charge = preferred_charge
340
343
  if row["charge"] is not None and row["charge"] != 0:
341
- charge = row["charge"]
344
+ charge = row["charge"]
342
345
 
343
346
  write_ion(
344
347
  f,
@@ -397,7 +400,7 @@ def export_mgf(
397
400
  )
398
401
  c += 1
399
402
  continue # Skip the rest of the processing for this feature
400
-
403
+
401
404
  # If we reach here, either use_cache=False or no cached spectra were available
402
405
  if split_energy:
403
406
  # get energy of all scans with scan_uid in ms2_scans by fetching them
@@ -408,20 +411,20 @@ def export_mgf(
408
411
  for scan_uid in ms2_scan_uids:
409
412
  spec = self.get_spectrum(scan_uid)
410
413
  if spec is not None:
411
- spectra_with_energy.append((scan_uid, spec.energy if hasattr(spec, 'energy') else 0))
412
-
414
+ spectra_with_energy.append((scan_uid, spec.energy if hasattr(spec, "energy") else 0))
415
+
413
416
  # Group by energy
414
417
  energy_groups: dict[float, list[int]] = {}
415
418
  for scan_uid, energy in spectra_with_energy:
416
419
  if energy not in energy_groups:
417
420
  energy_groups[energy] = []
418
421
  energy_groups[energy].append(scan_uid)
419
-
422
+
420
423
  for energy, scan_uids_for_energy in energy_groups.items():
421
424
  if selection == "best":
422
425
  # Keep only the first scan for this energy
423
426
  scan_uids_for_energy = [scan_uids_for_energy[0]]
424
-
427
+
425
428
  for scan_uid in scan_uids_for_energy:
426
429
  spect = self.get_spectrum(
427
430
  scan_uid,
@@ -556,7 +559,7 @@ def export_mgf(
556
559
  inty_min_str = f"{inty_min:.3f}" if inty_min != float("-inf") else "None"
557
560
  q1_ratio_min_str = f"{q1_ratio_min:.3f}" if q1_ratio_min is not None else "None"
558
561
  eic_corr_min_str = f"{eic_corr_min:.3f}" if eic_corr_min is not None else "None"
559
-
562
+
560
563
  self.logger.debug(
561
564
  f"MGF created with int>{inty_min_str}, q1_ratio>{q1_ratio_min_str}, eic_corr>{eic_corr_min_str}",
562
565
  )