masster 0.4.4__py3-none-any.whl → 0.4.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

masster/sample/save.py CHANGED
@@ -139,9 +139,13 @@ def export_features(self, filename="features.csv"):
139
139
  clean_df = clean_df.with_columns(
140
140
  (pl.col("ms2_scans").is_not_null()).alias("has_ms2"),
141
141
  )
142
- clean_df = self.features_df.select([
143
- col for col in self.features_df.columns if self.features_df[col].dtype not in (pl.List, pl.Object)
144
- ])
142
+ clean_df = self.features_df.select(
143
+ [
144
+ col
145
+ for col in self.features_df.columns
146
+ if self.features_df[col].dtype not in (pl.List, pl.Object)
147
+ ],
148
+ )
145
149
  if filename.lower().endswith((".xls", ".xlsx")):
146
150
  clean_df.to_pandas().to_excel(filename, index=False)
147
151
  self.logger.info(f"Features exported to {filename} (Excel format)")
@@ -275,7 +279,10 @@ def export_mgf(
275
279
  if spect.ms_level > 1 and hasattr(spect, "energy"):
276
280
  f.write(f"ENERGY={spect.energy}\n")
277
281
  # Use list comprehension for better performance
278
- peak_lines = [f"{mz_val:.5f} {inty_val:.0f}\n" for mz_val, inty_val in zip(spect.mz, spect.inty, strict=False)]
282
+ peak_lines = [
283
+ f"{mz_val:.5f} {inty_val:.0f}\n"
284
+ for mz_val, inty_val in zip(spect.mz, spect.inty, strict=False)
285
+ ]
279
286
  f.writelines(peak_lines)
280
287
  f.write("END IONS\n\n")
281
288
 
@@ -287,7 +294,8 @@ def export_mgf(
287
294
 
288
295
  # count how many features have charge < 0
289
296
  if (
290
- self.features_df.filter(pl.col("charge") < 0).shape[0] - self.features_df.filter(pl.col("charge") > 0).shape[0]
297
+ self.features_df.filter(pl.col("charge") < 0).shape[0]
298
+ - self.features_df.filter(pl.col("charge") > 0).shape[0]
291
299
  > 0
292
300
  ):
293
301
  preferred_charge = -1
@@ -388,7 +396,9 @@ def export_mgf(
388
396
  q1_max=q1_ratio_max,
389
397
  )
390
398
  # Get the corresponding scan_uid from the list
391
- current_scan_uid = scan_uids[i] if i < len(scan_uids) else "unknown"
399
+ current_scan_uid = (
400
+ scan_uids[i] if i < len(scan_uids) else "unknown"
401
+ )
392
402
  write_ion(
393
403
  f,
394
404
  f"fid:{feature_uid}, rt:{rt_str}, mz:{mz_str}, scan_uid:{current_scan_uid}",
@@ -411,7 +421,12 @@ def export_mgf(
411
421
  for scan_uid in ms2_scan_uids:
412
422
  spec = self.get_spectrum(scan_uid)
413
423
  if spec is not None:
414
- spectra_with_energy.append((scan_uid, spec.energy if hasattr(spec, "energy") else 0))
424
+ spectra_with_energy.append(
425
+ (
426
+ scan_uid,
427
+ spec.energy if hasattr(spec, "energy") else 0,
428
+ ),
429
+ )
415
430
 
416
431
  # Group by energy
417
432
  energy_groups: dict[float, list[int]] = {}
@@ -496,14 +511,18 @@ def export_mgf(
496
511
  spect = spect.centroid(
497
512
  tolerance=self.parameters["mz_tol_ms1_da"],
498
513
  ppm=self.parameters["mz_tol_ms1_ppm"],
499
- min_points=self.parameters["centroid_min_points_ms1"],
514
+ min_points=self.parameters[
515
+ "centroid_min_points_ms1"
516
+ ],
500
517
  algo=centroid_algo,
501
518
  )
502
519
  elif spect.ms_level == 2:
503
520
  spect = spect.centroid(
504
521
  tolerance=self.parameters["mz_tol_ms2_da"],
505
522
  ppm=self.parameters["mz_tol_ms2_ppm"],
506
- min_points=self.parameters["centroid_min_points_ms2"],
523
+ min_points=self.parameters[
524
+ "centroid_min_points_ms2"
525
+ ],
507
526
  algo=centroid_algo,
508
527
  )
509
528
  if deisotope:
@@ -595,7 +614,9 @@ def export_dda_stats(self, filename="stats.csv"):
595
614
  ms2_count = len(self.scans_df.filter(pl.col("ms_level") == 2))
596
615
  features_count = len(self.features_df) if self.features_df is not None else 0
597
616
  features_with_ms2 = (
598
- self.features_df.filter(pl.col("ms2_scans").is_not_null()).height if self.features_df is not None else 0
617
+ self.features_df.filter(pl.col("ms2_scans").is_not_null()).height
618
+ if self.features_df is not None
619
+ else 0
599
620
  )
600
621
 
601
622
  # Initialize a dictionary to hold statistics
@@ -610,7 +631,9 @@ def export_dda_stats(self, filename="stats.csv"):
610
631
  if "time_cycle" in self.scans_df.columns:
611
632
  ms1_df = self.scans_df.filter(pl.col("ms_level") == 1)
612
633
  avg_cycle_time = ms1_df["time_cycle"].mean()
613
- stats["Average_cycle_time"] = avg_cycle_time if avg_cycle_time is not None else ""
634
+ stats["Average_cycle_time"] = (
635
+ avg_cycle_time if avg_cycle_time is not None else ""
636
+ )
614
637
  else:
615
638
  stats["Average_cycle_time"] = 0
616
639
 
masster/spectrum.py CHANGED
@@ -138,10 +138,10 @@ class Spectrum:
138
138
 
139
139
  Example Usage:
140
140
  >>> import numpy as np
141
- >>> from masster import spec
141
+ >>> from masster import Spectrum
142
142
  >>> mz = np.array([100.0, 150.0, 200.0, 250.0])
143
143
  >>> intensity = np.array([1000, 5000, 3000, 800])
144
- >>> spectrum = spec(mz=mz, inty=intensity, ms_level=1)
144
+ >>> spectrum = Spectrum(mz=mz, inty=intensity, ms_level=1)
145
145
  >>> spectrum.find_peaks()
146
146
  >>> spectrum.plot()
147
147
 
@@ -298,7 +298,11 @@ class align_defaults:
298
298
  "dtype": str,
299
299
  "description": "Method to use for extrapolation outside the data range in LOWESS",
300
300
  "default": "four-point-linear",
301
- "allowed_values": ["two-point-linear", "four-point-linear", "global-linear"],
301
+ "allowed_values": [
302
+ "two-point-linear",
303
+ "four-point-linear",
304
+ "global-linear",
305
+ ],
302
306
  },
303
307
  },
304
308
  repr=False,
@@ -158,7 +158,9 @@ class identify_defaults:
158
158
  if not isinstance(value, list):
159
159
  return False
160
160
  # For heteroatoms, ensure all elements are strings
161
- if param_name == "heteroatoms" and not all(isinstance(item, str) for item in value):
161
+ if param_name == "heteroatoms" and not all(
162
+ isinstance(item, str) for item in value
163
+ ):
162
164
  return False
163
165
 
164
166
  # Range validation for numeric types
@@ -33,7 +33,7 @@ class study_defaults:
33
33
 
34
34
  eic_mz_tol: float = 0.01
35
35
  eic_rt_tol: float = 10.0
36
-
36
+
37
37
  polarity: str = "positive"
38
38
  adducts: list[str] | None = None
39
39
  adduct_min_probability: float = 0.04
@@ -54,7 +54,14 @@ class study_defaults:
54
54
  "dtype": str,
55
55
  "description": "Logging level to be set for the logger",
56
56
  "default": "INFO",
57
- "allowed_values": ["TRACE", "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
57
+ "allowed_values": [
58
+ "TRACE",
59
+ "DEBUG",
60
+ "INFO",
61
+ "WARNING",
62
+ "ERROR",
63
+ "CRITICAL",
64
+ ],
58
65
  },
59
66
  "log_label": {
60
67
  "dtype": "Optional[str]",
@@ -92,14 +99,19 @@ class study_defaults:
92
99
  "default": ["H:+:0.8", "Na:+:0.1", "NH4:+:0.1"],
93
100
  "examples": {
94
101
  "positive": ["H:+:0.8", "Na:+:0.1", "NH4:+:0.1"],
95
- "negative": ["H-1:-:0.95", "Cl:-:0.05", "CH2O2:0:0.2", "H-2-O:0:0.2"]
102
+ "negative": [
103
+ "H-1:-:0.95",
104
+ "Cl:-:0.05",
105
+ "CH2O2:0:0.2",
106
+ "H-2-O:0:0.2",
107
+ ],
96
108
  },
97
109
  "validation_rules": [
98
110
  "Format: element:charge:probability",
99
111
  "Charge must be +, -, or 0 (neutral)",
100
112
  "Probability must be between 0.0 and 1.0",
101
- "Sum of all charged adduct probabilities must equal 1.0"
102
- ]
113
+ "Sum of all charged adduct probabilities must equal 1.0",
114
+ ],
103
115
  },
104
116
  "adduct_min_probability": {
105
117
  "dtype": float,
@@ -116,54 +128,71 @@ class study_defaults:
116
128
  """Set polarity-specific defaults for adducts if not explicitly provided."""
117
129
  # If adducts is None, set based on polarity
118
130
  if self.adducts is None:
119
- if self.polarity.lower() in ['positive', 'pos']:
120
- self.adducts = ["+H:1:0.65", "+Na:1:0.15", "+NH4:1:0.15", "+K:1:0.05", "-H2O:0:0.15"]
121
- elif self.polarity.lower() in ['negative', 'neg']:
122
- self.adducts = ["-H:-1:0.9", "+Cl:-1:0.1", "+CH2O2:0:0.15", "-H2O:0:0.15"]
131
+ if self.polarity.lower() in ["positive", "pos"]:
132
+ self.adducts = [
133
+ "+H:1:0.65",
134
+ "+Na:1:0.15",
135
+ "+NH4:1:0.15",
136
+ "+K:1:0.05",
137
+ "-H2O:0:0.15",
138
+ ]
139
+ elif self.polarity.lower() in ["negative", "neg"]:
140
+ self.adducts = [
141
+ "-H:-1:0.9",
142
+ "+Cl:-1:0.1",
143
+ "+CH2O2:0:0.15",
144
+ "-H2O:0:0.15",
145
+ ]
123
146
  else:
124
147
  # Default to positive if polarity is not recognized
125
- self.adducts = ["+H:1:0.65", "+Na:1:0.15", "+NH4:1:0.15", "+K:1:0.05", "-H2O:0:0.15"]
148
+ self.adducts = [
149
+ "+H:1:0.65",
150
+ "+Na:1:0.15",
151
+ "+NH4:1:0.15",
152
+ "+K:1:0.05",
153
+ "-H2O:0:0.15",
154
+ ]
126
155
 
127
156
  def _validate_adducts(self, adduct_list: list[str]) -> bool:
128
157
  """
129
158
  Validate adducts according to OpenMS convention.
130
-
159
+
131
160
  Format: element:charge:probability
132
161
  - Elements can be molecular formulas (e.g., H, Na, NH4, H-1, CH2O2)
133
162
  - Charge must be +, -, or 0 (for neutral)
134
163
  - Probability must be a float between 0 and 1
135
164
  - Total probability of all charged adducts should sum to 1.0
136
-
165
+
137
166
  Args:
138
167
  adduct_list: List of adduct strings in OpenMS format
139
-
168
+
140
169
  Returns:
141
170
  True if all adducts are valid, False otherwise
142
171
  """
143
172
  if not adduct_list: # Empty list is valid
144
173
  return True
145
-
174
+
146
175
  charged_total_prob = 0.0
147
176
  neutral_total_prob = 0.0
148
-
177
+
149
178
  for adduct in adduct_list:
150
179
  if not isinstance(adduct, str):
151
180
  return False
152
-
181
+
153
182
  parts = adduct.split(":")
154
183
  if len(parts) != 3:
155
184
  return False
156
-
185
+
157
186
  element, charge, prob_str = parts
158
-
187
+
159
188
  # Validate element (non-empty string)
160
189
  if not element:
161
190
  return False
162
-
191
+
163
192
  # Validate charge
164
193
  if charge not in ["+", "-", "0"]:
165
194
  return False
166
-
195
+
167
196
  # Validate probability
168
197
  try:
169
198
  probability = float(prob_str)
@@ -171,20 +200,20 @@ class study_defaults:
171
200
  return False
172
201
  except (ValueError, TypeError):
173
202
  return False
174
-
203
+
175
204
  # Sum probabilities by charge type
176
205
  if charge in ["+", "-"]:
177
206
  charged_total_prob += probability
178
207
  else: # charge == "0" (neutral)
179
208
  neutral_total_prob += probability
180
-
209
+
181
210
  # Validate probability constraints
182
211
  # Charged adducts should sum to 1.0 (within tolerance)
183
212
  if charged_total_prob > 0 and abs(charged_total_prob - 1.0) > 1e-6:
184
213
  return False
185
-
214
+
186
215
  # Neutral adducts can have any total probability (they're optional)
187
-
216
+
188
217
  return True
189
218
 
190
219
  def get_info(self, param_name: str) -> dict[str, Any]:
@@ -316,7 +345,11 @@ class study_defaults:
316
345
  expected_dtype = self._param_metadata[param_name]["dtype"]
317
346
 
318
347
  # Handle optional types
319
- if isinstance(expected_dtype, str) and expected_dtype.startswith("Optional") and value is not None:
348
+ if (
349
+ isinstance(expected_dtype, str)
350
+ and expected_dtype.startswith("Optional")
351
+ and value is not None
352
+ ):
320
353
  if "int" in expected_dtype and not isinstance(value, int):
321
354
  try:
322
355
  value = int(value)