masster 0.5.22__py3-none-any.whl → 0.5.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

masster/sample/helpers.py CHANGED
@@ -113,9 +113,7 @@ def _get_scan_uids(self, scans=None, verbose=True):
113
113
  scans_uids = self.scans_df.get_column("scan_uid").to_list()
114
114
  elif isinstance(scans, list):
115
115
  # if scans is a list, ensure all elements are valid scan_uids
116
- scans_uids = [
117
- s for s in scans if s in self.scans_df.get_column("scan_uid").to_list()
118
- ]
116
+ scans_uids = [s for s in scans if s in self.scans_df.get_column("scan_uid").to_list()]
119
117
  if verbose and not scans_uids:
120
118
  self.logger.error("No valid scan_uids provided.")
121
119
 
@@ -340,9 +338,7 @@ def get_eic(self, mz, mz_tol=None):
340
338
  return None
341
339
 
342
340
  # Aggregate intensities per retention time. Use sum in case multiple points per rt.
343
- chrom = (
344
- matches.group_by("rt").agg([pl.col("inty").sum().alias("inty")]).sort("rt")
345
- )
341
+ chrom = matches.group_by("rt").agg([pl.col("inty").sum().alias("inty")]).sort("rt")
346
342
 
347
343
  # Attach to Sample
348
344
  self.chrom_df = chrom
@@ -401,16 +397,14 @@ def features_select(
401
397
  # self.logger.info("No features found. R")
402
398
  return
403
399
  feats = self.features_df.clone()
404
-
400
+
405
401
  # Filter by feature UIDs if provided
406
402
  if uid is not None:
407
403
  if isinstance(uid, tuple) and len(uid) == 2:
408
404
  # Handle tuple as range of feature UIDs
409
405
  min_uid, max_uid = uid
410
406
  feats_len_before_filter = len(feats)
411
- feats = feats.filter(
412
- (pl.col("feature_uid") >= min_uid) & (pl.col("feature_uid") <= max_uid)
413
- )
407
+ feats = feats.filter((pl.col("feature_uid") >= min_uid) & (pl.col("feature_uid") <= max_uid))
414
408
  self.logger.debug(
415
409
  f"Selected features by UID range ({min_uid}-{max_uid}). Features removed: {feats_len_before_filter - len(feats)}",
416
410
  )
@@ -420,13 +414,13 @@ def features_select(
420
414
  if not feature_uids_to_keep:
421
415
  self.logger.warning("No valid feature UIDs provided.")
422
416
  return feats.limit(0) # Return empty DataFrame with same structure
423
-
417
+
424
418
  feats_len_before_filter = len(feats)
425
419
  feats = feats.filter(pl.col("feature_uid").is_in(feature_uids_to_keep))
426
420
  self.logger.debug(
427
421
  f"Selected features by UIDs. Features removed: {feats_len_before_filter - len(feats)}",
428
422
  )
429
-
423
+
430
424
  if coherence is not None:
431
425
  has_coherence = "chrom_coherence" in self.features_df.columns
432
426
  if not has_coherence:
@@ -437,8 +431,7 @@ def features_select(
437
431
  if isinstance(coherence, tuple) and len(coherence) == 2:
438
432
  min_coherence, max_coherence = coherence
439
433
  feats = feats.filter(
440
- (pl.col("chrom_coherence") >= min_coherence)
441
- & (pl.col("chrom_coherence") <= max_coherence),
434
+ (pl.col("chrom_coherence") >= min_coherence) & (pl.col("chrom_coherence") <= max_coherence),
442
435
  )
443
436
  else:
444
437
  feats = feats.filter(pl.col("chrom_coherence") >= coherence)
@@ -489,8 +482,7 @@ def features_select(
489
482
  if isinstance(rt_delta, tuple) and len(rt_delta) == 2:
490
483
  min_rt_delta, max_rt_delta = rt_delta
491
484
  feats = feats.filter(
492
- (pl.col("rt_delta") >= min_rt_delta)
493
- & (pl.col("rt_delta") <= max_rt_delta),
485
+ (pl.col("rt_delta") >= min_rt_delta) & (pl.col("rt_delta") <= max_rt_delta),
494
486
  )
495
487
  else:
496
488
  feats = feats.filter(pl.col("rt_delta") >= rt_delta)
@@ -567,8 +559,7 @@ def features_select(
567
559
  if isinstance(prominence, tuple) and len(prominence) == 2:
568
560
  min_prominence, max_prominence = prominence
569
561
  feats = feats.filter(
570
- (pl.col("chrom_prominence") >= min_prominence)
571
- & (pl.col("chrom_prominence") <= max_prominence),
562
+ (pl.col("chrom_prominence") >= min_prominence) & (pl.col("chrom_prominence") <= max_prominence),
572
563
  )
573
564
  else:
574
565
  feats = feats.filter(pl.col("chrom_prominence") >= prominence)
@@ -579,9 +570,7 @@ def features_select(
579
570
  if height is not None:
580
571
  feats_len_before_filter = len(feats)
581
572
  # Check if chrom_height column exists, if not use chrom_height_scaled
582
- height_col = (
583
- "chrom_height" if "chrom_height" in feats.columns else "chrom_height_scaled"
584
- )
573
+ height_col = "chrom_height" if "chrom_height" in feats.columns else "chrom_height_scaled"
585
574
  if isinstance(height, tuple) and len(height) == 2:
586
575
  min_height, max_height = height
587
576
  feats = feats.filter(
@@ -617,6 +606,7 @@ def features_select(
617
606
  self.logger.info(f"Selected features. Features remaining: {len(feats)}")
618
607
  return feats
619
608
 
609
+
620
610
  '''
621
611
  def _features_sync(self):
622
612
  """
@@ -725,6 +715,7 @@ def _features_sync(self):
725
715
  self.logger.error(f"Error during feature synchronization: {e}")
726
716
  '''
727
717
 
718
+
728
719
  def features_delete(self, features: list | None = None):
729
720
  """
730
721
  Delete features from both self.features_df and self._oms_features_map based on a list of feature UIDs.
@@ -841,10 +832,7 @@ def _delete_ms2(self):
841
832
 
842
833
  # Update scans_df to remove feature_uid association for linked MS2 spectra
843
834
  self.scans_df = self.scans_df.with_columns(
844
- pl.when(pl.col("ms_level") == 2)
845
- .then(None)
846
- .otherwise(pl.col("feature_uid"))
847
- .alias("feature_uid"),
835
+ pl.when(pl.col("ms_level") == 2).then(None).otherwise(pl.col("feature_uid")).alias("feature_uid"),
848
836
  )
849
837
  self.logger.info("MS2 spectra unlinked from features.")
850
838
 
@@ -930,11 +918,7 @@ def features_filter(self, features):
930
918
  )
931
919
 
932
920
  # Update scans_df to remove feature_uid associations for deleted features
933
- if (
934
- hasattr(self, "scans_df")
935
- and self.scans_df is not None
936
- and feature_uids_to_delete
937
- ):
921
+ if hasattr(self, "scans_df") and self.scans_df is not None and feature_uids_to_delete:
938
922
  self.scans_df = self.scans_df.with_columns(
939
923
  pl.when(pl.col("feature_uid").is_in(feature_uids_to_delete))
940
924
  .then(None)
masster/sample/lib.py CHANGED
@@ -221,11 +221,11 @@ def lib_load(self, csvfile=None, polarity=None):
221
221
  self.lib = self.lib.where(pd.notnull(self.lib), None)
222
222
  # find all elements == nan and replace them with None
223
223
  self.lib = self.lib.replace({np.nan: None})
224
-
224
+
225
225
  # Use sample.polarity if polarity parameter is None
226
226
  if polarity is None:
227
- polarity = getattr(self, 'polarity', 'positive')
228
-
227
+ polarity = getattr(self, "polarity", "positive")
228
+
229
229
  if polarity is not None:
230
230
  if polarity.lower() == "positive":
231
231
  self.lib = self.lib[self.lib["z"] > 0]
masster/sample/load.py CHANGED
@@ -13,11 +13,6 @@ Key Features:
13
13
  - **Error Handling**: Comprehensive error reporting for file loading issues.
14
14
  - **Raw Data Processing**: Handle centroided and profile data with signal smoothing.
15
15
 
16
- Dependencies:
17
- - `pyopenms`: For standard mass spectrometry file format support.
18
- - `polars` and `pandas`: For efficient data handling and manipulation.
19
- - `numpy`: For numerical array operations.
20
-
21
16
  Functions:
22
17
  - `load()`: Main file loading function with format detection.
23
18
  - `_load_mzML()`: Specialized mzML file loader.
@@ -55,7 +50,9 @@ warnings.filterwarnings("ignore", message="Warning: OPENMS_DATA_PATH.*", categor
55
50
 
56
51
  # Import pyopenms with suppressed warnings
57
52
  with warnings.catch_warnings():
58
- warnings.filterwarnings("ignore", message=".*OPENMS_DATA_PATH environment variable already exists.*", category=UserWarning)
53
+ warnings.filterwarnings(
54
+ "ignore", message=".*OPENMS_DATA_PATH environment variable already exists.*", category=UserWarning
55
+ )
59
56
  warnings.filterwarnings("ignore", message="Warning: OPENMS_DATA_PATH.*", category=UserWarning)
60
57
  import pyopenms as oms
61
58
 
@@ -163,6 +160,7 @@ def load_noms1(
163
160
  _load_raw(self, filename)
164
161
  elif filename.lower().endswith(".sample5"):
165
162
  from masster.sample.h5 import _load_sample5_study
163
+
166
164
  _load_sample5_study(self, filename) # Use optimized version for study loading
167
165
  else:
168
166
  raise ValueError("File must be .mzML, .wiff, *.raw, or .sample5")
@@ -286,12 +284,8 @@ def _load_mzML(
286
284
  if len(prec_mz) == 0:
287
285
  continue
288
286
  prec_mz = prec_mz[0].getMZ()
289
- precursorIsolationWindowLowerMZ = s.getPrecursors()[
290
- 0
291
- ].getIsolationWindowLowerOffset()
292
- precursorIsolationWindowUpperMZ = s.getPrecursors()[
293
- 0
294
- ].getIsolationWindowUpperOffset()
287
+ precursorIsolationWindowLowerMZ = s.getPrecursors()[0].getIsolationWindowLowerOffset()
288
+ precursorIsolationWindowUpperMZ = s.getPrecursors()[0].getIsolationWindowUpperOffset()
295
289
  prec_inty = s.getPrecursors()[0].getIntensity()
296
290
  # Try to get collision energy from meta values first, fallback to getActivationEnergy()
297
291
  try:
@@ -416,7 +410,7 @@ def _load_raw(
416
410
  - Updates instance attributes including self.file_path, self.file_obj, self.file_interface, and self.label.
417
411
  - Initiates further analysis by invoking analyze_dda().
418
412
  """
419
- #from alpharaw.thermo import ThermoRawData
413
+ # from alpharaw.thermo import ThermoRawData
420
414
  from masster.sample.thermo import ThermoRawData
421
415
 
422
416
  if not filename:
@@ -482,11 +476,11 @@ def _load_raw(
482
476
 
483
477
  # try to get polarity
484
478
  if self.polarity is None:
485
- if s['polarity'] == 'positive':
486
- self.polarity = 'positive'
487
- elif s['polarity'] == 'negative':
488
- self.polarity = 'negative'
489
-
479
+ if s["polarity"] == "positive":
480
+ self.polarity = "positive"
481
+ elif s["polarity"] == "negative":
482
+ self.polarity = "negative"
483
+
490
484
  peak_start_idx = s["peak_start_idx"]
491
485
  peak_stop_idx = s["peak_stop_idx"]
492
486
  peaks = raw_data.peak_df.loc[peak_start_idx : peak_stop_idx - 1]
@@ -639,11 +633,11 @@ def _load_wiff(
639
633
  ms_level = s["ms_level"]
640
634
  # try to get polarity
641
635
  if polarity is None:
642
- if s['polarity'] == 'positive':
643
- polarity = 'positive'
644
- elif s['polarity'] == 'negative':
645
- polarity = 'negative'
646
-
636
+ if s["polarity"] == "positive":
637
+ polarity = "positive"
638
+ elif s["polarity"] == "negative":
639
+ polarity = "negative"
640
+
647
641
  if ms_level == 1:
648
642
  cycle += 1
649
643
  prec_mz = None
@@ -745,6 +739,7 @@ def _load_wiff(
745
739
  self.file_source = filename
746
740
  self.file_obj = raw_data
747
741
  self.file_interface = "alpharaw"
742
+ self.polarity = polarity
748
743
  self.label = os.path.basename(filename)
749
744
  self.ms1_df = pl.DataFrame(ms1_df_records, schema=schema)
750
745
  if self.type != "ztscan":
@@ -775,6 +770,7 @@ def _load_featureXML(
775
770
  fh.load(filename, fm)
776
771
  self._oms_features_map = fm
777
772
 
773
+
778
774
  def _wiff_to_dict(
779
775
  filename=None,
780
776
  ):
@@ -1161,9 +1157,7 @@ def chrom_extract(
1161
1157
  scan_uid = trace["scan_uid"]
1162
1158
  # find all ms1 data with scan_uid and mz between q1-mz_tol and q1+mz_tol
1163
1159
  d = self.ms1_df.filter(
1164
- (pl.col("scan_uid").is_in(scan_uid))
1165
- & (pl.col("mz") >= q1 - mz_tol)
1166
- & (pl.col("mz") <= q1 + mz_tol),
1160
+ (pl.col("scan_uid").is_in(scan_uid)) & (pl.col("mz") >= q1 - mz_tol) & (pl.col("mz") <= q1 + mz_tol),
1167
1161
  )
1168
1162
  # for all unique rt values, find the maximum inty
1169
1163
  eic_rt = d.group_by("rt").agg(pl.col("inty").max())
@@ -1182,9 +1176,7 @@ def chrom_extract(
1182
1176
  scan_uid = trace["scan_uid"]
1183
1177
  # find all ms2 data with scan_uid and mz between q3-mz_tol and q3+mz_tol
1184
1178
  d = self.ms2data.filter(
1185
- (pl.col("scan_uid").is_in(scan_uid))
1186
- & (pl.col("mz") >= q3 - mz_tol)
1187
- & (pl.col("mz") <= q3 + mz_tol),
1179
+ (pl.col("scan_uid").is_in(scan_uid)) & (pl.col("mz") >= q3 - mz_tol) & (pl.col("mz") <= q3 + mz_tol),
1188
1180
  )
1189
1181
  # for all unique rt values, find the maximum inty
1190
1182
  eic_rt = d.group_by("rt").agg(pl.col("inty").max())