masster 0.3.10__py3-none-any.whl → 0.3.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

masster/sample/helpers.py CHANGED
@@ -9,81 +9,81 @@ import polars as pl
9
9
  def _estimate_memory_usage(self):
10
10
  """
11
11
  Estimate the memory usage of all dataframes in the Sample object.
12
-
12
+
13
13
  Returns:
14
14
  dict: A dictionary containing memory usage estimates for each dataframe
15
15
  and the total memory usage in bytes and MB.
16
16
  """
17
17
  memory_usage = {}
18
18
  total_bytes = 0
19
-
19
+
20
20
  # Check features_df
21
21
  if self.features_df is not None and len(self.features_df) > 0:
22
22
  features_bytes = self.features_df.estimated_size()
23
- memory_usage['features_df'] = {
24
- 'rows': len(self.features_df),
25
- 'columns': len(self.features_df.columns),
26
- 'bytes': features_bytes,
27
- 'mb': features_bytes / (1024 * 1024)
23
+ memory_usage["features_df"] = {
24
+ "rows": len(self.features_df),
25
+ "columns": len(self.features_df.columns),
26
+ "bytes": features_bytes,
27
+ "mb": features_bytes / (1024 * 1024),
28
28
  }
29
29
  total_bytes += features_bytes
30
30
  else:
31
- memory_usage['features_df'] = {'rows': 0, 'columns': 0, 'bytes': 0, 'mb': 0}
32
-
31
+ memory_usage["features_df"] = {"rows": 0, "columns": 0, "bytes": 0, "mb": 0}
32
+
33
33
  # Check scans_df
34
34
  if self.scans_df is not None and len(self.scans_df) > 0:
35
35
  scans_bytes = self.scans_df.estimated_size()
36
- memory_usage['scans_df'] = {
37
- 'rows': len(self.scans_df),
38
- 'columns': len(self.scans_df.columns),
39
- 'bytes': scans_bytes,
40
- 'mb': scans_bytes / (1024 * 1024)
36
+ memory_usage["scans_df"] = {
37
+ "rows": len(self.scans_df),
38
+ "columns": len(self.scans_df.columns),
39
+ "bytes": scans_bytes,
40
+ "mb": scans_bytes / (1024 * 1024),
41
41
  }
42
42
  total_bytes += scans_bytes
43
43
  else:
44
- memory_usage['scans_df'] = {'rows': 0, 'columns': 0, 'bytes': 0, 'mb': 0}
45
-
44
+ memory_usage["scans_df"] = {"rows": 0, "columns": 0, "bytes": 0, "mb": 0}
45
+
46
46
  # Check ms1_df
47
47
  if self.ms1_df is not None and len(self.ms1_df) > 0:
48
48
  ms1_bytes = self.ms1_df.estimated_size()
49
- memory_usage['ms1_df'] = {
50
- 'rows': len(self.ms1_df),
51
- 'columns': len(self.ms1_df.columns),
52
- 'bytes': ms1_bytes,
53
- 'mb': ms1_bytes / (1024 * 1024)
49
+ memory_usage["ms1_df"] = {
50
+ "rows": len(self.ms1_df),
51
+ "columns": len(self.ms1_df.columns),
52
+ "bytes": ms1_bytes,
53
+ "mb": ms1_bytes / (1024 * 1024),
54
54
  }
55
55
  total_bytes += ms1_bytes
56
56
  else:
57
- memory_usage['ms1_df'] = {'rows': 0, 'columns': 0, 'bytes': 0, 'mb': 0}
58
-
57
+ memory_usage["ms1_df"] = {"rows": 0, "columns": 0, "bytes": 0, "mb": 0}
58
+
59
59
  # Check chrom_df
60
60
  if self.chrom_df is not None and len(self.chrom_df) > 0:
61
61
  chrom_bytes = self.chrom_df.estimated_size()
62
- memory_usage['chrom_df'] = {
63
- 'rows': len(self.chrom_df),
64
- 'columns': len(self.chrom_df.columns),
65
- 'bytes': chrom_bytes,
66
- 'mb': chrom_bytes / (1024 * 1024)
62
+ memory_usage["chrom_df"] = {
63
+ "rows": len(self.chrom_df),
64
+ "columns": len(self.chrom_df.columns),
65
+ "bytes": chrom_bytes,
66
+ "mb": chrom_bytes / (1024 * 1024),
67
67
  }
68
68
  total_bytes += chrom_bytes
69
69
  else:
70
- memory_usage['chrom_df'] = {'rows': 0, 'columns': 0, 'bytes': 0, 'mb': 0}
71
-
70
+ memory_usage["chrom_df"] = {"rows": 0, "columns": 0, "bytes": 0, "mb": 0}
71
+
72
72
  # Add total memory usage
73
- memory_usage['total'] = {
74
- 'bytes': total_bytes,
75
- 'mb': total_bytes / (1024 * 1024),
76
- 'gb': total_bytes / (1024 * 1024 * 1024)
73
+ memory_usage["total"] = {
74
+ "bytes": total_bytes,
75
+ "mb": total_bytes / (1024 * 1024),
76
+ "gb": total_bytes / (1024 * 1024 * 1024),
77
77
  }
78
-
78
+
79
79
  # Log the memory usage summary
80
- if hasattr(self, 'logger'):
80
+ if hasattr(self, "logger"):
81
81
  self.logger.debug(f"Total DataFrame memory usage: {memory_usage['total']['mb']:.2f} MB")
82
82
  for df_name, stats in memory_usage.items():
83
- if df_name != 'total' and stats['bytes'] > 0:
83
+ if df_name != "total" and stats["bytes"] > 0:
84
84
  self.logger.debug(f"{df_name}: {stats['rows']} rows, {stats['mb']:.2f} MB")
85
-
86
- return memory_usage['total']['mb']
85
+
86
+ return memory_usage["total"]["mb"]
87
87
 
88
88
 
89
89
  def get_dda_stats(self):
@@ -92,9 +92,6 @@ def get_dda_stats(self):
92
92
  return ms1
93
93
 
94
94
 
95
- # TODO
96
-
97
-
98
95
  def get_feature(self, feature_uid):
99
96
  # get the feature with feature_uid == feature_uid
100
97
  feature = self.features_df.filter(pl.col("feature_uid") == feature_uid)
@@ -121,7 +118,7 @@ def _get_scan_uids(self, scans=None, verbose=True):
121
118
  def _get_feature_uids(self, features=None, verbose=True):
122
119
  """
123
120
  Get feature UIDs from various input types.
124
-
121
+
125
122
  Parameters:
126
123
  features: Can be one of the following:
127
124
  - None: Returns all feature UIDs from self.features_df
@@ -129,7 +126,7 @@ def _get_feature_uids(self, features=None, verbose=True):
129
126
  - polars.DataFrame: Extracts unique values from 'feature_uid' or 'feature_id' column
130
127
  - pandas.DataFrame: Extracts unique values from 'feature_uid' or 'feature_id' column
131
128
  verbose (bool): Whether to log errors for invalid inputs
132
-
129
+
133
130
  Returns:
134
131
  list: List of feature UIDs
135
132
  """
@@ -146,7 +143,7 @@ def _get_feature_uids(self, features=None, verbose=True):
146
143
  if verbose:
147
144
  self.logger.warning("No features_df available to validate feature UIDs.")
148
145
  return []
149
-
146
+
150
147
  valid_feature_uids = self.features_df.get_column("feature_uid").to_list()
151
148
  feature_uids = [f for f in features if f in valid_feature_uids]
152
149
  if verbose and not feature_uids:
@@ -155,50 +152,53 @@ def _get_feature_uids(self, features=None, verbose=True):
155
152
  # Handle polars and pandas DataFrames
156
153
  try:
157
154
  # Check if it's a polars DataFrame
158
- if hasattr(features, 'columns') and hasattr(features, 'get_column'):
155
+ if hasattr(features, "columns") and hasattr(features, "get_column"):
159
156
  # Polars DataFrame
160
157
  feature_column = None
161
- if 'feature_uid' in features.columns:
162
- feature_column = 'feature_uid'
163
- elif 'feature_id' in features.columns:
164
- feature_column = 'feature_id'
165
-
158
+ if "feature_uid" in features.columns:
159
+ feature_column = "feature_uid"
160
+ elif "feature_id" in features.columns:
161
+ feature_column = "feature_id"
162
+
166
163
  if feature_column is None:
167
164
  if verbose:
168
165
  self.logger.error("No 'feature_uid' or 'feature_id' column found in polars DataFrame.")
169
166
  return []
170
-
167
+
171
168
  # Get unique values from the column
172
169
  feature_uids = features.get_column(feature_column).unique().to_list()
173
-
170
+
174
171
  # Check if it's a pandas DataFrame
175
- elif hasattr(features, 'columns') and hasattr(features, 'iloc'):
172
+ elif hasattr(features, "columns") and hasattr(features, "iloc"):
176
173
  # Pandas DataFrame
177
174
  import pandas as pd
175
+
178
176
  if not isinstance(features, pd.DataFrame):
179
177
  if verbose:
180
- self.logger.error("Invalid input type. Expected None, list, polars DataFrame, or pandas DataFrame.")
178
+ self.logger.error(
179
+ "Invalid input type. Expected None, list, polars DataFrame, or pandas DataFrame."
180
+ )
181
181
  return []
182
-
182
+
183
183
  feature_column = None
184
- if 'feature_uid' in features.columns:
185
- feature_column = 'feature_uid'
186
- elif 'feature_id' in features.columns:
187
- feature_column = 'feature_id'
188
-
184
+ if "feature_uid" in features.columns:
185
+ feature_column = "feature_uid"
186
+ elif "feature_id" in features.columns:
187
+ feature_column = "feature_id"
188
+
189
189
  if feature_column is None:
190
190
  if verbose:
191
191
  self.logger.error("No 'feature_uid' or 'feature_id' column found in pandas DataFrame.")
192
192
  return []
193
-
193
+
194
194
  # Get unique values from the column
195
195
  feature_uids = features[feature_column].unique().tolist()
196
-
196
+
197
197
  else:
198
198
  if verbose:
199
199
  self.logger.error("Invalid input type. Expected None, list, polars DataFrame, or pandas DataFrame.")
200
200
  return []
201
-
201
+
202
202
  except Exception as e:
203
203
  if verbose:
204
204
  self.logger.error(f"Error processing DataFrame input: {e}")
@@ -281,7 +281,59 @@ def select_closest_scan(
281
281
  return scan
282
282
 
283
283
 
284
- # TODO the variables here do not follow the rest (mz, rt being tuples, etc.)
284
+ def get_eic(self, mz, mz_tol=0.01):
285
+ """
286
+ Extract an extracted ion chromatogram (EIC) from `ms1_df` for a target m/z ± mz_tol.
287
+
288
+ The function filters `self.ms1_df` for rows with `mz` within the tolerance, aggregates
289
+ intensities per retention time (summing intensities for the same `rt`), sorts by `rt`,
290
+ stores the resulting chromatogram in `self.chrom_df` and returns it.
291
+
292
+ Parameters:
293
+ mz (float): target m/z value
294
+ mz_tol (float): tolerance around mz (default 0.01)
295
+
296
+ Returns:
297
+ polars.DataFrame or None: chromatogram with columns ['rt', 'inty'] or None if not available
298
+ """
299
+ # Validate ms1_df
300
+ if not hasattr(self, "ms1_df") or self.ms1_df is None:
301
+ if hasattr(self, "logger"):
302
+ self.logger.warning("No ms1_df available to build EIC.")
303
+ return None
304
+
305
+ try:
306
+ # Filter by mz window
307
+ mz_min = mz - mz_tol
308
+ mz_max = mz + mz_tol
309
+ matches = self.ms1_df.filter((pl.col("mz") >= mz_min) & (pl.col("mz") <= mz_max))
310
+
311
+ if len(matches) == 0:
312
+ if hasattr(self, "logger"):
313
+ self.logger.debug(f"No ms1 points found for mz={mz} ± {mz_tol}.")
314
+ # ensure chrom_df is None when nothing found
315
+ self.chrom_df = None
316
+ return None
317
+
318
+ # Aggregate intensities per retention time. Use sum in case multiple points per rt.
319
+ chrom = (
320
+ matches.group_by("rt")
321
+ .agg([pl.col("inty").sum().alias("inty")])
322
+ .sort("rt")
323
+ )
324
+
325
+ # Attach to Sample
326
+ self.chrom_df = chrom
327
+
328
+ if hasattr(self, "logger"):
329
+ self.logger.debug(f"Built EIC for mz={mz} ± {mz_tol}: {len(chrom)} points.")
330
+
331
+ return chrom
332
+
333
+ except Exception as e:
334
+ if hasattr(self, "logger"):
335
+ self.logger.error(f"Error building EIC for mz={mz}: {e}")
336
+ return None
285
337
 
286
338
 
287
339
  def select(
@@ -301,7 +353,7 @@ def select(
301
353
  ):
302
354
  """
303
355
  Select features based on specified criteria and return the filtered DataFrame.
304
-
356
+
305
357
  Parameters:
306
358
  mz: m/z range filter (tuple for range, single value for minimum)
307
359
  rt: retention time range filter (tuple for range, single value for minimum)
@@ -315,7 +367,7 @@ def select(
315
367
  height_scaled: scaled height filter (tuple for range, single value for minimum)
316
368
  prominence: prominence filter (tuple for range, single value for minimum)
317
369
  height: height filter (tuple for range, single value for minimum)
318
-
370
+
319
371
  Returns:
320
372
  polars.DataFrame: Filtered features DataFrame
321
373
  """
@@ -491,24 +543,22 @@ def select(
491
543
  return feats
492
544
 
493
545
 
494
-
495
-
496
546
  def _features_sync(self):
497
547
  """
498
- Synchronizes the OpenMS FeatureMap and features_df by removing features that exist in one
548
+ Synchronizes the OpenMS FeatureMap and features_df by removing features that exist in one
499
549
  but not the other, using feature_id for mapping between them.
500
-
550
+
501
551
  This function ensures that:
502
552
  - Features in the FeatureMap that don't have corresponding entries in features_df are removed
503
553
  - Features in features_df that don't have corresponding entries in the FeatureMap are removed
504
-
554
+
505
555
  Returns:
506
556
  None
507
-
557
+
508
558
  Side Effects:
509
559
  Updates self.features (OpenMS FeatureMap) by creating a new FeatureMap with synchronized features
510
560
  Updates self.features_df by filtering to only include features present in the FeatureMap
511
-
561
+
512
562
  Note:
513
563
  Uses feature_id as the mapping key. feature_id contains OpenMS unique IDs that correspond
514
564
  to the unique IDs of features in the FeatureMap.
@@ -516,34 +566,34 @@ def _features_sync(self):
516
566
  if self.features_df is None or self.features is None:
517
567
  self.logger.warning("Cannot sync: features_df or FeatureMap is None.")
518
568
  return
519
-
569
+
520
570
  try:
521
571
  # Import pyopenms
522
572
  import pyopenms as oms
523
-
573
+
524
574
  # Get feature_ids from features_df
525
575
  df_feature_ids = set(self.features_df.get_column("feature_id").to_list())
526
-
576
+
527
577
  # Get feature unique IDs from FeatureMap
528
578
  feature_map_ids = set()
529
579
  for i in range(self.features.size()):
530
580
  feature = self.features[i]
531
581
  unique_id = str(feature.getUniqueId()) # Convert to string to match DataFrame
532
582
  feature_map_ids.add(unique_id)
533
-
583
+
534
584
  # Find features that exist in both
535
585
  common_feature_ids = df_feature_ids & feature_map_ids
536
-
586
+
537
587
  # Safety check: log error and exit if no features are matching
538
588
  if not common_feature_ids:
539
589
  self.logger.error(
540
590
  f"No matching features found between FeatureMap and features_df. "
541
591
  f"FeatureMap has {len(feature_map_ids)} features, "
542
592
  f"features_df has {len(df_feature_ids)} features. "
543
- f"Cannot synchronize - this indicates a data inconsistency. Exiting without changes."
593
+ f"Cannot synchronize - this indicates a data inconsistency. Exiting without changes.",
544
594
  )
545
595
  return
546
-
596
+
547
597
  # Create new synchronized FeatureMap with only common features
548
598
  synced_feature_map = oms.FeatureMap()
549
599
  for i in range(self.features.size()):
@@ -551,19 +601,19 @@ def _features_sync(self):
551
601
  unique_id = str(feature.getUniqueId())
552
602
  if unique_id in common_feature_ids:
553
603
  synced_feature_map.push_back(feature)
554
-
604
+
555
605
  # Filter features_df to only include features that exist in FeatureMap
556
606
  synced_features_df = self.features_df.filter(
557
- pl.col("feature_id").is_in(list(common_feature_ids))
607
+ pl.col("feature_id").is_in(list(common_feature_ids)),
558
608
  )
559
-
609
+
560
610
  # Update the objects
561
611
  original_map_size = self.features.size()
562
612
  original_df_size = len(self.features_df)
563
-
613
+
564
614
  self.features = synced_feature_map
565
615
  self.features_df = synced_features_df
566
-
616
+
567
617
  # Log the synchronization results
568
618
  map_removed = original_map_size - self.features.size()
569
619
  df_removed = original_df_size - len(self.features_df)
@@ -573,36 +623,36 @@ def _features_sync(self):
573
623
  self.logger.info(
574
624
  f"Features synchronized. FeatureMap: {original_map_size} -> {self.features.size()} "
575
625
  f"({map_removed} removed), DataFrame: {original_df_size} -> {len(self.features_df)} "
576
- f"({df_removed} removed)"
626
+ f"({df_removed} removed)",
577
627
  )
578
628
  else:
579
629
  self.logger.debug(
580
630
  f"Features synchronized. FeatureMap: {original_map_size} -> {self.features.size()} "
581
631
  f"({map_removed} removed), DataFrame: {original_df_size} -> {len(self.features_df)} "
582
- f"({df_removed} removed)"
632
+ f"({df_removed} removed)",
583
633
  )
584
-
634
+
585
635
  except ImportError:
586
636
  self.logger.warning("PyOpenMS not available, cannot sync FeatureMap")
587
637
  except Exception as e:
588
638
  self.logger.error(f"Error during feature synchronization: {e}")
589
639
 
590
640
 
591
- def features_delete(self, features: list|None=None):
641
+ def features_delete(self, features: list | None = None):
592
642
  """
593
643
  Delete features from both self.features_df and self.features based on a list of feature UIDs.
594
-
644
+
595
645
  Parameters:
596
646
  features (list, optional): List of feature UIDs to delete. If None, all features will be deleted.
597
-
647
+
598
648
  Returns:
599
649
  None
600
-
650
+
601
651
  Side Effects:
602
652
  Updates self.features_df by removing specified features.
603
653
  Updates self.features (OpenMS FeatureMap) by creating a new FeatureMap with only the remaining features.
604
654
  Updates self.scans_df by removing feature_uid associations for deleted features.
605
-
655
+
606
656
  Note:
607
657
  The function preserves all OpenMS FeatureMap information by creating a new FeatureMap
608
658
  containing only the features that should remain after deletion.
@@ -610,33 +660,33 @@ def features_delete(self, features: list|None=None):
610
660
  if self.features_df is None:
611
661
  self.logger.warning("No features found.")
612
662
  return
613
-
663
+
614
664
  # Get the feature UIDs to delete
615
665
  feature_uids_to_delete = self._get_feature_uids(features=features, verbose=True)
616
-
666
+
617
667
  if not feature_uids_to_delete:
618
668
  self.logger.warning("No valid feature UIDs provided for deletion.")
619
669
  return
620
-
670
+
621
671
  original_count = len(self.features_df)
622
-
672
+
623
673
  # Update features_df by filtering out the features to delete
624
674
  self.features_df = self.features_df.filter(
625
- ~pl.col("feature_uid").is_in(feature_uids_to_delete)
675
+ ~pl.col("feature_uid").is_in(feature_uids_to_delete),
626
676
  )
627
-
677
+
628
678
  # Update the OpenMS FeatureMap by creating a new one with only features to keep
629
679
  if self.features is not None:
630
680
  try:
631
681
  # Import pyopenms
632
682
  import pyopenms as oms
633
-
683
+
634
684
  # Create new FeatureMap with only features to keep
635
685
  filtered_map = oms.FeatureMap()
636
-
686
+
637
687
  # Get the feature UIDs that should remain after deletion
638
688
  remaining_feature_uids = self.features_df.get_column("feature_uid").to_list()
639
-
689
+
640
690
  # Iterate through existing features and keep only those not in deletion list
641
691
  for i in range(self.features.size()):
642
692
  feature = self.features[i]
@@ -644,25 +694,25 @@ def features_delete(self, features: list|None=None):
644
694
  # we can check if the current index is in the remaining UIDs
645
695
  if i in remaining_feature_uids:
646
696
  filtered_map.push_back(feature)
647
-
697
+
648
698
  # Replace the original FeatureMap with the filtered one
649
699
  self.features = filtered_map
650
700
  self.logger.debug(f"OpenMS FeatureMap updated with {filtered_map.size()} remaining features.")
651
-
701
+
652
702
  except ImportError:
653
703
  self.logger.warning("PyOpenMS not available, only updating features_df")
654
704
  except Exception as e:
655
705
  self.logger.warning(f"Could not update OpenMS FeatureMap: {e}. FeatureMap may be out of sync.")
656
-
706
+
657
707
  # Update scans_df to remove feature_uid associations for deleted features
658
- if hasattr(self, 'scans_df') and self.scans_df is not None:
708
+ if hasattr(self, "scans_df") and self.scans_df is not None:
659
709
  self.scans_df = self.scans_df.with_columns(
660
710
  pl.when(pl.col("feature_uid").is_in(feature_uids_to_delete))
661
711
  .then(None)
662
712
  .otherwise(pl.col("feature_uid"))
663
- .alias("feature_uid")
713
+ .alias("feature_uid"),
664
714
  )
665
-
715
+
666
716
  deleted_count = original_count - len(self.features_df)
667
717
  self.logger.info(f"Deleted {deleted_count} features. Remaining features: {len(self.features_df)}")
668
718
 
@@ -702,21 +752,21 @@ def _delete_ms2(self):
702
752
  def features_filter(self, features):
703
753
  """
704
754
  Keep only the specified features and delete all others. This is the opposite of features_delete().
705
-
755
+
706
756
  Parameters:
707
757
  features: Can be one of the following:
708
758
  - list: List of feature UIDs to keep
709
759
  - polars.DataFrame: DataFrame with 'feature_uid' or 'feature_id' column - extracts unique values to keep
710
760
  - pandas.DataFrame: DataFrame with 'feature_uid' or 'feature_id' column - extracts unique values to keep
711
-
761
+
712
762
  Returns:
713
763
  None
714
-
764
+
715
765
  Side Effects:
716
766
  Updates self.features_df by keeping only the specified features.
717
767
  Updates self.features (OpenMS FeatureMap) by creating a new FeatureMap with only the specified features.
718
768
  Updates self.scans_df by removing feature_uid associations for deleted features.
719
-
769
+
720
770
  Note:
721
771
  The function preserves all OpenMS FeatureMap information by creating a new FeatureMap
722
772
  containing only the features that should be kept.
@@ -724,38 +774,38 @@ def features_filter(self, features):
724
774
  if self.features_df is None:
725
775
  self.logger.warning("No features found.")
726
776
  return
727
-
777
+
728
778
  if features is None:
729
779
  self.logger.warning("No features specified to keep. Use features_delete() to delete all features.")
730
780
  return
731
-
781
+
732
782
  # Get the feature UIDs to keep
733
783
  feature_uids_to_keep = self._get_feature_uids(features=features, verbose=True)
734
-
784
+
735
785
  if not feature_uids_to_keep:
736
786
  self.logger.warning("No valid feature UIDs provided to keep.")
737
787
  return
738
-
788
+
739
789
  original_count = len(self.features_df)
740
-
790
+
741
791
  # Update features_df by keeping only the specified features
742
792
  self.features_df = self.features_df.filter(
743
- pl.col("feature_uid").is_in(feature_uids_to_keep)
793
+ pl.col("feature_uid").is_in(feature_uids_to_keep),
744
794
  )
745
-
795
+
746
796
  # Calculate which features were deleted (all except the ones to keep)
747
797
  all_feature_uids = set(range(original_count)) # Assuming sequential UIDs
748
798
  feature_uids_to_delete = list(all_feature_uids - set(feature_uids_to_keep))
749
-
799
+
750
800
  # Update the OpenMS FeatureMap by creating a new one with only features to keep
751
801
  if self.features is not None:
752
802
  try:
753
803
  # Import pyopenms
754
804
  import pyopenms as oms
755
-
805
+
756
806
  # Create new FeatureMap with only features to keep
757
807
  filtered_map = oms.FeatureMap()
758
-
808
+
759
809
  # Iterate through existing features and keep only those in the keep list
760
810
  for i in range(self.features.size()):
761
811
  feature = self.features[i]
@@ -763,25 +813,25 @@ def features_filter(self, features):
763
813
  # we can check if the current index is in the keep UIDs
764
814
  if i in feature_uids_to_keep:
765
815
  filtered_map.push_back(feature)
766
-
816
+
767
817
  # Replace the original FeatureMap with the filtered one
768
818
  self.features = filtered_map
769
819
  self.logger.debug(f"OpenMS FeatureMap updated with {filtered_map.size()} remaining features.")
770
-
820
+
771
821
  except ImportError:
772
822
  self.logger.warning("PyOpenMS not available, only updating features_df")
773
823
  except Exception as e:
774
824
  self.logger.warning(f"Could not update OpenMS FeatureMap: {e}. FeatureMap may be out of sync.")
775
-
825
+
776
826
  # Update scans_df to remove feature_uid associations for deleted features
777
- if hasattr(self, 'scans_df') and self.scans_df is not None and feature_uids_to_delete:
827
+ if hasattr(self, "scans_df") and self.scans_df is not None and feature_uids_to_delete:
778
828
  self.scans_df = self.scans_df.with_columns(
779
829
  pl.when(pl.col("feature_uid").is_in(feature_uids_to_delete))
780
830
  .then(None)
781
831
  .otherwise(pl.col("feature_uid"))
782
- .alias("feature_uid")
832
+ .alias("feature_uid"),
783
833
  )
784
-
834
+
785
835
  kept_count = len(self.features_df)
786
836
  deleted_count = original_count - kept_count
787
837
  self.logger.info(f"Kept {kept_count} features, deleted {deleted_count} features. Remaining features: {kept_count}")
@@ -789,27 +839,27 @@ def features_filter(self, features):
789
839
 
790
840
  def set_source(self, filename):
791
841
  """
792
- Reassign file_source. If filename contains only a path, keep the current basename
793
- and build an absolute path. Check that the new file exists before overwriting
842
+ Reassign file_source. If filename contains only a path, keep the current basename
843
+ and build an absolute path. Check that the new file exists before overwriting
794
844
  the old file_source.
795
-
845
+
796
846
  Parameters:
797
847
  filename (str): New file path or directory path
798
-
848
+
799
849
  Returns:
800
850
  None
801
851
  """
802
852
  import os
803
-
853
+
804
854
  # Store the old file_source for logging
805
- old_file_source = getattr(self, 'file_source', None)
806
-
855
+ old_file_source = getattr(self, "file_source", None)
856
+
807
857
  # Check if filename is just a directory path
808
858
  if os.path.isdir(filename):
809
859
  if old_file_source is None:
810
860
  self.logger.error("Cannot build path: no current file_source available")
811
861
  return
812
-
862
+
813
863
  # Get the basename from current file_source
814
864
  current_basename = os.path.basename(old_file_source)
815
865
  # Build new absolute path
@@ -817,15 +867,15 @@ def set_source(self, filename):
817
867
  else:
818
868
  # filename is a full path, make it absolute
819
869
  new_file_path = os.path.abspath(filename)
820
-
870
+
821
871
  # Check if the new file exists
822
872
  if not os.path.exists(new_file_path):
823
873
  self.logger.error(f"File does not exist: {new_file_path}")
824
874
  return
825
-
875
+
826
876
  # Update file_source
827
877
  self.file_source = new_file_path
828
-
878
+
829
879
  # Log the change
830
880
  if old_file_source is not None:
831
881
  self.logger.info(f"Updated file_source from {old_file_source} to {self.file_source}")