masster 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. masster/__init__.py +8 -8
  2. masster/_version.py +1 -1
  3. masster/chromatogram.py +3 -9
  4. masster/data/libs/README.md +1 -1
  5. masster/data/libs/ccm.csv +120 -120
  6. masster/data/libs/ccm.py +116 -62
  7. masster/data/libs/central_carbon_README.md +1 -1
  8. masster/data/libs/urine.py +161 -65
  9. masster/data/libs/urine_metabolites.csv +4693 -4693
  10. masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.mzML +2 -2
  11. masster/logger.py +43 -78
  12. masster/sample/__init__.py +1 -1
  13. masster/sample/adducts.py +264 -338
  14. masster/sample/defaults/find_adducts_def.py +8 -21
  15. masster/sample/defaults/find_features_def.py +1 -6
  16. masster/sample/defaults/get_spectrum_def.py +1 -5
  17. masster/sample/defaults/sample_def.py +1 -5
  18. masster/sample/h5.py +282 -561
  19. masster/sample/helpers.py +75 -131
  20. masster/sample/lib.py +17 -42
  21. masster/sample/load.py +17 -31
  22. masster/sample/parameters.py +2 -6
  23. masster/sample/plot.py +27 -88
  24. masster/sample/processing.py +87 -117
  25. masster/sample/quant.py +51 -57
  26. masster/sample/sample.py +90 -103
  27. masster/sample/sample5_schema.json +44 -44
  28. masster/sample/save.py +12 -35
  29. masster/sample/sciex.py +19 -66
  30. masster/spectrum.py +20 -58
  31. masster/study/__init__.py +1 -1
  32. masster/study/defaults/align_def.py +1 -5
  33. masster/study/defaults/fill_chrom_def.py +1 -5
  34. masster/study/defaults/fill_def.py +1 -5
  35. masster/study/defaults/integrate_chrom_def.py +1 -5
  36. masster/study/defaults/integrate_def.py +1 -5
  37. masster/study/defaults/study_def.py +25 -58
  38. masster/study/export.py +207 -233
  39. masster/study/h5.py +136 -470
  40. masster/study/helpers.py +202 -495
  41. masster/study/helpers_optimized.py +13 -40
  42. masster/study/id.py +110 -213
  43. masster/study/load.py +143 -230
  44. masster/study/plot.py +257 -518
  45. masster/study/processing.py +257 -469
  46. masster/study/save.py +5 -15
  47. masster/study/study.py +276 -379
  48. masster/study/study5_schema.json +96 -96
  49. {masster-0.4.0.dist-info → masster-0.4.1.dist-info}/METADATA +1 -1
  50. masster-0.4.1.dist-info/RECORD +67 -0
  51. masster-0.4.0.dist-info/RECORD +0 -67
  52. {masster-0.4.0.dist-info → masster-0.4.1.dist-info}/WHEEL +0 -0
  53. {masster-0.4.0.dist-info → masster-0.4.1.dist-info}/entry_points.txt +0 -0
  54. {masster-0.4.0.dist-info → masster-0.4.1.dist-info}/licenses/LICENSE +0 -0
masster/sample/helpers.py CHANGED
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import polars as pl
4
+ import numpy as np
4
5
 
5
6
 
6
7
  # Parameters removed - using hardcoded defaults
@@ -78,14 +79,10 @@ def _estimate_memory_usage(self):
78
79
 
79
80
  # Log the memory usage summary
80
81
  if hasattr(self, "logger"):
81
- self.logger.debug(
82
- f"Total DataFrame memory usage: {memory_usage['total']['mb']:.2f} MB",
83
- )
82
+ self.logger.debug(f"Total DataFrame memory usage: {memory_usage['total']['mb']:.2f} MB")
84
83
  for df_name, stats in memory_usage.items():
85
84
  if df_name != "total" and stats["bytes"] > 0:
86
- self.logger.debug(
87
- f"{df_name}: {stats['rows']} rows, {stats['mb']:.2f} MB",
88
- )
85
+ self.logger.debug(f"{df_name}: {stats['rows']} rows, {stats['mb']:.2f} MB")
89
86
 
90
87
  return memory_usage["total"]["mb"]
91
88
 
@@ -113,9 +110,7 @@ def _get_scan_uids(self, scans=None, verbose=True):
113
110
  scans_uids = self.scans_df.get_column("scan_uid").to_list()
114
111
  elif isinstance(scans, list):
115
112
  # if scans is a list, ensure all elements are valid scan_uids
116
- scans_uids = [
117
- s for s in scans if s in self.scans_df.get_column("scan_uid").to_list()
118
- ]
113
+ scans_uids = [s for s in scans if s in self.scans_df.get_column("scan_uid").to_list()]
119
114
  if verbose and not scans_uids:
120
115
  self.logger.error("No valid scan_uids provided.")
121
116
 
@@ -148,9 +143,7 @@ def _get_feature_uids(self, features=None, verbose=True):
148
143
  # If features is a list, ensure all elements are valid feature_uids
149
144
  if self.features_df is None:
150
145
  if verbose:
151
- self.logger.warning(
152
- "No features_df available to validate feature UIDs.",
153
- )
146
+ self.logger.warning("No features_df available to validate feature UIDs.")
154
147
  return []
155
148
 
156
149
  valid_feature_uids = self.features_df.get_column("feature_uid").to_list()
@@ -171,9 +164,7 @@ def _get_feature_uids(self, features=None, verbose=True):
171
164
 
172
165
  if feature_column is None:
173
166
  if verbose:
174
- self.logger.error(
175
- "No 'feature_uid' or 'feature_id' column found in polars DataFrame.",
176
- )
167
+ self.logger.error("No 'feature_uid' or 'feature_id' column found in polars DataFrame.")
177
168
  return []
178
169
 
179
170
  # Get unique values from the column
@@ -199,9 +190,7 @@ def _get_feature_uids(self, features=None, verbose=True):
199
190
 
200
191
  if feature_column is None:
201
192
  if verbose:
202
- self.logger.error(
203
- "No 'feature_uid' or 'feature_id' column found in pandas DataFrame.",
204
- )
193
+ self.logger.error("No 'feature_uid' or 'feature_id' column found in pandas DataFrame.")
205
194
  return []
206
195
 
207
196
  # Get unique values from the column
@@ -209,9 +198,7 @@ def _get_feature_uids(self, features=None, verbose=True):
209
198
 
210
199
  else:
211
200
  if verbose:
212
- self.logger.error(
213
- "Invalid input type. Expected None, list, polars DataFrame, or pandas DataFrame.",
214
- )
201
+ self.logger.error("Invalid input type. Expected None, list, polars DataFrame, or pandas DataFrame.")
215
202
  return []
216
203
 
217
204
  except Exception as e:
@@ -328,9 +315,7 @@ def get_eic(self, mz, mz_tol=None):
328
315
  # Filter by mz window
329
316
  mz_min = mz - mz_tol
330
317
  mz_max = mz + mz_tol
331
- matches = self.ms1_df.filter(
332
- (pl.col("mz") >= mz_min) & (pl.col("mz") <= mz_max),
333
- )
318
+ matches = self.ms1_df.filter((pl.col("mz") >= mz_min) & (pl.col("mz") <= mz_max))
334
319
 
335
320
  if len(matches) == 0:
336
321
  if hasattr(self, "logger"):
@@ -340,9 +325,7 @@ def get_eic(self, mz, mz_tol=None):
340
325
  return None
341
326
 
342
327
  # Aggregate intensities per retention time. Use sum in case multiple points per rt.
343
- chrom = (
344
- matches.group_by("rt").agg([pl.col("inty").sum().alias("inty")]).sort("rt")
345
- )
328
+ chrom = matches.group_by("rt").agg([pl.col("inty").sum().alias("inty")]).sort("rt")
346
329
 
347
330
  # Attach to Sample
348
331
  self.chrom_df = chrom
@@ -408,8 +391,7 @@ def select(
408
391
  if isinstance(coherence, tuple) and len(coherence) == 2:
409
392
  min_coherence, max_coherence = coherence
410
393
  feats = feats.filter(
411
- (pl.col("chrom_coherence") >= min_coherence)
412
- & (pl.col("chrom_coherence") <= max_coherence),
394
+ (pl.col("chrom_coherence") >= min_coherence) & (pl.col("chrom_coherence") <= max_coherence),
413
395
  )
414
396
  else:
415
397
  feats = feats.filter(pl.col("chrom_coherence") >= coherence)
@@ -460,8 +442,7 @@ def select(
460
442
  if isinstance(rt_delta, tuple) and len(rt_delta) == 2:
461
443
  min_rt_delta, max_rt_delta = rt_delta
462
444
  feats = feats.filter(
463
- (pl.col("rt_delta") >= min_rt_delta)
464
- & (pl.col("rt_delta") <= max_rt_delta),
445
+ (pl.col("rt_delta") >= min_rt_delta) & (pl.col("rt_delta") <= max_rt_delta),
465
446
  )
466
447
  else:
467
448
  feats = feats.filter(pl.col("rt_delta") >= rt_delta)
@@ -538,8 +519,7 @@ def select(
538
519
  if isinstance(prominence, tuple) and len(prominence) == 2:
539
520
  min_prominence, max_prominence = prominence
540
521
  feats = feats.filter(
541
- (pl.col("chrom_prominence") >= min_prominence)
542
- & (pl.col("chrom_prominence") <= max_prominence),
522
+ (pl.col("chrom_prominence") >= min_prominence) & (pl.col("chrom_prominence") <= max_prominence),
543
523
  )
544
524
  else:
545
525
  feats = feats.filter(pl.col("chrom_prominence") >= prominence)
@@ -550,9 +530,7 @@ def select(
550
530
  if height is not None:
551
531
  feats_len_before_filter = len(feats)
552
532
  # Check if chrom_height column exists, if not use chrom_height_scaled
553
- height_col = (
554
- "chrom_height" if "chrom_height" in feats.columns else "chrom_height_scaled"
555
- )
533
+ height_col = "chrom_height" if "chrom_height" in feats.columns else "chrom_height_scaled"
556
534
  if isinstance(height, tuple) and len(height) == 2:
557
535
  min_height, max_height = height
558
536
  feats = feats.filter(
@@ -573,23 +551,23 @@ def select(
573
551
  def _features_sync(self):
574
552
  """
575
553
  Synchronizes the cached FeatureMap with features_df.
576
-
577
- This ensures that the cached FeatureMap (_oms_features_map) contains only features
578
- that exist in both the FeatureMap and the features_df. This is important
554
+
555
+ This ensures that the cached FeatureMap (_oms_features_map) contains only features
556
+ that exist in both the FeatureMap and the features_df. This is important
579
557
  after operations that modify features_df but not the FeatureMap (like filtering).
580
-
558
+
581
559
  Side Effects:
582
560
  Updates self._oms_features_map and self.features_df to contain only common features.
583
561
  Logs information about removed features.
584
562
  """
585
563
  if self.features_df is None or len(self.features_df) == 0:
586
564
  self.logger.debug("No features_df to synchronize")
587
- if hasattr(self, "_oms_features_map"):
565
+ if hasattr(self, '_oms_features_map'):
588
566
  self._oms_features_map = None
589
567
  return
590
568
 
591
569
  # Check if we have a cached feature map
592
- if not hasattr(self, "_oms_features_map") or self._oms_features_map is None:
570
+ if not hasattr(self, '_oms_features_map') or self._oms_features_map is None:
593
571
  self.logger.debug("No cached feature map to synchronize")
594
572
  return
595
573
 
@@ -598,26 +576,20 @@ def _features_sync(self):
598
576
  except ImportError:
599
577
  self.logger.warning("PyOpenMS not available, cannot sync FeatureMap")
600
578
  return
601
-
579
+
602
580
  try:
603
581
  # Get feature IDs from both sources
604
582
  if "feature_id" in self.features_df.columns:
605
- df_feature_ids = set(
606
- self.features_df.get_column("feature_id").cast(str).to_list(),
607
- )
583
+ df_feature_ids = set(self.features_df.get_column("feature_id").cast(str).to_list())
608
584
  else:
609
- self.logger.warning(
610
- "No feature_id column in features_df, cannot synchronize",
611
- )
585
+ self.logger.warning("No feature_id column in features_df, cannot synchronize")
612
586
  return
613
587
 
614
588
  # Get feature IDs from FeatureMap
615
589
  feature_map_ids = set()
616
590
  for i in range(self._oms_features_map.size()):
617
591
  feature = self._oms_features_map[i]
618
- unique_id = str(
619
- feature.getUniqueId(),
620
- ) # Convert to string to match DataFrame
592
+ unique_id = str(feature.getUniqueId()) # Convert to string to match DataFrame
621
593
  feature_map_ids.add(unique_id)
622
594
 
623
595
  # Find features that exist in both
@@ -715,7 +687,7 @@ def features_delete(self, features: list | None = None):
715
687
  )
716
688
 
717
689
  # Update the OpenMS FeatureMap by creating a new one with only features to keep
718
- if hasattr(self, "_oms_features_map") and self._oms_features_map is not None:
690
+ if hasattr(self, '_oms_features_map') and self._oms_features_map is not None:
719
691
  try:
720
692
  # Import pyopenms
721
693
  import pyopenms as oms
@@ -724,9 +696,7 @@ def features_delete(self, features: list | None = None):
724
696
  filtered_map = oms.FeatureMap()
725
697
 
726
698
  # Get the feature UIDs that should remain after deletion
727
- remaining_feature_uids = self.features_df.get_column(
728
- "feature_uid",
729
- ).to_list()
699
+ remaining_feature_uids = self.features_df.get_column("feature_uid").to_list()
730
700
 
731
701
  # Iterate through existing features and keep only those not in deletion list
732
702
  for i in range(self._oms_features_map.size()):
@@ -738,16 +708,12 @@ def features_delete(self, features: list | None = None):
738
708
 
739
709
  # Replace the original FeatureMap with the filtered one
740
710
  self._oms_features_map = filtered_map
741
- self.logger.debug(
742
- f"OpenMS FeatureMap updated with {filtered_map.size()} remaining features.",
743
- )
711
+ self.logger.debug(f"OpenMS FeatureMap updated with {filtered_map.size()} remaining features.")
744
712
 
745
713
  except ImportError:
746
714
  self.logger.warning("PyOpenMS not available, only updating features_df")
747
715
  except Exception as e:
748
- self.logger.warning(
749
- f"Could not update OpenMS FeatureMap: {e}. FeatureMap may be out of sync.",
750
- )
716
+ self.logger.warning(f"Could not update OpenMS FeatureMap: {e}. FeatureMap may be out of sync.")
751
717
 
752
718
  # Update scans_df to remove feature_uid associations for deleted features
753
719
  if hasattr(self, "scans_df") and self.scans_df is not None:
@@ -759,9 +725,7 @@ def features_delete(self, features: list | None = None):
759
725
  )
760
726
 
761
727
  deleted_count = original_count - len(self.features_df)
762
- self.logger.info(
763
- f"Deleted {deleted_count} features. Remaining features: {len(self.features_df)}",
764
- )
728
+ self.logger.info(f"Deleted {deleted_count} features. Remaining features: {len(self.features_df)}")
765
729
 
766
730
 
767
731
  def _delete_ms2(self):
@@ -784,19 +748,14 @@ def _delete_ms2(self):
784
748
  self.logger.debug("Unlinking MS2 spectra from features...")
785
749
 
786
750
  # Set ms2_scans and ms2_specs to None using Polars syntax
787
- self.features_df = self.features_df.with_columns(
788
- [
789
- pl.lit(None).alias("ms2_scans"),
790
- pl.lit(None).alias("ms2_specs"),
791
- ],
792
- )
751
+ self.features_df = self.features_df.with_columns([
752
+ pl.lit(None).alias("ms2_scans"),
753
+ pl.lit(None).alias("ms2_specs"),
754
+ ])
793
755
 
794
756
  # Update scans_df to remove feature_uid association for linked MS2 spectra
795
757
  self.scans_df = self.scans_df.with_columns(
796
- pl.when(pl.col("ms_level") == 2)
797
- .then(None)
798
- .otherwise(pl.col("feature_uid"))
799
- .alias("feature_uid"),
758
+ pl.when(pl.col("ms_level") == 2).then(None).otherwise(pl.col("feature_uid")).alias("feature_uid"),
800
759
  )
801
760
  self.logger.info("MS2 spectra unlinked from features.")
802
761
 
@@ -828,9 +787,7 @@ def features_filter(self, features):
828
787
  return
829
788
 
830
789
  if features is None:
831
- self.logger.warning(
832
- "No features specified to keep. Use features_delete() to delete all features.",
833
- )
790
+ self.logger.warning("No features specified to keep. Use features_delete() to delete all features.")
834
791
  return
835
792
 
836
793
  # Get the feature UIDs to keep
@@ -852,7 +809,7 @@ def features_filter(self, features):
852
809
  feature_uids_to_delete = list(all_feature_uids - set(feature_uids_to_keep))
853
810
 
854
811
  # Update the OpenMS FeatureMap by creating a new one with only features to keep
855
- if hasattr(self, "_oms_features_map") and self._oms_features_map is not None:
812
+ if hasattr(self, '_oms_features_map') and self._oms_features_map is not None:
856
813
  try:
857
814
  # Import pyopenms
858
815
  import pyopenms as oms
@@ -870,23 +827,15 @@ def features_filter(self, features):
870
827
 
871
828
  # Replace the original FeatureMap with the filtered one
872
829
  self._oms_features_map = filtered_map
873
- self.logger.debug(
874
- f"OpenMS FeatureMap updated with {filtered_map.size()} remaining features.",
875
- )
830
+ self.logger.debug(f"OpenMS FeatureMap updated with {filtered_map.size()} remaining features.")
876
831
 
877
832
  except ImportError:
878
833
  self.logger.warning("PyOpenMS not available, only updating features_df")
879
834
  except Exception as e:
880
- self.logger.warning(
881
- f"Could not update OpenMS FeatureMap: {e}. FeatureMap may be out of sync.",
882
- )
835
+ self.logger.warning(f"Could not update OpenMS FeatureMap: {e}. FeatureMap may be out of sync.")
883
836
 
884
837
  # Update scans_df to remove feature_uid associations for deleted features
885
- if (
886
- hasattr(self, "scans_df")
887
- and self.scans_df is not None
888
- and feature_uids_to_delete
889
- ):
838
+ if hasattr(self, "scans_df") and self.scans_df is not None and feature_uids_to_delete:
890
839
  self.scans_df = self.scans_df.with_columns(
891
840
  pl.when(pl.col("feature_uid").is_in(feature_uids_to_delete))
892
841
  .then(None)
@@ -896,9 +845,7 @@ def features_filter(self, features):
896
845
 
897
846
  kept_count = len(self.features_df)
898
847
  deleted_count = original_count - kept_count
899
- self.logger.info(
900
- f"Kept {kept_count} features, deleted {deleted_count} features. Remaining features: {kept_count}",
901
- )
848
+ self.logger.info(f"Kept {kept_count} features, deleted {deleted_count} features. Remaining features: {kept_count}")
902
849
 
903
850
 
904
851
  def set_source(self, filename):
@@ -942,9 +889,7 @@ def set_source(self, filename):
942
889
 
943
890
  # Log the change
944
891
  if old_file_source is not None:
945
- self.logger.info(
946
- f"Updated file_source from {old_file_source} to {self.file_source}",
947
- )
892
+ self.logger.info(f"Updated file_source from {old_file_source} to {self.file_source}")
948
893
  else:
949
894
  self.logger.info(f"Set file_source to {self.file_source}")
950
895
 
@@ -952,90 +897,89 @@ def set_source(self, filename):
952
897
  def _recreate_feature_map(self):
953
898
  """
954
899
  Recreate OpenMS FeatureMap from features_df.
955
-
900
+
956
901
  This helper function creates a new OpenMS FeatureMap using the data from features_df.
957
902
  This allows us to avoid storing and loading featureXML files by default, while still
958
903
  being able to recreate the feature map when needed for OpenMS operations like
959
904
  find_features() or saving to featureXML format.
960
-
905
+
961
906
  Returns:
962
907
  oms.FeatureMap: A new FeatureMap with features from features_df, or None if no features
963
-
908
+
964
909
  Side Effects:
965
910
  Caches the created feature map in self._oms_features_map for reuse
966
911
  """
967
912
  if self.features_df is None or len(self.features_df) == 0:
968
913
  self.logger.debug("No features_df available to recreate feature map")
969
914
  return None
970
-
915
+
971
916
  try:
972
917
  import pyopenms as oms
973
918
  except ImportError:
974
919
  self.logger.warning("PyOpenMS not available, cannot recreate feature map")
975
920
  return None
976
-
921
+
977
922
  # Create new FeatureMap
978
923
  feature_map = oms.FeatureMap()
979
-
924
+
980
925
  # Set the primary MS run path if available
981
- if hasattr(self, "file_path") and self.file_path:
926
+ if hasattr(self, 'file_path') and self.file_path:
982
927
  feature_map.setPrimaryMSRunPath([self.file_path.encode()])
983
-
928
+
984
929
  # Convert DataFrame features to OpenMS Features
985
930
  for i, feature_row in enumerate(self.features_df.iter_rows(named=True)):
986
931
  feature = oms.Feature()
987
-
932
+
988
933
  # Set basic properties from DataFrame (handle missing values gracefully)
989
934
  try:
990
- if feature_row.get("feature_id") is not None:
991
- feature.setUniqueId(int(feature_row["feature_id"]))
935
+ if feature_row.get('feature_id') is not None:
936
+ feature.setUniqueId(int(feature_row['feature_id']))
992
937
  else:
993
938
  feature.setUniqueId(i) # Use index as fallback
994
-
995
- if feature_row.get("mz") is not None:
996
- feature.setMZ(float(feature_row["mz"]))
997
- if feature_row.get("rt") is not None:
998
- feature.setRT(float(feature_row["rt"]))
999
- if feature_row.get("inty") is not None:
1000
- feature.setIntensity(float(feature_row["inty"]))
1001
- if feature_row.get("quality") is not None:
1002
- feature.setOverallQuality(float(feature_row["quality"]))
1003
- if feature_row.get("charge") is not None:
1004
- feature.setCharge(int(feature_row["charge"]))
1005
-
939
+
940
+ if feature_row.get('mz') is not None:
941
+ feature.setMZ(float(feature_row['mz']))
942
+ if feature_row.get('rt') is not None:
943
+ feature.setRT(float(feature_row['rt']))
944
+ if feature_row.get('inty') is not None:
945
+ feature.setIntensity(float(feature_row['inty']))
946
+ if feature_row.get('quality') is not None:
947
+ feature.setOverallQuality(float(feature_row['quality']))
948
+ if feature_row.get('charge') is not None:
949
+ feature.setCharge(int(feature_row['charge']))
950
+
1006
951
  # Add to feature map
1007
952
  feature_map.push_back(feature)
1008
-
953
+
1009
954
  except (ValueError, TypeError) as e:
1010
955
  self.logger.warning(f"Skipping feature due to conversion error: {e}")
1011
956
  continue
1012
-
957
+
1013
958
  # Ensure unique IDs
1014
959
  feature_map.ensureUniqueId()
1015
-
960
+
1016
961
  # Cache the feature map
1017
962
  self._oms_features_map = feature_map
1018
-
1019
- self.logger.debug(
1020
- f"Recreated FeatureMap with {feature_map.size()} features from features_df",
1021
- )
963
+
964
+ self.logger.debug(f"Recreated FeatureMap with {feature_map.size()} features from features_df")
1022
965
  return feature_map
1023
966
 
1024
967
 
1025
968
  def _get_feature_map(self):
1026
969
  """
1027
970
  Get the OpenMS FeatureMap, creating it from features_df if needed.
1028
-
971
+
1029
972
  This property-like method returns the cached feature map if available,
1030
- or recreates it from features_df if not. This allows lazy loading of
973
+ or recreates it from features_df if not. This allows lazy loading of
1031
974
  feature maps only when needed for OpenMS operations.
1032
-
975
+
1033
976
  Returns:
1034
977
  oms.FeatureMap or None: The feature map, or None if not available
1035
978
  """
1036
979
  # Return cached feature map if available
1037
- if hasattr(self, "_oms_features_map") and self._oms_features_map is not None:
980
+ if hasattr(self, '_oms_features_map') and self._oms_features_map is not None:
1038
981
  return self._oms_features_map
1039
-
982
+
1040
983
  # Otherwise recreate from features_df
1041
984
  return self._recreate_feature_map()
985
+
masster/sample/lib.py CHANGED
@@ -1,8 +1,8 @@
1
1
  """
2
2
  lib.py
3
3
 
4
- This module provides the Lib class and utility functions for mass spectrometry compound library
5
- management and feature annotation. It contains core functionality for compound library management,
4
+ This module provides the Lib class and utility functions for mass spectrometry compound library
5
+ management and feature annotation. It contains core functionality for compound library management,
6
6
  target identification, adduct handling, and various analytical operations.
7
7
 
8
8
  Key Features:
@@ -34,7 +34,7 @@ Supported Adducts:
34
34
 
35
35
  Example Usage:
36
36
  ```python
37
- from master.sample.lib import Lib
37
+ from masster.sample.lib import Lib
38
38
 
39
39
  # Create library instance
40
40
  lib = Lib()
@@ -63,7 +63,7 @@ import pyopenms as oms
63
63
 
64
64
  from tqdm import tqdm
65
65
 
66
- from master.chromatogram import Chromatogram
66
+ from masster.chromatogram import Chromatogram
67
67
  # Parameters removed - using hardcoded defaults
68
68
 
69
69
 
@@ -251,9 +251,7 @@ def lib_link(
251
251
 
252
252
  for _index, row in self.lib.iterrows():
253
253
  # find all features that match the mz and rt is not None
254
- mask = (self.features_df["mz"] >= row["mz"] - mz_tol_lib) & (
255
- self.features_df["mz"] <= row["mz"] + mz_tol_lib
256
- )
254
+ mask = (self.features_df["mz"] >= row["mz"] - mz_tol_lib) & (self.features_df["mz"] <= row["mz"] + mz_tol_lib)
257
255
  if row["rt"] is not None and rt_tol_lib is not np.nan:
258
256
  mask &= (self.features_df["rt"] >= row["rt"] - rt_tol_lib) & (
259
257
  self.features_df["rt"] <= row["rt"] + rt_tol_lib
@@ -280,12 +278,8 @@ def lib_link(
280
278
  "mz": f["mz"].values[0],
281
279
  "delta_mz": row["mz"] - f["mz"].values[0],
282
280
  "rt": f["rt"].values[0],
283
- "delta_rt": row["rt"] - f["rt"].values[0]
284
- if row["rt"] is not None
285
- else None,
286
- "ms2_scans": f["ms2_scans"].values[0]
287
- if "ms2_scans" in self.features_df.columns
288
- else None,
281
+ "delta_rt": row["rt"] - f["rt"].values[0] if row["rt"] is not None else None,
282
+ "ms2_scans": f["ms2_scans"].values[0] if "ms2_scans" in self.features_df.columns else None,
289
283
  "eic": None,
290
284
  }
291
285
  lib_matches.append(new_match)
@@ -418,9 +412,7 @@ def save_lib_mgf(
418
412
  desc="Export MGF",
419
413
  ):
420
414
  # find the feature with feature_uid == matchrow["feature_uid"]
421
- row = self.features_df[
422
- self.features_df["feature_uid"] == matchrow["feature_uid"]
423
- ].iloc[0]
415
+ row = self.features_df[self.features_df["feature_uid"] == matchrow["feature_uid"]].iloc[0]
424
416
  if row["ms2_scans"] is None:
425
417
  skip = skip + 1
426
418
  continue
@@ -546,9 +538,7 @@ def save_lib_mgf(
546
538
  d = {
547
539
  "PEPMASS": row["mz"],
548
540
  "RTINSECONDS": row["rt"],
549
- "IONMODE": "positive"
550
- if matchrow["adduct"][-1] == "+"
551
- else "negative",
541
+ "IONMODE": "positive" if matchrow["adduct"][-1] == "+" else "negative",
552
542
  "CHARGE": "1" + matchrow["adduct"].split("]")[1],
553
543
  "NAME": f"{matchrow['name']}",
554
544
  "SMILES": matchrow["smiles"],
@@ -616,9 +606,7 @@ def save_lib_mgf(
616
606
  d = {
617
607
  "PEPMASS": row["mz"],
618
608
  "RTINSECONDS": row["rt"],
619
- "IONMODE": "positive"
620
- if matchrow["adduct"][-1] == "+"
621
- else "negative",
609
+ "IONMODE": "positive" if matchrow["adduct"][-1] == "+" else "negative",
622
610
  "CHARGE": "1" + matchrow["adduct"].split("]")[1],
623
611
  "NAME": f"{matchrow['name']}",
624
612
  "SMILES": matchrow["smiles"],
@@ -653,18 +641,14 @@ def save_lib_mgf(
653
641
  spec = spec.centroid(
654
642
  tolerance=self.parameters["mz_tol_ms1_da"],
655
643
  ppm=self.parameters["mz_tol_ms1_ppm"],
656
- min_points=self.parameters[
657
- "centroid_min_points_ms1"
658
- ],
644
+ min_points=self.parameters["centroid_min_points_ms1"],
659
645
  algo=centroid_algo,
660
646
  )
661
647
  elif spec.ms_level == 2:
662
648
  spec = spec.centroid(
663
649
  tolerance=self.parameters["mz_tol_ms2_da"],
664
650
  ppm=self.parameters["mz_tol_ms2_ppm"],
665
- min_points=self.parameters[
666
- "centroid_min_points_ms2"
667
- ],
651
+ min_points=self.parameters["centroid_min_points_ms2"],
668
652
  algo=centroid_algo,
669
653
  )
670
654
  if deisotope:
@@ -699,9 +683,7 @@ def save_lib_mgf(
699
683
  d = {
700
684
  "PEPMASS": row["mz"],
701
685
  "RTINSECONDS": row["rt"],
702
- "IONMODE": "positive"
703
- if matchrow["adduct"][-1] == "+"
704
- else "negative",
686
+ "IONMODE": "positive" if matchrow["adduct"][-1] == "+" else "negative",
705
687
  "CHARGE": "1" + matchrow["adduct"].split("]")[1],
706
688
  "NAME": f"{matchrow['name']}",
707
689
  "SMILES": matchrow["smiles"],
@@ -739,8 +721,7 @@ def save_lib_mgf(
739
721
  kineticenergy = None
740
722
  if mslevel > 1:
741
723
  if (
742
- "CID" in filename.upper()
743
- or "ZTS" in filename.upper()
724
+ "CID" in filename.upper() or "ZTS" in filename.upper()
744
725
  ) and "EAD" in filename.upper():
745
726
  activation = "CID-EAD"
746
727
  match = re.search(r"(\d+)KE", filename.upper())
@@ -752,17 +733,13 @@ def save_lib_mgf(
752
733
  kineticenergy = int(match.group(1))
753
734
  else:
754
735
  activation = "CID"
755
- energy = (
756
- spec.energy if hasattr(spec, "energy") else None
757
- )
736
+ energy = spec.energy if hasattr(spec, "energy") else None
758
737
 
759
738
  spec = filter_peaks(spec, inty_min=inty_min)
760
739
  d = {
761
740
  "PEPMASS": row["mz"],
762
741
  "RTINSECONDS": row["rt"],
763
- "IONMODE": "positive"
764
- if matchrow["adduct"][-1] == "+"
765
- else "negative",
742
+ "IONMODE": "positive" if matchrow["adduct"][-1] == "+" else "negative",
766
743
  "CHARGE": "1" + matchrow["adduct"].split("]")[1],
767
744
  "NAME": f"{matchrow['name']}",
768
745
  "SMILES": matchrow["smiles"],
@@ -775,9 +752,7 @@ def save_lib_mgf(
775
752
  "FILENAME": filename,
776
753
  "SCANS": ms1_scan_uid,
777
754
  "FID": row["fid"],
778
- "MSLEVEL": 1
779
- if spec.ms_level is None
780
- else spec.ms_level,
755
+ "MSLEVEL": 1 if spec.ms_level is None else spec.ms_level,
781
756
  }
782
757
  write_ion(f, d, spec)
783
758