masster 0.4.4__py3-none-any.whl → 0.4.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

Files changed (39) hide show
  1. masster/__init__.py +8 -8
  2. masster/chromatogram.py +1 -1
  3. masster/data/libs/urine.csv +3 -3
  4. masster/logger.py +11 -11
  5. masster/sample/__init__.py +1 -1
  6. masster/sample/adducts.py +338 -264
  7. masster/sample/defaults/find_adducts_def.py +21 -8
  8. masster/sample/h5.py +561 -282
  9. masster/sample/helpers.py +131 -75
  10. masster/sample/lib.py +4 -4
  11. masster/sample/load.py +31 -17
  12. masster/sample/parameters.py +1 -1
  13. masster/sample/plot.py +7 -7
  14. masster/sample/processing.py +117 -87
  15. masster/sample/sample.py +103 -90
  16. masster/sample/sample5_schema.json +44 -44
  17. masster/sample/save.py +35 -12
  18. masster/spectrum.py +1 -1
  19. masster/study/__init__.py +1 -1
  20. masster/study/defaults/align_def.py +5 -1
  21. masster/study/defaults/identify_def.py +3 -1
  22. masster/study/defaults/study_def.py +58 -25
  23. masster/study/export.py +360 -210
  24. masster/study/h5.py +560 -158
  25. masster/study/helpers.py +496 -203
  26. masster/study/helpers_optimized.py +1 -1
  27. masster/study/id.py +538 -349
  28. masster/study/load.py +233 -143
  29. masster/study/plot.py +71 -71
  30. masster/study/processing.py +456 -254
  31. masster/study/save.py +15 -5
  32. masster/study/study.py +213 -131
  33. masster/study/study5_schema.json +149 -149
  34. {masster-0.4.4.dist-info → masster-0.4.5.dist-info}/METADATA +3 -1
  35. {masster-0.4.4.dist-info → masster-0.4.5.dist-info}/RECORD +39 -39
  36. {masster-0.4.4.dist-info → masster-0.4.5.dist-info}/WHEEL +0 -0
  37. {masster-0.4.4.dist-info → masster-0.4.5.dist-info}/entry_points.txt +0 -0
  38. {masster-0.4.4.dist-info → masster-0.4.5.dist-info}/licenses/LICENSE +0 -0
  39. {masster-0.4.4.dist-info → masster-0.4.5.dist-info}/top_level.txt +0 -0
masster/sample/plot.py CHANGED
@@ -144,7 +144,7 @@ def _display_plot(plot_object, layout=None):
144
144
  def _handle_sample_plot_output(self, plot_obj, filename=None, plot_type="bokeh"):
145
145
  """
146
146
  Helper function to handle consistent save/display behavior for sample plots.
147
-
147
+
148
148
  Parameters:
149
149
  plot_obj: The plot object (bokeh figure, holoviews layout, or panel object)
150
150
  filename: Optional filename to save the plot
@@ -155,10 +155,10 @@ def _handle_sample_plot_output(self, plot_obj, filename=None, plot_type="bokeh")
155
155
  import os
156
156
  if hasattr(self, 'folder') and self.folder and not os.path.isabs(filename):
157
157
  filename = os.path.join(self.folder, filename)
158
-
158
+
159
159
  # Convert to absolute path for logging
160
160
  abs_filename = os.path.abspath(filename)
161
-
161
+
162
162
  if filename.endswith(".html"):
163
163
  if plot_type == "panel":
164
164
  plot_obj.save(filename, embed=True) # type: ignore[attr-defined]
@@ -375,7 +375,7 @@ def plot_chrom(
375
375
 
376
376
  layout = layout.cols(1)
377
377
  layout = panel.Column(layout)
378
-
378
+
379
379
  # Use consistent save/display behavior
380
380
  self._handle_sample_plot_output(layout, filename, "panel")
381
381
 
@@ -927,7 +927,7 @@ def plot_2d(
927
927
  layout = panel.Column(overlay)
928
928
 
929
929
  if filename is not None:
930
- # Use consistent save/display behavior
930
+ # Use consistent save/display behavior
931
931
  self._handle_sample_plot_output(layout, filename, "panel")
932
932
  return None
933
933
  else:
@@ -2073,7 +2073,7 @@ def plot_tic(
2073
2073
  return
2074
2074
 
2075
2075
  # Import helper locally to avoid circular imports
2076
- from masster.study.helpers import get_tic
2076
+ from master.study.helpers import get_tic
2077
2077
 
2078
2078
  # Delegate TIC computation to study helper which handles ms1_df and scans_df fallbacks
2079
2079
  try:
@@ -2128,7 +2128,7 @@ def plot_bpc(
2128
2128
  return
2129
2129
 
2130
2130
  # Import helper locally to avoid circular imports
2131
- from masster.study.helpers import get_bpc
2131
+ from master.study.helpers import get_bpc
2132
2132
 
2133
2133
  # Delegate BPC computation to study helper
2134
2134
  try:
@@ -8,13 +8,14 @@ import pyopenms as oms
8
8
 
9
9
  from tqdm import tqdm
10
10
 
11
- from masster.spectrum import Spectrum
11
+ from master.spectrum import Spectrum
12
12
  from .defaults.find_features_def import find_features_defaults
13
13
  from .defaults.find_ms2_def import find_ms2_defaults
14
14
  from .defaults.get_spectrum_def import get_spectrum_defaults
15
15
 
16
16
 
17
- from masster.chromatogram import Chromatogram
17
+ from master.chromatogram import Chromatogram
18
+
18
19
 
19
20
  def get_spectrum(self, scan, **kwargs):
20
21
  """Retrieve a single spectrum and optionally post-process it.
@@ -252,7 +253,8 @@ def get_spectrum(self, scan, **kwargs):
252
253
  spec=spect,
253
254
  scan_uid=scan_uid,
254
255
  feature_uid=scan_info["feature_uid"][0]
255
- if "feature_uid" in scan_info and scan_info["feature_uid"][0] is not None
256
+ if "feature_uid" in scan_info
257
+ and scan_info["feature_uid"][0] is not None
256
258
  else feature_uid,
257
259
  q1_step=2,
258
260
  deisotope=deisotope,
@@ -445,7 +447,9 @@ def _spec_to_mat(
445
447
  closest_index = np.argmin(np.abs(ar2 - val1))
446
448
  closest_indices.append((i, closest_index))
447
449
  # filter out pairs that are not within the specified tolerance
448
- closest_indices = [(i, j) for i, j in closest_indices if np.abs(ar1[i] - ar2[j]) <= tol]
450
+ closest_indices = [
451
+ (i, j) for i, j in closest_indices if np.abs(ar1[i] - ar2[j]) <= tol
452
+ ]
449
453
  # remove duplicates from the list of indices
450
454
  closest_indices = list(set(closest_indices))
451
455
  # sort the list of indices by the first element (i) in ascending order
@@ -564,9 +568,13 @@ def find_features(self, **kwargs):
564
568
  import os
565
569
 
566
570
  os.environ["OMP_NUM_THREADS"] = str(params.threads)
567
- self.logger.debug(f"Set thread count to {params.threads} via OMP_NUM_THREADS")
571
+ self.logger.debug(
572
+ f"Set thread count to {params.threads} via OMP_NUM_THREADS",
573
+ )
568
574
  except Exception:
569
- self.logger.warning(f"Could not set thread count to {params.threads} - using default")
575
+ self.logger.warning(
576
+ f"Could not set thread count to {params.threads} - using default",
577
+ )
570
578
 
571
579
  # Set debug mode if enabled
572
580
  if hasattr(params, "debug") and params.debug:
@@ -607,7 +615,8 @@ def find_features(self, **kwargs):
607
615
  mtd_par.setValue("noise_threshold_int", float(params.get("noise")))
608
616
  mtd_par.setValue(
609
617
  "min_trace_length",
610
- float(params.get("min_trace_length_multiplier")) * float(params.get("chrom_fwhm_min")),
618
+ float(params.get("min_trace_length_multiplier"))
619
+ * float(params.get("chrom_fwhm_min")),
611
620
  )
612
621
  mtd_par.setValue(
613
622
  "trace_termination_outliers",
@@ -618,8 +627,14 @@ def find_features(self, **kwargs):
618
627
  # Additional MTD parameters
619
628
  mtd_par.setValue("min_sample_rate", float(params.get("min_sample_rate")))
620
629
  mtd_par.setValue("min_trace_length", float(params.get("min_trace_length")))
621
- mtd_par.setValue("trace_termination_criterion", params.get("trace_termination_criterion"))
622
- mtd_par.setValue("reestimate_mt_sd", "true" if params.get("reestimate_mt_sd") else "false")
630
+ mtd_par.setValue(
631
+ "trace_termination_criterion",
632
+ params.get("trace_termination_criterion"),
633
+ )
634
+ mtd_par.setValue(
635
+ "reestimate_mt_sd",
636
+ "true" if params.get("reestimate_mt_sd") else "false",
637
+ )
623
638
  mtd_par.setValue("quant_method", params.get("quant_method"))
624
639
 
625
640
  mtd.setParameters(mtd_par) # set the new parameters
@@ -688,7 +703,7 @@ def find_features(self, **kwargs):
688
703
  df = feature_map.get_df(export_peptide_identifications=False) # type: ignore[attr-defined]
689
704
  # Sets the file path to the primary MS run (usually the mzML file)
690
705
  feature_map.setPrimaryMSRunPath([self.file_path.encode()])
691
-
706
+
692
707
  # Store feature map in both attributes for compatibility
693
708
  self.features = feature_map
694
709
  self._oms_features_map = feature_map
@@ -769,13 +784,15 @@ def find_features(self, **kwargs):
769
784
  height_scaleds.append(None)
770
785
 
771
786
  # Add the computed columns to the dataframe
772
- df = df.with_columns([
773
- pl.Series("chrom", chroms, dtype=pl.Object),
774
- pl.Series("chrom_coherence", coherences, dtype=pl.Float64),
775
- pl.Series("chrom_prominence", prominences, dtype=pl.Float64),
776
- pl.Series("chrom_prominence_scaled", prominence_scaleds, dtype=pl.Float64),
777
- pl.Series("chrom_height_scaled", height_scaleds, dtype=pl.Float64),
778
- ])
787
+ df = df.with_columns(
788
+ [
789
+ pl.Series("chrom", chroms, dtype=pl.Object),
790
+ pl.Series("chrom_coherence", coherences, dtype=pl.Float64),
791
+ pl.Series("chrom_prominence", prominences, dtype=pl.Float64),
792
+ pl.Series("chrom_prominence_scaled", prominence_scaleds, dtype=pl.Float64),
793
+ pl.Series("chrom_height_scaled", height_scaleds, dtype=pl.Float64),
794
+ ],
795
+ )
779
796
 
780
797
  self.features_df = df
781
798
  self._features_sync()
@@ -796,10 +813,10 @@ def find_features(self, **kwargs):
796
813
  def _clean_features_df(self, df):
797
814
  """Clean and standardize features DataFrame."""
798
815
  # Convert pandas DataFrame to polars if needed
799
- if hasattr(df, 'index'): # pandas DataFrame
816
+ if hasattr(df, "index"): # pandas DataFrame
800
817
  df = df.copy()
801
818
  df["feature_id"] = df.index
802
-
819
+
803
820
  if hasattr(df, "columns") and not isinstance(df, pl.DataFrame):
804
821
  df_pl = pl.from_pandas(df)
805
822
  else:
@@ -809,35 +826,37 @@ def _clean_features_df(self, df):
809
826
  df2 = df_pl.filter(pl.col("quality") != 0)
810
827
 
811
828
  # Create new dataframe with required columns and transformations
812
- df_result = df2.select([
813
- pl.int_range(pl.len()).alias("feature_uid"),
814
- pl.col("feature_id").cast(pl.String).alias("feature_id"),
815
- pl.col("mz").round(5),
816
- pl.col("RT").round(3).alias("rt"),
817
- pl.col("RT").round(3).alias("rt_original"),
818
- pl.col("RTstart").round(3).alias("rt_start"),
819
- pl.col("RTend").round(3).alias("rt_end"),
820
- (pl.col("RTend") - pl.col("RTstart")).round(3).alias("rt_delta"),
821
- pl.col("MZstart").round(5).alias("mz_start"),
822
- pl.col("MZend").round(5).alias("mz_end"),
823
- pl.col("intensity").alias("inty"),
824
- pl.col("quality"),
825
- pl.col("charge"),
826
- pl.lit(0).alias("iso"),
827
- pl.lit(None, dtype=pl.Int64).alias("iso_of"),
828
- pl.lit(None, dtype=pl.Utf8).alias("adduct"),
829
- pl.lit(None, dtype=pl.Float64).alias("adduct_charge"),
830
- pl.lit(None, dtype=pl.Float64).alias("adduct_mass_shift"),
831
- pl.lit(None, dtype=pl.Float64).alias("adduct_mass_neutral"),
832
- pl.lit(None, dtype=pl.Int64).alias("adduct_group"),
833
- pl.lit(None, dtype=pl.Object).alias("chrom"),
834
- pl.lit(None, dtype=pl.Float64).alias("chrom_coherence"),
835
- pl.lit(None, dtype=pl.Float64).alias("chrom_prominence"),
836
- pl.lit(None, dtype=pl.Float64).alias("chrom_prominence_scaled"),
837
- pl.lit(None, dtype=pl.Float64).alias("chrom_height_scaled"),
838
- pl.lit(None, dtype=pl.Object).alias("ms2_scans"),
839
- pl.lit(None, dtype=pl.Object).alias("ms2_specs"),
840
- ])
829
+ df_result = df2.select(
830
+ [
831
+ pl.int_range(pl.len()).alias("feature_uid"),
832
+ pl.col("feature_id").cast(pl.String).alias("feature_id"),
833
+ pl.col("mz").round(5),
834
+ pl.col("RT").round(3).alias("rt"),
835
+ pl.col("RT").round(3).alias("rt_original"),
836
+ pl.col("RTstart").round(3).alias("rt_start"),
837
+ pl.col("RTend").round(3).alias("rt_end"),
838
+ (pl.col("RTend") - pl.col("RTstart")).round(3).alias("rt_delta"),
839
+ pl.col("MZstart").round(5).alias("mz_start"),
840
+ pl.col("MZend").round(5).alias("mz_end"),
841
+ pl.col("intensity").alias("inty"),
842
+ pl.col("quality"),
843
+ pl.col("charge"),
844
+ pl.lit(0).alias("iso"),
845
+ pl.lit(None, dtype=pl.Int64).alias("iso_of"),
846
+ pl.lit(None, dtype=pl.Utf8).alias("adduct"),
847
+ pl.lit(None, dtype=pl.Float64).alias("adduct_charge"),
848
+ pl.lit(None, dtype=pl.Float64).alias("adduct_mass_shift"),
849
+ pl.lit(None, dtype=pl.Float64).alias("adduct_mass_neutral"),
850
+ pl.lit(None, dtype=pl.Int64).alias("adduct_group"),
851
+ pl.lit(None, dtype=pl.Object).alias("chrom"),
852
+ pl.lit(None, dtype=pl.Float64).alias("chrom_coherence"),
853
+ pl.lit(None, dtype=pl.Float64).alias("chrom_prominence"),
854
+ pl.lit(None, dtype=pl.Float64).alias("chrom_prominence_scaled"),
855
+ pl.lit(None, dtype=pl.Float64).alias("chrom_height_scaled"),
856
+ pl.lit(None, dtype=pl.Object).alias("ms2_scans"),
857
+ pl.lit(None, dtype=pl.Object).alias("ms2_specs"),
858
+ ],
859
+ )
841
860
 
842
861
  return df_result
843
862
 
@@ -859,10 +878,12 @@ def _features_deisotope(
859
878
  df = pl.from_pandas(df)
860
879
 
861
880
  # Initialize new columns
862
- df = df.with_columns([
863
- pl.lit(0).alias("iso"),
864
- pl.col("feature_uid").alias("iso_of"),
865
- ])
881
+ df = df.with_columns(
882
+ [
883
+ pl.lit(0).alias("iso"),
884
+ pl.col("feature_uid").alias("iso_of"),
885
+ ],
886
+ )
866
887
 
867
888
  # Sort by 'mz'
868
889
  df = df.sort("mz")
@@ -889,13 +910,13 @@ def _features_deisotope(
889
910
  for isotope_offset in [1, 2, 3]:
890
911
  offset_mz = isotope_offset * mz_diff
891
912
  tolerance_factor = 1.0 if isotope_offset == 1 else 1.5
892
-
913
+
893
914
  t_lower = base_mz + offset_mz - tolerance_factor * mz_tol
894
915
  t_upper = base_mz + offset_mz + tolerance_factor * mz_tol
895
-
916
+
896
917
  li = np.searchsorted(mz_arr, t_lower, side="left")
897
918
  ri = np.searchsorted(mz_arr, t_upper, side="right")
898
-
919
+
899
920
  if li < ri:
900
921
  cand_idx = np.arange(li, ri)
901
922
  mask = (
@@ -904,22 +925,23 @@ def _features_deisotope(
904
925
  & (intensity_arr[cand_idx] < 2 * base_int)
905
926
  )
906
927
  valid_cand = cand_idx[mask]
907
-
928
+
908
929
  for cand in valid_cand:
909
930
  if cand != i and iso_of_arr[cand] == feature_uid_arr[cand]:
910
931
  iso_arr[cand] = iso_arr[i] + isotope_offset
911
932
  iso_of_arr[cand] = base_feature_uid
912
933
 
913
934
  # Update the dataframe with isotope assignments
914
- df = df.with_columns([
915
- pl.Series("iso", iso_arr),
916
- pl.Series("iso_of", iso_of_arr),
917
- ])
935
+ df = df.with_columns(
936
+ [
937
+ pl.Series("iso", iso_arr),
938
+ pl.Series("iso_of", iso_of_arr),
939
+ ],
940
+ )
918
941
 
919
942
  return df
920
943
 
921
944
 
922
-
923
945
  def analyze_dda(self):
924
946
  # Preallocate variables
925
947
  cycle_records = []
@@ -1106,7 +1128,9 @@ def find_ms2(self, **kwargs):
1106
1128
  feature_rt_start = features_subset.select("rt_start").to_numpy().flatten()
1107
1129
  feature_rt_end = features_subset.select("rt_end").to_numpy().flatten()
1108
1130
  feature_uids = features_subset.select("feature_uid").to_numpy().flatten()
1109
- feature_indices = features_subset.with_row_index().select("index").to_numpy().flatten()
1131
+ feature_indices = (
1132
+ features_subset.with_row_index().select("index").to_numpy().flatten()
1133
+ )
1110
1134
 
1111
1135
  # Pre-compute RT radius for all features
1112
1136
  rt_radius = np.minimum(feature_rt - feature_rt_start, feature_rt_end - feature_rt)
@@ -1159,15 +1183,17 @@ def find_ms2(self, **kwargs):
1159
1183
 
1160
1184
  scan_uids = ms2_index_arr[final_indices].tolist()
1161
1185
  scan_uid_lists.append(scan_uids)
1162
- spec_lists.append([
1163
- self.get_spectrum(
1164
- scan_uids[0],
1165
- centroid=centroid,
1166
- deisotope=deisotope,
1167
- dia_stats=dia_stats,
1168
- feature_uid=feature_uid,
1169
- ),
1170
- ])
1186
+ spec_lists.append(
1187
+ [
1188
+ self.get_spectrum(
1189
+ scan_uids[0],
1190
+ centroid=centroid,
1191
+ deisotope=deisotope,
1192
+ dia_stats=dia_stats,
1193
+ feature_uid=feature_uid,
1194
+ ),
1195
+ ],
1196
+ )
1171
1197
 
1172
1198
  # Collect updates for batch processing
1173
1199
  updated_feature_uids.extend([feature_uid] * len(final_indices))
@@ -1181,11 +1207,13 @@ def find_ms2(self, **kwargs):
1181
1207
  features_df = pl.from_pandas(features_df)
1182
1208
 
1183
1209
  # Update the features_df
1184
- update_df = pl.DataFrame({
1185
- "temp_idx": feature_indices,
1186
- "ms2_scans": pl.Series("ms2_scans", scan_uid_lists, dtype=pl.Object),
1187
- "ms2_specs": pl.Series("ms2_specs", spec_lists, dtype=pl.Object),
1188
- })
1210
+ update_df = pl.DataFrame(
1211
+ {
1212
+ "temp_idx": feature_indices,
1213
+ "ms2_scans": pl.Series("ms2_scans", scan_uid_lists, dtype=pl.Object),
1214
+ "ms2_specs": pl.Series("ms2_specs", spec_lists, dtype=pl.Object),
1215
+ },
1216
+ )
1189
1217
 
1190
1218
  # Join and update
1191
1219
  features_df = (
@@ -1196,16 +1224,18 @@ def find_ms2(self, **kwargs):
1196
1224
  how="left",
1197
1225
  suffix="_new",
1198
1226
  )
1199
- .with_columns([
1200
- pl.when(pl.col("ms2_scans_new").is_not_null())
1201
- .then(pl.col("ms2_scans_new"))
1202
- .otherwise(pl.col("ms2_scans"))
1203
- .alias("ms2_scans"),
1204
- pl.when(pl.col("ms2_specs_new").is_not_null())
1205
- .then(pl.col("ms2_specs_new"))
1206
- .otherwise(pl.col("ms2_specs"))
1207
- .alias("ms2_specs"),
1208
- ])
1227
+ .with_columns(
1228
+ [
1229
+ pl.when(pl.col("ms2_scans_new").is_not_null())
1230
+ .then(pl.col("ms2_scans_new"))
1231
+ .otherwise(pl.col("ms2_scans"))
1232
+ .alias("ms2_scans"),
1233
+ pl.when(pl.col("ms2_specs_new").is_not_null())
1234
+ .then(pl.col("ms2_specs_new"))
1235
+ .otherwise(pl.col("ms2_specs"))
1236
+ .alias("ms2_specs"),
1237
+ ],
1238
+ )
1209
1239
  .drop(["temp_idx", "ms2_scans_new", "ms2_specs_new"])
1210
1240
  )
1211
1241
 
@@ -1242,4 +1272,4 @@ def find_ms2(self, **kwargs):
1242
1272
  self.store_history(["find_ms2"], params.to_dict())
1243
1273
  self.logger.debug(
1244
1274
  "Parameters stored to find_ms2",
1245
- )
1275
+ )