masster 0.4.4__py3-none-any.whl → 0.4.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/__init__.py +8 -8
- masster/chromatogram.py +1 -1
- masster/data/libs/urine.csv +3 -3
- masster/logger.py +11 -11
- masster/sample/__init__.py +1 -1
- masster/sample/adducts.py +338 -264
- masster/sample/defaults/find_adducts_def.py +21 -8
- masster/sample/h5.py +561 -282
- masster/sample/helpers.py +131 -75
- masster/sample/lib.py +4 -4
- masster/sample/load.py +31 -17
- masster/sample/parameters.py +1 -1
- masster/sample/plot.py +7 -7
- masster/sample/processing.py +117 -87
- masster/sample/sample.py +103 -90
- masster/sample/sample5_schema.json +44 -44
- masster/sample/save.py +35 -12
- masster/spectrum.py +1 -1
- masster/study/__init__.py +1 -1
- masster/study/defaults/align_def.py +5 -1
- masster/study/defaults/identify_def.py +3 -1
- masster/study/defaults/study_def.py +58 -25
- masster/study/export.py +360 -210
- masster/study/h5.py +560 -158
- masster/study/helpers.py +496 -203
- masster/study/helpers_optimized.py +1 -1
- masster/study/id.py +538 -349
- masster/study/load.py +233 -143
- masster/study/plot.py +71 -71
- masster/study/processing.py +456 -254
- masster/study/save.py +15 -5
- masster/study/study.py +213 -131
- masster/study/study5_schema.json +149 -149
- {masster-0.4.4.dist-info → masster-0.4.5.dist-info}/METADATA +3 -1
- {masster-0.4.4.dist-info → masster-0.4.5.dist-info}/RECORD +39 -39
- {masster-0.4.4.dist-info → masster-0.4.5.dist-info}/WHEEL +0 -0
- {masster-0.4.4.dist-info → masster-0.4.5.dist-info}/entry_points.txt +0 -0
- {masster-0.4.4.dist-info → masster-0.4.5.dist-info}/licenses/LICENSE +0 -0
- {masster-0.4.4.dist-info → masster-0.4.5.dist-info}/top_level.txt +0 -0
masster/sample/plot.py
CHANGED
|
@@ -144,7 +144,7 @@ def _display_plot(plot_object, layout=None):
|
|
|
144
144
|
def _handle_sample_plot_output(self, plot_obj, filename=None, plot_type="bokeh"):
|
|
145
145
|
"""
|
|
146
146
|
Helper function to handle consistent save/display behavior for sample plots.
|
|
147
|
-
|
|
147
|
+
|
|
148
148
|
Parameters:
|
|
149
149
|
plot_obj: The plot object (bokeh figure, holoviews layout, or panel object)
|
|
150
150
|
filename: Optional filename to save the plot
|
|
@@ -155,10 +155,10 @@ def _handle_sample_plot_output(self, plot_obj, filename=None, plot_type="bokeh")
|
|
|
155
155
|
import os
|
|
156
156
|
if hasattr(self, 'folder') and self.folder and not os.path.isabs(filename):
|
|
157
157
|
filename = os.path.join(self.folder, filename)
|
|
158
|
-
|
|
158
|
+
|
|
159
159
|
# Convert to absolute path for logging
|
|
160
160
|
abs_filename = os.path.abspath(filename)
|
|
161
|
-
|
|
161
|
+
|
|
162
162
|
if filename.endswith(".html"):
|
|
163
163
|
if plot_type == "panel":
|
|
164
164
|
plot_obj.save(filename, embed=True) # type: ignore[attr-defined]
|
|
@@ -375,7 +375,7 @@ def plot_chrom(
|
|
|
375
375
|
|
|
376
376
|
layout = layout.cols(1)
|
|
377
377
|
layout = panel.Column(layout)
|
|
378
|
-
|
|
378
|
+
|
|
379
379
|
# Use consistent save/display behavior
|
|
380
380
|
self._handle_sample_plot_output(layout, filename, "panel")
|
|
381
381
|
|
|
@@ -927,7 +927,7 @@ def plot_2d(
|
|
|
927
927
|
layout = panel.Column(overlay)
|
|
928
928
|
|
|
929
929
|
if filename is not None:
|
|
930
|
-
# Use consistent save/display behavior
|
|
930
|
+
# Use consistent save/display behavior
|
|
931
931
|
self._handle_sample_plot_output(layout, filename, "panel")
|
|
932
932
|
return None
|
|
933
933
|
else:
|
|
@@ -2073,7 +2073,7 @@ def plot_tic(
|
|
|
2073
2073
|
return
|
|
2074
2074
|
|
|
2075
2075
|
# Import helper locally to avoid circular imports
|
|
2076
|
-
from
|
|
2076
|
+
from master.study.helpers import get_tic
|
|
2077
2077
|
|
|
2078
2078
|
# Delegate TIC computation to study helper which handles ms1_df and scans_df fallbacks
|
|
2079
2079
|
try:
|
|
@@ -2128,7 +2128,7 @@ def plot_bpc(
|
|
|
2128
2128
|
return
|
|
2129
2129
|
|
|
2130
2130
|
# Import helper locally to avoid circular imports
|
|
2131
|
-
from
|
|
2131
|
+
from master.study.helpers import get_bpc
|
|
2132
2132
|
|
|
2133
2133
|
# Delegate BPC computation to study helper
|
|
2134
2134
|
try:
|
masster/sample/processing.py
CHANGED
|
@@ -8,13 +8,14 @@ import pyopenms as oms
|
|
|
8
8
|
|
|
9
9
|
from tqdm import tqdm
|
|
10
10
|
|
|
11
|
-
from
|
|
11
|
+
from master.spectrum import Spectrum
|
|
12
12
|
from .defaults.find_features_def import find_features_defaults
|
|
13
13
|
from .defaults.find_ms2_def import find_ms2_defaults
|
|
14
14
|
from .defaults.get_spectrum_def import get_spectrum_defaults
|
|
15
15
|
|
|
16
16
|
|
|
17
|
-
from
|
|
17
|
+
from master.chromatogram import Chromatogram
|
|
18
|
+
|
|
18
19
|
|
|
19
20
|
def get_spectrum(self, scan, **kwargs):
|
|
20
21
|
"""Retrieve a single spectrum and optionally post-process it.
|
|
@@ -252,7 +253,8 @@ def get_spectrum(self, scan, **kwargs):
|
|
|
252
253
|
spec=spect,
|
|
253
254
|
scan_uid=scan_uid,
|
|
254
255
|
feature_uid=scan_info["feature_uid"][0]
|
|
255
|
-
if "feature_uid" in scan_info
|
|
256
|
+
if "feature_uid" in scan_info
|
|
257
|
+
and scan_info["feature_uid"][0] is not None
|
|
256
258
|
else feature_uid,
|
|
257
259
|
q1_step=2,
|
|
258
260
|
deisotope=deisotope,
|
|
@@ -445,7 +447,9 @@ def _spec_to_mat(
|
|
|
445
447
|
closest_index = np.argmin(np.abs(ar2 - val1))
|
|
446
448
|
closest_indices.append((i, closest_index))
|
|
447
449
|
# filter out pairs that are not within the specified tolerance
|
|
448
|
-
closest_indices = [
|
|
450
|
+
closest_indices = [
|
|
451
|
+
(i, j) for i, j in closest_indices if np.abs(ar1[i] - ar2[j]) <= tol
|
|
452
|
+
]
|
|
449
453
|
# remove duplicates from the list of indices
|
|
450
454
|
closest_indices = list(set(closest_indices))
|
|
451
455
|
# sort the list of indices by the first element (i) in ascending order
|
|
@@ -564,9 +568,13 @@ def find_features(self, **kwargs):
|
|
|
564
568
|
import os
|
|
565
569
|
|
|
566
570
|
os.environ["OMP_NUM_THREADS"] = str(params.threads)
|
|
567
|
-
self.logger.debug(
|
|
571
|
+
self.logger.debug(
|
|
572
|
+
f"Set thread count to {params.threads} via OMP_NUM_THREADS",
|
|
573
|
+
)
|
|
568
574
|
except Exception:
|
|
569
|
-
self.logger.warning(
|
|
575
|
+
self.logger.warning(
|
|
576
|
+
f"Could not set thread count to {params.threads} - using default",
|
|
577
|
+
)
|
|
570
578
|
|
|
571
579
|
# Set debug mode if enabled
|
|
572
580
|
if hasattr(params, "debug") and params.debug:
|
|
@@ -607,7 +615,8 @@ def find_features(self, **kwargs):
|
|
|
607
615
|
mtd_par.setValue("noise_threshold_int", float(params.get("noise")))
|
|
608
616
|
mtd_par.setValue(
|
|
609
617
|
"min_trace_length",
|
|
610
|
-
float(params.get("min_trace_length_multiplier"))
|
|
618
|
+
float(params.get("min_trace_length_multiplier"))
|
|
619
|
+
* float(params.get("chrom_fwhm_min")),
|
|
611
620
|
)
|
|
612
621
|
mtd_par.setValue(
|
|
613
622
|
"trace_termination_outliers",
|
|
@@ -618,8 +627,14 @@ def find_features(self, **kwargs):
|
|
|
618
627
|
# Additional MTD parameters
|
|
619
628
|
mtd_par.setValue("min_sample_rate", float(params.get("min_sample_rate")))
|
|
620
629
|
mtd_par.setValue("min_trace_length", float(params.get("min_trace_length")))
|
|
621
|
-
mtd_par.setValue(
|
|
622
|
-
|
|
630
|
+
mtd_par.setValue(
|
|
631
|
+
"trace_termination_criterion",
|
|
632
|
+
params.get("trace_termination_criterion"),
|
|
633
|
+
)
|
|
634
|
+
mtd_par.setValue(
|
|
635
|
+
"reestimate_mt_sd",
|
|
636
|
+
"true" if params.get("reestimate_mt_sd") else "false",
|
|
637
|
+
)
|
|
623
638
|
mtd_par.setValue("quant_method", params.get("quant_method"))
|
|
624
639
|
|
|
625
640
|
mtd.setParameters(mtd_par) # set the new parameters
|
|
@@ -688,7 +703,7 @@ def find_features(self, **kwargs):
|
|
|
688
703
|
df = feature_map.get_df(export_peptide_identifications=False) # type: ignore[attr-defined]
|
|
689
704
|
# Sets the file path to the primary MS run (usually the mzML file)
|
|
690
705
|
feature_map.setPrimaryMSRunPath([self.file_path.encode()])
|
|
691
|
-
|
|
706
|
+
|
|
692
707
|
# Store feature map in both attributes for compatibility
|
|
693
708
|
self.features = feature_map
|
|
694
709
|
self._oms_features_map = feature_map
|
|
@@ -769,13 +784,15 @@ def find_features(self, **kwargs):
|
|
|
769
784
|
height_scaleds.append(None)
|
|
770
785
|
|
|
771
786
|
# Add the computed columns to the dataframe
|
|
772
|
-
df = df.with_columns(
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
787
|
+
df = df.with_columns(
|
|
788
|
+
[
|
|
789
|
+
pl.Series("chrom", chroms, dtype=pl.Object),
|
|
790
|
+
pl.Series("chrom_coherence", coherences, dtype=pl.Float64),
|
|
791
|
+
pl.Series("chrom_prominence", prominences, dtype=pl.Float64),
|
|
792
|
+
pl.Series("chrom_prominence_scaled", prominence_scaleds, dtype=pl.Float64),
|
|
793
|
+
pl.Series("chrom_height_scaled", height_scaleds, dtype=pl.Float64),
|
|
794
|
+
],
|
|
795
|
+
)
|
|
779
796
|
|
|
780
797
|
self.features_df = df
|
|
781
798
|
self._features_sync()
|
|
@@ -796,10 +813,10 @@ def find_features(self, **kwargs):
|
|
|
796
813
|
def _clean_features_df(self, df):
|
|
797
814
|
"""Clean and standardize features DataFrame."""
|
|
798
815
|
# Convert pandas DataFrame to polars if needed
|
|
799
|
-
if hasattr(df,
|
|
816
|
+
if hasattr(df, "index"): # pandas DataFrame
|
|
800
817
|
df = df.copy()
|
|
801
818
|
df["feature_id"] = df.index
|
|
802
|
-
|
|
819
|
+
|
|
803
820
|
if hasattr(df, "columns") and not isinstance(df, pl.DataFrame):
|
|
804
821
|
df_pl = pl.from_pandas(df)
|
|
805
822
|
else:
|
|
@@ -809,35 +826,37 @@ def _clean_features_df(self, df):
|
|
|
809
826
|
df2 = df_pl.filter(pl.col("quality") != 0)
|
|
810
827
|
|
|
811
828
|
# Create new dataframe with required columns and transformations
|
|
812
|
-
df_result = df2.select(
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
829
|
+
df_result = df2.select(
|
|
830
|
+
[
|
|
831
|
+
pl.int_range(pl.len()).alias("feature_uid"),
|
|
832
|
+
pl.col("feature_id").cast(pl.String).alias("feature_id"),
|
|
833
|
+
pl.col("mz").round(5),
|
|
834
|
+
pl.col("RT").round(3).alias("rt"),
|
|
835
|
+
pl.col("RT").round(3).alias("rt_original"),
|
|
836
|
+
pl.col("RTstart").round(3).alias("rt_start"),
|
|
837
|
+
pl.col("RTend").round(3).alias("rt_end"),
|
|
838
|
+
(pl.col("RTend") - pl.col("RTstart")).round(3).alias("rt_delta"),
|
|
839
|
+
pl.col("MZstart").round(5).alias("mz_start"),
|
|
840
|
+
pl.col("MZend").round(5).alias("mz_end"),
|
|
841
|
+
pl.col("intensity").alias("inty"),
|
|
842
|
+
pl.col("quality"),
|
|
843
|
+
pl.col("charge"),
|
|
844
|
+
pl.lit(0).alias("iso"),
|
|
845
|
+
pl.lit(None, dtype=pl.Int64).alias("iso_of"),
|
|
846
|
+
pl.lit(None, dtype=pl.Utf8).alias("adduct"),
|
|
847
|
+
pl.lit(None, dtype=pl.Float64).alias("adduct_charge"),
|
|
848
|
+
pl.lit(None, dtype=pl.Float64).alias("adduct_mass_shift"),
|
|
849
|
+
pl.lit(None, dtype=pl.Float64).alias("adduct_mass_neutral"),
|
|
850
|
+
pl.lit(None, dtype=pl.Int64).alias("adduct_group"),
|
|
851
|
+
pl.lit(None, dtype=pl.Object).alias("chrom"),
|
|
852
|
+
pl.lit(None, dtype=pl.Float64).alias("chrom_coherence"),
|
|
853
|
+
pl.lit(None, dtype=pl.Float64).alias("chrom_prominence"),
|
|
854
|
+
pl.lit(None, dtype=pl.Float64).alias("chrom_prominence_scaled"),
|
|
855
|
+
pl.lit(None, dtype=pl.Float64).alias("chrom_height_scaled"),
|
|
856
|
+
pl.lit(None, dtype=pl.Object).alias("ms2_scans"),
|
|
857
|
+
pl.lit(None, dtype=pl.Object).alias("ms2_specs"),
|
|
858
|
+
],
|
|
859
|
+
)
|
|
841
860
|
|
|
842
861
|
return df_result
|
|
843
862
|
|
|
@@ -859,10 +878,12 @@ def _features_deisotope(
|
|
|
859
878
|
df = pl.from_pandas(df)
|
|
860
879
|
|
|
861
880
|
# Initialize new columns
|
|
862
|
-
df = df.with_columns(
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
881
|
+
df = df.with_columns(
|
|
882
|
+
[
|
|
883
|
+
pl.lit(0).alias("iso"),
|
|
884
|
+
pl.col("feature_uid").alias("iso_of"),
|
|
885
|
+
],
|
|
886
|
+
)
|
|
866
887
|
|
|
867
888
|
# Sort by 'mz'
|
|
868
889
|
df = df.sort("mz")
|
|
@@ -889,13 +910,13 @@ def _features_deisotope(
|
|
|
889
910
|
for isotope_offset in [1, 2, 3]:
|
|
890
911
|
offset_mz = isotope_offset * mz_diff
|
|
891
912
|
tolerance_factor = 1.0 if isotope_offset == 1 else 1.5
|
|
892
|
-
|
|
913
|
+
|
|
893
914
|
t_lower = base_mz + offset_mz - tolerance_factor * mz_tol
|
|
894
915
|
t_upper = base_mz + offset_mz + tolerance_factor * mz_tol
|
|
895
|
-
|
|
916
|
+
|
|
896
917
|
li = np.searchsorted(mz_arr, t_lower, side="left")
|
|
897
918
|
ri = np.searchsorted(mz_arr, t_upper, side="right")
|
|
898
|
-
|
|
919
|
+
|
|
899
920
|
if li < ri:
|
|
900
921
|
cand_idx = np.arange(li, ri)
|
|
901
922
|
mask = (
|
|
@@ -904,22 +925,23 @@ def _features_deisotope(
|
|
|
904
925
|
& (intensity_arr[cand_idx] < 2 * base_int)
|
|
905
926
|
)
|
|
906
927
|
valid_cand = cand_idx[mask]
|
|
907
|
-
|
|
928
|
+
|
|
908
929
|
for cand in valid_cand:
|
|
909
930
|
if cand != i and iso_of_arr[cand] == feature_uid_arr[cand]:
|
|
910
931
|
iso_arr[cand] = iso_arr[i] + isotope_offset
|
|
911
932
|
iso_of_arr[cand] = base_feature_uid
|
|
912
933
|
|
|
913
934
|
# Update the dataframe with isotope assignments
|
|
914
|
-
df = df.with_columns(
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
|
|
935
|
+
df = df.with_columns(
|
|
936
|
+
[
|
|
937
|
+
pl.Series("iso", iso_arr),
|
|
938
|
+
pl.Series("iso_of", iso_of_arr),
|
|
939
|
+
],
|
|
940
|
+
)
|
|
918
941
|
|
|
919
942
|
return df
|
|
920
943
|
|
|
921
944
|
|
|
922
|
-
|
|
923
945
|
def analyze_dda(self):
|
|
924
946
|
# Preallocate variables
|
|
925
947
|
cycle_records = []
|
|
@@ -1106,7 +1128,9 @@ def find_ms2(self, **kwargs):
|
|
|
1106
1128
|
feature_rt_start = features_subset.select("rt_start").to_numpy().flatten()
|
|
1107
1129
|
feature_rt_end = features_subset.select("rt_end").to_numpy().flatten()
|
|
1108
1130
|
feature_uids = features_subset.select("feature_uid").to_numpy().flatten()
|
|
1109
|
-
feature_indices =
|
|
1131
|
+
feature_indices = (
|
|
1132
|
+
features_subset.with_row_index().select("index").to_numpy().flatten()
|
|
1133
|
+
)
|
|
1110
1134
|
|
|
1111
1135
|
# Pre-compute RT radius for all features
|
|
1112
1136
|
rt_radius = np.minimum(feature_rt - feature_rt_start, feature_rt_end - feature_rt)
|
|
@@ -1159,15 +1183,17 @@ def find_ms2(self, **kwargs):
|
|
|
1159
1183
|
|
|
1160
1184
|
scan_uids = ms2_index_arr[final_indices].tolist()
|
|
1161
1185
|
scan_uid_lists.append(scan_uids)
|
|
1162
|
-
spec_lists.append(
|
|
1163
|
-
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1186
|
+
spec_lists.append(
|
|
1187
|
+
[
|
|
1188
|
+
self.get_spectrum(
|
|
1189
|
+
scan_uids[0],
|
|
1190
|
+
centroid=centroid,
|
|
1191
|
+
deisotope=deisotope,
|
|
1192
|
+
dia_stats=dia_stats,
|
|
1193
|
+
feature_uid=feature_uid,
|
|
1194
|
+
),
|
|
1195
|
+
],
|
|
1196
|
+
)
|
|
1171
1197
|
|
|
1172
1198
|
# Collect updates for batch processing
|
|
1173
1199
|
updated_feature_uids.extend([feature_uid] * len(final_indices))
|
|
@@ -1181,11 +1207,13 @@ def find_ms2(self, **kwargs):
|
|
|
1181
1207
|
features_df = pl.from_pandas(features_df)
|
|
1182
1208
|
|
|
1183
1209
|
# Update the features_df
|
|
1184
|
-
update_df = pl.DataFrame(
|
|
1185
|
-
|
|
1186
|
-
|
|
1187
|
-
|
|
1188
|
-
|
|
1210
|
+
update_df = pl.DataFrame(
|
|
1211
|
+
{
|
|
1212
|
+
"temp_idx": feature_indices,
|
|
1213
|
+
"ms2_scans": pl.Series("ms2_scans", scan_uid_lists, dtype=pl.Object),
|
|
1214
|
+
"ms2_specs": pl.Series("ms2_specs", spec_lists, dtype=pl.Object),
|
|
1215
|
+
},
|
|
1216
|
+
)
|
|
1189
1217
|
|
|
1190
1218
|
# Join and update
|
|
1191
1219
|
features_df = (
|
|
@@ -1196,16 +1224,18 @@ def find_ms2(self, **kwargs):
|
|
|
1196
1224
|
how="left",
|
|
1197
1225
|
suffix="_new",
|
|
1198
1226
|
)
|
|
1199
|
-
.with_columns(
|
|
1200
|
-
|
|
1201
|
-
|
|
1202
|
-
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
|
|
1208
|
-
|
|
1227
|
+
.with_columns(
|
|
1228
|
+
[
|
|
1229
|
+
pl.when(pl.col("ms2_scans_new").is_not_null())
|
|
1230
|
+
.then(pl.col("ms2_scans_new"))
|
|
1231
|
+
.otherwise(pl.col("ms2_scans"))
|
|
1232
|
+
.alias("ms2_scans"),
|
|
1233
|
+
pl.when(pl.col("ms2_specs_new").is_not_null())
|
|
1234
|
+
.then(pl.col("ms2_specs_new"))
|
|
1235
|
+
.otherwise(pl.col("ms2_specs"))
|
|
1236
|
+
.alias("ms2_specs"),
|
|
1237
|
+
],
|
|
1238
|
+
)
|
|
1209
1239
|
.drop(["temp_idx", "ms2_scans_new", "ms2_specs_new"])
|
|
1210
1240
|
)
|
|
1211
1241
|
|
|
@@ -1242,4 +1272,4 @@ def find_ms2(self, **kwargs):
|
|
|
1242
1272
|
self.store_history(["find_ms2"], params.to_dict())
|
|
1243
1273
|
self.logger.debug(
|
|
1244
1274
|
"Parameters stored to find_ms2",
|
|
1245
|
-
)
|
|
1275
|
+
)
|