masster 0.4.22__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/_version.py +1 -1
- masster/data/libs/aa.csv +22 -0
- masster/lib/lib.py +6 -0
- masster/sample/adducts.py +1 -1
- masster/sample/load.py +10 -9
- masster/sample/plot.py +1 -1
- masster/sample/processing.py +4 -4
- masster/sample/sample.py +29 -32
- masster/study/analysis.py +1762 -0
- masster/study/defaults/fill_def.py +1 -1
- masster/study/export.py +5 -3
- masster/study/h5.py +3 -0
- masster/study/helpers.py +153 -80
- masster/study/id.py +545 -4
- masster/study/load.py +33 -59
- masster/study/merge.py +413 -315
- masster/study/parameters.py +3 -3
- masster/study/plot.py +398 -43
- masster/study/processing.py +6 -14
- masster/study/save.py +8 -4
- masster/study/study.py +179 -139
- masster/study/study5_schema.json +9 -0
- {masster-0.4.22.dist-info → masster-0.5.1.dist-info}/METADATA +54 -14
- {masster-0.4.22.dist-info → masster-0.5.1.dist-info}/RECORD +27 -25
- {masster-0.4.22.dist-info → masster-0.5.1.dist-info}/WHEEL +0 -0
- {masster-0.4.22.dist-info → masster-0.5.1.dist-info}/entry_points.txt +0 -0
- {masster-0.4.22.dist-info → masster-0.5.1.dist-info}/licenses/LICENSE +0 -0
masster/_version.py
CHANGED
masster/data/libs/aa.csv
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
name,smiles,inchikey,formula,db_id,db
|
|
2
|
+
L-Glutamic acid,N[C@@H](CCC(O)=O)C(O)=O,WHUUTDBJXJRKMK-VKHMYHEASA-N,C5H9NO4,CID:33032,pubchem
|
|
3
|
+
L-Tyrosine,N[C@@H](CC1=CC=C(O)C=C1)C(O)=O,OUYCCCASQSFEME-QMMMGPOBSA-N,C9H11NO3,CID:6057,pubchem
|
|
4
|
+
L-Phenylalanine,N[C@@H](CC1=CC=CC=C1)C(O)=O,COLNVLDHVKWLRT-QMMMGPOBSA-N,C9H11NO2,CID:6140,pubchem
|
|
5
|
+
L-Alanine,C[C@H](N)C(O)=O,QNAYBMKLOCPYGJ-REOHCLBHSA-N,C3H7NO2,CID:5950,pubchem
|
|
6
|
+
L-Proline,OC(=O)[C@@H]1CCCN1,ONIBWKKTOPOVIA-BYPYZUCNSA-N,C5H9NO2,CID:145742,pubchem
|
|
7
|
+
L-Threonine,C[C@@H](O)[C@H](N)C(O)=O,AYFVYJQAPQTCCC-GBXIJSLDSA-N,C4H9NO3,CID:6288,pubchem
|
|
8
|
+
L-Asparagine,N[C@@H](CC(N)=O)C(O)=O,DCXYFEDJOCDNAF-REOHCLBHSA-N,C4H8N2O3,CID:6267,pubchem
|
|
9
|
+
L-Isoleucine,CC[C@H](C)[C@H](N)C(O)=O,AGPKZVBTJJNPAG-WHFBIAKZSA-N,C6H13NO2,CID:6306,pubchem
|
|
10
|
+
L-Histidine,N[C@@H](CC1=CN=CN1)C(O)=O,HNDVDQJCIGZPNO-YFKPBYRVSA-N,C6H9N3O2,CID:6274,pubchem
|
|
11
|
+
L-Lysine,NCCCC[C@H](N)C(O)=O,KDXKERNSBIXSRK-YFKPBYRVSA-N,C6H14N2O2,CID:5962,pubchem
|
|
12
|
+
L-Serine,N[C@@H](CO)C(O)=O,MTCFGRXMJLQNBG-REOHCLBHSA-N,C3H7NO3,CID:5951,pubchem
|
|
13
|
+
L-Aspartic acid,N[C@@H](CC(O)=O)C(O)=O,CKLJMWTZIZZHCS-REOHCLBHSA-N,C4H7NO4,CID:5960,pubchem
|
|
14
|
+
L-Cystine,N[C@@H](CSSC[C@H](N)C(O)=O)C(O)=O,LEVWYRKDKASIDU-IMJSIDKUSA-N,C6H12N2O4S2,CID:67678,pubchem
|
|
15
|
+
L-Arginine,N[C@@H](CCCNC(N)=N)C(O)=O,ODKSFYDXXFIFQN-BYPYZUCNSA-N,C6H14N4O2,CID:6322,pubchem
|
|
16
|
+
L-Cysteine,N[C@@H](CS)C(O)=O,XUJNEKJLAYXESH-REOHCLBHSA-N,C3H7NO2S,CID:5862,pubchem
|
|
17
|
+
L-Glutamine,N[C@@H](CCC(N)=O)C(O)=O,ZDXPYRJPNDTMRX-VKHMYHEASA-N,C5H10N2O3,CID:5961,pubchem
|
|
18
|
+
L-Leucine,CC(C)C[C@H](N)C(O)=O,ROHFNLRQFUQHCH-YFKPBYRVSA-N,C6H13NO2,CID:6106,pubchem
|
|
19
|
+
L-Methionine,CSCC[C@H](N)C(O)=O,FFEARJCKVFRZRR-BYPYZUCNSA-N,C5H11NO2S,CID:6137,pubchem
|
|
20
|
+
L-Valine,CC(C)[C@H](N)C(O)=O,KZSNJWFQEVHDMF-BYPYZUCNSA-N,C5H11NO2,CID:6287,pubchem
|
|
21
|
+
L-Tryptophan,N[C@@H](CC1=CNC2=C1C=CC=C2)C(O)=O,QIVBCDIJIAJPQS-VIFPVBQESA-N,C11H12N2O2,CID:6305,pubchem
|
|
22
|
+
Glycine,NCC(O)=O,QNAYBMKLOCPYGJ-UHFFFAOYSA-N,C2H5NO2,CID:750,Glycine
|
masster/lib/lib.py
CHANGED
|
@@ -123,11 +123,13 @@ class Lib:
|
|
|
123
123
|
"inchi": pl.Series([], dtype=pl.Utf8),
|
|
124
124
|
"inchikey": pl.Series([], dtype=pl.Utf8),
|
|
125
125
|
"formula": pl.Series([], dtype=pl.Utf8),
|
|
126
|
+
"iso": pl.Series([], dtype=pl.Int64),
|
|
126
127
|
"adduct": pl.Series([], dtype=pl.Utf8),
|
|
127
128
|
"m": pl.Series([], dtype=pl.Float64),
|
|
128
129
|
"z": pl.Series([], dtype=pl.Int8),
|
|
129
130
|
"mz": pl.Series([], dtype=pl.Float64),
|
|
130
131
|
"rt": pl.Series([], dtype=pl.Float64),
|
|
132
|
+
"quant_group": pl.Series([], dtype=pl.Int64),
|
|
131
133
|
"db_id": pl.Series([], dtype=pl.Utf8),
|
|
132
134
|
"db": pl.Series([], dtype=pl.Utf8),
|
|
133
135
|
})
|
|
@@ -245,11 +247,13 @@ class Lib:
|
|
|
245
247
|
"inchi": compound_data.get("inchi", ""),
|
|
246
248
|
"inchikey": compound_data.get("inchikey", ""),
|
|
247
249
|
"formula": compound_data["formula"],
|
|
250
|
+
"iso": 0, # Default to zero
|
|
248
251
|
"adduct": adduct,
|
|
249
252
|
"m": adducted_mass,
|
|
250
253
|
"z": charge,
|
|
251
254
|
"mz": mz,
|
|
252
255
|
"rt": compound_data.get("rt", None),
|
|
256
|
+
"quant_group": counter, # Use same as lib_uid for default
|
|
253
257
|
"db_id": compound_data.get("db_id", None),
|
|
254
258
|
"db": compound_data.get("db", None),
|
|
255
259
|
}
|
|
@@ -526,12 +530,14 @@ class Lib:
|
|
|
526
530
|
"source_id": match_row.get("source_id"),
|
|
527
531
|
"name": match_row["name"],
|
|
528
532
|
"formula": match_row["formula"],
|
|
533
|
+
"iso": match_row.get("iso", 0),
|
|
529
534
|
"adduct": match_row["adduct"],
|
|
530
535
|
"smiles": match_row["smiles"],
|
|
531
536
|
"inchi": match_row["inchi"],
|
|
532
537
|
"inchikey": match_row["inchikey"],
|
|
533
538
|
"lib_mz": match_row["mz"],
|
|
534
539
|
"lib_rt": match_row["rt"],
|
|
540
|
+
"quant_group": match_row.get("quant_group"),
|
|
535
541
|
"delta_mz": abs(feature_mz - match_row["mz"]),
|
|
536
542
|
"delta_rt": abs(feature_rt - match_row["rt"]) if feature_rt is not None and match_row["rt"] is not None else None,
|
|
537
543
|
}
|
masster/sample/adducts.py
CHANGED
|
@@ -809,5 +809,5 @@ def find_adducts(self, **kwargs):
|
|
|
809
809
|
["name", "charge", "mass_shift", "probability"],
|
|
810
810
|
).to_dicts()
|
|
811
811
|
|
|
812
|
-
self.
|
|
812
|
+
self.update_history(["find_adducts"], history_params)
|
|
813
813
|
self.logger.debug("Parameters stored successfully")
|
masster/sample/load.py
CHANGED
|
@@ -173,8 +173,8 @@ def load_noms1(
|
|
|
173
173
|
self.label = label
|
|
174
174
|
|
|
175
175
|
|
|
176
|
-
#
|
|
177
|
-
def
|
|
176
|
+
# Renamed for clarity and internal use
|
|
177
|
+
def _load_ms1(
|
|
178
178
|
self,
|
|
179
179
|
filename=None,
|
|
180
180
|
ondisk=False,
|
|
@@ -182,7 +182,8 @@ def load_study(
|
|
|
182
182
|
label=None,
|
|
183
183
|
):
|
|
184
184
|
"""
|
|
185
|
-
|
|
185
|
+
Load MS1-only data (renamed from load_study for clarity).
|
|
186
|
+
Optimized version for study loading that excludes MS2 data.
|
|
186
187
|
|
|
187
188
|
This method is deprecated. Use load_noms1() instead.
|
|
188
189
|
"""
|
|
@@ -828,11 +829,11 @@ def _load_mzpkl(
|
|
|
828
829
|
continue
|
|
829
830
|
if k == "spectra_df":
|
|
830
831
|
k = "scans_df"
|
|
831
|
-
|
|
832
|
+
setattr(self, k, v)
|
|
832
833
|
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
if self.ondisk:
|
|
834
|
+
# Import and call internal sanitize function
|
|
835
|
+
from masster.study.load import _sanitize
|
|
836
|
+
_sanitize(self) if self.ondisk:
|
|
836
837
|
self.file_obj = oms.OnDiscMSExperiment()
|
|
837
838
|
else:
|
|
838
839
|
self.file_obj = oms.MSExperiment()
|
|
@@ -955,7 +956,7 @@ def sanitize(self):
|
|
|
955
956
|
self.features_df.at[_i, "ms2_specs"][_j] = new_ms2_specs
|
|
956
957
|
|
|
957
958
|
|
|
958
|
-
def
|
|
959
|
+
def _index_file(self):
|
|
959
960
|
"""
|
|
960
961
|
Reload raw data from a file based on its extension.
|
|
961
962
|
|
|
@@ -1022,7 +1023,7 @@ def index_file(self):
|
|
|
1022
1023
|
raise FileNotFoundError(
|
|
1023
1024
|
f"File {self.file_source} not found. Did the path change? Consider running source().",
|
|
1024
1025
|
)
|
|
1025
|
-
self.
|
|
1026
|
+
self._index_file()
|
|
1026
1027
|
else:
|
|
1027
1028
|
raise FileNotFoundError(
|
|
1028
1029
|
f"File {self.file_source} not found. Did the path change? Consider running source().",
|
masster/sample/plot.py
CHANGED
masster/sample/processing.py
CHANGED
|
@@ -799,7 +799,7 @@ def find_features(self, **kwargs):
|
|
|
799
799
|
self.logger.info(f"Feature detection completed. Total features: {len(df)}")
|
|
800
800
|
|
|
801
801
|
# store params
|
|
802
|
-
self.
|
|
802
|
+
self.update_history(["find_features"], params.to_dict())
|
|
803
803
|
self.logger.debug(
|
|
804
804
|
"Parameters stored to find_features",
|
|
805
805
|
)
|
|
@@ -1114,7 +1114,7 @@ def find_ms2(self, **kwargs):
|
|
|
1114
1114
|
c = 0
|
|
1115
1115
|
|
|
1116
1116
|
if self.file_interface is None:
|
|
1117
|
-
self.
|
|
1117
|
+
self._index_file()
|
|
1118
1118
|
|
|
1119
1119
|
# Vectorize the entire operation for better performance
|
|
1120
1120
|
features_subset = features_df.filter(pl.col("feature_uid").is_in(feature_uid_list))
|
|
@@ -1269,7 +1269,7 @@ def find_ms2(self, **kwargs):
|
|
|
1269
1269
|
self.features_df = features_df
|
|
1270
1270
|
|
|
1271
1271
|
# store params
|
|
1272
|
-
self.
|
|
1272
|
+
self.update_history(["find_ms2"], params.to_dict())
|
|
1273
1273
|
self.logger.debug(
|
|
1274
1274
|
"Parameters stored to find_ms2",
|
|
1275
1275
|
)
|
|
@@ -1430,4 +1430,4 @@ def find_iso(self, rt_tolerance: float = 0.1, **kwargs):
|
|
|
1430
1430
|
# Store parameters in history
|
|
1431
1431
|
params_dict = {"rt_tolerance": rt_tolerance}
|
|
1432
1432
|
params_dict.update(kwargs)
|
|
1433
|
-
self.
|
|
1433
|
+
self.update_history(["find_iso"], params_dict)
|
masster/sample/sample.py
CHANGED
|
@@ -47,17 +47,17 @@ from masster.sample.defaults.find_adducts_def import find_adducts_defaults
|
|
|
47
47
|
from masster.sample.defaults.find_ms2_def import find_ms2_defaults
|
|
48
48
|
from masster.sample.defaults.get_spectrum_def import get_spectrum_defaults
|
|
49
49
|
|
|
50
|
-
# Sample-specific imports
|
|
51
|
-
from masster.sample.h5 import _load_sample5
|
|
52
|
-
from masster.sample.h5 import _load_sample5_study
|
|
53
|
-
from masster.sample.h5 import _save_sample5
|
|
54
|
-
from masster.sample.helpers import _delete_ms2
|
|
50
|
+
# Sample-specific imports - keeping these private, only for internal use
|
|
51
|
+
# from masster.sample.h5 import _load_sample5
|
|
52
|
+
# from masster.sample.h5 import _load_sample5_study
|
|
53
|
+
# from masster.sample.h5 import _save_sample5
|
|
54
|
+
# from masster.sample.helpers import _delete_ms2
|
|
55
55
|
from masster.sample.helpers import _estimate_memory_usage
|
|
56
56
|
from masster.sample.helpers import _get_scan_uids
|
|
57
57
|
from masster.sample.helpers import _get_feature_uids
|
|
58
|
-
from masster.sample.helpers import _features_sync
|
|
59
|
-
from masster.sample.adducts import _get_adducts
|
|
58
|
+
# from masster.sample.helpers import _features_sync - made internal only
|
|
60
59
|
from masster.sample.adducts import find_adducts
|
|
60
|
+
# Removed _get_adducts - only used in study modules
|
|
61
61
|
from masster.sample.helpers import features_delete
|
|
62
62
|
from masster.sample.helpers import features_filter
|
|
63
63
|
from masster.sample.helpers import select
|
|
@@ -69,22 +69,23 @@ from masster.sample.helpers import get_eic
|
|
|
69
69
|
from masster.sample.helpers import set_source
|
|
70
70
|
from masster.sample.helpers import _recreate_feature_map
|
|
71
71
|
from masster.sample.helpers import _get_feature_map
|
|
72
|
-
|
|
73
|
-
from masster.sample.load import
|
|
74
|
-
from masster.sample.load import
|
|
75
|
-
from masster.sample.load import
|
|
76
|
-
from masster.sample.load import
|
|
72
|
+
# Load functions - keeping only specific ones needed for external API
|
|
73
|
+
# from masster.sample.load import _load_featureXML - made internal only
|
|
74
|
+
# from masster.sample.load import _load_ms2data - made internal only
|
|
75
|
+
# from masster.sample.load import _load_mzML - made internal only
|
|
76
|
+
# from masster.sample.load import _load_raw - made internal only
|
|
77
|
+
# from masster.sample.load import _load_wiff - made internal only
|
|
77
78
|
from masster.sample.load import chrom_extract
|
|
78
|
-
from masster.sample.load import
|
|
79
|
+
from masster.sample.load import _index_file
|
|
79
80
|
from masster.sample.load import load
|
|
80
81
|
from masster.sample.load import load_noms1
|
|
81
|
-
from masster.sample.load import load_study
|
|
82
|
+
from masster.sample.load import _load_ms1 # Renamed from load_study
|
|
82
83
|
from masster.sample.load import sanitize
|
|
83
84
|
from masster.sample.plot import plot_2d
|
|
84
85
|
from masster.sample.plot import plot_2d_oracle
|
|
85
86
|
from masster.sample.plot import plot_dda_stats
|
|
86
87
|
from masster.sample.plot import plot_chrom
|
|
87
|
-
from masster.sample.plot import plot_feature_stats
|
|
88
|
+
from masster.sample.plot import plot_features_stats # Renamed from plot_feature_stats
|
|
88
89
|
from masster.sample.plot import plot_ms2_cycle
|
|
89
90
|
from masster.sample.plot import plot_ms2_eic
|
|
90
91
|
from masster.sample.plot import plot_ms2_q1
|
|
@@ -105,12 +106,12 @@ from masster.sample.parameters import get_parameters
|
|
|
105
106
|
from masster.sample.parameters import update_parameters
|
|
106
107
|
from masster.sample.parameters import get_parameters_property
|
|
107
108
|
from masster.sample.parameters import set_parameters_property
|
|
108
|
-
from masster.sample.save import _save_featureXML
|
|
109
109
|
from masster.sample.save import export_chrom
|
|
110
110
|
from masster.sample.save import export_dda_stats
|
|
111
111
|
from masster.sample.save import export_features
|
|
112
112
|
from masster.sample.save import export_mgf
|
|
113
113
|
from masster.sample.save import save
|
|
114
|
+
# Removed internal-only import: _save_featureXML
|
|
114
115
|
|
|
115
116
|
|
|
116
117
|
class Sample:
|
|
@@ -215,7 +216,8 @@ class Sample:
|
|
|
215
216
|
# Attach module functions as class methods
|
|
216
217
|
load = load
|
|
217
218
|
load_noms1 = load_noms1
|
|
218
|
-
|
|
219
|
+
_load_ms1 = _load_ms1 # Renamed from load_study for clarity
|
|
220
|
+
load_study = _load_ms1 # Backward compatibility alias
|
|
219
221
|
save = save
|
|
220
222
|
find_features = find_features
|
|
221
223
|
find_adducts = find_adducts
|
|
@@ -230,6 +232,7 @@ class Sample:
|
|
|
230
232
|
select_features = select
|
|
231
233
|
analyze_dda = analyze_dda
|
|
232
234
|
store_history = store_history
|
|
235
|
+
update_history = store_history # Preferred name for consistency with Study class
|
|
233
236
|
get_parameters = get_parameters
|
|
234
237
|
update_parameters = update_parameters
|
|
235
238
|
get_parameters_property = get_parameters_property
|
|
@@ -242,7 +245,8 @@ class Sample:
|
|
|
242
245
|
plot_2d_oracle = plot_2d_oracle
|
|
243
246
|
plot_dda_stats = plot_dda_stats
|
|
244
247
|
plot_chrom = plot_chrom
|
|
245
|
-
|
|
248
|
+
plot_features_stats = plot_features_stats # Renamed from plot_feature_stats
|
|
249
|
+
plot_feature_stats = plot_features_stats # Backward compatibility alias
|
|
246
250
|
plot_ms2_cycle = plot_ms2_cycle
|
|
247
251
|
plot_ms2_eic = plot_ms2_eic
|
|
248
252
|
plot_ms2_q1 = plot_ms2_q1
|
|
@@ -259,30 +263,21 @@ class Sample:
|
|
|
259
263
|
_get_feature_map = _get_feature_map
|
|
260
264
|
|
|
261
265
|
# Additional method assignments for all imported functions
|
|
262
|
-
_load_sample5
|
|
263
|
-
_load_sample5_study = _load_sample5_study
|
|
264
|
-
_save_sample5 = _save_sample5
|
|
265
|
-
_delete_ms2 = _delete_ms2
|
|
266
|
+
# Removed internal-only methods: _load_sample5, _load_sample5_study, _save_sample5, _delete_ms2, _features_sync
|
|
266
267
|
_estimate_memory_usage = _estimate_memory_usage
|
|
267
268
|
_get_scan_uids = _get_scan_uids
|
|
268
269
|
_get_feature_uids = _get_feature_uids
|
|
269
|
-
_features_sync = _features_sync
|
|
270
270
|
features_delete = features_delete
|
|
271
271
|
features_filter = features_filter
|
|
272
|
-
_load_featureXML
|
|
273
|
-
_load_ms2data = _load_ms2data
|
|
274
|
-
_load_mzML = _load_mzML
|
|
275
|
-
_load_raw = _load_raw
|
|
276
|
-
_load_wiff = _load_wiff
|
|
272
|
+
# Removed internal-only load methods: _load_featureXML, _load_ms2data, _load_mzML, _load_raw, _load_wiff
|
|
277
273
|
chrom_extract = chrom_extract
|
|
278
|
-
|
|
274
|
+
_index_file = _index_file # Renamed from index_file to be internal-only
|
|
279
275
|
sanitize = sanitize
|
|
280
276
|
_clean_features_df = _clean_features_df
|
|
281
277
|
_features_deisotope = _features_deisotope
|
|
282
278
|
_get_ztscan_stats = _get_ztscan_stats
|
|
283
279
|
_spec_to_mat = _spec_to_mat
|
|
284
|
-
_save_featureXML
|
|
285
|
-
_get_adducts = _get_adducts
|
|
280
|
+
# Removed internal-only methods: _save_featureXML, _get_adducts (used only in study modules)
|
|
286
281
|
|
|
287
282
|
# defaults
|
|
288
283
|
sample_defaults = sample_defaults
|
|
@@ -303,12 +298,14 @@ class Sample:
|
|
|
303
298
|
"""
|
|
304
299
|
# Define backward compatibility aliases to hide
|
|
305
300
|
backward_compatibility_aliases = {
|
|
306
|
-
'load_study', # deprecated alias for
|
|
301
|
+
'load_study', # deprecated alias for _load_ms1
|
|
307
302
|
'filter_features', # alias for filter (deprecated naming)
|
|
308
303
|
'select_features', # alias for select (deprecated naming)
|
|
309
304
|
'features_filter', # confusing duplicate of filter
|
|
310
305
|
'features_select', # confusing duplicate of select
|
|
311
306
|
'merge_defaults', # alias for find_features_defaults (confusing)
|
|
307
|
+
'plot_feature_stats', # backward compatibility for plot_features_stats
|
|
308
|
+
'store_history', # deprecated alias for update_history
|
|
312
309
|
}
|
|
313
310
|
|
|
314
311
|
# Get all attributes from the class
|