masster 0.2.5__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/__init__.py +27 -27
- masster/_version.py +17 -17
- masster/chromatogram.py +497 -503
- masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.featureXML +199787 -0
- masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.sample5 +0 -0
- masster/logger.py +318 -244
- masster/sample/__init__.py +9 -9
- masster/sample/defaults/__init__.py +15 -15
- masster/sample/defaults/find_adducts_def.py +325 -325
- masster/sample/defaults/find_features_def.py +366 -366
- masster/sample/defaults/find_ms2_def.py +285 -285
- masster/sample/defaults/get_spectrum_def.py +314 -318
- masster/sample/defaults/sample_def.py +374 -378
- masster/sample/h5.py +1321 -1297
- masster/sample/helpers.py +833 -364
- masster/sample/lib.py +762 -0
- masster/sample/load.py +1220 -1187
- masster/sample/parameters.py +131 -131
- masster/sample/plot.py +1685 -1622
- masster/sample/processing.py +1402 -1416
- masster/sample/quant.py +209 -0
- masster/sample/sample.py +393 -387
- masster/sample/sample5_schema.json +181 -181
- masster/sample/save.py +737 -736
- masster/sample/sciex.py +1213 -0
- masster/spectrum.py +1287 -1319
- masster/study/__init__.py +9 -9
- masster/study/defaults/__init__.py +21 -19
- masster/study/defaults/align_def.py +267 -267
- masster/study/defaults/export_def.py +41 -40
- masster/study/defaults/fill_chrom_def.py +264 -264
- masster/study/defaults/fill_def.py +260 -0
- masster/study/defaults/find_consensus_def.py +256 -256
- masster/study/defaults/find_ms2_def.py +163 -163
- masster/study/defaults/integrate_chrom_def.py +225 -225
- masster/study/defaults/integrate_def.py +221 -0
- masster/study/defaults/merge_def.py +256 -0
- masster/study/defaults/study_def.py +272 -269
- masster/study/export.py +674 -287
- masster/study/h5.py +1406 -886
- masster/study/helpers.py +1713 -433
- masster/study/helpers_optimized.py +317 -0
- masster/study/load.py +1231 -1078
- masster/study/parameters.py +99 -99
- masster/study/plot.py +632 -645
- masster/study/processing.py +1057 -1046
- masster/study/save.py +161 -134
- masster/study/study.py +612 -522
- masster/study/study5_schema.json +253 -241
- {masster-0.2.5.dist-info → masster-0.3.1.dist-info}/METADATA +15 -10
- masster-0.3.1.dist-info/RECORD +59 -0
- {masster-0.2.5.dist-info → masster-0.3.1.dist-info}/licenses/LICENSE +661 -661
- masster-0.2.5.dist-info/RECORD +0 -50
- {masster-0.2.5.dist-info → masster-0.3.1.dist-info}/WHEEL +0 -0
- {masster-0.2.5.dist-info → masster-0.3.1.dist-info}/entry_points.txt +0 -0
masster/sample/quant.py
ADDED
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
# a module for the reproducible quantification of EIC traces.
|
|
2
|
+
# it takes a study and a list of features indicated either as MS1 or MRM features
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
|
|
7
|
+
# from .parameters import QuantParameters
|
|
8
|
+
# Parameters removed - using hardcoded defaults
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def chrom_from_csv(
|
|
12
|
+
self,
|
|
13
|
+
filename=None,
|
|
14
|
+
):
|
|
15
|
+
"""
|
|
16
|
+
Load MRM transitions from a file.
|
|
17
|
+
:param filename: Path to the file containing MRM transitions.
|
|
18
|
+
:return: List of MRM transitions
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
# if filename exists and ends with csv, read it as a CSV file
|
|
22
|
+
if filename and filename.endswith(".csv"):
|
|
23
|
+
df = pd.read_csv(filename, comment="#")
|
|
24
|
+
# possible columns are name, class, q1, q3, rt, istd. Make sure to handle upper and lower case.
|
|
25
|
+
df.columns = [col.lower() for col in df.columns]
|
|
26
|
+
if "name" not in df.columns:
|
|
27
|
+
raise ValueError("CSV file must contain a 'name' column.")
|
|
28
|
+
if "q1" in df.columns:
|
|
29
|
+
col_q1 = "q1"
|
|
30
|
+
elif "precursor" in df.columns:
|
|
31
|
+
col_q1 = "precursor"
|
|
32
|
+
elif "precursor_mz" in df.columns:
|
|
33
|
+
col_q1 = "precursor_mz"
|
|
34
|
+
else:
|
|
35
|
+
raise ValueError("CSV file must contain a 'q1' column or equivalent.")
|
|
36
|
+
if "q3" in df.columns:
|
|
37
|
+
col_q3 = "q3"
|
|
38
|
+
elif "product" in df.columns:
|
|
39
|
+
col_q3 = "product"
|
|
40
|
+
elif "product_mz" in df.columns:
|
|
41
|
+
col_q3 = "product_mz"
|
|
42
|
+
else:
|
|
43
|
+
col_q3 = None
|
|
44
|
+
col_rt = "rt" if "rt" in df.columns else None
|
|
45
|
+
col_istd = "istd" if "istd" in df.columns else None
|
|
46
|
+
col_class = "class" if "class" in df.columns else None
|
|
47
|
+
col_adduct = "adduct" if "adduct" in df.columns else None
|
|
48
|
+
col_qid = "qid" if "qid" in df.columns else None
|
|
49
|
+
col_group = "group" if "group" in df.columns else None
|
|
50
|
+
col_formula = "formula" if "formula" in df.columns else None
|
|
51
|
+
col_inchikey = "inchikey" if "inchikey" in df.columns else None
|
|
52
|
+
col_smiles = "smiles" if "smiles" in df.columns else None
|
|
53
|
+
|
|
54
|
+
traces = []
|
|
55
|
+
for _, row in df.iterrows():
|
|
56
|
+
traces.append({
|
|
57
|
+
"chid": row[col_qid],
|
|
58
|
+
"type": "mrm",
|
|
59
|
+
"name": row["name"],
|
|
60
|
+
"group": row[col_group],
|
|
61
|
+
"prec_mz": row[col_q1],
|
|
62
|
+
"prod_mz": row[col_q3] if col_q3 else None,
|
|
63
|
+
"rt": row[col_rt],
|
|
64
|
+
"rt_start": None,
|
|
65
|
+
"rt_end": None,
|
|
66
|
+
"istd": row[col_istd] if col_istd else None,
|
|
67
|
+
"adduct": row[col_adduct] if col_adduct else None,
|
|
68
|
+
"class": row[col_class] if col_class else None,
|
|
69
|
+
"formula": row[col_formula] if col_formula else None,
|
|
70
|
+
"inchikey": row[col_inchikey] if col_inchikey else None,
|
|
71
|
+
"smiles": row[col_smiles] if col_smiles else None,
|
|
72
|
+
})
|
|
73
|
+
self.chrom_df = pd.DataFrame(traces)
|
|
74
|
+
return
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def chrom_from_oracle(
|
|
78
|
+
self,
|
|
79
|
+
oracle_folder=None,
|
|
80
|
+
classes=None,
|
|
81
|
+
level=None,
|
|
82
|
+
):
|
|
83
|
+
if level is None:
|
|
84
|
+
level = [2]
|
|
85
|
+
if oracle_folder is None:
|
|
86
|
+
return
|
|
87
|
+
# try to read the annotationfile as a csv file and add it to feats
|
|
88
|
+
try:
|
|
89
|
+
oracle_data = pd.read_csv(
|
|
90
|
+
os.path.join(oracle_folder, "diag", "annotation_full.csv"),
|
|
91
|
+
)
|
|
92
|
+
except:
|
|
93
|
+
print(f"Could not read {oracle_folder}/diag/annotation_full.csv")
|
|
94
|
+
return
|
|
95
|
+
|
|
96
|
+
# if classes is not None, filter the oracle_data by classe
|
|
97
|
+
traces = []
|
|
98
|
+
|
|
99
|
+
cols_to_keep = [
|
|
100
|
+
"mz",
|
|
101
|
+
"rt",
|
|
102
|
+
"level",
|
|
103
|
+
"formula",
|
|
104
|
+
"ion",
|
|
105
|
+
"name",
|
|
106
|
+
"hg",
|
|
107
|
+
"ms2_matched",
|
|
108
|
+
"ms2_missed",
|
|
109
|
+
]
|
|
110
|
+
|
|
111
|
+
qid = 0
|
|
112
|
+
oracle_data = oracle_data[cols_to_keep]
|
|
113
|
+
# keep only MS2 features
|
|
114
|
+
oracle_data["lib_frags"] = None
|
|
115
|
+
for i, row in oracle_data.iterrows():
|
|
116
|
+
if row["level"] in level:
|
|
117
|
+
if classes is not None and row["hg"] not in classes:
|
|
118
|
+
continue
|
|
119
|
+
if row["level"] == 2:
|
|
120
|
+
frags = {}
|
|
121
|
+
if row["ms2_matched"] is not None:
|
|
122
|
+
if isinstance(row["ms2_matched"], str):
|
|
123
|
+
# split the ms2_matched column by semicolon
|
|
124
|
+
tokens = row["ms2_matched"].split(" ")
|
|
125
|
+
for token in tokens:
|
|
126
|
+
if token.strip():
|
|
127
|
+
frag = token.split("|")
|
|
128
|
+
if len(frag) > 1:
|
|
129
|
+
# add to dictionary with frag[2] as key and frag[1] as value
|
|
130
|
+
frags[frag[1]] = float(frag[0])
|
|
131
|
+
if row["ms2_missed"] is not None:
|
|
132
|
+
if isinstance(row["ms2_missed"], str): # frag[0]
|
|
133
|
+
tokens = row["ms2_missed"].split(" ")
|
|
134
|
+
for token in tokens:
|
|
135
|
+
if token.strip():
|
|
136
|
+
frag = token.split("|")
|
|
137
|
+
if len(frag) > 1:
|
|
138
|
+
# add to dictionary with frag[2] as key and frag[1] as value
|
|
139
|
+
frags[frag[1]] = float(frag[0])
|
|
140
|
+
if len(frags) > 0:
|
|
141
|
+
oracle_data.at[i, "lib_frags"] = frags
|
|
142
|
+
for _key, value in frags.items():
|
|
143
|
+
# add the fragment to the row
|
|
144
|
+
traces.append({
|
|
145
|
+
"chid": qid,
|
|
146
|
+
"type": "mrm",
|
|
147
|
+
"name": row["name"] + " " + row["ion"],
|
|
148
|
+
"group": row["name"] + " " + row["ion"],
|
|
149
|
+
"prec_mz": row["mz"],
|
|
150
|
+
"prod_mz": value,
|
|
151
|
+
"rt": row["rt"],
|
|
152
|
+
"rt_start": None,
|
|
153
|
+
"rt_end": None,
|
|
154
|
+
"istd": None,
|
|
155
|
+
"adduct": row["ion"],
|
|
156
|
+
"class": row["hg"],
|
|
157
|
+
"formula": row["formula"],
|
|
158
|
+
"inchikey": None,
|
|
159
|
+
"smiles": None,
|
|
160
|
+
})
|
|
161
|
+
qid += 1
|
|
162
|
+
self.chrom_df = pd.DataFrame(traces)
|
|
163
|
+
return
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def chrom_from_features(
|
|
167
|
+
self,
|
|
168
|
+
feature_uid=None,
|
|
169
|
+
):
|
|
170
|
+
"""
|
|
171
|
+
Create a chromatogram from features.
|
|
172
|
+
:param feature_uid: Feature UID to create the chromatogram for. If None, create chromatograms for all features.
|
|
173
|
+
:return: None
|
|
174
|
+
"""
|
|
175
|
+
traces = []
|
|
176
|
+
chid = 0
|
|
177
|
+
|
|
178
|
+
if feature_uid is None:
|
|
179
|
+
feature_uid = self.features_df["feature_uid"].unique()
|
|
180
|
+
else:
|
|
181
|
+
# ensure feature_uid is a list
|
|
182
|
+
if not isinstance(feature_uid, list | tuple):
|
|
183
|
+
feature_uid = [feature_uid]
|
|
184
|
+
|
|
185
|
+
for _i, row in self.features_df.iterrows():
|
|
186
|
+
if row["feature_uid"] not in feature_uid:
|
|
187
|
+
continue
|
|
188
|
+
|
|
189
|
+
traces.append({
|
|
190
|
+
"chid": chid,
|
|
191
|
+
"type": "ms1",
|
|
192
|
+
"name": f"MS1 fid:{row['feature_uid']} ({row['mz']:.4f})",
|
|
193
|
+
"group": f"fid:{row['feature_uid']}",
|
|
194
|
+
"prec_mz": row["mz"],
|
|
195
|
+
"prod_mz": None,
|
|
196
|
+
"rt": row["rt"],
|
|
197
|
+
"rt_start": row["rt_start"],
|
|
198
|
+
"rt_end": row["rt_end"],
|
|
199
|
+
"istd": None,
|
|
200
|
+
"adduct": None,
|
|
201
|
+
"class": None,
|
|
202
|
+
"formula": None,
|
|
203
|
+
"inchikey": None,
|
|
204
|
+
"smiles": None,
|
|
205
|
+
})
|
|
206
|
+
chid += 1
|
|
207
|
+
|
|
208
|
+
self.chrom_df = pd.DataFrame(traces)
|
|
209
|
+
return
|