masster 0.2.4__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

Files changed (55) hide show
  1. masster/__init__.py +27 -27
  2. masster/_version.py +17 -17
  3. masster/chromatogram.py +497 -503
  4. masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.featureXML +199787 -0
  5. masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.sample5 +0 -0
  6. masster/logger.py +318 -244
  7. masster/sample/__init__.py +9 -9
  8. masster/sample/defaults/__init__.py +15 -15
  9. masster/sample/defaults/find_adducts_def.py +325 -325
  10. masster/sample/defaults/find_features_def.py +366 -366
  11. masster/sample/defaults/find_ms2_def.py +285 -285
  12. masster/sample/defaults/get_spectrum_def.py +314 -318
  13. masster/sample/defaults/sample_def.py +374 -378
  14. masster/sample/h5.py +1321 -1297
  15. masster/sample/helpers.py +833 -364
  16. masster/sample/lib.py +762 -0
  17. masster/sample/load.py +1220 -1187
  18. masster/sample/parameters.py +131 -131
  19. masster/sample/plot.py +1610 -1622
  20. masster/sample/processing.py +1402 -1416
  21. masster/sample/quant.py +209 -0
  22. masster/sample/sample.py +391 -387
  23. masster/sample/sample5_schema.json +181 -181
  24. masster/sample/save.py +737 -719
  25. masster/sample/sciex.py +1213 -0
  26. masster/spectrum.py +1287 -1319
  27. masster/study/__init__.py +9 -9
  28. masster/study/defaults/__init__.py +21 -19
  29. masster/study/defaults/align_def.py +267 -267
  30. masster/study/defaults/export_def.py +41 -40
  31. masster/study/defaults/fill_chrom_def.py +264 -264
  32. masster/study/defaults/fill_def.py +260 -0
  33. masster/study/defaults/find_consensus_def.py +256 -256
  34. masster/study/defaults/find_ms2_def.py +163 -163
  35. masster/study/defaults/integrate_chrom_def.py +225 -225
  36. masster/study/defaults/integrate_def.py +221 -0
  37. masster/study/defaults/merge_def.py +256 -0
  38. masster/study/defaults/study_def.py +272 -269
  39. masster/study/export.py +674 -287
  40. masster/study/h5.py +1398 -886
  41. masster/study/helpers.py +1650 -433
  42. masster/study/helpers_optimized.py +317 -0
  43. masster/study/load.py +1201 -1078
  44. masster/study/parameters.py +99 -99
  45. masster/study/plot.py +632 -645
  46. masster/study/processing.py +1057 -1046
  47. masster/study/save.py +149 -134
  48. masster/study/study.py +606 -522
  49. masster/study/study5_schema.json +247 -241
  50. {masster-0.2.4.dist-info → masster-0.3.0.dist-info}/METADATA +15 -10
  51. masster-0.3.0.dist-info/RECORD +59 -0
  52. {masster-0.2.4.dist-info → masster-0.3.0.dist-info}/licenses/LICENSE +661 -661
  53. masster-0.2.4.dist-info/RECORD +0 -50
  54. {masster-0.2.4.dist-info → masster-0.3.0.dist-info}/WHEEL +0 -0
  55. {masster-0.2.4.dist-info → masster-0.3.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,209 @@
1
+ # a module for the reproducible quantification of EIC traces.
2
+ # it takes a study and a list of features indicated either as MS1 or MRM features
3
+ import os
4
+
5
+ import pandas as pd
6
+
7
+ # from .parameters import QuantParameters
8
+ # Parameters removed - using hardcoded defaults
9
+
10
+
11
+ def chrom_from_csv(
12
+ self,
13
+ filename=None,
14
+ ):
15
+ """
16
+ Load MRM transitions from a file.
17
+ :param filename: Path to the file containing MRM transitions.
18
+ :return: List of MRM transitions
19
+ """
20
+
21
+ # if filename exists and ends with csv, read it as a CSV file
22
+ if filename and filename.endswith(".csv"):
23
+ df = pd.read_csv(filename, comment="#")
24
+ # possible columns are name, class, q1, q3, rt, istd. Make sure to handle upper and lower case.
25
+ df.columns = [col.lower() for col in df.columns]
26
+ if "name" not in df.columns:
27
+ raise ValueError("CSV file must contain a 'name' column.")
28
+ if "q1" in df.columns:
29
+ col_q1 = "q1"
30
+ elif "precursor" in df.columns:
31
+ col_q1 = "precursor"
32
+ elif "precursor_mz" in df.columns:
33
+ col_q1 = "precursor_mz"
34
+ else:
35
+ raise ValueError("CSV file must contain a 'q1' column or equivalent.")
36
+ if "q3" in df.columns:
37
+ col_q3 = "q3"
38
+ elif "product" in df.columns:
39
+ col_q3 = "product"
40
+ elif "product_mz" in df.columns:
41
+ col_q3 = "product_mz"
42
+ else:
43
+ col_q3 = None
44
+ col_rt = "rt" if "rt" in df.columns else None
45
+ col_istd = "istd" if "istd" in df.columns else None
46
+ col_class = "class" if "class" in df.columns else None
47
+ col_adduct = "adduct" if "adduct" in df.columns else None
48
+ col_qid = "qid" if "qid" in df.columns else None
49
+ col_group = "group" if "group" in df.columns else None
50
+ col_formula = "formula" if "formula" in df.columns else None
51
+ col_inchikey = "inchikey" if "inchikey" in df.columns else None
52
+ col_smiles = "smiles" if "smiles" in df.columns else None
53
+
54
+ traces = []
55
+ for _, row in df.iterrows():
56
+ traces.append({
57
+ "chid": row[col_qid],
58
+ "type": "mrm",
59
+ "name": row["name"],
60
+ "group": row[col_group],
61
+ "prec_mz": row[col_q1],
62
+ "prod_mz": row[col_q3] if col_q3 else None,
63
+ "rt": row[col_rt],
64
+ "rt_start": None,
65
+ "rt_end": None,
66
+ "istd": row[col_istd] if col_istd else None,
67
+ "adduct": row[col_adduct] if col_adduct else None,
68
+ "class": row[col_class] if col_class else None,
69
+ "formula": row[col_formula] if col_formula else None,
70
+ "inchikey": row[col_inchikey] if col_inchikey else None,
71
+ "smiles": row[col_smiles] if col_smiles else None,
72
+ })
73
+ self.chrom_df = pd.DataFrame(traces)
74
+ return
75
+
76
+
77
+ def chrom_from_oracle(
78
+ self,
79
+ oracle_folder=None,
80
+ classes=None,
81
+ level=None,
82
+ ):
83
+ if level is None:
84
+ level = [2]
85
+ if oracle_folder is None:
86
+ return
87
+ # try to read the annotationfile as a csv file and add it to feats
88
+ try:
89
+ oracle_data = pd.read_csv(
90
+ os.path.join(oracle_folder, "diag", "annotation_full.csv"),
91
+ )
92
+ except:
93
+ print(f"Could not read {oracle_folder}/diag/annotation_full.csv")
94
+ return
95
+
96
+ # if classes is not None, filter the oracle_data by classe
97
+ traces = []
98
+
99
+ cols_to_keep = [
100
+ "mz",
101
+ "rt",
102
+ "level",
103
+ "formula",
104
+ "ion",
105
+ "name",
106
+ "hg",
107
+ "ms2_matched",
108
+ "ms2_missed",
109
+ ]
110
+
111
+ qid = 0
112
+ oracle_data = oracle_data[cols_to_keep]
113
+ # keep only MS2 features
114
+ oracle_data["lib_frags"] = None
115
+ for i, row in oracle_data.iterrows():
116
+ if row["level"] in level:
117
+ if classes is not None and row["hg"] not in classes:
118
+ continue
119
+ if row["level"] == 2:
120
+ frags = {}
121
+ if row["ms2_matched"] is not None:
122
+ if isinstance(row["ms2_matched"], str):
123
+ # split the ms2_matched column by semicolon
124
+ tokens = row["ms2_matched"].split(" ")
125
+ for token in tokens:
126
+ if token.strip():
127
+ frag = token.split("|")
128
+ if len(frag) > 1:
129
+ # add to dictionary with frag[2] as key and frag[1] as value
130
+ frags[frag[1]] = float(frag[0])
131
+ if row["ms2_missed"] is not None:
132
+ if isinstance(row["ms2_missed"], str): # frag[0]
133
+ tokens = row["ms2_missed"].split(" ")
134
+ for token in tokens:
135
+ if token.strip():
136
+ frag = token.split("|")
137
+ if len(frag) > 1:
138
+ # add to dictionary with frag[2] as key and frag[1] as value
139
+ frags[frag[1]] = float(frag[0])
140
+ if len(frags) > 0:
141
+ oracle_data.at[i, "lib_frags"] = frags
142
+ for _key, value in frags.items():
143
+ # add the fragment to the row
144
+ traces.append({
145
+ "chid": qid,
146
+ "type": "mrm",
147
+ "name": row["name"] + " " + row["ion"],
148
+ "group": row["name"] + " " + row["ion"],
149
+ "prec_mz": row["mz"],
150
+ "prod_mz": value,
151
+ "rt": row["rt"],
152
+ "rt_start": None,
153
+ "rt_end": None,
154
+ "istd": None,
155
+ "adduct": row["ion"],
156
+ "class": row["hg"],
157
+ "formula": row["formula"],
158
+ "inchikey": None,
159
+ "smiles": None,
160
+ })
161
+ qid += 1
162
+ self.chrom_df = pd.DataFrame(traces)
163
+ return
164
+
165
+
166
+ def chrom_from_features(
167
+ self,
168
+ feature_uid=None,
169
+ ):
170
+ """
171
+ Create a chromatogram from features.
172
+ :param feature_uid: Feature UID to create the chromatogram for. If None, create chromatograms for all features.
173
+ :return: None
174
+ """
175
+ traces = []
176
+ chid = 0
177
+
178
+ if feature_uid is None:
179
+ feature_uid = self.features_df["feature_uid"].unique()
180
+ else:
181
+ # ensure feature_uid is a list
182
+ if not isinstance(feature_uid, list | tuple):
183
+ feature_uid = [feature_uid]
184
+
185
+ for _i, row in self.features_df.iterrows():
186
+ if row["feature_uid"] not in feature_uid:
187
+ continue
188
+
189
+ traces.append({
190
+ "chid": chid,
191
+ "type": "ms1",
192
+ "name": f"MS1 fid:{row['feature_uid']} ({row['mz']:.4f})",
193
+ "group": f"fid:{row['feature_uid']}",
194
+ "prec_mz": row["mz"],
195
+ "prod_mz": None,
196
+ "rt": row["rt"],
197
+ "rt_start": row["rt_start"],
198
+ "rt_end": row["rt_end"],
199
+ "istd": None,
200
+ "adduct": None,
201
+ "class": None,
202
+ "formula": None,
203
+ "inchikey": None,
204
+ "smiles": None,
205
+ })
206
+ chid += 1
207
+
208
+ self.chrom_df = pd.DataFrame(traces)
209
+ return