glycanPRMQuant 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- glycanPRMQuant/__init__.py +17 -0
- glycanPRMQuant/calculateAUC.py +260 -0
- glycanPRMQuant/calculate_mass.py +42 -0
- glycanPRMQuant/centroidData.py +54 -0
- glycanPRMQuant/cli.py +149 -0
- glycanPRMQuant/consolidateAUC.py +55 -0
- glycanPRMQuant/constants.py +15 -0
- glycanPRMQuant/database/N_glycan_db.csv +1082 -0
- glycanPRMQuant/fragment_structure.py +70 -0
- glycanPRMQuant/glycanClassification.py +58 -0
- glycanPRMQuant/glycanClassificationUI.py +190 -0
- glycanPRMQuant/glycantypeBarplot.py +184 -0
- glycanPRMQuant/intensityBarplot.py +46 -0
- glycanPRMQuant/logging_utils.py +41 -0
- glycanPRMQuant/matchMS1.py +193 -0
- glycanPRMQuant/matchMS2.py +369 -0
- glycanPRMQuant/msPlotter.py +54 -0
- glycanPRMQuant/msfileReader.py +186 -0
- glycanPRMQuant/parallelProcess.py +234 -0
- glycanPRMQuant/performPCA.py +70 -0
- glycanPRMQuant/pipelineGUI.py +589 -0
- glycanPRMQuant/plotFragmentIntensity.py +356 -0
- glycanPRMQuant/plotMS2spectrum.py +152 -0
- glycanPRMQuant/processmzML.py +446 -0
- glycanPRMQuant/resources.py +34 -0
- glycanPRMQuant/skylineTransition.py +54 -0
- glycanprmquant-0.1.0.dist-info/METADATA +391 -0
- glycanprmquant-0.1.0.dist-info/RECORD +32 -0
- glycanprmquant-0.1.0.dist-info/WHEEL +5 -0
- glycanprmquant-0.1.0.dist-info/entry_points.txt +2 -0
- glycanprmquant-0.1.0.dist-info/licenses/LICENSE +21 -0
- glycanprmquant-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""
|
|
2
|
+
glycanPRMQuant: A package for glycan PRM (Parallel Reaction Monitoring) quantification.
|
|
3
|
+
|
|
4
|
+
This package provides tools for processing mass spectrometry data of glycans,
|
|
5
|
+
including MS1/MS2 matching, fragmentation analysis, and quantification.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
__version__ = "0.1.0"
|
|
9
|
+
__author__ = "Vishal Sandilya"
|
|
10
|
+
__email__ = "vishal.sandilya@ttu.edu"
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"matchMS1",
|
|
14
|
+
"matchMS2",
|
|
15
|
+
"process_mzml_pipeline",
|
|
16
|
+
"calculateAUC",
|
|
17
|
+
]
|
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import logging
|
|
3
|
+
import pandas as pd
|
|
4
|
+
import numpy as np
|
|
5
|
+
import matplotlib.pyplot as plt
|
|
6
|
+
from scipy.signal import find_peaks, peak_widths
|
|
7
|
+
from scipy.ndimage import gaussian_filter1d
|
|
8
|
+
from scipy.signal import savgol_filter
|
|
9
|
+
|
|
10
|
+
plt.rcParams["pdf.fonttype"] = 42
|
|
11
|
+
plt.rcParams["ps.fonttype"] = 42
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
def _smooth_signal(y, method: str, window: int):
|
|
16
|
+
if not window or window <= 0:
|
|
17
|
+
return y
|
|
18
|
+
method = (method or "gaussian").lower()
|
|
19
|
+
if method in ("gaussian", "gauss"):
|
|
20
|
+
return gaussian_filter1d(y, sigma=window, mode='nearest')
|
|
21
|
+
if method in ("savgol", "sav-gol", "savitzky-golay", "sg"):
|
|
22
|
+
n = len(y)
|
|
23
|
+
if n < 3:
|
|
24
|
+
return y
|
|
25
|
+
win = int(window)
|
|
26
|
+
if win % 2 == 0:
|
|
27
|
+
win += 1
|
|
28
|
+
if win < 3:
|
|
29
|
+
win = 3
|
|
30
|
+
if win > n:
|
|
31
|
+
win = n if n % 2 == 1 else n - 1
|
|
32
|
+
if win < 3:
|
|
33
|
+
return y
|
|
34
|
+
return savgol_filter(y, window_length=win, polyorder=2, mode='nearest')
|
|
35
|
+
return y
|
|
36
|
+
|
|
37
|
+
def _resample_uniform(rt, y):
|
|
38
|
+
rt = np.asarray(rt, dtype=float)
|
|
39
|
+
y = np.asarray(y, dtype=float)
|
|
40
|
+
if rt.size < 3:
|
|
41
|
+
return rt, y
|
|
42
|
+
order = np.argsort(rt)
|
|
43
|
+
rt = rt[order]
|
|
44
|
+
y = y[order]
|
|
45
|
+
diffs = np.diff(rt)
|
|
46
|
+
step = np.median(diffs[diffs > 0]) if np.any(diffs > 0) else None
|
|
47
|
+
if step is None or step <= 0:
|
|
48
|
+
return rt, y
|
|
49
|
+
grid = np.arange(rt.min(), rt.max() + step * 0.5, step)
|
|
50
|
+
y_interp = np.interp(grid, rt, y)
|
|
51
|
+
return grid, y_interp
|
|
52
|
+
|
|
53
|
+
def _find_width_at_height(y: np.ndarray, peak_idx: int, height: float):
|
|
54
|
+
y = np.asarray(y, dtype=float)
|
|
55
|
+
n = y.size
|
|
56
|
+
if n == 0:
|
|
57
|
+
return 0.0, float(max(n - 1, 0))
|
|
58
|
+
peak_idx = int(np.clip(peak_idx, 0, n - 1))
|
|
59
|
+
height = float(height)
|
|
60
|
+
|
|
61
|
+
left_candidates = np.where(y[:peak_idx + 1] <= height)[0]
|
|
62
|
+
if left_candidates.size == 0:
|
|
63
|
+
left_ip = 0.0
|
|
64
|
+
else:
|
|
65
|
+
li = left_candidates[-1]
|
|
66
|
+
if li == peak_idx:
|
|
67
|
+
left_ip = float(li)
|
|
68
|
+
else:
|
|
69
|
+
y1, y2 = y[li], y[li + 1]
|
|
70
|
+
if y2 == y1:
|
|
71
|
+
left_ip = float(li)
|
|
72
|
+
else:
|
|
73
|
+
frac = (height - y1) / (y2 - y1)
|
|
74
|
+
left_ip = li + float(frac)
|
|
75
|
+
|
|
76
|
+
right_candidates = np.where(y[peak_idx:] <= height)[0]
|
|
77
|
+
if right_candidates.size == 0:
|
|
78
|
+
right_ip = float(n - 1)
|
|
79
|
+
else:
|
|
80
|
+
ri = peak_idx + right_candidates[0]
|
|
81
|
+
if ri == peak_idx:
|
|
82
|
+
right_ip = float(ri)
|
|
83
|
+
else:
|
|
84
|
+
y1, y2 = y[ri - 1], y[ri]
|
|
85
|
+
if y2 == y1:
|
|
86
|
+
right_ip = float(ri)
|
|
87
|
+
else:
|
|
88
|
+
frac = (height - y1) / (y2 - y1)
|
|
89
|
+
right_ip = (ri - 1) + float(frac)
|
|
90
|
+
|
|
91
|
+
return left_ip, right_ip
|
|
92
|
+
|
|
93
|
+
def calculateAUC(
|
|
94
|
+
ms2_input,
|
|
95
|
+
glycan_col: str = 'Glycan',
|
|
96
|
+
scan_col: str = 'scan_number',
|
|
97
|
+
rt_col: str = 'rt',
|
|
98
|
+
intensity_col: str = 'fragment_intensity',
|
|
99
|
+
adduct_col: str = 'Adduct',
|
|
100
|
+
rel_height: float = 0.7,
|
|
101
|
+
rel_height_mode: str = "prominence",
|
|
102
|
+
prominence: float = None,
|
|
103
|
+
smoothing_window: int = 30,
|
|
104
|
+
smoothing_method: str = "gaussian",
|
|
105
|
+
plot: bool = False,
|
|
106
|
+
save_path: str = None,
|
|
107
|
+
window = 0
|
|
108
|
+
) -> pd.DataFrame:
|
|
109
|
+
"""
|
|
110
|
+
Calculate AUC for each glycan/adduct by optionally smoothing the summed fragment-intensity
|
|
111
|
+
chromatogram, detecting the main peak, determining its boundaries at a relative height,
|
|
112
|
+
and integrating the (smoothed or raw) intensity between those boundaries. Also returns a
|
|
113
|
+
glycan-level total that sums AUCs across all adducts for that glycan.
|
|
114
|
+
|
|
115
|
+
smoothing_window <= 0 will skip smoothing.
|
|
116
|
+
|
|
117
|
+
Parameters
|
|
118
|
+
----------
|
|
119
|
+
ms2_input : pd.DataFrame or str
|
|
120
|
+
Matched MS2 DataFrame or path to CSV/Excel.
|
|
121
|
+
glycan_col : str
|
|
122
|
+
Column name for glycan composition.
|
|
123
|
+
scan_col : str
|
|
124
|
+
Column name for scan number.
|
|
125
|
+
rt_col : str
|
|
126
|
+
Column name for retention time.
|
|
127
|
+
intensity_col : str
|
|
128
|
+
Column for fragment intensity.
|
|
129
|
+
rel_height : float
|
|
130
|
+
Relative height (0–1) for width calculation (e.g. 0.5 for half-height).
|
|
131
|
+
prominence : float or None
|
|
132
|
+
Minimum peak prominence passed to find_peaks.
|
|
133
|
+
smoothing_window : int
|
|
134
|
+
Smoothing window. For Gaussian, this is sigma. For Sav-Gol, this is window length.
|
|
135
|
+
smoothing_method : str
|
|
136
|
+
"gaussian" (default) or "savgol".
|
|
137
|
+
plot : bool
|
|
138
|
+
If True, plot smoothed vs raw chromatogram and integration window.
|
|
139
|
+
|
|
140
|
+
Returns
|
|
141
|
+
-------
|
|
142
|
+
tuple[pd.DataFrame, pd.DataFrame]
|
|
143
|
+
(per_adduct_df, total_df)
|
|
144
|
+
per_adduct_df columns: [glycan_col, adduct_col, 'peak_rt', 'start_rt', 'end_rt', 'AUC']
|
|
145
|
+
total_df columns: [glycan_col, 'AUC'] (AUC summed across adducts per glycan)
|
|
146
|
+
"""
|
|
147
|
+
# load data
|
|
148
|
+
if isinstance(ms2_input, str):
|
|
149
|
+
ext = os.path.splitext(ms2_input)[1].lower()
|
|
150
|
+
if ext == '.csv':
|
|
151
|
+
df = pd.read_csv(ms2_input)
|
|
152
|
+
elif ext in ('.xlsx', '.xls'):
|
|
153
|
+
df = pd.read_excel(ms2_input)
|
|
154
|
+
else:
|
|
155
|
+
raise ValueError("Unsupported file type.")
|
|
156
|
+
else:
|
|
157
|
+
df = ms2_input.copy()
|
|
158
|
+
|
|
159
|
+
# validate
|
|
160
|
+
missing = {glycan_col, scan_col, rt_col, intensity_col} - set(df.columns)
|
|
161
|
+
if missing:
|
|
162
|
+
raise ValueError(f"Missing columns: {missing}")
|
|
163
|
+
|
|
164
|
+
# If adduct column is absent, treat all signal as one pseudo-adduct so grouping works
|
|
165
|
+
if adduct_col not in df.columns:
|
|
166
|
+
df = df.copy()
|
|
167
|
+
df[adduct_col] = 'ALL'
|
|
168
|
+
|
|
169
|
+
# sum per scan for each glycan
|
|
170
|
+
summed = (
|
|
171
|
+
df.groupby([glycan_col, adduct_col, scan_col])
|
|
172
|
+
.agg(rt=(rt_col, 'first'),
|
|
173
|
+
summed_intensity=(intensity_col, 'sum'))
|
|
174
|
+
.reset_index()
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
results = []
|
|
178
|
+
for (glycan, adduct), sub in summed.groupby([glycan_col, adduct_col]):
|
|
179
|
+
sub = sub.sort_values('rt')
|
|
180
|
+
x = sub['rt'].to_numpy()
|
|
181
|
+
y = sub['summed_intensity'].to_numpy()
|
|
182
|
+
|
|
183
|
+
# apply smoothing if requested
|
|
184
|
+
if smoothing_window and smoothing_window > 0:
|
|
185
|
+
xg, yg = _resample_uniform(x, y)
|
|
186
|
+
y_smooth = _smooth_signal(yg, smoothing_method, smoothing_window)
|
|
187
|
+
x = xg
|
|
188
|
+
else:
|
|
189
|
+
y_smooth = y
|
|
190
|
+
|
|
191
|
+
# detect peaks on y_smooth
|
|
192
|
+
peaks, props = find_peaks(y_smooth, prominence=prominence)
|
|
193
|
+
if len(peaks) == 0:
|
|
194
|
+
main_idx = np.argmax(y_smooth)
|
|
195
|
+
else:
|
|
196
|
+
main_idx = peaks[np.argmax(y_smooth[peaks])]
|
|
197
|
+
|
|
198
|
+
# compute width at rel_height
|
|
199
|
+
mode = (rel_height_mode or "prominence").lower()
|
|
200
|
+
if mode in ("height", "peak", "absolute"):
|
|
201
|
+
peak_y = float(y_smooth[main_idx])
|
|
202
|
+
height_level = peak_y * (1.0 - float(rel_height))
|
|
203
|
+
left_ip, right_ip = _find_width_at_height(y_smooth, main_idx, height_level)
|
|
204
|
+
else:
|
|
205
|
+
widths, h_eval, left_ips, right_ips = peak_widths(
|
|
206
|
+
y_smooth, [main_idx], rel_height=rel_height
|
|
207
|
+
)
|
|
208
|
+
left_ip, right_ip = left_ips[0], right_ips[0]
|
|
209
|
+
|
|
210
|
+
# map to retention time
|
|
211
|
+
idxs = np.arange(len(x))
|
|
212
|
+
start_rt = np.interp(left_ip, idxs, x)
|
|
213
|
+
end_rt = np.interp(right_ip, idxs, x)
|
|
214
|
+
peak_rt = x[main_idx]
|
|
215
|
+
|
|
216
|
+
# Integrate with interpolated boundary points so narrow windows that
|
|
217
|
+
# fall between scans do not collapse to a single apex sample.
|
|
218
|
+
interior_mask = (x > start_rt) & (x < end_rt)
|
|
219
|
+
x_auc = np.concatenate(([start_rt], x[interior_mask], [end_rt]))
|
|
220
|
+
y_auc = np.interp(x_auc, x, y_smooth)
|
|
221
|
+
auc = np.trapezoid(y_auc, x_auc)
|
|
222
|
+
|
|
223
|
+
logger.info(
|
|
224
|
+
f"Glycan {glycan!r}: peak RT={peak_rt:.2f}, "
|
|
225
|
+
f"window=[{start_rt:.2f}, {end_rt:.2f}], AUC={auc:.2f}"
|
|
226
|
+
)
|
|
227
|
+
results.append({
|
|
228
|
+
glycan_col: glycan,
|
|
229
|
+
adduct_col: adduct,
|
|
230
|
+
'peak_rt': peak_rt,
|
|
231
|
+
'start_rt': start_rt,
|
|
232
|
+
'end_rt': end_rt,
|
|
233
|
+
'AUC': auc
|
|
234
|
+
})
|
|
235
|
+
plt.style.use(['science', 'no-latex'])
|
|
236
|
+
plt.rcParams['font.family'] = 'Arial'
|
|
237
|
+
|
|
238
|
+
if plot:
|
|
239
|
+
fig, ax = plt.subplots(figsize=(4.8, 4))
|
|
240
|
+
ax.plot(x, y_smooth, label=(
|
|
241
|
+
f'smoothed ({smoothing_method}, w={smoothing_window})'
|
|
242
|
+
if smoothing_window and smoothing_window > 0 else 'raw'
|
|
243
|
+
))
|
|
244
|
+
ax.axvspan(start_rt, end_rt, color='red', alpha=0.1,
|
|
245
|
+
label='integration window')
|
|
246
|
+
ax.set_xlabel('RT (min)')
|
|
247
|
+
ax.set_ylabel('Intensity')
|
|
248
|
+
plt.xlim(x.min()-window, x.max()+window)
|
|
249
|
+
plt.ylim(0, y_smooth.max() * 1.1)
|
|
250
|
+
ax.set_title(f"{glycan} ({adduct}): Integration Window")
|
|
251
|
+
plt.tight_layout()
|
|
252
|
+
if save_path:
|
|
253
|
+
plt.savefig(save_path, dpi=300)
|
|
254
|
+
logger.info(f"Saved plot to {save_path}")
|
|
255
|
+
else:
|
|
256
|
+
plt.show()
|
|
257
|
+
|
|
258
|
+
per_adduct_df = pd.DataFrame(results)
|
|
259
|
+
total_df = per_adduct_df.groupby(glycan_col, as_index=False)['AUC'].sum()
|
|
260
|
+
return per_adduct_df, total_df
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Module for calculating the mass of glycans
|
|
3
|
+
"""
|
|
4
|
+
import logging
|
|
5
|
+
|
|
6
|
+
from glypy.io import iupac
|
|
7
|
+
from glypy.io.iupac import IUPACError
|
|
8
|
+
from glypy.structure import ReducedEnd
|
|
9
|
+
from glypy.composition.composition_transform import derivatize
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def calculate_mass(glycan_str, derivatization="methyl", reduced_end=True, verbose=True):
|
|
15
|
+
"""
|
|
16
|
+
Given an IUPAC string,
|
|
17
|
+
calculate the mass of the glycan, optionally applying derivatization and setting a reduced end.
|
|
18
|
+
"""
|
|
19
|
+
try:
|
|
20
|
+
glycan = iupac.loads(glycan_str, dialect="simple")
|
|
21
|
+
except IUPACError as e:
|
|
22
|
+
if verbose:
|
|
23
|
+
logger.error("Error parsing IUPAC string: %s", e)
|
|
24
|
+
return None
|
|
25
|
+
|
|
26
|
+
try:
|
|
27
|
+
if reduced_end:
|
|
28
|
+
glycan.set_reducing_end(ReducedEnd())
|
|
29
|
+
if derivatization:
|
|
30
|
+
derivatize(glycan, derivatization)
|
|
31
|
+
return glycan.mass()
|
|
32
|
+
except (KeyError, ValueError) as e:
|
|
33
|
+
if verbose:
|
|
34
|
+
logger.error("Error modifying glycan: %s", e)
|
|
35
|
+
return None
|
|
36
|
+
|
|
37
|
+
if __name__ == "__main__":
|
|
38
|
+
# Example usage
|
|
39
|
+
glycan_str = "Man(a1-2)Man(a1-3)[Man(a1-3)[Man(a1-6)]Man(a1-6)]Man(b1-4)GlcNAc(b1-4)GlcNAc"
|
|
40
|
+
mass = calculate_mass(glycan_str, derivatization=None, reduced_end=False)
|
|
41
|
+
if mass is not None:
|
|
42
|
+
print(f"The mass of {glycan_str} is: {mass:.4f} Da")
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
|
|
4
|
+
def gaussian_fit(data, mz_col='mz', intensity_col='intensity', resolution=120000):
|
|
5
|
+
"""
|
|
6
|
+
Reconstruct a mass spectrum by fitting Gaussian peaks to each (m/z, intensity) pair.
|
|
7
|
+
|
|
8
|
+
Parameters
|
|
9
|
+
----------
|
|
10
|
+
data : pandas.DataFrame
|
|
11
|
+
Must contain columns:
|
|
12
|
+
- scan_number
|
|
13
|
+
- <mz_col> # name of the m/z column
|
|
14
|
+
- <intensity_col> # name of the intensity column
|
|
15
|
+
mz_col : str, optional
|
|
16
|
+
Name of the column in `data` holding the m/z values. Default is 'mz'.
|
|
17
|
+
intensity_col : str, optional
|
|
18
|
+
Name of the column in `data` holding the peak intensities. Default is 'intensity'.
|
|
19
|
+
resolution : float, optional
|
|
20
|
+
Instrument resolution used to compute FWHM of peaks
|
|
21
|
+
(FWHM = mz / resolution). Default is 120000.
|
|
22
|
+
|
|
23
|
+
Returns
|
|
24
|
+
-------
|
|
25
|
+
pandas.DataFrame
|
|
26
|
+
Two columns:
|
|
27
|
+
- 'mz': uniformly spaced m/z grid
|
|
28
|
+
- 'intensity': reconstructed spectrum
|
|
29
|
+
"""
|
|
30
|
+
# Check for required columns
|
|
31
|
+
required = {'scan_number', mz_col, intensity_col}
|
|
32
|
+
missing = required - set(data.columns)
|
|
33
|
+
if missing:
|
|
34
|
+
raise ValueError(f"Input DataFrame is missing columns: {missing}")
|
|
35
|
+
|
|
36
|
+
# Build the m/z grid
|
|
37
|
+
mz_min = data[mz_col].min() - 1
|
|
38
|
+
mz_max = data[mz_col].max() + 1
|
|
39
|
+
num_points = 20000
|
|
40
|
+
mz_grid = np.linspace(mz_min, mz_max, num_points)
|
|
41
|
+
|
|
42
|
+
# Initialize spectrum
|
|
43
|
+
reconstructed = np.zeros_like(mz_grid)
|
|
44
|
+
|
|
45
|
+
# Sum Gaussian peaks
|
|
46
|
+
for _, row in data.iterrows():
|
|
47
|
+
center = row[mz_col]
|
|
48
|
+
height = row[intensity_col]
|
|
49
|
+
fwhm = center / resolution
|
|
50
|
+
sigma = fwhm / (2 * np.sqrt(2 * np.log(2)))
|
|
51
|
+
peak = height * np.exp(-0.5 * ((mz_grid - center) / sigma) ** 2)
|
|
52
|
+
reconstructed += peak
|
|
53
|
+
|
|
54
|
+
return pd.DataFrame({'mz': mz_grid, 'intensity': reconstructed})
|
glycanPRMQuant/cli.py
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
"""Command-line interface for glycanPRMQuant."""
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import logging
|
|
5
|
+
import multiprocessing
|
|
6
|
+
|
|
7
|
+
from glycanPRMQuant.logging_utils import configure_logging
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _add_common_options(parser: argparse.ArgumentParser) -> None:
|
|
11
|
+
parser.add_argument("--ppm-ms1-tol", type=float, default=10)
|
|
12
|
+
parser.add_argument("--mz-min", type=float, default=400)
|
|
13
|
+
parser.add_argument("--mz-max", type=float, default=2000)
|
|
14
|
+
parser.add_argument("--mz-offset", type=float, default=0.0)
|
|
15
|
+
parser.add_argument("--mass-offset", type=float, default=0.0)
|
|
16
|
+
parser.add_argument("--intensity-threshold", type=float, default=1e2)
|
|
17
|
+
parser.add_argument("--ppm-ms2-tol", type=float, default=10)
|
|
18
|
+
parser.add_argument("--mz-tol", type=float, default=0.02)
|
|
19
|
+
parser.add_argument("--fragment-ion-series", default="ABCXYZ")
|
|
20
|
+
parser.add_argument("--fragment-max-cleavages", type=int, default=2)
|
|
21
|
+
parser.add_argument("--smoothing-window", type=int, default=11)
|
|
22
|
+
parser.add_argument("--smoothing-method", choices=["gaussian", "savgol"], default="gaussian")
|
|
23
|
+
parser.add_argument("--disable-smoothing", action="store_true")
|
|
24
|
+
parser.add_argument("--rel-height", type=float, default=0.7)
|
|
25
|
+
parser.add_argument("--rel-height-mode", choices=["prominence", "height"], default="prominence")
|
|
26
|
+
parser.add_argument("--precursor-db-path")
|
|
27
|
+
parser.add_argument("--structure-db-path")
|
|
28
|
+
parser.add_argument("--skyline-transition", action="store_true")
|
|
29
|
+
parser.add_argument("--quiet", action="store_true")
|
|
30
|
+
parser.add_argument("-v", "--verbose", action="count", default=0)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _log_level(args: argparse.Namespace) -> int:
|
|
34
|
+
if args.quiet:
|
|
35
|
+
return logging.WARNING
|
|
36
|
+
if args.verbose >= 2:
|
|
37
|
+
return logging.DEBUG
|
|
38
|
+
return logging.INFO
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _run_one(args: argparse.Namespace) -> int:
|
|
42
|
+
configure_logging(_log_level(args), force=True)
|
|
43
|
+
from glycanPRMQuant.processmzML import process_mzml_pipeline
|
|
44
|
+
|
|
45
|
+
process_mzml_pipeline(
|
|
46
|
+
mzml_file=args.mzml_file,
|
|
47
|
+
output_dir=args.output_dir,
|
|
48
|
+
ppm_ms1_tol=args.ppm_ms1_tol,
|
|
49
|
+
mz_min=args.mz_min,
|
|
50
|
+
mz_max=args.mz_max,
|
|
51
|
+
mz_offset=args.mz_offset,
|
|
52
|
+
mass_offset=args.mass_offset,
|
|
53
|
+
intensity_threshold=args.intensity_threshold,
|
|
54
|
+
ppm_ms2_tol=args.ppm_ms2_tol,
|
|
55
|
+
mz_tol=args.mz_tol,
|
|
56
|
+
smoothing_window=args.smoothing_window,
|
|
57
|
+
smoothing_method=args.smoothing_method,
|
|
58
|
+
enable_smoothing=not args.disable_smoothing,
|
|
59
|
+
rel_height=args.rel_height,
|
|
60
|
+
rel_height_mode=args.rel_height_mode,
|
|
61
|
+
skyline_transition=args.skyline_transition,
|
|
62
|
+
fragment_ion_series=args.fragment_ion_series,
|
|
63
|
+
fragment_max_cleavages=args.fragment_max_cleavages,
|
|
64
|
+
precursor_db_path=args.precursor_db_path,
|
|
65
|
+
structure_db_path=args.structure_db_path,
|
|
66
|
+
)
|
|
67
|
+
return 0
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _run_batch(args: argparse.Namespace) -> int:
|
|
71
|
+
configure_logging(_log_level(args), force=True)
|
|
72
|
+
multiprocessing.freeze_support()
|
|
73
|
+
from glycanPRMQuant.parallelProcess import run_parallel_pipeline
|
|
74
|
+
|
|
75
|
+
run_parallel_pipeline(
|
|
76
|
+
input_dir=args.input_dir,
|
|
77
|
+
input_files=args.input_files,
|
|
78
|
+
output_root=args.output_root,
|
|
79
|
+
n_workers=args.workers,
|
|
80
|
+
ppm_ms1_tol=args.ppm_ms1_tol,
|
|
81
|
+
mz_min=args.mz_min,
|
|
82
|
+
mz_max=args.mz_max,
|
|
83
|
+
mz_offset=args.mz_offset,
|
|
84
|
+
mass_offset=args.mass_offset,
|
|
85
|
+
intensity_threshold=args.intensity_threshold,
|
|
86
|
+
ppm_ms2_tol=args.ppm_ms2_tol,
|
|
87
|
+
mz_tol=args.mz_tol,
|
|
88
|
+
fragment_ion_series=args.fragment_ion_series,
|
|
89
|
+
fragment_max_cleavages=args.fragment_max_cleavages,
|
|
90
|
+
smoothing_window=args.smoothing_window,
|
|
91
|
+
smoothing_method=args.smoothing_method,
|
|
92
|
+
enable_smoothing=not args.disable_smoothing,
|
|
93
|
+
rel_height=args.rel_height,
|
|
94
|
+
rel_height_mode=args.rel_height_mode,
|
|
95
|
+
skyline_transition=args.skyline_transition,
|
|
96
|
+
precursor_db_path=args.precursor_db_path,
|
|
97
|
+
structure_db_path=args.structure_db_path,
|
|
98
|
+
overwrite=args.overwrite,
|
|
99
|
+
dry_run=args.dry_run,
|
|
100
|
+
)
|
|
101
|
+
return 0
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _run_gui(args: argparse.Namespace) -> int:
|
|
105
|
+
configure_logging(_log_level(args), force=True)
|
|
106
|
+
from glycanPRMQuant.pipelineGUI import PipelineGUI
|
|
107
|
+
|
|
108
|
+
app = PipelineGUI()
|
|
109
|
+
app.mainloop()
|
|
110
|
+
return 0
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
114
|
+
parser = argparse.ArgumentParser(prog="glycan-prmquant")
|
|
115
|
+
sub = parser.add_subparsers(dest="command", required=True)
|
|
116
|
+
|
|
117
|
+
run_parser = sub.add_parser("run", help="Process one mzML file")
|
|
118
|
+
run_parser.add_argument("mzml_file")
|
|
119
|
+
run_parser.add_argument("output_dir")
|
|
120
|
+
_add_common_options(run_parser)
|
|
121
|
+
run_parser.set_defaults(func=_run_one)
|
|
122
|
+
|
|
123
|
+
batch_parser = sub.add_parser("batch", help="Process multiple mzML files")
|
|
124
|
+
source = batch_parser.add_mutually_exclusive_group(required=True)
|
|
125
|
+
source.add_argument("--input-dir")
|
|
126
|
+
source.add_argument("--input-files", nargs="+")
|
|
127
|
+
batch_parser.add_argument("--output-root", required=True)
|
|
128
|
+
batch_parser.add_argument("--workers", type=int)
|
|
129
|
+
batch_parser.add_argument("--overwrite", action="store_true")
|
|
130
|
+
batch_parser.add_argument("--dry-run", action="store_true")
|
|
131
|
+
_add_common_options(batch_parser)
|
|
132
|
+
batch_parser.set_defaults(func=_run_batch)
|
|
133
|
+
|
|
134
|
+
gui_parser = sub.add_parser("gui", help="Launch the Tkinter GUI")
|
|
135
|
+
gui_parser.add_argument("--quiet", action="store_true")
|
|
136
|
+
gui_parser.add_argument("-v", "--verbose", action="count", default=0)
|
|
137
|
+
gui_parser.set_defaults(func=_run_gui)
|
|
138
|
+
|
|
139
|
+
return parser
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def main(argv: list[str] | None = None) -> int:
|
|
143
|
+
parser = build_parser()
|
|
144
|
+
args = parser.parse_args(argv)
|
|
145
|
+
return args.func(args)
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
if __name__ == "__main__":
|
|
149
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import pandas as pd
|
|
3
|
+
|
|
4
|
+
def consolidate_auc_results(results_root: str, output_csv: str):
|
|
5
|
+
"""
|
|
6
|
+
Consolidate all <mzML_basename>_auc_values.csv files under `results_root`
|
|
7
|
+
into one CSV file.
|
|
8
|
+
|
|
9
|
+
The output will have one row per glycan, a 'Glycan' column plus one column
|
|
10
|
+
per mzML file (named by the folder/mzML basename) containing its AUC values.
|
|
11
|
+
|
|
12
|
+
Parameters
|
|
13
|
+
----------
|
|
14
|
+
results_root : str
|
|
15
|
+
Path to the directory containing per-file subfolders (each with *_auc_values.csv).
|
|
16
|
+
output_csv : str
|
|
17
|
+
Path to write the consolidated CSV file (e.g. "all_auc_summary.csv").
|
|
18
|
+
"""
|
|
19
|
+
auc_dfs = []
|
|
20
|
+
for sub in sorted(os.listdir(results_root)):
|
|
21
|
+
subdir = os.path.join(results_root, sub)
|
|
22
|
+
if not os.path.isdir(subdir):
|
|
23
|
+
continue
|
|
24
|
+
auc_file = os.path.join(subdir, f"{sub}_auc_values.csv")
|
|
25
|
+
if not os.path.isfile(auc_file):
|
|
26
|
+
print(f"Warning: no AUC file found for {sub} (looking for {auc_file})")
|
|
27
|
+
continue
|
|
28
|
+
|
|
29
|
+
df = pd.read_csv(auc_file)
|
|
30
|
+
# Identify AUC column(s) (case-insensitive contains 'auc')
|
|
31
|
+
auc_cols = [c for c in df.columns if 'auc' in c.lower()]
|
|
32
|
+
if not auc_cols:
|
|
33
|
+
print(f"Warning: no AUC column found in {auc_file}")
|
|
34
|
+
continue
|
|
35
|
+
# Prefer exact 'AUC' match if present
|
|
36
|
+
auc_col = 'AUC' if 'AUC' in auc_cols else auc_cols[0]
|
|
37
|
+
# Rename that column to the sub (folder) name
|
|
38
|
+
df = df.rename(columns={auc_col: sub})
|
|
39
|
+
# Keep only Glycan and the renamed AUC column
|
|
40
|
+
df = df[['Glycan', sub]]
|
|
41
|
+
auc_dfs.append(df)
|
|
42
|
+
|
|
43
|
+
if not auc_dfs:
|
|
44
|
+
raise RuntimeError(f"No _auc_values.csv files found in {results_root}")
|
|
45
|
+
|
|
46
|
+
# Merge all on 'Glycan' using outer join
|
|
47
|
+
merged = auc_dfs[0]
|
|
48
|
+
for df in auc_dfs[1:]:
|
|
49
|
+
merged = pd.merge(merged, df, on='Glycan', how='outer')
|
|
50
|
+
|
|
51
|
+
merged = merged.sort_values('Glycan').reset_index(drop=True)
|
|
52
|
+
|
|
53
|
+
# Write to CSV
|
|
54
|
+
merged.to_csv(output_csv, index=False)
|
|
55
|
+
print(f"Wrote consolidated AUC summary to {output_csv}")
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Shared constants for glycanPRMQuant package.
|
|
3
|
+
|
|
4
|
+
This module contains all physical and chemical constants used throughout
|
|
5
|
+
the package to ensure consistency across modules.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from .resources import resource_path
|
|
9
|
+
|
|
10
|
+
# Mass constants (in Daltons)
|
|
11
|
+
PROTON_MASS = 1.007276
|
|
12
|
+
NH4_MASS = 18.033826 # Ammonium adduct mass
|
|
13
|
+
|
|
14
|
+
# Default file paths
|
|
15
|
+
DEFAULT_PRECURSOR_DB = resource_path("database/N_glycan_db.csv")
|