glycanPRMQuant 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,17 @@
1
+ """
2
+ glycanPRMQuant: A package for glycan PRM (Parallel Reaction Monitoring) quantification.
3
+
4
+ This package provides tools for processing mass spectrometry data of glycans,
5
+ including MS1/MS2 matching, fragmentation analysis, and quantification.
6
+ """
7
+
8
+ __version__ = "0.1.0"
9
+ __author__ = "Vishal Sandilya"
10
+ __email__ = "vishal.sandilya@ttu.edu"
11
+
12
+ __all__ = [
13
+ "matchMS1",
14
+ "matchMS2",
15
+ "process_mzml_pipeline",
16
+ "calculateAUC",
17
+ ]
@@ -0,0 +1,260 @@
1
+ import os
2
+ import logging
3
+ import pandas as pd
4
+ import numpy as np
5
+ import matplotlib.pyplot as plt
6
+ from scipy.signal import find_peaks, peak_widths
7
+ from scipy.ndimage import gaussian_filter1d
8
+ from scipy.signal import savgol_filter
9
+
10
+ plt.rcParams["pdf.fonttype"] = 42
11
+ plt.rcParams["ps.fonttype"] = 42
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ def _smooth_signal(y, method: str, window: int):
16
+ if not window or window <= 0:
17
+ return y
18
+ method = (method or "gaussian").lower()
19
+ if method in ("gaussian", "gauss"):
20
+ return gaussian_filter1d(y, sigma=window, mode='nearest')
21
+ if method in ("savgol", "sav-gol", "savitzky-golay", "sg"):
22
+ n = len(y)
23
+ if n < 3:
24
+ return y
25
+ win = int(window)
26
+ if win % 2 == 0:
27
+ win += 1
28
+ if win < 3:
29
+ win = 3
30
+ if win > n:
31
+ win = n if n % 2 == 1 else n - 1
32
+ if win < 3:
33
+ return y
34
+ return savgol_filter(y, window_length=win, polyorder=2, mode='nearest')
35
+ return y
36
+
37
+ def _resample_uniform(rt, y):
38
+ rt = np.asarray(rt, dtype=float)
39
+ y = np.asarray(y, dtype=float)
40
+ if rt.size < 3:
41
+ return rt, y
42
+ order = np.argsort(rt)
43
+ rt = rt[order]
44
+ y = y[order]
45
+ diffs = np.diff(rt)
46
+ step = np.median(diffs[diffs > 0]) if np.any(diffs > 0) else None
47
+ if step is None or step <= 0:
48
+ return rt, y
49
+ grid = np.arange(rt.min(), rt.max() + step * 0.5, step)
50
+ y_interp = np.interp(grid, rt, y)
51
+ return grid, y_interp
52
+
53
+ def _find_width_at_height(y: np.ndarray, peak_idx: int, height: float):
54
+ y = np.asarray(y, dtype=float)
55
+ n = y.size
56
+ if n == 0:
57
+ return 0.0, float(max(n - 1, 0))
58
+ peak_idx = int(np.clip(peak_idx, 0, n - 1))
59
+ height = float(height)
60
+
61
+ left_candidates = np.where(y[:peak_idx + 1] <= height)[0]
62
+ if left_candidates.size == 0:
63
+ left_ip = 0.0
64
+ else:
65
+ li = left_candidates[-1]
66
+ if li == peak_idx:
67
+ left_ip = float(li)
68
+ else:
69
+ y1, y2 = y[li], y[li + 1]
70
+ if y2 == y1:
71
+ left_ip = float(li)
72
+ else:
73
+ frac = (height - y1) / (y2 - y1)
74
+ left_ip = li + float(frac)
75
+
76
+ right_candidates = np.where(y[peak_idx:] <= height)[0]
77
+ if right_candidates.size == 0:
78
+ right_ip = float(n - 1)
79
+ else:
80
+ ri = peak_idx + right_candidates[0]
81
+ if ri == peak_idx:
82
+ right_ip = float(ri)
83
+ else:
84
+ y1, y2 = y[ri - 1], y[ri]
85
+ if y2 == y1:
86
+ right_ip = float(ri)
87
+ else:
88
+ frac = (height - y1) / (y2 - y1)
89
+ right_ip = (ri - 1) + float(frac)
90
+
91
+ return left_ip, right_ip
92
+
93
+ def calculateAUC(
94
+ ms2_input,
95
+ glycan_col: str = 'Glycan',
96
+ scan_col: str = 'scan_number',
97
+ rt_col: str = 'rt',
98
+ intensity_col: str = 'fragment_intensity',
99
+ adduct_col: str = 'Adduct',
100
+ rel_height: float = 0.7,
101
+ rel_height_mode: str = "prominence",
102
+ prominence: float = None,
103
+ smoothing_window: int = 30,
104
+ smoothing_method: str = "gaussian",
105
+ plot: bool = False,
106
+ save_path: str = None,
107
+ window = 0
108
+ ) -> pd.DataFrame:
109
+ """
110
+ Calculate AUC for each glycan/adduct by optionally smoothing the summed fragment-intensity
111
+ chromatogram, detecting the main peak, determining its boundaries at a relative height,
112
+ and integrating the (smoothed or raw) intensity between those boundaries. Also returns a
113
+ glycan-level total that sums AUCs across all adducts for that glycan.
114
+
115
+ smoothing_window <= 0 will skip smoothing.
116
+
117
+ Parameters
118
+ ----------
119
+ ms2_input : pd.DataFrame or str
120
+ Matched MS2 DataFrame or path to CSV/Excel.
121
+ glycan_col : str
122
+ Column name for glycan composition.
123
+ scan_col : str
124
+ Column name for scan number.
125
+ rt_col : str
126
+ Column name for retention time.
127
+ intensity_col : str
128
+ Column for fragment intensity.
129
+ rel_height : float
130
+ Relative height (0–1) for width calculation (e.g. 0.5 for half-height).
131
+ prominence : float or None
132
+ Minimum peak prominence passed to find_peaks.
133
+ smoothing_window : int
134
+ Smoothing window. For Gaussian, this is sigma. For Sav-Gol, this is window length.
135
+ smoothing_method : str
136
+ "gaussian" (default) or "savgol".
137
+ plot : bool
138
+ If True, plot smoothed vs raw chromatogram and integration window.
139
+
140
+ Returns
141
+ -------
142
+ tuple[pd.DataFrame, pd.DataFrame]
143
+ (per_adduct_df, total_df)
144
+ per_adduct_df columns: [glycan_col, adduct_col, 'peak_rt', 'start_rt', 'end_rt', 'AUC']
145
+ total_df columns: [glycan_col, 'AUC'] (AUC summed across adducts per glycan)
146
+ """
147
+ # load data
148
+ if isinstance(ms2_input, str):
149
+ ext = os.path.splitext(ms2_input)[1].lower()
150
+ if ext == '.csv':
151
+ df = pd.read_csv(ms2_input)
152
+ elif ext in ('.xlsx', '.xls'):
153
+ df = pd.read_excel(ms2_input)
154
+ else:
155
+ raise ValueError("Unsupported file type.")
156
+ else:
157
+ df = ms2_input.copy()
158
+
159
+ # validate
160
+ missing = {glycan_col, scan_col, rt_col, intensity_col} - set(df.columns)
161
+ if missing:
162
+ raise ValueError(f"Missing columns: {missing}")
163
+
164
+ # If adduct column is absent, treat all signal as one pseudo-adduct so grouping works
165
+ if adduct_col not in df.columns:
166
+ df = df.copy()
167
+ df[adduct_col] = 'ALL'
168
+
169
+ # sum per scan for each glycan
170
+ summed = (
171
+ df.groupby([glycan_col, adduct_col, scan_col])
172
+ .agg(rt=(rt_col, 'first'),
173
+ summed_intensity=(intensity_col, 'sum'))
174
+ .reset_index()
175
+ )
176
+
177
+ results = []
178
+ for (glycan, adduct), sub in summed.groupby([glycan_col, adduct_col]):
179
+ sub = sub.sort_values('rt')
180
+ x = sub['rt'].to_numpy()
181
+ y = sub['summed_intensity'].to_numpy()
182
+
183
+ # apply smoothing if requested
184
+ if smoothing_window and smoothing_window > 0:
185
+ xg, yg = _resample_uniform(x, y)
186
+ y_smooth = _smooth_signal(yg, smoothing_method, smoothing_window)
187
+ x = xg
188
+ else:
189
+ y_smooth = y
190
+
191
+ # detect peaks on y_smooth
192
+ peaks, props = find_peaks(y_smooth, prominence=prominence)
193
+ if len(peaks) == 0:
194
+ main_idx = np.argmax(y_smooth)
195
+ else:
196
+ main_idx = peaks[np.argmax(y_smooth[peaks])]
197
+
198
+ # compute width at rel_height
199
+ mode = (rel_height_mode or "prominence").lower()
200
+ if mode in ("height", "peak", "absolute"):
201
+ peak_y = float(y_smooth[main_idx])
202
+ height_level = peak_y * (1.0 - float(rel_height))
203
+ left_ip, right_ip = _find_width_at_height(y_smooth, main_idx, height_level)
204
+ else:
205
+ widths, h_eval, left_ips, right_ips = peak_widths(
206
+ y_smooth, [main_idx], rel_height=rel_height
207
+ )
208
+ left_ip, right_ip = left_ips[0], right_ips[0]
209
+
210
+ # map to retention time
211
+ idxs = np.arange(len(x))
212
+ start_rt = np.interp(left_ip, idxs, x)
213
+ end_rt = np.interp(right_ip, idxs, x)
214
+ peak_rt = x[main_idx]
215
+
216
+ # Integrate with interpolated boundary points so narrow windows that
217
+ # fall between scans do not collapse to a single apex sample.
218
+ interior_mask = (x > start_rt) & (x < end_rt)
219
+ x_auc = np.concatenate(([start_rt], x[interior_mask], [end_rt]))
220
+ y_auc = np.interp(x_auc, x, y_smooth)
221
+ auc = np.trapezoid(y_auc, x_auc)
222
+
223
+ logger.info(
224
+ f"Glycan {glycan!r}: peak RT={peak_rt:.2f}, "
225
+ f"window=[{start_rt:.2f}, {end_rt:.2f}], AUC={auc:.2f}"
226
+ )
227
+ results.append({
228
+ glycan_col: glycan,
229
+ adduct_col: adduct,
230
+ 'peak_rt': peak_rt,
231
+ 'start_rt': start_rt,
232
+ 'end_rt': end_rt,
233
+ 'AUC': auc
234
+ })
235
+ plt.style.use(['science', 'no-latex'])
236
+ plt.rcParams['font.family'] = 'Arial'
237
+
238
+ if plot:
239
+ fig, ax = plt.subplots(figsize=(4.8, 4))
240
+ ax.plot(x, y_smooth, label=(
241
+ f'smoothed ({smoothing_method}, w={smoothing_window})'
242
+ if smoothing_window and smoothing_window > 0 else 'raw'
243
+ ))
244
+ ax.axvspan(start_rt, end_rt, color='red', alpha=0.1,
245
+ label='integration window')
246
+ ax.set_xlabel('RT (min)')
247
+ ax.set_ylabel('Intensity')
248
+ plt.xlim(x.min()-window, x.max()+window)
249
+ plt.ylim(0, y_smooth.max() * 1.1)
250
+ ax.set_title(f"{glycan} ({adduct}): Integration Window")
251
+ plt.tight_layout()
252
+ if save_path:
253
+ plt.savefig(save_path, dpi=300)
254
+ logger.info(f"Saved plot to {save_path}")
255
+ else:
256
+ plt.show()
257
+
258
+ per_adduct_df = pd.DataFrame(results)
259
+ total_df = per_adduct_df.groupby(glycan_col, as_index=False)['AUC'].sum()
260
+ return per_adduct_df, total_df
@@ -0,0 +1,42 @@
1
+ """
2
+ Module for calculating the mass of glycans
3
+ """
4
+ import logging
5
+
6
+ from glypy.io import iupac
7
+ from glypy.io.iupac import IUPACError
8
+ from glypy.structure import ReducedEnd
9
+ from glypy.composition.composition_transform import derivatize
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ def calculate_mass(glycan_str, derivatization="methyl", reduced_end=True, verbose=True):
15
+ """
16
+ Given an IUPAC string,
17
+ calculate the mass of the glycan, optionally applying derivatization and setting a reduced end.
18
+ """
19
+ try:
20
+ glycan = iupac.loads(glycan_str, dialect="simple")
21
+ except IUPACError as e:
22
+ if verbose:
23
+ logger.error("Error parsing IUPAC string: %s", e)
24
+ return None
25
+
26
+ try:
27
+ if reduced_end:
28
+ glycan.set_reducing_end(ReducedEnd())
29
+ if derivatization:
30
+ derivatize(glycan, derivatization)
31
+ return glycan.mass()
32
+ except (KeyError, ValueError) as e:
33
+ if verbose:
34
+ logger.error("Error modifying glycan: %s", e)
35
+ return None
36
+
37
+ if __name__ == "__main__":
38
+ # Example usage
39
+ glycan_str = "Man(a1-2)Man(a1-3)[Man(a1-3)[Man(a1-6)]Man(a1-6)]Man(b1-4)GlcNAc(b1-4)GlcNAc"
40
+ mass = calculate_mass(glycan_str, derivatization=None, reduced_end=False)
41
+ if mass is not None:
42
+ print(f"The mass of {glycan_str} is: {mass:.4f} Da")
@@ -0,0 +1,54 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+
4
+ def gaussian_fit(data, mz_col='mz', intensity_col='intensity', resolution=120000):
5
+ """
6
+ Reconstruct a mass spectrum by fitting Gaussian peaks to each (m/z, intensity) pair.
7
+
8
+ Parameters
9
+ ----------
10
+ data : pandas.DataFrame
11
+ Must contain columns:
12
+ - scan_number
13
+ - <mz_col> # name of the m/z column
14
+ - <intensity_col> # name of the intensity column
15
+ mz_col : str, optional
16
+ Name of the column in `data` holding the m/z values. Default is 'mz'.
17
+ intensity_col : str, optional
18
+ Name of the column in `data` holding the peak intensities. Default is 'intensity'.
19
+ resolution : float, optional
20
+ Instrument resolution used to compute FWHM of peaks
21
+ (FWHM = mz / resolution). Default is 120000.
22
+
23
+ Returns
24
+ -------
25
+ pandas.DataFrame
26
+ Two columns:
27
+ - 'mz': uniformly spaced m/z grid
28
+ - 'intensity': reconstructed spectrum
29
+ """
30
+ # Check for required columns
31
+ required = {'scan_number', mz_col, intensity_col}
32
+ missing = required - set(data.columns)
33
+ if missing:
34
+ raise ValueError(f"Input DataFrame is missing columns: {missing}")
35
+
36
+ # Build the m/z grid
37
+ mz_min = data[mz_col].min() - 1
38
+ mz_max = data[mz_col].max() + 1
39
+ num_points = 20000
40
+ mz_grid = np.linspace(mz_min, mz_max, num_points)
41
+
42
+ # Initialize spectrum
43
+ reconstructed = np.zeros_like(mz_grid)
44
+
45
+ # Sum Gaussian peaks
46
+ for _, row in data.iterrows():
47
+ center = row[mz_col]
48
+ height = row[intensity_col]
49
+ fwhm = center / resolution
50
+ sigma = fwhm / (2 * np.sqrt(2 * np.log(2)))
51
+ peak = height * np.exp(-0.5 * ((mz_grid - center) / sigma) ** 2)
52
+ reconstructed += peak
53
+
54
+ return pd.DataFrame({'mz': mz_grid, 'intensity': reconstructed})
glycanPRMQuant/cli.py ADDED
@@ -0,0 +1,149 @@
1
+ """Command-line interface for glycanPRMQuant."""
2
+
3
+ import argparse
4
+ import logging
5
+ import multiprocessing
6
+
7
+ from glycanPRMQuant.logging_utils import configure_logging
8
+
9
+
10
+ def _add_common_options(parser: argparse.ArgumentParser) -> None:
11
+ parser.add_argument("--ppm-ms1-tol", type=float, default=10)
12
+ parser.add_argument("--mz-min", type=float, default=400)
13
+ parser.add_argument("--mz-max", type=float, default=2000)
14
+ parser.add_argument("--mz-offset", type=float, default=0.0)
15
+ parser.add_argument("--mass-offset", type=float, default=0.0)
16
+ parser.add_argument("--intensity-threshold", type=float, default=1e2)
17
+ parser.add_argument("--ppm-ms2-tol", type=float, default=10)
18
+ parser.add_argument("--mz-tol", type=float, default=0.02)
19
+ parser.add_argument("--fragment-ion-series", default="ABCXYZ")
20
+ parser.add_argument("--fragment-max-cleavages", type=int, default=2)
21
+ parser.add_argument("--smoothing-window", type=int, default=11)
22
+ parser.add_argument("--smoothing-method", choices=["gaussian", "savgol"], default="gaussian")
23
+ parser.add_argument("--disable-smoothing", action="store_true")
24
+ parser.add_argument("--rel-height", type=float, default=0.7)
25
+ parser.add_argument("--rel-height-mode", choices=["prominence", "height"], default="prominence")
26
+ parser.add_argument("--precursor-db-path")
27
+ parser.add_argument("--structure-db-path")
28
+ parser.add_argument("--skyline-transition", action="store_true")
29
+ parser.add_argument("--quiet", action="store_true")
30
+ parser.add_argument("-v", "--verbose", action="count", default=0)
31
+
32
+
33
+ def _log_level(args: argparse.Namespace) -> int:
34
+ if args.quiet:
35
+ return logging.WARNING
36
+ if args.verbose >= 2:
37
+ return logging.DEBUG
38
+ return logging.INFO
39
+
40
+
41
+ def _run_one(args: argparse.Namespace) -> int:
42
+ configure_logging(_log_level(args), force=True)
43
+ from glycanPRMQuant.processmzML import process_mzml_pipeline
44
+
45
+ process_mzml_pipeline(
46
+ mzml_file=args.mzml_file,
47
+ output_dir=args.output_dir,
48
+ ppm_ms1_tol=args.ppm_ms1_tol,
49
+ mz_min=args.mz_min,
50
+ mz_max=args.mz_max,
51
+ mz_offset=args.mz_offset,
52
+ mass_offset=args.mass_offset,
53
+ intensity_threshold=args.intensity_threshold,
54
+ ppm_ms2_tol=args.ppm_ms2_tol,
55
+ mz_tol=args.mz_tol,
56
+ smoothing_window=args.smoothing_window,
57
+ smoothing_method=args.smoothing_method,
58
+ enable_smoothing=not args.disable_smoothing,
59
+ rel_height=args.rel_height,
60
+ rel_height_mode=args.rel_height_mode,
61
+ skyline_transition=args.skyline_transition,
62
+ fragment_ion_series=args.fragment_ion_series,
63
+ fragment_max_cleavages=args.fragment_max_cleavages,
64
+ precursor_db_path=args.precursor_db_path,
65
+ structure_db_path=args.structure_db_path,
66
+ )
67
+ return 0
68
+
69
+
70
+ def _run_batch(args: argparse.Namespace) -> int:
71
+ configure_logging(_log_level(args), force=True)
72
+ multiprocessing.freeze_support()
73
+ from glycanPRMQuant.parallelProcess import run_parallel_pipeline
74
+
75
+ run_parallel_pipeline(
76
+ input_dir=args.input_dir,
77
+ input_files=args.input_files,
78
+ output_root=args.output_root,
79
+ n_workers=args.workers,
80
+ ppm_ms1_tol=args.ppm_ms1_tol,
81
+ mz_min=args.mz_min,
82
+ mz_max=args.mz_max,
83
+ mz_offset=args.mz_offset,
84
+ mass_offset=args.mass_offset,
85
+ intensity_threshold=args.intensity_threshold,
86
+ ppm_ms2_tol=args.ppm_ms2_tol,
87
+ mz_tol=args.mz_tol,
88
+ fragment_ion_series=args.fragment_ion_series,
89
+ fragment_max_cleavages=args.fragment_max_cleavages,
90
+ smoothing_window=args.smoothing_window,
91
+ smoothing_method=args.smoothing_method,
92
+ enable_smoothing=not args.disable_smoothing,
93
+ rel_height=args.rel_height,
94
+ rel_height_mode=args.rel_height_mode,
95
+ skyline_transition=args.skyline_transition,
96
+ precursor_db_path=args.precursor_db_path,
97
+ structure_db_path=args.structure_db_path,
98
+ overwrite=args.overwrite,
99
+ dry_run=args.dry_run,
100
+ )
101
+ return 0
102
+
103
+
104
+ def _run_gui(args: argparse.Namespace) -> int:
105
+ configure_logging(_log_level(args), force=True)
106
+ from glycanPRMQuant.pipelineGUI import PipelineGUI
107
+
108
+ app = PipelineGUI()
109
+ app.mainloop()
110
+ return 0
111
+
112
+
113
+ def build_parser() -> argparse.ArgumentParser:
114
+ parser = argparse.ArgumentParser(prog="glycan-prmquant")
115
+ sub = parser.add_subparsers(dest="command", required=True)
116
+
117
+ run_parser = sub.add_parser("run", help="Process one mzML file")
118
+ run_parser.add_argument("mzml_file")
119
+ run_parser.add_argument("output_dir")
120
+ _add_common_options(run_parser)
121
+ run_parser.set_defaults(func=_run_one)
122
+
123
+ batch_parser = sub.add_parser("batch", help="Process multiple mzML files")
124
+ source = batch_parser.add_mutually_exclusive_group(required=True)
125
+ source.add_argument("--input-dir")
126
+ source.add_argument("--input-files", nargs="+")
127
+ batch_parser.add_argument("--output-root", required=True)
128
+ batch_parser.add_argument("--workers", type=int)
129
+ batch_parser.add_argument("--overwrite", action="store_true")
130
+ batch_parser.add_argument("--dry-run", action="store_true")
131
+ _add_common_options(batch_parser)
132
+ batch_parser.set_defaults(func=_run_batch)
133
+
134
+ gui_parser = sub.add_parser("gui", help="Launch the Tkinter GUI")
135
+ gui_parser.add_argument("--quiet", action="store_true")
136
+ gui_parser.add_argument("-v", "--verbose", action="count", default=0)
137
+ gui_parser.set_defaults(func=_run_gui)
138
+
139
+ return parser
140
+
141
+
142
+ def main(argv: list[str] | None = None) -> int:
143
+ parser = build_parser()
144
+ args = parser.parse_args(argv)
145
+ return args.func(args)
146
+
147
+
148
+ if __name__ == "__main__":
149
+ raise SystemExit(main())
@@ -0,0 +1,55 @@
1
+ import os
2
+ import pandas as pd
3
+
4
+ def consolidate_auc_results(results_root: str, output_csv: str):
5
+ """
6
+ Consolidate all <mzML_basename>_auc_values.csv files under `results_root`
7
+ into one CSV file.
8
+
9
+ The output will have one row per glycan, a 'Glycan' column plus one column
10
+ per mzML file (named by the folder/mzML basename) containing its AUC values.
11
+
12
+ Parameters
13
+ ----------
14
+ results_root : str
15
+ Path to the directory containing per-file subfolders (each with *_auc_values.csv).
16
+ output_csv : str
17
+ Path to write the consolidated CSV file (e.g. "all_auc_summary.csv").
18
+ """
19
+ auc_dfs = []
20
+ for sub in sorted(os.listdir(results_root)):
21
+ subdir = os.path.join(results_root, sub)
22
+ if not os.path.isdir(subdir):
23
+ continue
24
+ auc_file = os.path.join(subdir, f"{sub}_auc_values.csv")
25
+ if not os.path.isfile(auc_file):
26
+ print(f"Warning: no AUC file found for {sub} (looking for {auc_file})")
27
+ continue
28
+
29
+ df = pd.read_csv(auc_file)
30
+ # Identify AUC column(s) (case-insensitive contains 'auc')
31
+ auc_cols = [c for c in df.columns if 'auc' in c.lower()]
32
+ if not auc_cols:
33
+ print(f"Warning: no AUC column found in {auc_file}")
34
+ continue
35
+ # Prefer exact 'AUC' match if present
36
+ auc_col = 'AUC' if 'AUC' in auc_cols else auc_cols[0]
37
+ # Rename that column to the sub (folder) name
38
+ df = df.rename(columns={auc_col: sub})
39
+ # Keep only Glycan and the renamed AUC column
40
+ df = df[['Glycan', sub]]
41
+ auc_dfs.append(df)
42
+
43
+ if not auc_dfs:
44
+ raise RuntimeError(f"No _auc_values.csv files found in {results_root}")
45
+
46
+ # Merge all on 'Glycan' using outer join
47
+ merged = auc_dfs[0]
48
+ for df in auc_dfs[1:]:
49
+ merged = pd.merge(merged, df, on='Glycan', how='outer')
50
+
51
+ merged = merged.sort_values('Glycan').reset_index(drop=True)
52
+
53
+ # Write to CSV
54
+ merged.to_csv(output_csv, index=False)
55
+ print(f"Wrote consolidated AUC summary to {output_csv}")
@@ -0,0 +1,15 @@
1
+ """
2
+ Shared constants for glycanPRMQuant package.
3
+
4
+ This module contains all physical and chemical constants used throughout
5
+ the package to ensure consistency across modules.
6
+ """
7
+
8
+ from .resources import resource_path
9
+
10
+ # Mass constants (in Daltons)
11
+ PROTON_MASS = 1.007276
12
+ NH4_MASS = 18.033826 # Ammonium adduct mass
13
+
14
+ # Default file paths
15
+ DEFAULT_PRECURSOR_DB = resource_path("database/N_glycan_db.csv")