glycanPRMQuant 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. glycanprmquant-0.1.0/LICENSE +21 -0
  2. glycanprmquant-0.1.0/MANIFEST.in +3 -0
  3. glycanprmquant-0.1.0/PKG-INFO +391 -0
  4. glycanprmquant-0.1.0/README.md +360 -0
  5. glycanprmquant-0.1.0/glycanPRMQuant/__init__.py +17 -0
  6. glycanprmquant-0.1.0/glycanPRMQuant/calculateAUC.py +260 -0
  7. glycanprmquant-0.1.0/glycanPRMQuant/calculate_mass.py +42 -0
  8. glycanprmquant-0.1.0/glycanPRMQuant/centroidData.py +54 -0
  9. glycanprmquant-0.1.0/glycanPRMQuant/cli.py +149 -0
  10. glycanprmquant-0.1.0/glycanPRMQuant/consolidateAUC.py +55 -0
  11. glycanprmquant-0.1.0/glycanPRMQuant/constants.py +15 -0
  12. glycanprmquant-0.1.0/glycanPRMQuant/database/N_glycan_db.csv +1082 -0
  13. glycanprmquant-0.1.0/glycanPRMQuant/fragment_structure.py +70 -0
  14. glycanprmquant-0.1.0/glycanPRMQuant/glycanClassification.py +58 -0
  15. glycanprmquant-0.1.0/glycanPRMQuant/glycanClassificationUI.py +190 -0
  16. glycanprmquant-0.1.0/glycanPRMQuant/glycantypeBarplot.py +184 -0
  17. glycanprmquant-0.1.0/glycanPRMQuant/intensityBarplot.py +46 -0
  18. glycanprmquant-0.1.0/glycanPRMQuant/logging_utils.py +41 -0
  19. glycanprmquant-0.1.0/glycanPRMQuant/matchMS1.py +193 -0
  20. glycanprmquant-0.1.0/glycanPRMQuant/matchMS2.py +369 -0
  21. glycanprmquant-0.1.0/glycanPRMQuant/msPlotter.py +54 -0
  22. glycanprmquant-0.1.0/glycanPRMQuant/msfileReader.py +186 -0
  23. glycanprmquant-0.1.0/glycanPRMQuant/parallelProcess.py +234 -0
  24. glycanprmquant-0.1.0/glycanPRMQuant/performPCA.py +70 -0
  25. glycanprmquant-0.1.0/glycanPRMQuant/pipelineGUI.py +589 -0
  26. glycanprmquant-0.1.0/glycanPRMQuant/plotFragmentIntensity.py +356 -0
  27. glycanprmquant-0.1.0/glycanPRMQuant/plotMS2spectrum.py +152 -0
  28. glycanprmquant-0.1.0/glycanPRMQuant/processmzML.py +446 -0
  29. glycanprmquant-0.1.0/glycanPRMQuant/resources.py +34 -0
  30. glycanprmquant-0.1.0/glycanPRMQuant/skylineTransition.py +54 -0
  31. glycanprmquant-0.1.0/glycanPRMQuant.egg-info/PKG-INFO +391 -0
  32. glycanprmquant-0.1.0/glycanPRMQuant.egg-info/SOURCES.txt +52 -0
  33. glycanprmquant-0.1.0/glycanPRMQuant.egg-info/dependency_links.txt +1 -0
  34. glycanprmquant-0.1.0/glycanPRMQuant.egg-info/entry_points.txt +2 -0
  35. glycanprmquant-0.1.0/glycanPRMQuant.egg-info/requires.txt +16 -0
  36. glycanprmquant-0.1.0/glycanPRMQuant.egg-info/top_level.txt +1 -0
  37. glycanprmquant-0.1.0/pyproject.toml +51 -0
  38. glycanprmquant-0.1.0/setup.cfg +4 -0
  39. glycanprmquant-0.1.0/setup.py +4 -0
  40. glycanprmquant-0.1.0/tests/__init__.py +0 -0
  41. glycanprmquant-0.1.0/tests/test_MS2spectrum.py +29 -0
  42. glycanprmquant-0.1.0/tests/test_calculateAUC.py +27 -0
  43. glycanprmquant-0.1.0/tests/test_consolidateAUC.py +23 -0
  44. glycanprmquant-0.1.0/tests/test_extractMS1.py +13 -0
  45. glycanprmquant-0.1.0/tests/test_glycanClassification.py +17 -0
  46. glycanprmquant-0.1.0/tests/test_glycantypeBarplot.py +20 -0
  47. glycanprmquant-0.1.0/tests/test_intensityBarplot.py +19 -0
  48. glycanprmquant-0.1.0/tests/test_matchMS1.py +48 -0
  49. glycanprmquant-0.1.0/tests/test_matchMS2.py +70 -0
  50. glycanprmquant-0.1.0/tests/test_parallelProcess.py +18 -0
  51. glycanprmquant-0.1.0/tests/test_performPCA2D.py +22 -0
  52. glycanprmquant-0.1.0/tests/test_plotfragmentintensity.py +27 -0
  53. glycanprmquant-0.1.0/tests/test_processmzML.py +5 -0
  54. glycanprmquant-0.1.0/tests/test_skylineTransition.py +21 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Vishal Sandilya
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,3 @@
1
+ include README.md
2
+ include LICENSE
3
+ recursive-include glycanPRMQuant/database *.csv
@@ -0,0 +1,391 @@
1
+ Metadata-Version: 2.4
2
+ Name: glycanPRMQuant
3
+ Version: 0.1.0
4
+ Summary: Targeted PRM glycomics analysis from mzML data
5
+ Author-email: Vishal Sandilya <vishal.sandilya@ttu.edu>
6
+ License-Expression: MIT
7
+ Classifier: Development Status :: 3 - Alpha
8
+ Classifier: Intended Audience :: Science/Research
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Programming Language :: Python :: 3.12
11
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
12
+ Requires-Python: >=3.12
13
+ Description-Content-Type: text/markdown
14
+ License-File: LICENSE
15
+ Requires-Dist: numpy
16
+ Requires-Dist: pandas
17
+ Requires-Dist: scipy
18
+ Requires-Dist: matplotlib
19
+ Requires-Dist: seaborn
20
+ Requires-Dist: statsmodels
21
+ Requires-Dist: scikit-learn
22
+ Requires-Dist: openpyxl
23
+ Requires-Dist: scienceplots
24
+ Requires-Dist: pyteomics
25
+ Requires-Dist: glypy
26
+ Provides-Extra: dev
27
+ Requires-Dist: build; extra == "dev"
28
+ Requires-Dist: pytest; extra == "dev"
29
+ Requires-Dist: twine; extra == "dev"
30
+ Dynamic: license-file
31
+
32
+ # glycanPRMQuant
33
+
34
+ `glycanPRMQuant` is a Python package for targeted PRM glycomics analysis from
35
+ `.mzML` data. It extracts MS2 spectra, matches precursor ions to N-glycan
36
+ compositions, generates theoretical fragments from IUPAC structures, resolves
37
+ likely structures, plots chromatograms/spectra, and quantifies glycan signal by
38
+ AUC.
39
+
40
+ The package can be run from a Tkinter GUI for batch processing or called
41
+ programmatically from Python.
42
+
43
+ ## What It Does
44
+
45
+ - Reads vendor-converted `.mzML` files with `pyteomics`.
46
+ - Matches MS1 precursor m/z values against glycan compositions.
47
+ - Calculates precursor neutral masses from the bundled `N_glycan_db.csv` using
48
+ `glypy`, grouped once per `Composition`.
49
+ - Generates theoretical MS2 fragments from each candidate `Condensed IUPAC`
50
+ structure for a matched numerical composition.
51
+ - Scores candidate IUPAC structures and returns the most likely structure with
52
+ the numerical composition.
53
+ - Supports configurable fragment ion series, maximum cleavage count, m/z
54
+ tolerances, intensity thresholds, smoothing, and AUC boundary logic.
55
+ - Produces per-glycan MS2 CSV files, chromatograms, spectra, AUC tables, and
56
+ optional Skyline transition lists.
57
+ - Runs one file or many files in parallel.
58
+
59
+ ## Repository Layout
60
+
61
+ - `glycanPRMQuant/processmzML.py`
62
+ Single-file end-to-end pipeline: extraction, MS1 matching, MS2 matching,
63
+ plotting, AUC, and optional Skyline export.
64
+ - `glycanPRMQuant/parallelProcess.py`
65
+ Parallel multi-file runner used by the GUI and programmatic batch workflows.
66
+ - `glycanPRMQuant/pipelineGUI.py`
67
+ Tkinter GUI for selecting input files, output folder, matching parameters,
68
+ plotting options, DB overrides, and batch execution.
69
+ - `glycanPRMQuant/matchMS1.py`
70
+ Precursor matching. Uses the N-glycan database by default and calculates
71
+ neutral masses from grouped IUPAC compositions.
72
+ - `glycanPRMQuant/matchMS2.py`
73
+ Fragment matching. Generates fragments from IUPAC candidates, matches
74
+ observed fragments, and selects the best IUPAC structure.
75
+ - `glycanPRMQuant/fragment_structure.py`
76
+ `glypy`-based theoretical glycan fragmentation.
77
+ - `glycanPRMQuant/calculateAUC.py`
78
+ Peak picking, integration windows, smoothing, and AUC summarization.
79
+ - `glycanPRMQuant/plotFragmentIntensity.py` and `plotMS2spectrum.py`
80
+ Chromatogram and spectrum plotting utilities.
81
+ - `glycanPRMQuant/database/N_glycan_db.csv`
82
+ Default structure database with `Condensed IUPAC`, `Composition`, and
83
+ `Numerical Composition` columns.
84
+
85
+ ## Installation
86
+
87
+ Clone the repository and install it in editable mode:
88
+
89
+ ```bash
90
+ git clone https://github.com/Elquimico09/GlycanPRMQuant.git
91
+ cd GlycanPRMQuant
92
+ python -m venv .venv
93
+ ```
94
+
95
+ Activate the environment:
96
+
97
+ ```bash
98
+ # Windows
99
+ .venv\Scripts\activate
100
+
101
+ # macOS/Linux
102
+ source .venv/bin/activate
103
+ ```
104
+
105
+ Install:
106
+
107
+ ```bash
108
+ pip install -e .
109
+ ```
110
+
111
+ The package expects Python `>=3.12`.
112
+
113
+ ## Dependencies
114
+
115
+ Installed from `pyproject.toml`:
116
+
117
+ - `numpy`
118
+ - `pandas`
119
+ - `scipy`
120
+ - `matplotlib`
121
+ - `seaborn`
122
+ - `statsmodels`
123
+ - `scikit-learn`
124
+ - `openpyxl`
125
+ - `scienceplots`
126
+ - `pyteomics`
127
+ - `glypy`
128
+
129
+ External requirement:
130
+
131
+ - Input data must be in `.mzML` format. Convert vendor files with ProteoWizard
132
+ `msconvert` before running the pipeline.
133
+
134
+ ## Development Checks
135
+
136
+ Install the development extra and run the tests:
137
+
138
+ ```bash
139
+ pip install -e ".[dev]"
140
+ python -m pytest
141
+ python -m build
142
+ python -m twine check dist/*
143
+ ```
144
+
145
+ ## Quick Start: GUI
146
+
147
+ Run:
148
+
149
+ ```bash
150
+ glycan-prmquant gui
151
+ ```
152
+
153
+ In the GUI:
154
+
155
+ 1. Select one or more `.mzML` files.
156
+ 2. Select an output folder.
157
+ 3. Optionally provide custom precursor/structure DB files. Leave blank to use
158
+ the bundled `N_glycan_db.csv`.
159
+ 4. Set MS1/MS2 tolerances and intensity thresholds.
160
+ 5. Set fragment options:
161
+ - `Fragment ion series`: any combination of `A`, `B`, `C`, `X`, `Y`, `Z`.
162
+ Default: `ABCXYZ`.
163
+ - `Max cleavages`: maximum number of cleavages used during theoretical
164
+ fragmentation. Default: `2`.
165
+ 6. Choose output options and run.
166
+
167
+ You can also launch the GUI as a module:
168
+
169
+ ```bash
170
+ python -m glycanPRMQuant.pipelineGUI
171
+ ```
172
+
173
+ ## Quick Start: Command Line
174
+
175
+ Process one file:
176
+
177
+ ```bash
178
+ glycan-prmquant run path/to/sample.mzML path/to/output_dir \
179
+ --ppm-ms1-tol 10 \
180
+ --ppm-ms2-tol 10 \
181
+ --mz-tol 0.02 \
182
+ --fragment-ion-series BY \
183
+ --fragment-max-cleavages 2
184
+ ```
185
+
186
+ Process a folder of `.mzML` files:
187
+
188
+ ```bash
189
+ glycan-prmquant batch \
190
+ --input-dir path/to/mzml_folder \
191
+ --output-root path/to/results \
192
+ --workers 4
193
+ ```
194
+
195
+ Process specific files:
196
+
197
+ ```bash
198
+ glycan-prmquant batch \
199
+ --input-files path/to/file1.mzML path/to/file2.mzML \
200
+ --output-root path/to/results \
201
+ --workers 2
202
+ ```
203
+
204
+ Useful CLI flags:
205
+
206
+ - `--precursor-db-path` and `--structure-db-path` override the bundled
207
+ `N_glycan_db.csv`.
208
+ - `--skyline-transition` writes Skyline transition lists.
209
+ - `--disable-smoothing` disables chromatogram/AUC smoothing.
210
+ - `--quiet` shows warnings/errors only.
211
+ - `-v` and `-vv` increase logging verbosity.
212
+
213
+ ## Quick Start: Single File
214
+
215
+ ```python
216
+ from glycanPRMQuant.processmzML import process_mzml_pipeline
217
+
218
+ process_mzml_pipeline(
219
+ mzml_file="path/to/sample.mzML",
220
+ output_dir="path/to/output_dir",
221
+ ppm_ms1_tol=10,
222
+ mz_min=400,
223
+ mz_max=2000,
224
+ intensity_threshold=1e2,
225
+ ppm_ms2_tol=10,
226
+ mz_tol=0.02,
227
+ fragment_ion_series="BY",
228
+ fragment_max_cleavages=2,
229
+ )
230
+ ```
231
+
232
+ ## Quick Start: Multiple Files
233
+
234
+ On Windows, keep the `if __name__ == "__main__"` guard for multiprocessing.
235
+
236
+ ```python
237
+ import multiprocessing
238
+ from glycanPRMQuant.parallelProcess import run_parallel_pipeline
239
+
240
+ if __name__ == "__main__":
241
+ multiprocessing.freeze_support()
242
+ run_parallel_pipeline(
243
+ input_files=[
244
+ r"path\to\file1.mzML",
245
+ r"path\to\file2.mzML",
246
+ ],
247
+ output_root=r"path\to\results",
248
+ n_workers=4,
249
+ ppm_ms1_tol=10,
250
+ ppm_ms2_tol=10,
251
+ mz_tol=0.02,
252
+ fragment_ion_series="ABCXYZ",
253
+ fragment_max_cleavages=2,
254
+ )
255
+ ```
256
+
257
+ ## Custom Databases
258
+
259
+ By default, both MS1 and MS2 use the bundled `N_glycan_db.csv`.
260
+
261
+ You can override the database paths:
262
+
263
+ ```python
264
+ process_mzml_pipeline(
265
+ mzml_file="path/to/sample.mzML",
266
+ output_dir="path/to/output_dir",
267
+ precursor_db_path="path/to/N_glycan_db.csv",
268
+ structure_db_path="path/to/N_glycan_db.csv",
269
+ )
270
+ ```
271
+
272
+ The N-glycan structure database should include:
273
+
274
+ - `Condensed IUPAC`
275
+ - `Composition`
276
+ - `Numerical Composition`
277
+
278
+ `matchMS1` groups by `Composition` and calculates mass once per composition.
279
+ `matchMS2` groups by `Numerical Composition` and fragments each candidate IUPAC
280
+ structure for that composition.
281
+
282
+ ## Matching Details
283
+
284
+ ### MS1
285
+
286
+ `matchMS1` calculates neutral masses from the first parsable IUPAC structure for
287
+ each unique `Composition`, then generates precursor adduct m/z values:
288
+
289
+ - `2H`
290
+ - `3H`
291
+ - `4H`
292
+ - `H+NH4`
293
+ - `2NH4`
294
+
295
+ The output includes:
296
+
297
+ - `precursor_mz`
298
+ - `Glycan` using the numerical composition ID when available
299
+ - `Adduct`
300
+ - `database_mz`
301
+ - `ppm_error`
302
+
303
+ ### MS2
304
+
305
+ `matchMS2` uses the matched numerical composition to find all candidate IUPAC
306
+ structures, generates theoretical fragments, and matches observed fragments by
307
+ m/z tolerance. It scores candidate structures by:
308
+
309
+ 1. Total matched fragment count
310
+ 2. Unique matched fragment count
311
+ 3. Total matched fragment intensity
312
+ 4. Mean absolute ppm error
313
+
314
+ The returned rows are restricted to the selected best-scoring IUPAC and include:
315
+
316
+ - `Glycan`
317
+ - `NumericalComposition`
318
+ - `Composition`
319
+ - `IUPAC`
320
+ - `Fragment`
321
+ - `FragmentType`
322
+ - `fragment_mz`
323
+ - `fragment_intensity`
324
+ - `Charge`
325
+ - `Adduct`
326
+ - `IUPAC_match_count`
327
+ - `IUPAC_unique_fragments`
328
+ - `IUPAC_total_intensity`
329
+
330
+ ## Important Parameters
331
+
332
+ - `ppm_ms1_tol`: precursor matching tolerance in ppm.
333
+ - `mz_min`, `mz_max`: precursor m/z search range.
334
+ - `mz_offset`: offset applied to calculated precursor adduct m/z values.
335
+ - `mass_offset`: offset applied to neutral masses before precursor adduct
336
+ calculation.
337
+ - `intensity_threshold`: minimum MS2 fragment intensity used during extraction
338
+ and matching.
339
+ - `ppm_ms2_tol`: tolerance used to associate MS2 scans with matched precursors.
340
+ - `mz_tol`: fragment m/z tolerance in Da.
341
+ - `fragment_ion_series`: allowed theoretical fragment ion series. Use any
342
+ combination of `A`, `B`, `C`, `X`, `Y`, `Z`.
343
+ - `fragment_max_cleavages`: maximum number of cleavages during theoretical
344
+ fragmentation.
345
+ - `smoothing_window`: smoothing strength/window for chromatograms and AUC.
346
+ - `smoothing_method`: `gaussian` or `savgol`.
347
+ - `rel_height`: AUC boundary relative height.
348
+ - `rel_height_mode`: `prominence` or `height`.
349
+ - `skyline_transition`: write a Skyline transition list when `True`.
350
+
351
+ ## Outputs
352
+
353
+ Each sample output directory can include:
354
+
355
+ - `ms1_results.csv`
356
+ Matched precursor assignments.
357
+ - `ms2_<glycan>.csv`
358
+ Matched MS2 rows for a numerical glycan composition, including selected IUPAC
359
+ structure information.
360
+ - `<sample>_auc_values.csv`
361
+ Glycan-level total AUC.
362
+ - `<sample>_auc_values_by_adduct.csv`
363
+ Per-adduct AUC values.
364
+ - `<sample>_skyline_transitions.xlsx`
365
+ Optional Skyline transition export.
366
+ - `images/*.pdf`
367
+ Fragment chromatograms, precursor-adduct chromatograms, total chromatograms,
368
+ shaded AUC plots, and averaged MS2 spectra.
369
+
370
+ For multi-file runs:
371
+
372
+ - `combined_auc_values.csv` is written at the output root when more than one
373
+ file is processed.
374
+
375
+ ## Notes For Packaging
376
+
377
+ Default database paths are resolved through `glycanPRMQuant.resources`, which
378
+ supports both source-tree execution and PyInstaller-style bundled resources.
379
+ When building an executable, include `glycanPRMQuant/database/` as bundled data.
380
+
381
+ ## Data Availability
382
+
383
+ Development and benchmarking data are available through MassIVE: `MSV000101208`.
384
+
385
+ The package is archived on Zenodo:
386
+ [![DOI](https://zenodo.org/badge/945763571.svg)](https://doi.org/10.5281/zenodo.19189798)
387
+
388
+ ## License
389
+
390
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
391
+ [![Python 3.12+](https://img.shields.io/badge/python-3.12+-green.svg)](https://www.python.org)