pycompound 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,26 @@
1
+ Metadata-Version: 2.4
2
+ Name: pycompound
3
+ Version: 0.0.1
4
+ Summary: Python package to perform compound identification in mass spectrometry via spectral library matching.
5
+ Author-email: Hunter Dlugas <fy7392@wayne.edu>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/hdlugas/pycompound
8
+ Project-URL: Issues, https://github.com/hdlugas/pycompound/issues
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Operating System :: OS Independent
11
+ Requires-Python: >=3.9
12
+ Description-Content-Type: text/markdown
13
+ License-File: LICENSE
14
+ Requires-Dist: matplotlib==3.8.4
15
+ Requires-Dist: numpy==1.26.4
16
+ Requires-Dist: pandas==2.2.2
17
+ Requires-Dist: scipy==1.13.1
18
+ Requires-Dist: pyteomics==4.7.2
19
+ Requires-Dist: netCDF4==1.6.5
20
+ Requires-Dist: lxml>=5.1.0
21
+ Requires-Dist: shiny==1.4.0
22
+ Dynamic: license-file
23
+
24
+ # PyCompound
25
+ A Python-based tool for spectral library matching, PyCompound is available as a Python package with a command-line interface (CLI) available and as a GUI application build with Python/Shiny. It performs spectral library matching to identify chemical compounds, offering a range of spectrum preprocessing transformations and similarity measures, including Cosine and three entropy-based similarity measures. PyCompound supports both high-resolution mass spectrometry (HRMS) data (e.g., LC-MS/MS) and nominal-resolution mass spectrometry (NRMS) data (e.g., GC-MS). For documentation and usage instructions for PyCompound, please refer to the GitHub repository [https://github.com/hdlugas/pycompound](https://github.com/hdlugas/pycompound).
26
+
@@ -0,0 +1,14 @@
1
+ pycompound-0.0.1.dist-info/licenses/LICENSE,sha256=fPFFlkSGg60VQWyWqTSv8yoJnpLzppzdihVWY5NKom8,1064
2
+ pycompound_fy7392/app.py,sha256=aSfI6Rb5B5Qk3wetRcrmYncT5ZASjdiSJEtgw_EMnA8,30224
3
+ pycompound_fy7392/build_library.py,sha256=8ghpX8wfj6u-3V5X2IdJ-e8G_FRSla1lO0pzLj7hOtI,5373
4
+ pycompound_fy7392/plot_spectra.py,sha256=aeeCfYSd3qx-PGaMm2htpq4EFXOd-HB-PQ2gAa-pSrw,41325
5
+ pycompound_fy7392/plot_spectra_CLI.py,sha256=SfuA-hR3MuQPblp6rQKaYimefVBbXknt_ZBpuBHc6cs,7818
6
+ pycompound_fy7392/processing.py,sha256=7cKMX7PQ4Q-I4c8lRo5qXbOVGr8CeRdgNPURJx8DBV0,11075
7
+ pycompound_fy7392/similarity_measures.py,sha256=ocobDrW8FrWMCdMM5bwQLPCXU44fQEY1luGwFLm9eoE,4278
8
+ pycompound_fy7392/spec_lib_matching.py,sha256=B4dOcLX2aC4Fasgj9O3f_qjiU43wT4ygjAbH0BmJhVM,63264
9
+ pycompound_fy7392/spec_lib_matching_CLI.py,sha256=GN_sW5uTAEggoepwUj_FzW8gxUr55oS97SdvmDtF9z8,9337
10
+ pycompound_fy7392/tuning_CLI.py,sha256=FPD33xkCfsoGLLh3bCfuhYIrAvsPqe3hm-L4wwfU1xs,7982
11
+ pycompound-0.0.1.dist-info/METADATA,sha256=8IMgDlrtlHFAiYrzZY32EeECcmKZI9A2T_WOf9ib-cE,1555
12
+ pycompound-0.0.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
13
+ pycompound-0.0.1.dist-info/top_level.txt,sha256=jOc4cdcy-JvSi_GRF2whzak631-owRY2936rmmSzBDw,18
14
+ pycompound-0.0.1.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 hdlugas
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ pycompound_fy7392
@@ -0,0 +1,301 @@
1
+
2
+ from shiny import App, ui, render, reactive
3
+ from pycompound_fy7392.spec_lib_matching import run_spec_lib_matching_on_HRMS_data
4
+ from pycompound_fy7392.spec_lib_matching import run_spec_lib_matching_on_NRMS_data
5
+ from pycompound_fy7392.plot_spectra import generate_plots_on_HRMS_data
6
+ from pycompound_fy7392.plot_spectra import generate_plots_on_NRMS_data
7
+ from pycompound_fy7392.spec_lib_matching import tune_params_on_HRMS_data
8
+ from pycompound_fy7392.spec_lib_matching import tune_params_on_NRMS_data
9
+ import subprocess
10
+ import traceback
11
+ from pathlib import Path
12
+ import pandas as pd
13
+
14
+
15
+ def split_or_wrap(s):
16
+ s = str(s)
17
+ def parse(x):
18
+ x = x.strip()
19
+ if x.lower() == 'true':
20
+ return True
21
+ elif x.lower() == 'false':
22
+ return False
23
+ try:
24
+ return int(x)
25
+ except ValueError:
26
+ try:
27
+ return float(x)
28
+ except ValueError:
29
+ return x
30
+
31
+ if ',' not in s:
32
+ return [parse(s)]
33
+ else:
34
+ return [parse(item) for item in s.split(',')]
35
+
36
+
37
+ def custom_on_off_to_bool(lst):
38
+ if lst == ['no']:
39
+ return [False]
40
+ elif lst == ['yes']:
41
+ return [True]
42
+ elif lst == ['no','yes']:
43
+ return [False,True]
44
+ elif lst == ['yes','no']:
45
+ return [False,True]
46
+ elif not lst:
47
+ return [False]
48
+ else:
49
+ raise ValueError(f"Unhandled input: {lst}")
50
+
51
+
52
+
53
+ app_ui = ui.page_fluid(
54
+ ui.div(
55
+ ui.input_select("choice", "Choose an option:", ["Run spectral library matching to identify unknown compounds", "Tune parameters with a query library with known compound IDs", "Plot two spectra"]),
56
+ ui.input_radio_buttons("chromatography_platform", "Choose chromatography platform:", ["HRMS","NRMS"]),
57
+ style="width: 2000px; max-width: none;"),
58
+ ui.output_ui("dynamic_inputs"),
59
+ ui.output_text("status_output")
60
+ )
61
+
62
+ def server(input, output, session):
63
+ run_status = reactive.Value("Waiting for input...")
64
+
65
+ @output
66
+ @render.ui
67
+ def dynamic_inputs():
68
+ if input.choice() == "Run spectral library matching to identify unknown compounds":
69
+ if input.chromatography_platform() == "HRMS":
70
+ return ui.TagList(
71
+ ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
72
+ ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
73
+ ui.input_select("similarity_measure", "Select similarity measure:", ["cosine", "shannon", "renyi", "tsallis"]),
74
+ ui.input_select("high_quality_reference_library", "Indicate whether the reference library is considered to be of high quality. If true, then the spectrum preprocessing transformations of filtering and noise removal are performed only on the query spectrum/spectra.", [False,True]),
75
+ ui.input_text("spectrum_preprocessing_order", "Input a sequence of characters denoting the order in which spectrum preprocessing transformations should be applied. Available characters/transformations are C (centroiding), F (filtering), M (matching), N (noise removal), L (low-entropy transformation), and W (weight factor transformation. M must be in sequence, and if C is performed, then C must be performed before M.", "FCNMWL"),
76
+ ui.input_numeric("mz_min", "Enter numeric value for minimum mass/charge ratio for filtering:", 0),
77
+ ui.input_numeric("mz_max", "Enter numeric value for maximum mass/charge ratio for filtering:", 99999999),
78
+ ui.input_numeric("int_min", "Enter numeric value for minimum intensity for filtering:", 0),
79
+ ui.input_numeric("int_max", "Enter numeric value for maximum intensity for filtering:", 999999999),
80
+ ui.input_numeric("window_size_centroiding", "Enter numeric value for the centroiding window-size:", 0.5),
81
+ ui.input_numeric("window_size_matching", "Enter numeric value for the matching window-size:", 0.5),
82
+ ui.input_numeric("noise_threshold", "Enter numeric value for the noise removal threshold:", 0.0),
83
+ ui.input_numeric("wf_mz", "Enter numeric value for the mass/charge weight factor:", 0.0),
84
+ ui.input_numeric("wf_int", "Enter numeric value for the intensity weight factor:", 1.0),
85
+ ui.input_numeric("LET_threshold", "Enter non-negative numeric value for the low-entropy threshold:", 0.0),
86
+ ui.input_numeric("entropy_dimension", "Enter non-negative, non-unity numeric value for the entropy dimension (only applicable to Renyi and Tsallis):", 1.1),
87
+ ui.input_numeric("n_top_matches_to_save", "Enter positive integer for the number of top matches to save:", 1),
88
+ ui.input_text("output_identification", "Path to identification output:", f'{Path.cwd()}/output_identification.csv'),
89
+ ui.input_text("output_similarity_scores", "Path to output file of similarity scores:", f'{Path.cwd()}/output_similarity_scores.csv'),
90
+ ui.input_action_button("run_btn", "Run"))
91
+ else:
92
+ return ui.TagList(
93
+ ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
94
+ ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
95
+ ui.input_select("similarity_measure", "Select similarity measure:", ["cosine", "shannon", "renyi", "tsallis"]),
96
+ ui.input_select("high_quality_reference_library", "Indicate whether the reference library is considered to be of high quality. If true, then the spectrum preprocessing transformations of filtering and noise removal are performed only on the query spectrum/spectra.", [False,True]),
97
+ ui.input_text("spectrum_preprocessing_order", "Input a sequence of characters denoting the order in which spectrum preprocessing transformations should be applied. Available characters/transformations are F (filtering), N (noise removal), L (low-entropy transformation), and W (weight factor transformation).", "FNLW"),
98
+ ui.input_numeric("mz_min", "Enter numeric value for minimum mass/charge ratio for filtering:", 0),
99
+ ui.input_numeric("mz_max", "Enter numeric value for maximum mass/charge ratio for filtering:", 99999999),
100
+ ui.input_numeric("int_min", "Enter numeric value for minimum intensity for filtering:", 0),
101
+ ui.input_numeric("int_max", "Enter numeric value for maximum intensity for filtering:", 999999999),
102
+ ui.input_numeric("noise_threshold", "Enter numeric value for the noise removal threshold:", 0.0),
103
+ ui.input_numeric("wf_mz", "Enter numeric value for the mass/charge weight factor:", 0.0),
104
+ ui.input_numeric("wf_int", "Enter numeric value for the intensity weight factor:", 1.0),
105
+ ui.input_numeric("LET_threshold", "Enter non-negative numeric value for the low-entropy threshold:", 0.0),
106
+ ui.input_numeric("entropy_dimension", "Enter non-negative, non-unity numeric value for the entropy dimension (only applicable to Renyi and Tsallis):", 1.1),
107
+ ui.input_numeric("n_top_matches_to_save", "Enter positive integer for the number of top matches to save:", 1),
108
+ ui.input_text("output_identification", "Path to identification output:", f'{Path.cwd()}/output_identification.csv'),
109
+ ui.input_text("output_similarity_scores", "Path to output file of similarity scores:", f'{Path.cwd()}/output_similarity_scores.csv'),
110
+ ui.input_action_button("run_btn", "Run"))
111
+
112
+ elif input.choice() == "Tune parameters with a query library with known compound IDs":
113
+ if input.chromatography_platform() == "HRMS":
114
+ return ui.TagList(
115
+ ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
116
+ ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
117
+ ui.input_checkbox_group("similarity_measure", "Select similarity measure(s):", ["cosine", "shannon", "renyi", "tsallis"]),
118
+ ui.input_checkbox_group("high_quality_reference_library", "Indicate whether the reference library is considered to be of high quality. If True, then the spectrum preprocessing transformations of filtering and noise removal are performed only on the query spectrum/spectra.", ["no","yes"]),
119
+ ui.input_text("spectrum_preprocessing_order", "Input a sequence of characters denoting the order in which spectrum preprocessing transformations should be applied. Available characters/transformations are C (centroiding), F (filtering), M (matching), N (noise removal), L (low-entropy transformation), and W (weight factor transformation. M must be in sequence, and if C is performed, then C must be performed before M. If multiple spectrum preprocessing orders are to be tried, separate by comma.", "FCNMWL"),
120
+ ui.input_text("mz_min", "Enter numeric value(s) for minimum mass/charge ratio for filtering. Separate multiple entries with comma.", 0),
121
+ ui.input_text("mz_max", "Enter numeric value(s) for maximum mass/charge ratio for filtering. Separate multiple entries with comma.", 99999999),
122
+ ui.input_text("int_min", "Enter numeric value(s) for minimum intensity for filtering. Separate multiple entries with comma.", 0),
123
+ ui.input_text("int_max", "Enter numeric value(s) for maximum intensity for filtering. Separate multiple entries with comma.", 999999999),
124
+ ui.input_text("window_size_centroiding", "Enter numeric value(s) for the centroiding window-size. Separate multiple entries with comma.", 0.5),
125
+ ui.input_text("window_size_matching", "Enter numeric value(s) for the matching window-size. Separate multiple entries with comma.", 0.5),
126
+ ui.input_text("noise_threshold", "Enter numeric value(s) for the noise removal threshold. Separate multiple entries with comma.", 0.0),
127
+ ui.input_text("wf_mz", "Enter numeric value(s) for the mass/charge weight factor. Separate multiple entries with comma.", 0.0),
128
+ ui.input_text("wf_int", "Enter numeric value(s) for the intensity weight factor. Separate multiple entries with comma.", 1.0),
129
+ ui.input_text("LET_threshold", "Enter non-negative numeric value(s) for the low-entropy threshold. Separate multiple entries with comma.", 0.0),
130
+ ui.input_text("entropy_dimension", "Enter non-negative, non-unity numeric value(s) for the entropy dimension (only applicable to Renyi and Tsallis). Separate multiple entries with comma.", 1.1),
131
+ ui.input_text("output_path", "Path to parameter tuning output:", f'{Path.cwd()}/output_parameter_tuning.csv'),
132
+ ui.input_action_button("run_btn", "Run"))
133
+ else:
134
+ return ui.TagList(
135
+ ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
136
+ ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
137
+ ui.input_checkbox_group("similarity_measure", "Select similarity measure(s):", ["cosine", "shannon", "renyi", "tsallis"]),
138
+ ui.input_checkbox_group("high_quality_reference_library", "Indicate whether the reference library is considered to be of high quality. If True, then the spectrum preprocessing transformations of filtering and noise removal are performed only on the query spectrum/spectra.", ["no","yes"]),
139
+ ui.input_text("spectrum_preprocessing_order", "Input a sequence of characters denoting the order in which spectrum preprocessing transformations should be applied. Available characters/transformations are F (filtering), N (noise removal), L (low-entropy transformation), and W (weight factor transformation).", "FNLW"),
140
+ ui.input_text("mz_min", "Enter numeric value(s) for minimum mass/charge ratio for filtering. Separate multiple entries with comma.", 0),
141
+ ui.input_text("mz_max", "Enter numeric value(s) for maximum mass/charge ratio for filtering. Separate multiple entries with comma.", 99999999),
142
+ ui.input_text("int_min", "Enter numeric value(s) for minimum intensity for filtering. Separate multiple entries with comma.", 0),
143
+ ui.input_text("int_max", "Enter numeric value(s) for maximum intensity for filtering. Separate multiple entries with comma.", 999999999),
144
+ ui.input_text("noise_threshold", "Enter numeric value(s) for the noise removal threshold. Separate multiple entries with comma.", 0.0),
145
+ ui.input_text("wf_mz", "Enter numeric value(s) for the mass/charge weight factor. Separate multiple entries with comma.", 0.0),
146
+ ui.input_text("wf_int", "Enter numeric value(s) for the intensity weight factor. Separate multiple entries with comma.", 1.0),
147
+ ui.input_text("LET_threshold", "Enter non-negative numeric value(s) for the low-entropy threshold. Separate multiple entries with comma.", 0.0),
148
+ ui.input_text("entropy_dimension", "Enter non-negative, non-unity numeric value(s) for the entropy dimension (only applicable to Renyi and Tsallis). Separate multiple entries with comma.", 1.1),
149
+ ui.input_text("output_path", "Path to parameter tuning output:", f'{Path.cwd()}/output_parameter_tuning.csv'),
150
+ ui.input_action_button("run_btn", "Run"))
151
+
152
+
153
+ elif input.choice() == "Plot two spectra":
154
+ if input.chromatography_platform() == "HRMS":
155
+ return ui.TagList(
156
+ ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
157
+ ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
158
+ ui.input_text("spectrum_ID1", "Input ID of one spectrum to be plotted:", None),
159
+ ui.input_text("spectrum_ID2", "Input ID of another spectrum to be plotted:", None),
160
+ ui.input_select("similarity_measure", "Select similarity measure:", ["cosine", "shannon", "renyi", "tsallis"]),
161
+ ui.input_select("high_quality_reference_library", "Indicate whether the reference library is considered to be of high quality. If True, then the spectrum preprocessing transformations of filtering and noise removal are performed only on the query spectrum/spectra.", [False,True]),
162
+ ui.input_text("spectrum_preprocessing_order", "Input a sequence of characters denoting the order in which spectrum preprocessing transformations should be applied. Available characters/transformations are C (centroiding), F (filtering), M (matching), N (noise removal), L (low-entropy transformation), and W (weight factor transformation. M must be in sequence, and if C is performed, then C must be performed before M. If multiple spectrum preprocessing orders are to be tried, separate by comma.", "FCNMWL"),
163
+ ui.input_numeric("mz_min", "Enter numeric value for minimum mass/charge ratio for filtering. Separate multiple entries with comma.", 0),
164
+ ui.input_numeric("mz_max", "Enter numeric value for maximum mass/charge ratio for filtering. Separate multiple entries with comma.", 99999999),
165
+ ui.input_numeric("int_min", "Enter numeric value for minimum intensity for filtering. Separate multiple entries with comma.", 0),
166
+ ui.input_numeric("int_max", "Enter numeric value for maximum intensity for filtering. Separate multiple entries with comma.", 999999999),
167
+ ui.input_numeric("window_size_centroiding", "Enter numeric value for the centroiding window-size. Separate multiple entries with comma.", 0.5),
168
+ ui.input_numeric("window_size_matching", "Enter numeric value for the matching window-size. Separate multiple entries with comma.", 0.5),
169
+ ui.input_numeric("noise_threshold", "Enter numeric value for the noise removal threshold. Separate multiple entries with comma.", 0.0),
170
+ ui.input_numeric("wf_mz", "Enter numeric value for the mass/charge weight factor. Separate multiple entries with comma.", 0.0),
171
+ ui.input_numeric("wf_int", "Enter numeric value for the intensity weight factor. Separate multiple entries with comma.", 1.0),
172
+ ui.input_numeric("LET_threshold", "Enter non-negative numeric value for the low-entropy threshold. Separate multiple entries with comma.", 0.0),
173
+ ui.input_numeric("entropy_dimension", "Enter non-negative, non-unity numeric value for the entropy dimension (only applicable to Renyi and Tsallis). Separate multiple entries with comma.", 1.1),
174
+ ui.input_select("y_axis_transformation", "Select the transformation to apply to the intensity axis of the generated plots:", ["normalized", "none", "log10", "sqrt"]),
175
+ ui.input_text("output_path", "Path to parameter tuning output:", f'{Path.cwd()}/output_plots.pdf'),
176
+ ui.input_action_button("run_btn", "Run"))
177
+ else:
178
+ return ui.TagList(
179
+ ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
180
+ ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
181
+ ui.input_text("spectrum_ID1", "Input ID of one spectrum to be plotted:", None),
182
+ ui.input_text("spectrum_ID2", "Input ID of another spectrum to be plotted:", None),
183
+ ui.input_select("similarity_measure", "Select similarity measure:", ["cosine", "shannon", "renyi", "tsallis"]),
184
+ ui.input_select("high_quality_reference_library", "Indicate whether the reference library is considered to be of high quality. If True, then the spectrum preprocessing transformations of filtering and noise removal are performed only on the query spectrum/spectra.", [False,True]),
185
+ ui.input_text("spectrum_preprocessing_order", "Input a sequence of characters denoting the order in which spectrum preprocessing transformations should be applied. Available characters/transformations are F (filtering), N (noise removal), L (low-entropy transformation), and W (weight factor transformation).", "FNLW"),
186
+ ui.input_numeric("mz_min", "Enter numeric value for minimum mass/charge ratio for filtering. Separate multiple entries with comma.", 0),
187
+ ui.input_numeric("mz_max", "Enter numeric value for maximum mass/charge ratio for filtering. Separate multiple entries with comma.", 99999999),
188
+ ui.input_numeric("int_min", "Enter numeric value for minimum intensity for filtering. Separate multiple entries with comma.", 0),
189
+ ui.input_numeric("int_max", "Enter numeric value for maximum intensity for filtering. Separate multiple entries with comma.", 999999999),
190
+ ui.input_numeric("noise_threshold", "Enter numeric value for the noise removal threshold. Separate multiple entries with comma.", 0.0),
191
+ ui.input_numeric("wf_mz", "Enter numeric value for the mass/charge weight factor. Separate multiple entries with comma.", 0.0),
192
+ ui.input_numeric("wf_int", "Enter numeric value for the intensity weight factor. Separate multiple entries with comma.", 1.0),
193
+ ui.input_numeric("LET_threshold", "Enter non-negative numeric value for the low-entropy threshold. Separate multiple entries with comma.", 0.0),
194
+ ui.input_numeric("entropy_dimension", "Enter non-negative, non-unity numeric value for the entropy dimension (only applicable to Renyi and Tsallis). Separate multiple entries with comma.", 1.1),
195
+ ui.input_select("y_axis_transformation", "Select the transformation to apply to the intensity axis of the generated plots:", ["normalized", "none", "log10", "sqrt"]),
196
+ ui.input_text("output_path", "Path to parameter tuning output:", f'{Path.cwd()}/output_plot.pdf'),
197
+ ui.input_action_button("run_btn", "Run"))
198
+
199
+
200
+ @reactive.effect
201
+ @reactive.event(input.run_btn)
202
+ def _():
203
+ choice = input.choice()
204
+
205
+ if choice == "Run spectral library matching to identify unknown compounds":
206
+ if input.chromatography_platform() == "HRMS":
207
+ try:
208
+ run_spec_lib_matching_on_HRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], likely_reference_ids=None, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), window_size_centroiding=input.window_size_centroiding(), window_size_matching=input.window_size_matching(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), n_top_matches_to_save=input.n_top_matches_to_save(), print_id_results=False, output_identification=f'{Path.cwd()}/output_identification.csv', output_similarity_scores=f'{Path.cwd()}/output_similarity_scores.csv')
209
+ df_identification_tmp = pd.read_csv(f'{Path.cwd()}/output_identification.csv')
210
+ df_similarity_scores_tmp = pd.read_csv(f'{Path.cwd()}/output_similarity_scores.csv')
211
+ df_identification_tmp.to_csv(input.output_identification(), index=False)
212
+ df_similarity_scores_tmp.to_csv(input.output_similarity_scores(), index=False)
213
+ run_status.set(f"✅ Spectral library matching has finished.")
214
+ except Exception as e:
215
+ run_status.set(f"❌ Error: {traceback.format_exc()}")
216
+ elif input.chromatography_platform() == "NRMS":
217
+ try:
218
+ run_spec_lib_matching_on_NRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], likely_reference_ids=None, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), n_top_matches_to_save=input.n_top_matches_to_save(), print_id_results=False, output_identification=f'{Path.cwd()}/output_identification.csv', output_similarity_scores=f'{Path.cwd()}/output_similarity_scores.csv')
219
+ df_identification_tmp = pd.read_csv(f'{Path.cwd()}/output_identification.csv')
220
+ df_similarity_scores_tmp = pd.read_csv(f'{Path.cwd()}/output_similarity_scores.csv')
221
+ df_identification_tmp.to_csv(input.output_identification(), index=False)
222
+ df_similarity_scores_tmp.to_csv(input.output_similarity_scores(), index=False)
223
+ run_status.set(f"✅ Spectral library matching has finished.")
224
+ except Exception as e:
225
+ run_status.set(f"❌ Error: {traceback.format_exc()}")
226
+
227
+
228
+
229
+ elif choice == "Tune parameters with a query library with known compound IDs":
230
+ high_quality_reference_library_tmp = custom_on_off_to_bool(list(input.high_quality_reference_library()))
231
+ if input.chromatography_platform() == "HRMS":
232
+ try:
233
+ grid = {'similarity_measure':list(input.similarity_measure()),
234
+ 'high_quality_reference_library':high_quality_reference_library_tmp,
235
+ 'spectrum_preprocessing_order':split_or_wrap(input.spectrum_preprocessing_order()),
236
+ 'mz_min':split_or_wrap(input.mz_min()),
237
+ 'mz_max':split_or_wrap(input.mz_max()),
238
+ 'int_min':split_or_wrap(input.int_min()),
239
+ 'int_max':split_or_wrap(input.int_max()),
240
+ 'window_size_centroiding':split_or_wrap(input.window_size_centroiding()),
241
+ 'window_size_matching':split_or_wrap(input.window_size_matching()),
242
+ 'noise_threshold':split_or_wrap(input.noise_threshold()),
243
+ 'wf_mz':split_or_wrap(input.wf_mz()),
244
+ 'wf_int':split_or_wrap(input.wf_int()),
245
+ 'LET_threshold':split_or_wrap(input.LET_threshold()),
246
+ 'entropy_dimension':split_or_wrap(input.entropy_dimension())}
247
+ tune_params_on_HRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], grid=grid, output_path=input.output_path())
248
+ run_status.set(f"✅ Parameter tuning has finished.")
249
+ except Exception as e:
250
+ run_status.set(f"❌ Error: {traceback.format_exc()}")
251
+ elif input.chromatography_platform() == "NRMS":
252
+ try:
253
+ grid = {'similarity_measure':list(input.similarity_measure()),
254
+ 'high_quality_reference_library':high_quality_reference_library_tmp,
255
+ 'spectrum_preprocessing_order':split_or_wrap(input.spectrum_preprocessing_order()),
256
+ 'mz_min':split_or_wrap(input.mz_min()),
257
+ 'mz_max':split_or_wrap(input.mz_max()),
258
+ 'int_min':split_or_wrap(input.int_min()),
259
+ 'int_max':split_or_wrap(input.int_max()),
260
+ 'noise_threshold':split_or_wrap(input.noise_threshold()),
261
+ 'wf_mz':split_or_wrap(input.wf_mz()),
262
+ 'wf_int':split_or_wrap(input.wf_int()),
263
+ 'LET_threshold':split_or_wrap(input.LET_threshold()),
264
+ 'entropy_dimension':split_or_wrap(input.entropy_dimension())}
265
+ tune_params_on_NRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], grid=grid, output_path=input.output_path())
266
+ run_status.set(f"✅ Parameter tuning has finished.")
267
+ except Exception as e:
268
+ run_status.set(f"❌ Error: {traceback.format_exc()}")
269
+
270
+
271
+
272
+
273
+ elif choice == "Plot two spectra":
274
+ if len(input.spectrum_ID1())==0:
275
+ spectrum_ID1 = None
276
+ if len(input.spectrum_ID2())==0:
277
+ spectrum_ID2 = None
278
+
279
+ if input.chromatography_platform() == "HRMS":
280
+ try:
281
+ generate_plots_on_HRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], spectrum_ID1=spectrum_ID1, spectrum_ID2=spectrum_ID2, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), window_size_centroiding=input.window_size_centroiding(), window_size_matching=input.window_size_matching(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), y_axis_transformation=input.y_axis_transformation(), output_path=input.output_path())
282
+ run_status.set(f"✅ Plotting has finished.")
283
+ except Exception as e:
284
+ run_status.set(f"❌ Error: {traceback.format_exc()}")
285
+ elif input.chromatography_platform() == "NRMS":
286
+ print('hello')
287
+ try:
288
+ print('world')
289
+ generate_plots_on_NRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], spectrum_ID1=input.spectrum_ID1(), spectrum_ID2=input.spectrum_ID2(), similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), y_axis_transformation=input.y_axis_transformation(), output_path=input.output_path())
290
+ run_status.set(f"✅ Plotting has finished.")
291
+ except Exception as e:
292
+ run_status.set(f"❌ Error: {traceback.format_exc()}")
293
+
294
+ @output
295
+ @render.text
296
+ def status_output():
297
+ return run_status.get()
298
+
299
+
300
+ app = App(app_ui, server)
301
+
@@ -0,0 +1,135 @@
1
+
2
+ # this script has a function to extract the mass spectra from an mgf, mzML, or cdf file and write them in the necessary format for use in spectral library matching
3
+
4
+ import netCDF4 as nc
5
+ import numpy as np
6
+ import pandas as pd
7
+ from pathlib import Path
8
+ from pyteomics import mgf
9
+ from pyteomics import mzml
10
+ import sys
11
+
12
+ def build_library_from_raw_data(input_path=None, output_path=None, is_reference=False):
13
+ '''
14
+ Converts mgf, mzML, cdf, or msp file to the necessary format for spectral library matching.
15
+
16
+ --input_path: Path to input file (must be mgf, mzML, cdf, or msp file). Mandatory argument.
17
+ --output_path: Path to output CSV file. Default: current working directory.
18
+ --is_reference: Boolean flag indicating whether IDs of spectra should be written to output. Only pass true if building a reference library with known compound IDs. Only applicable to mgf and msp files. Options: \'True\', \'False\'. Optional argument. Default: False.
19
+ '''
20
+
21
+ if input_path is None:
22
+ print('Error: please specify input_path (i.e. the path to the input mgf, mzML, cdf, or msp file). Mandatory argument.')
23
+ sys.exit()
24
+
25
+ if output_path is None:
26
+ #print('Warning: no output_path specified, so library is written to {Path.cwd()}/build_library.csv')
27
+ tmp = input_path.split('/')
28
+ tmp = tmp[(len(tmp)-1)]
29
+ basename = tmp.split('.')[0]
30
+ output_path = f'{Path.cwd()}/{basename}.csv'
31
+ print(f'Warning: no output_path specified, so library is written to {output_path}')
32
+
33
+ if is_reference not in [True,False]:
34
+ print('Error: is_reference must be either \'True\' or \'False\'.')
35
+ sys.exit()
36
+
37
+ # determine whether an mgf or a mzML file was passed to --input_path
38
+ last_three_chars = input_path[(len(input_path)-3):len(input_path)]
39
+ last_four_chars = input_path[(len(input_path)-4):len(input_path)]
40
+ if last_three_chars == 'mgf' or last_three_chars == 'MGF':
41
+ input_file_type = 'mgf'
42
+ elif last_four_chars == 'mzML' or last_four_chars == 'mzml' or last_four_chars == 'MZML':
43
+ input_file_type = 'mzML'
44
+ elif last_three_chars == 'cdf' or last_three_chars == 'CDF':
45
+ input_file_type = 'cdf'
46
+ elif last_three_chars == 'msp' or last_three_chars == 'MSP':
47
+ input_file_type = 'msp'
48
+ else:
49
+ print('ERROR: either an \'mgf\', \'mzML\', \'cdf\', or \'msp\' file must be passed to --input_path')
50
+ sys.exit()
51
+
52
+
53
+ # obtain a list of spectra from the input file
54
+ spectra = []
55
+ if input_file_type == 'mgf':
56
+ with mgf.read(input_path, index_by_scans = True) as reader:
57
+ for spec in reader:
58
+ spectra.append(spec)
59
+ if input_file_type == 'mzML':
60
+ with mzml.read(input_path) as reader:
61
+ for spec in reader:
62
+ spectra.append(spec)
63
+
64
+
65
+ # extract the relevant information from each spectra (i.e m/z ratios and intensities)
66
+ if input_file_type == 'mgf' or input_file_type == 'mzML':
67
+ ids = []
68
+ mzs = []
69
+ ints = []
70
+ for i in range(0,len(spectra)):
71
+ for j in range(0,len(spectra[i]['m/z array'])):
72
+ if input_file_type == 'mzML':
73
+ ids.append(f'ID_{i+1}')
74
+ else:
75
+ if is_reference == False:
76
+ ids.append(f'ID_{i+1}')
77
+ elif is_reference == True:
78
+ ids.append(spectra[i]['params']['name'])
79
+ mzs.append(spectra[i]['m/z array'][j])
80
+ ints.append(spectra[i]['intensity array'][j])
81
+
82
+ if input_file_type == 'cdf':
83
+ dataset = nc.Dataset(input_path, 'r')
84
+ all_mzs = dataset.variables['mass_values'][:]
85
+ all_ints = dataset.variables['intensity_values'][:]
86
+ scan_idxs = dataset.variables['scan_index'][:]
87
+ dataset.close()
88
+
89
+ ids = []
90
+ mzs = []
91
+ ints = []
92
+ for i in range(0,(len(scan_idxs)-1)):
93
+ if i % 1000 == 0:
94
+ print(f'analyzed {i} out of {len(scan_idxs)} scans')
95
+ s_idx = scan_idxs[i]
96
+ e_idx = scan_idxs[i+1]
97
+
98
+ mzs_tmp = all_mzs[s_idx:e_idx]
99
+ ints_tmp = all_ints[s_idx:e_idx]
100
+
101
+ for j in range(0,len(mzs_tmp)):
102
+ ids.append(f'ID_{i+1}')
103
+ mzs.append(mzs_tmp[j])
104
+ ints.append(ints_tmp[j])
105
+
106
+
107
+ if input_file_type == 'msp':
108
+ ids = []
109
+ mzs = []
110
+ ints = []
111
+ with open(input_path, 'r') as f:
112
+ i = 0
113
+ for line in f:
114
+ line = line.strip()
115
+ if line.startswith('Name:'):
116
+ i += 1
117
+ if is_reference == False:
118
+ spectrum_id = f'ID_{i+1}'
119
+ elif is_reference == True:
120
+ spectrum_id = line.replace('Name: ','')
121
+ elif line and line[0].isdigit():
122
+ try:
123
+ mz, intensity = map(float, line.split()[:2])
124
+ ids.append(spectrum_id)
125
+ mzs.append(mz)
126
+ ints.append(intensity)
127
+ except ValueError:
128
+ continue
129
+
130
+
131
+ # write CSV file of spectra for use in spectral library matching
132
+ df = pd.DataFrame({'id':ids, 'mz_ratio':mzs, 'intensity':ints})
133
+ df.to_csv(output_path, index=False)
134
+
135
+