pycompound 0.0.1__tar.gz → 0.0.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pycompound-0.0.1 → pycompound-0.0.6}/PKG-INFO +4 -4
- pycompound-0.0.6/README.md +2 -0
- {pycompound-0.0.1 → pycompound-0.0.6}/pyproject.toml +3 -2
- pycompound-0.0.6/src/app.py +364 -0
- {pycompound-0.0.1/src/pycompound_fy7392 → pycompound-0.0.6/src/pycompound}/plot_spectra.py +18 -36
- {pycompound-0.0.1/src/pycompound_fy7392 → pycompound-0.0.6/src/pycompound}/plot_spectra_CLI.py +5 -4
- pycompound-0.0.1/src/pycompound_fy7392/app.py → pycompound-0.0.6/src/pycompound/pycompound_shiny.py +8 -10
- pycompound-0.0.6/src/pycompound/similarity_measures.py +365 -0
- {pycompound-0.0.1/src/pycompound_fy7392 → pycompound-0.0.6/src/pycompound}/spec_lib_matching.py +110 -157
- {pycompound-0.0.1/src/pycompound_fy7392 → pycompound-0.0.6/src/pycompound}/spec_lib_matching_CLI.py +5 -3
- {pycompound-0.0.1/src/pycompound_fy7392 → pycompound-0.0.6/src/pycompound}/tuning_CLI.py +4 -2
- {pycompound-0.0.1 → pycompound-0.0.6}/src/pycompound.egg-info/PKG-INFO +4 -4
- pycompound-0.0.6/src/pycompound.egg-info/SOURCES.txt +23 -0
- {pycompound-0.0.1 → pycompound-0.0.6}/src/pycompound.egg-info/requires.txt +2 -1
- pycompound-0.0.6/src/pycompound.egg-info/top_level.txt +3 -0
- {pycompound-0.0.1 → pycompound-0.0.6}/tests/test_plot_spectra.py +44 -1
- pycompound-0.0.6/tests/test_similarity_measures.py +59 -0
- {pycompound-0.0.1 → pycompound-0.0.6}/tests/test_spec_lib_matching.py +12 -5
- {pycompound-0.0.1 → pycompound-0.0.6}/tests/test_tuning.py +0 -1
- pycompound-0.0.1/README.md +0 -3
- pycompound-0.0.1/src/pycompound.egg-info/SOURCES.txt +0 -22
- pycompound-0.0.1/src/pycompound.egg-info/top_level.txt +0 -1
- pycompound-0.0.1/src/pycompound_fy7392/similarity_measures.py +0 -100
- pycompound-0.0.1/tests/test_similarity_measures.py +0 -29
- {pycompound-0.0.1 → pycompound-0.0.6}/LICENSE +0 -0
- {pycompound-0.0.1 → pycompound-0.0.6}/setup.cfg +0 -0
- {pycompound-0.0.1/src/pycompound_fy7392 → pycompound-0.0.6/src/pycompound}/build_library.py +0 -0
- {pycompound-0.0.1/src/pycompound_fy7392 → pycompound-0.0.6/src/pycompound}/processing.py +0 -0
- {pycompound-0.0.1 → pycompound-0.0.6}/src/pycompound.egg-info/dependency_links.txt +0 -0
- {pycompound-0.0.1 → pycompound-0.0.6}/tests/test_build_library.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pycompound
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.6
|
|
4
4
|
Summary: Python package to perform compound identification in mass spectrometry via spectral library matching.
|
|
5
5
|
Author-email: Hunter Dlugas <fy7392@wayne.edu>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -18,9 +18,9 @@ Requires-Dist: scipy==1.13.1
|
|
|
18
18
|
Requires-Dist: pyteomics==4.7.2
|
|
19
19
|
Requires-Dist: netCDF4==1.6.5
|
|
20
20
|
Requires-Dist: lxml>=5.1.0
|
|
21
|
-
Requires-Dist:
|
|
21
|
+
Requires-Dist: orjson==3.11.0
|
|
22
|
+
Requires-Dist: joblib==1.5.2
|
|
22
23
|
Dynamic: license-file
|
|
23
24
|
|
|
24
25
|
# PyCompound
|
|
25
|
-
A Python-based tool for spectral library matching, PyCompound is available as a Python package with a command-line interface (CLI) available and as a GUI application build with Python/Shiny. It performs spectral library matching to identify chemical compounds, offering a range of spectrum preprocessing transformations and similarity measures, including Cosine
|
|
26
|
-
|
|
26
|
+
A Python-based tool for spectral library matching, PyCompound is available as a Python package with a command-line interface (CLI) available and as a GUI application build with Python/Shiny. It performs spectral library matching to identify chemical compounds, offering a range of spectrum preprocessing transformations and similarity measures, including Cosine, three entropy-based similarity measures, and a plethora of binary similarity measures. PyCompound also includes functionality to tune parameters commonly used in a compound identification workflow given a query library of spectra with known ID. PyCompound supports both high-resolution mass spectrometry (HRMS) data (e.g., LC-MS/MS) and nominal-resolution mass spectrometry (NRMS) data (e.g., GC-MS). For the full documentation, see the GitHub repository https://github.com/hdlugas/pycompound.
|
|
@@ -0,0 +1,2 @@
|
|
|
1
|
+
# PyCompound
|
|
2
|
+
A Python-based tool for spectral library matching, PyCompound is available as a Python package with a command-line interface (CLI) available and as a GUI application build with Python/Shiny. It performs spectral library matching to identify chemical compounds, offering a range of spectrum preprocessing transformations and similarity measures, including Cosine, three entropy-based similarity measures, and a plethora of binary similarity measures. PyCompound also includes functionality to tune parameters commonly used in a compound identification workflow given a query library of spectra with known ID. PyCompound supports both high-resolution mass spectrometry (HRMS) data (e.g., LC-MS/MS) and nominal-resolution mass spectrometry (NRMS) data (e.g., GC-MS). For the full documentation, see the GitHub repository https://github.com/hdlugas/pycompound.
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "pycompound"
|
|
7
|
-
version = "0.0.
|
|
7
|
+
version = "0.0.6"
|
|
8
8
|
authors = [
|
|
9
9
|
{ name="Hunter Dlugas", email="fy7392@wayne.edu" },
|
|
10
10
|
]
|
|
@@ -26,7 +26,8 @@ dependencies = [
|
|
|
26
26
|
"pyteomics==4.7.2",
|
|
27
27
|
"netCDF4==1.6.5",
|
|
28
28
|
"lxml>=5.1.0",
|
|
29
|
-
"
|
|
29
|
+
"orjson==3.11.0",
|
|
30
|
+
"joblib==1.5.2"
|
|
30
31
|
]
|
|
31
32
|
|
|
32
33
|
[project.urls]
|
|
@@ -0,0 +1,364 @@
|
|
|
1
|
+
|
|
2
|
+
from shiny import App, ui, reactive, render
|
|
3
|
+
from pycompound_fy7392.spec_lib_matching import run_spec_lib_matching_on_HRMS_data
|
|
4
|
+
from pycompound_fy7392.spec_lib_matching import run_spec_lib_matching_on_NRMS_data
|
|
5
|
+
from pycompound_fy7392.spec_lib_matching import tune_params_on_HRMS_data
|
|
6
|
+
from pycompound_fy7392.spec_lib_matching import tune_params_on_NRMS_data
|
|
7
|
+
from pycompound_fy7392.plot_spectra import generate_plots_on_HRMS_data
|
|
8
|
+
from pycompound_fy7392.plot_spectra import generate_plots_on_NRMS_data
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
import subprocess
|
|
11
|
+
import traceback
|
|
12
|
+
import asyncio
|
|
13
|
+
import io
|
|
14
|
+
import matplotlib.pyplot as plt
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def plot_spectra_ui(platform: str):
|
|
18
|
+
# Base inputs common to all platforms
|
|
19
|
+
base_inputs = [
|
|
20
|
+
ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
21
|
+
ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
22
|
+
ui.input_text("spectrum_ID1", "Input ID of one spectrum to be plotted:", None),
|
|
23
|
+
ui.input_text("spectrum_ID2", "Input ID of another spectrum to be plotted:", None),
|
|
24
|
+
ui.input_select("similarity_measure", "Select similarity measure:", ["cosine","shannon","renyi","tsallis","mixture","jaccard","dice","3w_jaccard","sokal_sneath","binary_cosine","mountford","mcconnaughey","driver_kroeber","simpson","braun_banquet","fager_mcgowan","kulczynski","intersection","hamming","hellinger"]),
|
|
25
|
+
ui.input_select(
|
|
26
|
+
"high_quality_reference_library",
|
|
27
|
+
"Indicate whether the reference library is considered high quality. "
|
|
28
|
+
"If True, filtering and noise removal are only applied to the query spectra.",
|
|
29
|
+
[False, True],
|
|
30
|
+
),
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
# Extra inputs depending on platform
|
|
34
|
+
if platform == "HRMS":
|
|
35
|
+
extra_inputs = [
|
|
36
|
+
ui.input_text(
|
|
37
|
+
"spectrum_preprocessing_order",
|
|
38
|
+
"Sequence of characters for preprocessing order (C, F, M, N, L, W). M must be included, C before M if used.",
|
|
39
|
+
"FCNMWL",
|
|
40
|
+
),
|
|
41
|
+
ui.input_numeric("window_size_centroiding", "Centroiding window-size:", 0.5),
|
|
42
|
+
ui.input_numeric("window_size_matching", "Matching window-size:", 0.5),
|
|
43
|
+
]
|
|
44
|
+
else:
|
|
45
|
+
extra_inputs = [
|
|
46
|
+
ui.input_text(
|
|
47
|
+
"spectrum_preprocessing_order",
|
|
48
|
+
"Sequence of characters for preprocessing order (F, N, L, W).",
|
|
49
|
+
"FNLW",
|
|
50
|
+
)
|
|
51
|
+
]
|
|
52
|
+
|
|
53
|
+
# Numeric inputs
|
|
54
|
+
numeric_inputs = [
|
|
55
|
+
ui.input_numeric("mz_min", "Minimum m/z for filtering:", 0),
|
|
56
|
+
ui.input_numeric("mz_max", "Maximum m/z for filtering:", 99999999),
|
|
57
|
+
ui.input_numeric("int_min", "Minimum intensity for filtering:", 0),
|
|
58
|
+
ui.input_numeric("int_max", "Maximum intensity for filtering:", 999999999),
|
|
59
|
+
ui.input_numeric("noise_threshold", "Noise removal threshold:", 0.0),
|
|
60
|
+
ui.input_numeric("wf_mz", "Mass/charge weight factor:", 0.0),
|
|
61
|
+
ui.input_numeric("wf_int", "Intensity weight factor:", 1.0),
|
|
62
|
+
ui.input_numeric("LET_threshold", "Low-entropy threshold:", 0.0),
|
|
63
|
+
ui.input_numeric("entropy_dimension", "Entropy dimension (Renyi/Tsallis only):", 1.1),
|
|
64
|
+
]
|
|
65
|
+
|
|
66
|
+
# Y-axis transformation select input
|
|
67
|
+
select_input = ui.input_select(
|
|
68
|
+
"y_axis_transformation",
|
|
69
|
+
"Transformation to apply to intensity axis:",
|
|
70
|
+
["normalized", "none", "log10", "sqrt"],
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
# Run and Back buttons
|
|
74
|
+
run_button = ui.input_action_button("run_btn", "Run", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
|
|
75
|
+
back_button = ui.input_action_button("back", "Back to main menu", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
|
|
76
|
+
|
|
77
|
+
#print(len(extra_inputs))
|
|
78
|
+
# Layout base_inputs and extra_inputs in columns
|
|
79
|
+
if platform == "HRMS":
|
|
80
|
+
inputs_columns = ui.layout_columns(
|
|
81
|
+
ui.div(base_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
|
|
82
|
+
ui.div([base_inputs[5:6], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
|
|
83
|
+
ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
|
|
84
|
+
ui.div([numeric_inputs[5:10], select_input], style="display:flex; flex-direction:column; gap:10px;"),
|
|
85
|
+
col_widths=(3, 3, 3, 3),
|
|
86
|
+
)
|
|
87
|
+
elif platform == "NRMS":
|
|
88
|
+
inputs_columns = ui.layout_columns(
|
|
89
|
+
ui.div(base_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
|
|
90
|
+
ui.div([base_inputs[5:6], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
|
|
91
|
+
ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
|
|
92
|
+
ui.div([numeric_inputs[5:10], select_input], style="display:flex; flex-direction:column; gap:10px;"),
|
|
93
|
+
col_widths=(3, 3, 3, 3),
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
# Combine everything
|
|
97
|
+
return ui.div(
|
|
98
|
+
ui.TagList(
|
|
99
|
+
ui.h2("Plot Spectra"),
|
|
100
|
+
inputs_columns,
|
|
101
|
+
run_button,
|
|
102
|
+
back_button
|
|
103
|
+
),
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def run_spec_lib_matching_ui(platform: str):
|
|
109
|
+
# Base inputs common to all platforms
|
|
110
|
+
base_inputs = [
|
|
111
|
+
ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
112
|
+
ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
113
|
+
ui.input_select("similarity_measure", "Select similarity measure:", ["cosine","shannon","renyi","tsallis","mixture","jaccard","dice","3w_jaccard","sokal_sneath","binary_cosine","mountford","mcconnaughey","driver_kroeber","simpson","braun_banquet","fager_mcgowan","kulczynski","intersection","hamming","hellinger"]),
|
|
114
|
+
ui.input_select(
|
|
115
|
+
"high_quality_reference_library",
|
|
116
|
+
"Indicate whether the reference library is considered high quality. "
|
|
117
|
+
"If True, filtering and noise removal are only applied to the query spectra.",
|
|
118
|
+
[False, True],
|
|
119
|
+
),
|
|
120
|
+
]
|
|
121
|
+
|
|
122
|
+
# Extra inputs depending on platform
|
|
123
|
+
if platform == "HRMS":
|
|
124
|
+
extra_inputs = [
|
|
125
|
+
ui.input_text(
|
|
126
|
+
"spectrum_preprocessing_order",
|
|
127
|
+
"Sequence of characters for preprocessing order (C, F, M, N, L, W). M must be included, C before M if used.",
|
|
128
|
+
"FCNMWL",
|
|
129
|
+
),
|
|
130
|
+
ui.input_numeric("window_size_centroiding", "Centroiding window-size:", 0.5),
|
|
131
|
+
ui.input_numeric("window_size_matching", "Matching window-size:", 0.5),
|
|
132
|
+
]
|
|
133
|
+
else:
|
|
134
|
+
extra_inputs = [
|
|
135
|
+
ui.input_text(
|
|
136
|
+
"spectrum_preprocessing_order",
|
|
137
|
+
"Sequence of characters for preprocessing order (F, N, L, W).",
|
|
138
|
+
"FNLW",
|
|
139
|
+
)
|
|
140
|
+
]
|
|
141
|
+
|
|
142
|
+
# Numeric inputs
|
|
143
|
+
numeric_inputs = [
|
|
144
|
+
ui.input_numeric("mz_min", "Minimum m/z for filtering:", 0),
|
|
145
|
+
ui.input_numeric("mz_max", "Maximum m/z for filtering:", 99999999),
|
|
146
|
+
ui.input_numeric("int_min", "Minimum intensity for filtering:", 0),
|
|
147
|
+
ui.input_numeric("int_max", "Maximum intensity for filtering:", 999999999),
|
|
148
|
+
ui.input_numeric("noise_threshold", "Noise removal threshold:", 0.0),
|
|
149
|
+
ui.input_numeric("wf_mz", "Mass/charge weight factor:", 0.0),
|
|
150
|
+
ui.input_numeric("wf_int", "Intensity weight factor:", 1.0),
|
|
151
|
+
ui.input_numeric("LET_threshold", "Low-entropy threshold:", 0.0),
|
|
152
|
+
ui.input_numeric("entropy_dimension", "Entropy dimension (Renyi/Tsallis only):", 1.1),
|
|
153
|
+
ui.input_numeric("n_top_matches_to_save", "Number of top matches to save:", 1),
|
|
154
|
+
]
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
# Run and Back buttons
|
|
158
|
+
run_button = ui.input_action_button("run_btn", "Run", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
|
|
159
|
+
back_button = ui.input_action_button("back", "Back to main menu", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
|
|
160
|
+
|
|
161
|
+
#print(len(extra_inputs))
|
|
162
|
+
# Layout base_inputs and extra_inputs in columns
|
|
163
|
+
if platform == "HRMS":
|
|
164
|
+
inputs_columns = ui.layout_columns(
|
|
165
|
+
ui.div(base_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
|
|
166
|
+
ui.div([base_inputs[5:6], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
|
|
167
|
+
ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
|
|
168
|
+
ui.div(numeric_inputs[5:10], style="display:flex; flex-direction:column; gap:10px;"),
|
|
169
|
+
col_widths=(3, 3, 3, 3),
|
|
170
|
+
)
|
|
171
|
+
elif platform == "NRMS":
|
|
172
|
+
inputs_columns = ui.layout_columns(
|
|
173
|
+
ui.div(base_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
|
|
174
|
+
ui.div([base_inputs[5:6], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
|
|
175
|
+
ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
|
|
176
|
+
ui.div(numeric_inputs[5:10], style="display:flex; flex-direction:column; gap:10px;"),
|
|
177
|
+
col_widths=(3, 3, 3, 3),
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
# Combine everything
|
|
181
|
+
return ui.div(
|
|
182
|
+
ui.TagList(
|
|
183
|
+
ui.h2("Run Spectral Library Matching"),
|
|
184
|
+
inputs_columns,
|
|
185
|
+
run_button,
|
|
186
|
+
back_button
|
|
187
|
+
),
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
app_ui = ui.page_fluid(
|
|
193
|
+
ui.output_ui("main_ui"),
|
|
194
|
+
ui.output_text("status_output")
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def server(input, output, session):
|
|
199
|
+
|
|
200
|
+
# Track which page to show
|
|
201
|
+
current_page = reactive.Value("main_menu")
|
|
202
|
+
|
|
203
|
+
# Track button clicks
|
|
204
|
+
plot_clicks = reactive.Value(0)
|
|
205
|
+
match_clicks = reactive.Value(0)
|
|
206
|
+
back_clicks = reactive.Value(0)
|
|
207
|
+
|
|
208
|
+
run_status = reactive.Value("Waiting for input...")
|
|
209
|
+
|
|
210
|
+
@reactive.Effect
|
|
211
|
+
def _():
|
|
212
|
+
# Main menu buttons
|
|
213
|
+
if input.plot_spectra() > plot_clicks.get():
|
|
214
|
+
current_page.set("plot_spectra")
|
|
215
|
+
plot_clicks.set(input.plot_spectra())
|
|
216
|
+
elif input.run_spec_lib_matching() > match_clicks.get():
|
|
217
|
+
current_page.set("run_spec_lib_matching")
|
|
218
|
+
match_clicks.set(input.run_spec_lib_matching())
|
|
219
|
+
elif hasattr(input, "back") and input.back() > back_clicks.get():
|
|
220
|
+
current_page.set("main_menu")
|
|
221
|
+
back_clicks.set(input.back())
|
|
222
|
+
|
|
223
|
+
@render.image
|
|
224
|
+
def image():
|
|
225
|
+
from pathlib import Path
|
|
226
|
+
|
|
227
|
+
dir = Path(__file__).resolve().parent
|
|
228
|
+
img: ImgData = {"src": str(dir / "www/emblem.png"), "width": "320px", "height": "250px"}
|
|
229
|
+
return img
|
|
230
|
+
|
|
231
|
+
@output
|
|
232
|
+
@render.ui
|
|
233
|
+
def main_ui():
|
|
234
|
+
if current_page() == "main_menu":
|
|
235
|
+
return ui.page_fluid(
|
|
236
|
+
ui.h2("Main Menu"),
|
|
237
|
+
ui.div(
|
|
238
|
+
ui.output_image("image"),
|
|
239
|
+
style=(
|
|
240
|
+
"position:fixed; top:0; left:50%; transform:translateX(-50%); "
|
|
241
|
+
"z-index:1000; text-align:center; padding:10px; background-color:white;"
|
|
242
|
+
),
|
|
243
|
+
),
|
|
244
|
+
ui.div(
|
|
245
|
+
"Overview:",
|
|
246
|
+
style="text-align:left; font-size:24px; font-weight:bold; margin-top:350px"
|
|
247
|
+
),
|
|
248
|
+
ui.div(
|
|
249
|
+
"PyCompound is a Python-based tool designed for performing spectral library matching on either high-resolution mass spectrometry data (HRMS) or low-resolution mass spectrometry data (NRMS). PyCompound offers a range of spectrum preprocessing transformations and similarity measures. These spectrum preprocessing transformations include filtering on mass/charge and/or intensity values, weight factor transformation, low-entropy transformation, centroiding, noise removal, and matching. The available similarity measures include the canonical Cosine similarity measure, three entropy-based similarity measures, and a variety of binary similarity measures: Jaccard, Dice, 3W-Jaccard, Sokal-Sneath, Binary Cosine, Mountford, McConnaughey, Driver-Kroeber, Simpson, Braun-Banquet, Fager-McGowan, Kulczynski, Intersection, Hamming, and Hellinger.",
|
|
250
|
+
style="margin-top:10px; text-align:left; font-size:16px; font-weight:500"
|
|
251
|
+
),
|
|
252
|
+
ui.div(
|
|
253
|
+
"Select options:",
|
|
254
|
+
style="margin-top:30px; text-align:left; font-size:24px; font-weight:bold"
|
|
255
|
+
),
|
|
256
|
+
ui.div(
|
|
257
|
+
ui.input_radio_buttons("chromatography_platform", "Specify chromatography platform:", ["HRMS","NRMS"]),
|
|
258
|
+
style="font-size:18px; margin-top:10px; max-width:none"
|
|
259
|
+
),
|
|
260
|
+
ui.input_action_button("plot_spectra", "Plot two spectra before and after preprocessing transformations.", style="font-size:18px; padding:20px 40px; width:550px; height:100px; margin-top:10px; margin-right:50px"),
|
|
261
|
+
ui.input_action_button("run_spec_lib_matching", "Run spectral library matching to perform compound identification on a query library of spectra.", style="font-size:18px; padding:20px 40px; width:550px; height:100px; margin-top:10px; margin-right:50px"),
|
|
262
|
+
ui.div(
|
|
263
|
+
"References:",
|
|
264
|
+
style="margin-top:35px; text-align:left; font-size:24px; font-weight:bold"
|
|
265
|
+
),
|
|
266
|
+
ui.div(
|
|
267
|
+
"If Shannon Entropy similarity measure, low-entropy transformation, or centroiding are used:",
|
|
268
|
+
style="margin-top:10px; text-align:left; font-size:14px; font-weight:500"
|
|
269
|
+
),
|
|
270
|
+
ui.div(
|
|
271
|
+
ui.HTML(
|
|
272
|
+
'Li, Y., Kind, T., Folz, J. et al. (2021) Spectral entropy outperforms MS/MS dot product similarity for small-molecule compound identification. Nat Methods, 18 1524–1531. <a href="https://doi.org/10.1038/s41592-021-01331-z" target="_blank">https://doi.org/10.1038/s41592-021-01331-z</a>.'
|
|
273
|
+
),
|
|
274
|
+
style="text-align:left; font-size:14px; font-weight:500"
|
|
275
|
+
),
|
|
276
|
+
ui.div(
|
|
277
|
+
"If Tsallis Entropy similarity measure or series of preprocessing transformations are used:",
|
|
278
|
+
style="margin-top:10px; text-align:left; font-size:14px; font-weight:500"
|
|
279
|
+
),
|
|
280
|
+
ui.div(
|
|
281
|
+
ui.HTML(
|
|
282
|
+
'Dlugas, H., Zhang, X., Kim, S. (2025) Comparative analysis of continuous similarity measures for compound identification in mass spectrometry-based metabolomics. Chemometrics and Intelligent Laboratory Systems, 263, 105417. <a href="https://doi.org/10.1016/j.chemolab.2025.105417", target="_blank">https://doi.org/10.1016/j.chemolab.2025.105417</a>.'
|
|
283
|
+
),
|
|
284
|
+
style="text-align:left; font-size:14px; font-weight:500"
|
|
285
|
+
),
|
|
286
|
+
ui.div(
|
|
287
|
+
"If binary similarity measures are used:",
|
|
288
|
+
style="margin-top:10px; text-align:left; font-size:14px; font-weight:500"
|
|
289
|
+
),
|
|
290
|
+
ui.div(
|
|
291
|
+
ui.HTML(
|
|
292
|
+
'Kim, S., Kato, I., & Zhang, X. (2022). Comparative Analysis of Binary Similarity Measures for Compound Identification in Mass Spectrometry-Based Metabolomics. Metabolites, 12(8), 694. <a href="https://doi.org/10.3390/metabo12080694" target="_blank">https://doi.org/10.3390/metabo12080694</a>.'
|
|
293
|
+
),
|
|
294
|
+
style="text-align:left; font-size:14px; font-weight:500"
|
|
295
|
+
),
|
|
296
|
+
|
|
297
|
+
ui.div(
|
|
298
|
+
"If weight factor transformation is used:",
|
|
299
|
+
style="margin-top:10px; text-align:left; font-size:14px; font-weight:500"
|
|
300
|
+
),
|
|
301
|
+
ui.div(
|
|
302
|
+
ui.HTML(
|
|
303
|
+
'Kim, S., Koo, I., Wei, X., & Zhang, X. (2012). A method of finding optimal weight factors for compound identification in gas chromatography-mass spectrometry. Bioinformatics, 28(8), 1158-1163. <a href="https://doi.org/10.1093/bioinformatics/bts083" target="_blank">https://doi.org/10.1093/bioinformatics/bts083</a>.'
|
|
304
|
+
),
|
|
305
|
+
style="margin-bottom:40px; text-align:left; font-size:14px; font-weight:500"
|
|
306
|
+
),
|
|
307
|
+
)
|
|
308
|
+
elif current_page() == "plot_spectra":
|
|
309
|
+
return plot_spectra_ui(input.chromatography_platform())
|
|
310
|
+
elif current_page() == "run_spec_lib_matching":
|
|
311
|
+
return run_spec_lib_matching_ui(input.chromatography_platform())
|
|
312
|
+
|
|
313
|
+
@reactive.effect
|
|
314
|
+
@reactive.event(input.run_btn)
|
|
315
|
+
def _():
|
|
316
|
+
if current_page() == "plot_spectra":
|
|
317
|
+
if len(input.spectrum_ID1())==0:
|
|
318
|
+
spectrum_ID1 = None
|
|
319
|
+
else:
|
|
320
|
+
spectrum_ID1 = input.spectrum_ID1()
|
|
321
|
+
if len(input.spectrum_ID2())==0:
|
|
322
|
+
spectrum_ID2 = None
|
|
323
|
+
else:
|
|
324
|
+
spectrum_ID2 = input.spectrum_ID2()
|
|
325
|
+
|
|
326
|
+
if input.chromatography_platform() == "HRMS":
|
|
327
|
+
try:
|
|
328
|
+
fig = generate_plots_on_HRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], spectrum_ID1=spectrum_ID1, spectrum_ID2=spectrum_ID2, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), window_size_centroiding=input.window_size_centroiding(), window_size_matching=input.window_size_matching(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), y_axis_transformation=input.y_axis_transformation(), return_plot=True)
|
|
329
|
+
plt.show()
|
|
330
|
+
run_status.set(f"✅ Plotting has finished.")
|
|
331
|
+
except Exception as e:
|
|
332
|
+
run_status.set(f"❌ Error: {traceback.format_exc()}")
|
|
333
|
+
elif input.chromatography_platform() == "NRMS":
|
|
334
|
+
try:
|
|
335
|
+
generate_plots_on_NRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], spectrum_ID1=spectrum_ID1, spectrum_ID2=spectrum_ID2, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), y_axis_transformation=input.y_axis_transformation(), return_plot=True)
|
|
336
|
+
plt.show()
|
|
337
|
+
run_status.set(f"✅ Plotting has finished.")
|
|
338
|
+
except Exception as e:
|
|
339
|
+
run_status.set(f"❌ Error: {traceback.format_exc()}")
|
|
340
|
+
|
|
341
|
+
elif current_page() == 'run_spec_lib_matching':
|
|
342
|
+
if input.chromatography_platform() == 'HRMS':
|
|
343
|
+
try:
|
|
344
|
+
run_spec_lib_matching_on_HRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], likely_reference_ids=None, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), window_size_centroiding=input.window_size_centroiding(), window_size_matching=input.window_size_matching(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), n_top_matches_to_save=input.n_top_matches_to_save(), print_id_results=False, output_identification=f'{Path.cwd()}/output_identification.csv', output_similarity_scores=f'{Path.cwd()}/')
|
|
345
|
+
run_status.set(f"✅ Spectral library matching has finished and results were written to {Path.cwd()}/output_similarity_scores.csv.")
|
|
346
|
+
except Exception as e:
|
|
347
|
+
run_status.set(f"❌ Error: {traceback.format_exc()}")
|
|
348
|
+
elif input.chromatography_platform() == 'NRMS':
|
|
349
|
+
try:
|
|
350
|
+
run_spec_lib_matching_on_NRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], likely_reference_ids=None, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), n_top_matches_to_save=input.n_top_matches_to_save(), print_id_results=False, output_identification=f'{Path.cwd()}/output_identification.csv', output_similarity_scores=f'{Path.cwd()}/output_similarity_scores.csv')
|
|
351
|
+
run_status.set(f"✅ Spectral library matching has finished and results were written to {Path.cwd()}/")
|
|
352
|
+
except Exception as e:
|
|
353
|
+
run_status.set(f"❌ Error: {traceback.format_exc()}")
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
@render.text
|
|
357
|
+
def status_output():
|
|
358
|
+
return run_status.get()
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
app = App(app_ui, server)
|
|
363
|
+
|
|
364
|
+
|
|
@@ -9,7 +9,7 @@ import sys
|
|
|
9
9
|
import matplotlib.pyplot as plt
|
|
10
10
|
|
|
11
11
|
|
|
12
|
-
def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_ID1=None, spectrum_ID2=None, similarity_measure='cosine', spectrum_preprocessing_order='FCNMWL', high_quality_reference_library=False, mz_min=0, mz_max=9999999, int_min=0, int_max=9999999, window_size_centroiding=0.5, window_size_matching=0.5, noise_threshold=0.0, wf_mz=0.0, wf_intensity=1.0, LET_threshold=0.0, entropy_dimension=1.1, y_axis_transformation='normalized', output_path=None):
|
|
12
|
+
def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_ID1=None, spectrum_ID2=None, similarity_measure='cosine', weights={'Cosine':0.25,'Shannon':0.25,'Renyi':0.25,'Tsallis':0.25}, spectrum_preprocessing_order='FCNMWL', high_quality_reference_library=False, mz_min=0, mz_max=9999999, int_min=0, int_max=9999999, window_size_centroiding=0.5, window_size_matching=0.5, noise_threshold=0.0, wf_mz=0.0, wf_intensity=1.0, LET_threshold=0.0, entropy_dimension=1.1, y_axis_transformation='normalized', output_path=None, return_plot=False):
|
|
13
13
|
'''
|
|
14
14
|
plots two spectra against each other before and after preprocessing transformations for high-resolution mass spectrometry data
|
|
15
15
|
|
|
@@ -17,7 +17,8 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
|
|
|
17
17
|
--reference_data: mgf, mzML, or csv file of the reference mass spectra. If csv file, each row should correspond to a mass spectrum, the left-most column should contain in identifier (i.e. the CAS registry number or the compound name), and the remaining column should correspond to a single mass/charge ratio. Mandatory argument.
|
|
18
18
|
--spectrum_ID1: ID of one spectrum to be plotted. Default is first spectrum in the query library. Optional argument.
|
|
19
19
|
--spectrum_ID2: ID of another spectrum to be plotted. Default is first spectrum in the reference library. Optional argument.
|
|
20
|
-
--similarity_measure:
|
|
20
|
+
--similarity_measure: cosine, shannon, renyi, tsallis, mixture, jaccard, dice, 3w_jaccard, sokal_sneath, binary_cosine, mountford, mcconnaughey, driver_kroeber, simpson, braun_banquet, fager_mcgowan, kulczynski, intersection, hamming, hellinger. Default: cosine.
|
|
21
|
+
--weights: dict of weights to give to each non-binary similarity measure (i.e. cosine, shannon, renyi, and tsallis) when the mixture similarity measure is specified. Default: 0.25 for each of the four non-binary similarity measures.
|
|
21
22
|
--spectrum_preprocessing_order: The spectrum preprocessing transformations and the order in which they are to be applied. Note that these transformations are applied prior to computing similarity scores. Format must be a string with 2-6 characters chosen from C, F, M, N, L, W representing centroiding, filtering based on mass/charge and intensity values, matching, noise removal, low-entropy trannsformation, and weight-factor-transformation, respectively. For example, if \'WCM\' is passed, then each spectrum will undergo a weight factor transformation, then centroiding, and then matching. Note that if an argument is passed, then \'M\' must be contained in the argument, since matching is a required preprocessing step in spectral library matching of HRMS data. Furthermore, \'C\' must be performed before matching since centroiding can change the number of ion fragments in a given spectrum. Default: FCNMWL')
|
|
22
23
|
--high_quality_reference_library: True/False flag indicating whether the reference library is considered to be of high quality. If True, then the spectrum preprocessing transformations of filtering and noise removal are performed only on the query spectrum/spectra. If False, all spectrum preprocessing transformations specified will be applied to both the query and reference spectra. Default: False')
|
|
23
24
|
--mz_min: Remove all peaks with mass/charge value less than mz_min in each spectrum. Default: 0
|
|
@@ -95,8 +96,8 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
|
|
|
95
96
|
print(f'Error: spectrum_preprocessing_order must contain only \'C\', \'F\', \'M\', \'N\', \'L\', \'W\'.')
|
|
96
97
|
sys.exit()
|
|
97
98
|
|
|
98
|
-
if similarity_measure not in ['cosine','shannon','renyi','tsallis']:
|
|
99
|
-
print('\nError: similarity_measure must be either
|
|
99
|
+
if similarity_measure not in ['cosine','shannon','renyi','tsallis','mixture','jaccard','dice','3w_jaccard','sokal_sneath','binary_cosine','mountford','mcconnaughey','driver_kroeber','simpson','braun_banquet','fager_mcgowan','kulczynski','interection','hamming','hellinger']:
|
|
100
|
+
print('\nError: similarity_measure must be either cosine, shannon, renyi, tsallis, mixture, jaccard, dice, 3w_jaccard, sokal_sneath, binary_cosine, mountford, mcconnaughey, driver_kroeber, simpson, braun_banquet, fager_mcgowan, kulczynski, interection, hamming, or hellinger.')
|
|
100
101
|
sys.exit()
|
|
101
102
|
|
|
102
103
|
if isinstance(int_min,int) is True:
|
|
@@ -157,10 +158,6 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
|
|
|
157
158
|
output_path = f'{Path.cwd()}/spectrum1_{spectrum_ID1}_spectrum2_{spectrum_ID2}.pdf'
|
|
158
159
|
|
|
159
160
|
|
|
160
|
-
#print(spectrum_ID1)
|
|
161
|
-
#print(spectrum_ID2)
|
|
162
|
-
#print(unique_query_ids)
|
|
163
|
-
#print(unique_reference_ids)
|
|
164
161
|
if spectrum_ID1 in unique_query_ids and spectrum_ID2 in unique_query_ids:
|
|
165
162
|
query_idx = unique_query_ids.index(spectrum_ID1)
|
|
166
163
|
reference_idx = unique_query_ids.index(spectrum_ID2)
|
|
@@ -266,17 +263,7 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
|
|
|
266
263
|
|
|
267
264
|
# if there is at least one non-zero intensity ion fragment in either spectra, compute their similarity
|
|
268
265
|
if np.sum(q_ints) != 0 and np.sum(r_ints) != 0 and q_spec.shape[0] > 1 and r_spec.shape[1] > 1:
|
|
269
|
-
|
|
270
|
-
similarity_score = S_cos(q_ints, r_ints)
|
|
271
|
-
else:
|
|
272
|
-
q_ints = normalize(q_ints, method = normalization_method)
|
|
273
|
-
r_ints = normalize(r_ints, method = normalization_method)
|
|
274
|
-
if similarity_measure == 'shannon':
|
|
275
|
-
similarity_score = S_shannon(q_ints, r_ints)
|
|
276
|
-
elif similarity_measure == 'renyi':
|
|
277
|
-
similarity_score = S_renyi(q_ints, r_ints, q)
|
|
278
|
-
elif similarity_measure == 'tsallis':
|
|
279
|
-
similarity_score = S_tsallis(q_ints, r_ints, q)
|
|
266
|
+
similarity_score = get_similarity(similarity_measure, q_ints, r_ints, weights, entropy_dimension)
|
|
280
267
|
else:
|
|
281
268
|
similarity_score = 0
|
|
282
269
|
|
|
@@ -333,16 +320,20 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
|
|
|
333
320
|
fig.text(0.45, 0.06, f'Low-Entropy Threshold: {LET_threshold}', fontsize=7)
|
|
334
321
|
plt.savefig(output_path, format='pdf')
|
|
335
322
|
|
|
323
|
+
if return_plot == True:
|
|
324
|
+
return plt
|
|
325
|
+
|
|
336
326
|
|
|
337
327
|
|
|
338
328
|
|
|
339
|
-
def generate_plots_on_NRMS_data(query_data=None, reference_data=None, spectrum_ID1=None, spectrum_ID2=None, similarity_measure='cosine', spectrum_preprocessing_order='FNLW', high_quality_reference_library=False, mz_min=0, mz_max=9999999, int_min=0, int_max=9999999, noise_threshold=0.0, wf_mz=0.0, wf_intensity=1.0, LET_threshold=0.0, entropy_dimension=1.1, y_axis_transformation='normalized', output_path=None):
|
|
329
|
+
def generate_plots_on_NRMS_data(query_data=None, reference_data=None, spectrum_ID1=None, spectrum_ID2=None, similarity_measure='cosine', weights={'Cosine':0.25,'Shannon':0.25,'Renyi':0.25,'Tsallis':0.25}, spectrum_preprocessing_order='FNLW', high_quality_reference_library=False, mz_min=0, mz_max=9999999, int_min=0, int_max=9999999, noise_threshold=0.0, wf_mz=0.0, wf_intensity=1.0, LET_threshold=0.0, entropy_dimension=1.1, y_axis_transformation='normalized', output_path=None, return_plot=False):
|
|
340
330
|
'''
|
|
341
331
|
plots two spectra against each other before and after preprocessing transformations for high-resolution mass spectrometry data
|
|
342
332
|
|
|
343
333
|
--query_data: cdf or csv file of query mass spectrum/spectra to be identified. If csv file, each row should correspond to a mass spectrum, the left-most column should contain an identifier, and each of the other columns should correspond to a single mass/charge ratio. Mandatory argument.
|
|
344
334
|
--reference_data: cdf of csv file of the reference mass spectra. If csv file, each row should correspond to a mass spectrum, the left-most column should contain in identifier (i.e. the CAS registry number or the compound name), and the remaining column should correspond to a single mass/charge ratio. Mandatory argument.
|
|
345
|
-
--similarity_measure:
|
|
335
|
+
--similarity_measure: cosine, shannon, renyi, tsallis, mixture, jaccard, dice, 3w_jaccard, sokal_sneath, binary_cosine, mountford, mcconnaughey, driver_kroeber, simpson, braun_banquet, fager_mcgowan, kulczynski, intersection, hamming, hellinger. Default: cosine.
|
|
336
|
+
--weights: dict of weights to give to each non-binary similarity measure (i.e. cosine, shannon, renyi, and tsallis) when the mixture similarity measure is specified. Default: 0.25 for each of the four non-binary similarity measures.
|
|
346
337
|
--spectrum_preprocessing_order: The spectrum preprocessing transformations and the order in which they are to be applied. Note that these transformations are applied prior to computing similarity scores. Format must be a string with 2-4 characters chosen from F, N, L, W representing filtering based on mass/charge and intensity values, noise removal, low-entropy trannsformation, and weight-factor-transformation, respectively. For example, if \'WN\' is passed, then each spectrum will undergo a weight factor transformation and then noise removal. Default: FNLW')
|
|
347
338
|
--high_quality_reference_library: True/False flag indicating whether the reference library is considered to be of high quality. If True, then the spectrum preprocessing transformations of filtering and noise removal are performed only on the query spectrum/spectra. If False, all spectrum preprocessing transformations specified will be applied to both the query and reference spectra. Default: False')
|
|
348
339
|
--mz_min: Remove all peaks with mass/charge value less than mz_min in each spectrum. Default: 0
|
|
@@ -409,8 +400,8 @@ def generate_plots_on_NRMS_data(query_data=None, reference_data=None, spectrum_I
|
|
|
409
400
|
print(f'Error: spectrum_preprocessing_order must contain only \'F\', \'N\', \'W\', \'L\'.')
|
|
410
401
|
sys.exit()
|
|
411
402
|
|
|
412
|
-
if similarity_measure not in ['cosine','shannon','renyi','tsallis']:
|
|
413
|
-
print('\nError: similarity_measure must be either
|
|
403
|
+
if similarity_measure not in ['cosine','shannon','renyi','tsallis','mixture','jaccard','dice','3w_jaccard','sokal_sneath','binary_cosine','mountford','mcconnaughey','driver_kroeber','simpson','braun_banquet','fager_mcgowan','kulczynski','interection','hamming','hellinger']:
|
|
404
|
+
print('\nError: similarity_measure must be either cosine, shannon, renyi, tsallis, mixture, jaccard, dice, 3w_jaccard, sokal_sneath, binary_cosine, mountford, mcconnaughey, driver_kroeber, simpson, braun_banquet, fager_mcgowan, kulczynski, interection, hamming, or hellinger.')
|
|
414
405
|
sys.exit()
|
|
415
406
|
|
|
416
407
|
if isinstance(int_min,int) is True:
|
|
@@ -564,20 +555,9 @@ def generate_plots_on_NRMS_data(query_data=None, reference_data=None, spectrum_I
|
|
|
564
555
|
if high_quality_reference_library == False:
|
|
565
556
|
r_spec = filter_spec_gcms(r_spec, mz_min = mz_min, mz_max = mz_max, int_min = int_min, int_max = int_max)
|
|
566
557
|
|
|
567
|
-
# compute similarity score; if the spectra contain one point
|
|
558
|
+
# compute similarity score; if the spectra contain at most one point, their similarity is considered to be 0
|
|
568
559
|
if q_spec.shape[0] > 1:
|
|
569
|
-
|
|
570
|
-
similarity_score = S_cos(q_spec[:,1], r_spec[:,1])
|
|
571
|
-
else:
|
|
572
|
-
q_spec[:,1] = normalize(q_spec[:,1], method = normalization_method)
|
|
573
|
-
r_spec[:,1] = normalize(r_spec[:,1], method = normalization_method)
|
|
574
|
-
|
|
575
|
-
if similarity_measure == 'shannon':
|
|
576
|
-
similarity_score = S_shannon(q_spec[:,1].astype('float'), r_spec[:,1].astype('float'))
|
|
577
|
-
elif similarity_measure == 'renyi':
|
|
578
|
-
similarity_score = S_renyi(q_spec[:,1], r_spec[:,1], q)
|
|
579
|
-
elif similarity_measure == 'tsallis':
|
|
580
|
-
similarity_score = S_tsallis(q_spec[:,1], r_spec[:,1], q)
|
|
560
|
+
similarity_score = get_similarity(similarity_measure, q_spec[:,1], r_spec[:,1], weights, entropy_dimension)
|
|
581
561
|
else:
|
|
582
562
|
similarity_score = 0
|
|
583
563
|
|
|
@@ -633,4 +613,6 @@ def generate_plots_on_NRMS_data(query_data=None, reference_data=None, spectrum_I
|
|
|
633
613
|
fig.text(0.45, 0.06, f'Low-Entropy Threshold: {LET_threshold}', fontsize=7)
|
|
634
614
|
plt.savefig(output_path, format='pdf')
|
|
635
615
|
|
|
616
|
+
if return_plot == True:
|
|
617
|
+
return fig
|
|
636
618
|
|
{pycompound-0.0.1/src/pycompound_fy7392 → pycompound-0.0.6/src/pycompound}/plot_spectra_CLI.py
RENAMED
|
@@ -3,6 +3,7 @@ from pycompound_fy7392.plot_spectra import generate_plots_on_HRMS_data
|
|
|
3
3
|
from pycompound_fy7392.plot_spectra import generate_plots_on_NRMS_data
|
|
4
4
|
import pandas as pd
|
|
5
5
|
import argparse
|
|
6
|
+
import json
|
|
6
7
|
from pathlib import Path
|
|
7
8
|
import sys
|
|
8
9
|
|
|
@@ -13,7 +14,8 @@ parser.add_argument('--query_data', type=str, metavar='\b', help='CSV file of qu
|
|
|
13
14
|
parser.add_argument('--reference_data', type=str, metavar='\b', help='CSV file of the reference mass spectra. Each row should correspond to a mass spectrum, the left-most column should contain in identifier (i.e. the CAS registry number or the compound name), and the remaining column should correspond to a single mass/charge ratio. Mandatory argument.')
|
|
14
15
|
parser.add_argument('--spectrum_ID1', type=str, metavar='\b', help='The identifier of the query spectrum to be plotted. Default: first query spectrum in query_data.')
|
|
15
16
|
parser.add_argument('--spectrum_ID2', type=str, metavar='\b', help='The identifier of the reference spectrum to be plotted. Default: first reference spectrum in reference_data.')
|
|
16
|
-
parser.add_argument('--similarity_measure', type=str, default='cosine', metavar='\b', help='Similarity measure: options are
|
|
17
|
+
parser.add_argument('--similarity_measure', type=str, default='cosine', metavar='\b', help='Similarity measure: options are cosine, shannon, renyi, tsallis, mixture, jaccard, dice, 3w_jaccard, sokal_sneath, binary_cosine, mountford, mcconnaughey, driver_kroeber, simpson, braun_banquet, fager_mcgowan, kulczynski, intersection, hamming, or hellinger. Default: cosine.')
|
|
18
|
+
parser.add_argument('--weights', type=json.loads, default={'Cosine':0.25,'Shannon':0.25,'Renyi':0.25,'Tsallis':0.25}, metavar='\b', help='dict of weights to give to each non-binary similarity measure (i.e. cosine, shannon, renyi, and tsallis) when the mixture similarity measure is specified. Default: 0.25 for each of the four non-binary similarity measures.')
|
|
17
19
|
parser.add_argument('--chromatography_platform', type=str, metavar='\b', help='Chromatography platform: options are \'HRMS\' and \'NRMS\'. Mandatory argument.')
|
|
18
20
|
parser.add_argument('--spectrum_preprocessing_order', type=str, metavar='\b', help='The LC-MS/MS spectrum preprocessing transformations and the order in which they are to be applied. Note that these transformations are applied prior to computing similarity scores. Format must be a string with 2-6 characters chosen from C, F, M, N, L, W representing centroiding, filtering based on mass/charge and intensity values, matching, noise removal, low-entropy trannsformation, and weight-factor-transformation, respectively. For example, if \'WCM\' is passed, then each spectrum will undergo a weight factor transformation, then centroiding, and then matching. Note that if an argument is passed, then \'M\' must be contained in the argument, since matching is a required preprocessing step in spectral library matching of LC-MS/MS data. Furthermore, \'C\' must be performed before matching since centroiding can change the number of ion fragments in a given spectrum. Default: FCNMWL for HRMS, FNLW for NRMS')
|
|
19
21
|
parser.add_argument('--high_quality_reference_library', type=str, default='False', metavar='\b', help='True/False flag indicating whether the reference library is considered to be of high quality. If True, then the spectrum preprocessing transformations of filtering and noise removal are performed only on the query spectrum/spectra. If False, all spectrum preprocessing transformations specified will be applied to both the query and reference spectra. Default: False')
|
|
@@ -43,9 +45,8 @@ else:
|
|
|
43
45
|
|
|
44
46
|
|
|
45
47
|
if args.chromatography_platform == 'HRMS':
|
|
46
|
-
generate_plots_on_HRMS_data(query_data=args.query_data, reference_data=args.reference_data, spectrum_ID1=args.spectrum_ID1, spectrum_ID2=args.spectrum_ID2, similarity_measure=args.similarity_measure, spectrum_preprocessing_order=spectrum_preprocessing_order, high_quality_reference_library=args.high_quality_reference_library, mz_min=args.mz_min, mz_max=args.mz_max, int_min=args.int_min, int_max=args.int_max, window_size_centroiding=args.window_size_centroiding, window_size_matching=args.window_size_matching, noise_threshold=args.noise_threshold, wf_mz=args.wf_mz, wf_intensity=args.wf_intensity, LET_threshold=args.LET_threshold, entropy_dimension=args.entropy_dimension, y_axis_transformation=args.y_axis_transformation, output_path=args.output_path)
|
|
48
|
+
generate_plots_on_HRMS_data(query_data=args.query_data, reference_data=args.reference_data, spectrum_ID1=args.spectrum_ID1, spectrum_ID2=args.spectrum_ID2, similarity_measure=args.similarity_measure, weights=args.weights, spectrum_preprocessing_order=spectrum_preprocessing_order, high_quality_reference_library=args.high_quality_reference_library, mz_min=args.mz_min, mz_max=args.mz_max, int_min=args.int_min, int_max=args.int_max, window_size_centroiding=args.window_size_centroiding, window_size_matching=args.window_size_matching, noise_threshold=args.noise_threshold, wf_mz=args.wf_mz, wf_intensity=args.wf_intensity, LET_threshold=args.LET_threshold, entropy_dimension=args.entropy_dimension, y_axis_transformation=args.y_axis_transformation, output_path=args.output_path)
|
|
47
49
|
elif args.chromatography_platform == 'NRMS':
|
|
48
|
-
generate_plots_on_NRMS_data(query_data=args.query_data, reference_data=args.reference_data, spectrum_ID1=args.spectrum_ID1, spectrum_ID2=args.spectrum_ID2, similarity_measure=args.similarity_measure, spectrum_preprocessing_order=spectrum_preprocessing_order, high_quality_reference_library=args.high_quality_reference_library, mz_min=args.mz_min, mz_max=args.mz_max, int_min=args.int_min, int_max=args.int_max, noise_threshold=args.noise_threshold, wf_mz=args.wf_mz, wf_intensity=args.wf_intensity, LET_threshold=args.LET_threshold, entropy_dimension=args.entropy_dimension, y_axis_transformation=args.y_axis_transformation, output_path=args.output_path)
|
|
49
|
-
|
|
50
|
+
generate_plots_on_NRMS_data(query_data=args.query_data, reference_data=args.reference_data, spectrum_ID1=args.spectrum_ID1, spectrum_ID2=args.spectrum_ID2, similarity_measure=args.similarity_measure, weights=args.weights, spectrum_preprocessing_order=spectrum_preprocessing_order, high_quality_reference_library=args.high_quality_reference_library, mz_min=args.mz_min, mz_max=args.mz_max, int_min=args.int_min, int_max=args.int_max, noise_threshold=args.noise_threshold, wf_mz=args.wf_mz, wf_intensity=args.wf_intensity, LET_threshold=args.LET_threshold, entropy_dimension=args.entropy_dimension, y_axis_transformation=args.y_axis_transformation, output_path=args.output_path)
|
|
50
51
|
|
|
51
52
|
|