pycompound 0.1.0__tar.gz → 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pycompound-0.1.0/src/pycompound.egg-info → pycompound-0.1.1}/PKG-INFO +1 -1
- {pycompound-0.1.0 → pycompound-0.1.1}/pyproject.toml +1 -1
- {pycompound-0.1.0 → pycompound-0.1.1}/src/app.py +428 -63
- {pycompound-0.1.0 → pycompound-0.1.1}/src/pycompound/plot_spectra.py +7 -4
- {pycompound-0.1.0 → pycompound-0.1.1}/src/pycompound/spec_lib_matching.py +53 -25
- {pycompound-0.1.0 → pycompound-0.1.1}/src/pycompound/tuning_CLI.py +1 -2
- {pycompound-0.1.0 → pycompound-0.1.1/src/pycompound.egg-info}/PKG-INFO +1 -1
- {pycompound-0.1.0 → pycompound-0.1.1}/src/pycompound.egg-info/top_level.txt +1 -0
- {pycompound-0.1.0 → pycompound-0.1.1}/tests/test_plot_spectra.py +2 -2
- {pycompound-0.1.0 → pycompound-0.1.1}/tests/test_tuning.py +11 -6
- {pycompound-0.1.0 → pycompound-0.1.1}/LICENSE +0 -0
- {pycompound-0.1.0 → pycompound-0.1.1}/README.md +0 -0
- {pycompound-0.1.0 → pycompound-0.1.1}/setup.cfg +0 -0
- {pycompound-0.1.0 → pycompound-0.1.1}/src/pycompound/build_library.py +0 -0
- {pycompound-0.1.0 → pycompound-0.1.1}/src/pycompound/plot_spectra_CLI.py +0 -0
- {pycompound-0.1.0 → pycompound-0.1.1}/src/pycompound/processing.py +0 -0
- {pycompound-0.1.0 → pycompound-0.1.1}/src/pycompound/similarity_measures.py +0 -0
- {pycompound-0.1.0 → pycompound-0.1.1}/src/pycompound/spec_lib_matching_CLI.py +0 -0
- {pycompound-0.1.0 → pycompound-0.1.1}/src/pycompound.egg-info/SOURCES.txt +0 -0
- {pycompound-0.1.0 → pycompound-0.1.1}/src/pycompound.egg-info/dependency_links.txt +0 -0
- {pycompound-0.1.0 → pycompound-0.1.1}/src/pycompound.egg-info/requires.txt +0 -0
- {pycompound-0.1.0 → pycompound-0.1.1}/tests/test_build_library.py +0 -0
- {pycompound-0.1.0 → pycompound-0.1.1}/tests/test_similarity_measures.py +0 -0
- {pycompound-0.1.0 → pycompound-0.1.1}/tests/test_spec_lib_matching.py +0 -0
|
@@ -18,8 +18,25 @@ import matplotlib.pyplot as plt
|
|
|
18
18
|
import pandas as pd
|
|
19
19
|
import numpy as np
|
|
20
20
|
import netCDF4 as nc
|
|
21
|
-
from pyteomics import mgf
|
|
22
|
-
|
|
21
|
+
from pyteomics import mgf, mzml
|
|
22
|
+
import ast
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
_LOG_QUEUE: asyncio.Queue[str] = asyncio.Queue()
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def strip_text(s):
|
|
29
|
+
return [x.strip() for x in s.strip('[]').split(',') if x.strip()]
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def strip_numeric(s):
|
|
33
|
+
return [float(x.strip()) for x in s.strip('[]').split(',') if x.strip()]
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def strip_weights(s):
|
|
37
|
+
tuples = ast.literal_eval(s)
|
|
38
|
+
keys = ['Cosine', 'Shannon', 'Renyi', 'Tsallis']
|
|
39
|
+
return [dict(zip(keys,t)) for t in tuples]
|
|
23
40
|
|
|
24
41
|
|
|
25
42
|
def build_library(input_path=None, output_path=None):
|
|
@@ -152,30 +169,37 @@ def extract_first_column_ids(file_path: str, max_ids: int = 20000):
|
|
|
152
169
|
return []
|
|
153
170
|
|
|
154
171
|
|
|
172
|
+
def _open_plot_window(session, png_bytes: bytes, title: str = "plot.png"):
|
|
173
|
+
"""Send PNG bytes to browser and open in a new window as a data URL."""
|
|
174
|
+
b64 = base64.b64encode(png_bytes).decode("ascii")
|
|
175
|
+
data_url = f"data:image/png;base64,{b64}"
|
|
176
|
+
session.send_custom_message("open-plot-window", {"png": data_url, "title": title})
|
|
177
|
+
|
|
178
|
+
|
|
155
179
|
def plot_spectra_ui(platform: str):
|
|
156
180
|
# Base inputs common to all platforms
|
|
157
181
|
base_inputs = [
|
|
158
182
|
ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
159
183
|
ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
184
|
+
ui.input_selectize(
|
|
185
|
+
"spectrum_ID1",
|
|
186
|
+
"Select spectrum ID 1 (default is the first spectrum in the library):",
|
|
187
|
+
choices=[],
|
|
188
|
+
multiple=False,
|
|
189
|
+
options={"placeholder": "Upload a library..."},
|
|
190
|
+
),
|
|
191
|
+
ui.input_selectize(
|
|
192
|
+
"spectrum_ID2",
|
|
193
|
+
"Select spectrum ID 2 (default is the first spectrum in the library):",
|
|
194
|
+
choices=[],
|
|
195
|
+
multiple=False,
|
|
196
|
+
options={"placeholder": "Upload a library..."},
|
|
197
|
+
),
|
|
174
198
|
ui.input_select("similarity_measure", "Select similarity measure:", ["cosine","shannon","renyi","tsallis","mixture","jaccard","dice","3w_jaccard","sokal_sneath","binary_cosine","mountford","mcconnaughey","driver_kroeber","simpson","braun_banquet","fager_mcgowan","kulczynski","intersection","hamming","hellinger"]),
|
|
199
|
+
ui.input_text('weights', 'Weights for similarity measure (cosine, shannon, renyi, tsallis):', '0.25, 0.25, 0.25, 0.25'),
|
|
175
200
|
ui.input_select(
|
|
176
201
|
"high_quality_reference_library",
|
|
177
|
-
"Indicate whether the reference library is considered high quality. "
|
|
178
|
-
"If True, filtering and noise removal are only applied to the query spectra.",
|
|
202
|
+
"Indicate whether the reference library is considered high quality. If True, filtering and noise removal are only applied to the query spectra.",
|
|
179
203
|
[False, True],
|
|
180
204
|
),
|
|
181
205
|
]
|
|
@@ -185,7 +209,7 @@ def plot_spectra_ui(platform: str):
|
|
|
185
209
|
extra_inputs = [
|
|
186
210
|
ui.input_text(
|
|
187
211
|
"spectrum_preprocessing_order",
|
|
188
|
-
"Sequence of characters for preprocessing order (C, F, M, N, L, W). M must be included, C before M if used.",
|
|
212
|
+
"Sequence of characters for preprocessing order (C (centroiding), F (filtering), M (matching), N (noise removal), L (low-entropy transformation), W (weight factor transformation)). M must be included, C before M if used.",
|
|
189
213
|
"FCNMWL",
|
|
190
214
|
),
|
|
191
215
|
ui.input_numeric("window_size_centroiding", "Centroiding window-size:", 0.5),
|
|
@@ -195,7 +219,7 @@ def plot_spectra_ui(platform: str):
|
|
|
195
219
|
extra_inputs = [
|
|
196
220
|
ui.input_text(
|
|
197
221
|
"spectrum_preprocessing_order",
|
|
198
|
-
"Sequence of characters for preprocessing order (F, N, L, W).",
|
|
222
|
+
"Sequence of characters for preprocessing order (F (filtering), N (noise removal), L (low-entropy transformation), W (weight factor transformation)).",
|
|
199
223
|
"FNLW",
|
|
200
224
|
)
|
|
201
225
|
]
|
|
@@ -227,19 +251,19 @@ def plot_spectra_ui(platform: str):
|
|
|
227
251
|
# Layout base_inputs and extra_inputs in columns
|
|
228
252
|
if platform == "HRMS":
|
|
229
253
|
inputs_columns = ui.layout_columns(
|
|
230
|
-
ui.div(base_inputs[0:
|
|
231
|
-
ui.div([base_inputs[
|
|
254
|
+
ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
|
|
255
|
+
ui.div([base_inputs[6:7], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
|
|
232
256
|
ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
|
|
233
257
|
ui.div([numeric_inputs[5:10], select_input], style="display:flex; flex-direction:column; gap:10px;"),
|
|
234
|
-
col_widths=(3,
|
|
258
|
+
col_widths=(3,3,3,3),
|
|
235
259
|
)
|
|
236
260
|
elif platform == "NRMS":
|
|
237
261
|
inputs_columns = ui.layout_columns(
|
|
238
|
-
ui.div(base_inputs[0:
|
|
239
|
-
ui.div([base_inputs[
|
|
262
|
+
ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
|
|
263
|
+
ui.div([base_inputs[6:7], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
|
|
240
264
|
ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
|
|
241
265
|
ui.div([numeric_inputs[5:10], select_input], style="display:flex; flex-direction:column; gap:10px;"),
|
|
242
|
-
col_widths=(3,
|
|
266
|
+
col_widths=(3,3,3,3),
|
|
243
267
|
)
|
|
244
268
|
|
|
245
269
|
# Combine everything
|
|
@@ -249,7 +273,8 @@ def plot_spectra_ui(platform: str):
|
|
|
249
273
|
inputs_columns,
|
|
250
274
|
run_button_plot_spectra,
|
|
251
275
|
back_button,
|
|
252
|
-
ui.div(ui.output_text("plot_query_status"), style="margin-top:8px; font-size:14px")
|
|
276
|
+
ui.div(ui.output_text("plot_query_status"), style="margin-top:8px; font-size:14px"),
|
|
277
|
+
ui.div(ui.output_text("plot_reference_status"), style="margin-top:8px; font-size:14px")
|
|
253
278
|
),
|
|
254
279
|
)
|
|
255
280
|
|
|
@@ -261,12 +286,26 @@ def run_spec_lib_matching_ui(platform: str):
|
|
|
261
286
|
ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
262
287
|
ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
263
288
|
ui.input_select("similarity_measure", "Select similarity measure:", ["cosine","shannon","renyi","tsallis","mixture","jaccard","dice","3w_jaccard","sokal_sneath","binary_cosine","mountford","mcconnaughey","driver_kroeber","simpson","braun_banquet","fager_mcgowan","kulczynski","intersection","hamming","hellinger"]),
|
|
289
|
+
ui.input_text('weights', 'Weights for similarity measure (cosine, shannon, renyi, tsallis):', '0.25, 0.25, 0.25, 0.25'),
|
|
290
|
+
ui.input_selectize(
|
|
291
|
+
"spectrum_ID1",
|
|
292
|
+
"Select spectrum ID 1 (only applicable for plotting; default is the first spectrum in the query library):",
|
|
293
|
+
choices=[],
|
|
294
|
+
multiple=False,
|
|
295
|
+
options={"placeholder": "Upload a library..."},
|
|
296
|
+
),
|
|
297
|
+
ui.input_selectize(
|
|
298
|
+
"spectrum_ID2",
|
|
299
|
+
"Select spectrum ID 2 (only applicable for plotting; default is the first spectrum in the reference library):",
|
|
300
|
+
choices=[],
|
|
301
|
+
multiple=False,
|
|
302
|
+
options={"placeholder": "Upload a library..."},
|
|
303
|
+
),
|
|
264
304
|
ui.input_select(
|
|
265
305
|
"high_quality_reference_library",
|
|
266
|
-
"Indicate whether the reference library is considered high quality. "
|
|
267
|
-
"If True, filtering and noise removal are only applied to the query spectra.",
|
|
306
|
+
"Indicate whether the reference library is considered high quality. If True, filtering and noise removal are only applied to the query spectra.",
|
|
268
307
|
[False, True],
|
|
269
|
-
)
|
|
308
|
+
)
|
|
270
309
|
]
|
|
271
310
|
|
|
272
311
|
# Extra inputs depending on platform
|
|
@@ -274,7 +313,7 @@ def run_spec_lib_matching_ui(platform: str):
|
|
|
274
313
|
extra_inputs = [
|
|
275
314
|
ui.input_text(
|
|
276
315
|
"spectrum_preprocessing_order",
|
|
277
|
-
"Sequence of characters for preprocessing order (C, F, M, N, L, W). M must be included, C before M if used.",
|
|
316
|
+
"Sequence of characters for preprocessing order (C (centroiding), F (filtering), M (matching), N (noise removal), L (low-entropy transformation), W (weight factor transformation)). M must be included, C before M if used.",
|
|
278
317
|
"FCNMWL",
|
|
279
318
|
),
|
|
280
319
|
ui.input_numeric("window_size_centroiding", "Centroiding window-size:", 0.5),
|
|
@@ -284,7 +323,7 @@ def run_spec_lib_matching_ui(platform: str):
|
|
|
284
323
|
extra_inputs = [
|
|
285
324
|
ui.input_text(
|
|
286
325
|
"spectrum_preprocessing_order",
|
|
287
|
-
"Sequence of characters for preprocessing order (F, N, L, W).",
|
|
326
|
+
"Sequence of characters for preprocessing order (F (filtering), N (noise removal), L (low-entropy transformation), W (weight factor transformation)).",
|
|
288
327
|
"FNLW",
|
|
289
328
|
)
|
|
290
329
|
]
|
|
@@ -300,30 +339,31 @@ def run_spec_lib_matching_ui(platform: str):
|
|
|
300
339
|
ui.input_numeric("wf_int", "Intensity weight factor:", 1.0),
|
|
301
340
|
ui.input_numeric("LET_threshold", "Low-entropy threshold:", 0.0),
|
|
302
341
|
ui.input_numeric("entropy_dimension", "Entropy dimension (Renyi/Tsallis only):", 1.1),
|
|
303
|
-
ui.input_numeric("n_top_matches_to_save", "Number of top matches to save:",
|
|
342
|
+
ui.input_numeric("n_top_matches_to_save", "Number of top matches to save:", 3),
|
|
304
343
|
]
|
|
305
344
|
|
|
306
345
|
|
|
307
346
|
# Run and Back buttons
|
|
308
|
-
run_button_spec_lib_matching = ui.download_button("run_btn_spec_lib_matching", "Run", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
|
|
347
|
+
run_button_spec_lib_matching = ui.download_button("run_btn_spec_lib_matching", "Run Spectral Library Matching", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
|
|
348
|
+
run_button_plot_spectra_within_spec_lib_matching = ui.download_button("run_btn_plot_spectra_within_spec_lib_matching", "Plot Spectra", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
|
|
309
349
|
back_button = ui.input_action_button("back", "Back to main menu", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
|
|
310
350
|
|
|
311
351
|
# Layout base_inputs and extra_inputs in columns
|
|
312
352
|
if platform == "HRMS":
|
|
313
353
|
inputs_columns = ui.layout_columns(
|
|
314
|
-
ui.div(base_inputs[0:
|
|
315
|
-
ui.div([base_inputs[
|
|
354
|
+
ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
|
|
355
|
+
ui.div([base_inputs[6:7], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
|
|
316
356
|
ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
|
|
317
357
|
ui.div(numeric_inputs[5:10], style="display:flex; flex-direction:column; gap:10px;"),
|
|
318
|
-
col_widths=(3,
|
|
358
|
+
col_widths=(3,3,3,3)
|
|
319
359
|
)
|
|
320
360
|
elif platform == "NRMS":
|
|
321
361
|
inputs_columns = ui.layout_columns(
|
|
322
|
-
ui.div(base_inputs[0:
|
|
323
|
-
ui.div([base_inputs[
|
|
362
|
+
ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
|
|
363
|
+
ui.div([base_inputs[6:7], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
|
|
324
364
|
ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
|
|
325
365
|
ui.div(numeric_inputs[5:10], style="display:flex; flex-direction:column; gap:10px;"),
|
|
326
|
-
col_widths=(3,
|
|
366
|
+
col_widths=(3,3,3,3)
|
|
327
367
|
)
|
|
328
368
|
|
|
329
369
|
log_panel = ui.card(
|
|
@@ -338,13 +378,100 @@ def run_spec_lib_matching_ui(platform: str):
|
|
|
338
378
|
ui.h2("Run Spectral Library Matching"),
|
|
339
379
|
inputs_columns,
|
|
340
380
|
run_button_spec_lib_matching,
|
|
381
|
+
run_button_plot_spectra_within_spec_lib_matching,
|
|
341
382
|
back_button,
|
|
342
|
-
log_panel
|
|
383
|
+
log_panel
|
|
343
384
|
),
|
|
344
385
|
)
|
|
345
386
|
|
|
346
387
|
|
|
347
388
|
|
|
389
|
+
def run_parameter_tuning_ui(platform: str):
|
|
390
|
+
# Base inputs common to all platforms
|
|
391
|
+
base_inputs = [
|
|
392
|
+
ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
393
|
+
ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
394
|
+
ui.input_selectize("similarity_measure", "Select similarity measure(s):", ["cosine","shannon","renyi","tsallis","mixture","jaccard","dice","3w_jaccard","sokal_sneath","binary_cosine","mountford","mcconnaughey","driver_kroeber","simpson","braun_banquet","fager_mcgowan","kulczynski","intersection","hamming","hellinger"], multiple=True, selected='cosine'),
|
|
395
|
+
ui.input_text('weights', 'Weights for similarity measure (cosine, shannon, renyi, tsallis):', '((0.25, 0.25, 0.25, 0.25), (0.2, 0.3, 0.4, 0.1))'),
|
|
396
|
+
ui.input_text("high_quality_reference_library", "Indicate whether the reference library is considered high quality. If True, filtering and noise removal are only applied to the query spectra.", '[True]')
|
|
397
|
+
]
|
|
398
|
+
|
|
399
|
+
# Extra inputs depending on platform
|
|
400
|
+
if platform == "HRMS":
|
|
401
|
+
extra_inputs = [
|
|
402
|
+
ui.input_text(
|
|
403
|
+
"spectrum_preprocessing_order",
|
|
404
|
+
"Sequence of characters for preprocessing order (C (centroiding), F (filtering), M (matching), N (noise removal), L (low-entropy transformation), W (weight factor transformation)). M must be included, C before M if used.",
|
|
405
|
+
"[FCNMWL,CWM]",
|
|
406
|
+
),
|
|
407
|
+
ui.input_text("window_size_centroiding", "Centroiding window-size:", "[0.5]"),
|
|
408
|
+
ui.input_text("window_size_matching", "Matching window-size:", "[0.1,0.5]"),
|
|
409
|
+
]
|
|
410
|
+
else:
|
|
411
|
+
extra_inputs = [
|
|
412
|
+
ui.input_text(
|
|
413
|
+
"spectrum_preprocessing_order",
|
|
414
|
+
"Sequence of characters for preprocessing order (F (filtering), N (noise removal), L (low-entropy transformation), W (weight factor transformation)).",
|
|
415
|
+
"[FNLW,WNL]",
|
|
416
|
+
)
|
|
417
|
+
]
|
|
418
|
+
|
|
419
|
+
# Numeric inputs
|
|
420
|
+
numeric_inputs = [
|
|
421
|
+
ui.input_text("mz_min", "Minimum m/z for filtering:", '[0]'),
|
|
422
|
+
ui.input_text("mz_max", "Maximum m/z for filtering:", '[99999999]'),
|
|
423
|
+
ui.input_text("int_min", "Minimum intensity for filtering:", '[0]'),
|
|
424
|
+
ui.input_text("int_max", "Maximum intensity for filtering:", '[999999999]'),
|
|
425
|
+
ui.input_text("noise_threshold", "Noise removal threshold:", '[0.0]'),
|
|
426
|
+
ui.input_text("wf_mz", "Mass/charge weight factor:", '[0.0]'),
|
|
427
|
+
ui.input_text("wf_int", "Intensity weight factor:", '[1.0]'),
|
|
428
|
+
ui.input_text("LET_threshold", "Low-entropy threshold:", '[0.0]'),
|
|
429
|
+
ui.input_text("entropy_dimension", "Entropy dimension (Renyi/Tsallis only):", '[1.1]')
|
|
430
|
+
]
|
|
431
|
+
|
|
432
|
+
|
|
433
|
+
# Run and Back buttons
|
|
434
|
+
run_button_parameter_tuning = ui.download_button("run_btn_parameter_tuning", "Tune parameters", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
|
|
435
|
+
back_button = ui.input_action_button("back", "Back to main menu", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
|
|
436
|
+
|
|
437
|
+
# Layout base_inputs and extra_inputs in columns
|
|
438
|
+
if platform == "HRMS":
|
|
439
|
+
inputs_columns = ui.layout_columns(
|
|
440
|
+
ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
|
|
441
|
+
ui.div([base_inputs[6:7], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
|
|
442
|
+
ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
|
|
443
|
+
ui.div(numeric_inputs[5:9], style="display:flex; flex-direction:column; gap:10px;"),
|
|
444
|
+
col_widths=(3, 3, 3, 3),
|
|
445
|
+
)
|
|
446
|
+
elif platform == "NRMS":
|
|
447
|
+
inputs_columns = ui.layout_columns(
|
|
448
|
+
ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
|
|
449
|
+
ui.div([base_inputs[6:7], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
|
|
450
|
+
ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
|
|
451
|
+
ui.div(numeric_inputs[5:9], style="display:flex; flex-direction:column; gap:10px;"),
|
|
452
|
+
col_widths=(3, 3, 3, 3),
|
|
453
|
+
)
|
|
454
|
+
|
|
455
|
+
log_panel = ui.card(
|
|
456
|
+
ui.card_header("Identification log"),
|
|
457
|
+
ui.output_text_verbatim("match_log"),
|
|
458
|
+
style="max-height:300px; overflow:auto"
|
|
459
|
+
)
|
|
460
|
+
|
|
461
|
+
# Combine everything
|
|
462
|
+
return ui.div(
|
|
463
|
+
ui.TagList(
|
|
464
|
+
ui.h2("Tune parameters"),
|
|
465
|
+
inputs_columns,
|
|
466
|
+
run_button_parameter_tuning,
|
|
467
|
+
back_button,
|
|
468
|
+
log_panel
|
|
469
|
+
),
|
|
470
|
+
)
|
|
471
|
+
|
|
472
|
+
|
|
473
|
+
|
|
474
|
+
|
|
348
475
|
app_ui = ui.page_fluid(
|
|
349
476
|
ui.output_ui("main_ui"),
|
|
350
477
|
ui.output_text("status_output")
|
|
@@ -361,8 +488,12 @@ def server(input, output, session):
|
|
|
361
488
|
|
|
362
489
|
run_status_plot_spectra = reactive.Value("")
|
|
363
490
|
run_status_spec_lib_matching = reactive.Value("")
|
|
491
|
+
run_status_plot_spectra_within_spec_lib_matching = reactive.Value("")
|
|
492
|
+
run_status_parameter_tuning = reactive.Value("")
|
|
493
|
+
is_tuning_running = reactive.Value(False)
|
|
364
494
|
match_log_rv = reactive.Value("")
|
|
365
495
|
is_matching_rv = reactive.Value(False)
|
|
496
|
+
is_any_job_running = reactive.Value(False)
|
|
366
497
|
|
|
367
498
|
query_ids_rv = reactive.Value([])
|
|
368
499
|
query_file_path_rv = reactive.Value(None)
|
|
@@ -377,6 +508,29 @@ def server(input, output, session):
|
|
|
377
508
|
converted_reference_path_rv = reactive.Value(None)
|
|
378
509
|
|
|
379
510
|
|
|
511
|
+
#def _drain_queue_nowait(q: asyncio.Queue[str]) -> list[str]:
|
|
512
|
+
def _drain_queue_nowait(q: asyncio.Queue) -> list[str]:
|
|
513
|
+
out = []
|
|
514
|
+
try:
|
|
515
|
+
while True:
|
|
516
|
+
out.append(q.get_nowait())
|
|
517
|
+
except asyncio.QueueEmpty:
|
|
518
|
+
pass
|
|
519
|
+
return out
|
|
520
|
+
|
|
521
|
+
|
|
522
|
+
@reactive.effect
|
|
523
|
+
async def _pump_logs():
|
|
524
|
+
if not is_any_job_running.get():
|
|
525
|
+
return
|
|
526
|
+
|
|
527
|
+
reactive.invalidate_later(0.1)
|
|
528
|
+
msgs = _drain_queue_nowait(_LOG_QUEUE)
|
|
529
|
+
if msgs:
|
|
530
|
+
match_log_rv.set(match_log_rv.get() + "".join(msgs))
|
|
531
|
+
await reactive.flush()
|
|
532
|
+
|
|
533
|
+
|
|
380
534
|
def process_database(file_path: str):
|
|
381
535
|
suffix = Path(file_path).suffix.lower()
|
|
382
536
|
return {"path": file_path, "suffix": suffix}
|
|
@@ -385,13 +539,14 @@ def server(input, output, session):
|
|
|
385
539
|
def plot_query_status():
|
|
386
540
|
return query_status_rv.get() or ""
|
|
387
541
|
|
|
542
|
+
@render.text
|
|
543
|
+
def plot_reference_status():
|
|
544
|
+
return reference_status_rv.get() or ""
|
|
545
|
+
|
|
388
546
|
|
|
389
547
|
@reactive.effect
|
|
390
548
|
@reactive.event(input.query_data)
|
|
391
549
|
async def _on_query_upload():
|
|
392
|
-
if current_page() != "plot_spectra":
|
|
393
|
-
return
|
|
394
|
-
|
|
395
550
|
files = input.query_data()
|
|
396
551
|
req(files and len(files) > 0)
|
|
397
552
|
|
|
@@ -414,9 +569,6 @@ def server(input, output, session):
|
|
|
414
569
|
@reactive.effect
|
|
415
570
|
@reactive.event(input.reference_data)
|
|
416
571
|
async def _on_reference_upload():
|
|
417
|
-
if current_page() != "plot_spectra":
|
|
418
|
-
return
|
|
419
|
-
|
|
420
572
|
files = input.reference_data()
|
|
421
573
|
req(files and len(files) > 0)
|
|
422
574
|
|
|
@@ -467,6 +619,9 @@ def server(input, output, session):
|
|
|
467
619
|
elif input.run_spec_lib_matching() > match_clicks.get():
|
|
468
620
|
current_page.set("run_spec_lib_matching")
|
|
469
621
|
match_clicks.set(input.run_spec_lib_matching())
|
|
622
|
+
elif input.run_parameter_tuning() > match_clicks.get():
|
|
623
|
+
current_page.set("run_parameter_tuning")
|
|
624
|
+
match_clicks.set(input.run_parameter_tuning())
|
|
470
625
|
elif hasattr(input, "back") and input.back() > back_clicks.get():
|
|
471
626
|
current_page.set("main_menu")
|
|
472
627
|
back_clicks.set(input.back())
|
|
@@ -512,6 +667,7 @@ def server(input, output, session):
|
|
|
512
667
|
),
|
|
513
668
|
ui.input_action_button("plot_spectra", "Plot two spectra before and after preprocessing transformations.", style="font-size:18px; padding:20px 40px; width:550px; height:100px; margin-top:10px; margin-right:50px"),
|
|
514
669
|
ui.input_action_button("run_spec_lib_matching", "Run spectral library matching to perform compound identification on a query library of spectra.", style="font-size:18px; padding:20px 40px; width:550px; height:100px; margin-top:10px; margin-right:50px"),
|
|
670
|
+
ui.input_action_button("run_parameter_tuning", "Tune parameters to maximize accuracy of compound identification given a query library with known spectrum IDs.", style="font-size:18px; padding:20px 40px; width:450px; height:120px; margin-top:10px; margin-right:50px"),
|
|
515
671
|
ui.div(
|
|
516
672
|
"References:",
|
|
517
673
|
style="margin-top:35px; text-align:left; font-size:24px; font-weight:bold"
|
|
@@ -562,14 +718,16 @@ def server(input, output, session):
|
|
|
562
718
|
return plot_spectra_ui(input.chromatography_platform())
|
|
563
719
|
elif current_page() == "run_spec_lib_matching":
|
|
564
720
|
return run_spec_lib_matching_ui(input.chromatography_platform())
|
|
721
|
+
elif current_page() == "run_parameter_tuning":
|
|
722
|
+
return run_parameter_tuning_ui(input.chromatography_platform())
|
|
565
723
|
|
|
566
724
|
|
|
567
725
|
|
|
568
726
|
@reactive.effect
|
|
569
727
|
@reactive.event(input.query_data)
|
|
570
728
|
async def _populate_ids_from_query_upload():
|
|
571
|
-
if current_page() != "plot_spectra":
|
|
572
|
-
|
|
729
|
+
#if current_page() != "plot_spectra":
|
|
730
|
+
# return
|
|
573
731
|
|
|
574
732
|
files = input.query_data()
|
|
575
733
|
if not files:
|
|
@@ -628,8 +786,8 @@ def server(input, output, session):
|
|
|
628
786
|
@reactive.effect
|
|
629
787
|
@reactive.event(input.reference_data)
|
|
630
788
|
async def _populate_ids_from_reference_upload():
|
|
631
|
-
if current_page() != "plot_spectra":
|
|
632
|
-
|
|
789
|
+
#if current_page() != "plot_spectra":
|
|
790
|
+
# return
|
|
633
791
|
|
|
634
792
|
files = input.reference_data()
|
|
635
793
|
if not files:
|
|
@@ -685,19 +843,23 @@ def server(input, output, session):
|
|
|
685
843
|
raise
|
|
686
844
|
|
|
687
845
|
|
|
688
|
-
|
|
689
846
|
@render.download(filename=lambda: f"plot.png")
|
|
690
847
|
def run_btn_plot_spectra():
|
|
691
848
|
spectrum_ID1 = input.spectrum_ID1() or None
|
|
692
849
|
spectrum_ID2 = input.spectrum_ID2() or None
|
|
693
850
|
|
|
851
|
+
weights = [float(weight.strip()) for weight in input.weights().split(",") if weight.strip()]
|
|
852
|
+
weights = {'Cosine':weights[0], 'Shannon':weights[1], 'Renyi':weights[2], 'Tsallis':weights[3]}
|
|
853
|
+
|
|
694
854
|
if input.chromatography_platform() == "HRMS":
|
|
695
|
-
fig = generate_plots_on_HRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], spectrum_ID1=spectrum_ID1, spectrum_ID2=spectrum_ID2, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), window_size_centroiding=input.window_size_centroiding(), window_size_matching=input.window_size_matching(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), y_axis_transformation=input.y_axis_transformation(), return_plot=True)
|
|
696
|
-
|
|
855
|
+
fig = generate_plots_on_HRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], spectrum_ID1=spectrum_ID1, spectrum_ID2=spectrum_ID2, similarity_measure=input.similarity_measure(), weights=weights, spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), window_size_centroiding=input.window_size_centroiding(), window_size_matching=input.window_size_matching(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), y_axis_transformation=input.y_axis_transformation(), return_plot=True)
|
|
856
|
+
plt.show()
|
|
697
857
|
elif input.chromatography_platform() == "NRMS":
|
|
698
858
|
fig = generate_plots_on_NRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], spectrum_ID1=spectrum_ID1, spectrum_ID2=spectrum_ID2, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), y_axis_transformation=input.y_axis_transformation(), return_plot=True)
|
|
859
|
+
plt.show()
|
|
699
860
|
with io.BytesIO() as buf:
|
|
700
861
|
fig.savefig(buf, format="png", dpi=150, bbox_inches="tight")
|
|
862
|
+
plt.close()
|
|
701
863
|
yield buf.getvalue()
|
|
702
864
|
|
|
703
865
|
|
|
@@ -705,6 +867,7 @@ def server(input, output, session):
|
|
|
705
867
|
def status_output():
|
|
706
868
|
return run_status_plot_spectra.get()
|
|
707
869
|
return run_status_spec_lib_matching.get()
|
|
870
|
+
return run_status_parameter_tuning.get()
|
|
708
871
|
|
|
709
872
|
|
|
710
873
|
class ReactiveWriter(io.TextIOBase):
|
|
@@ -718,7 +881,6 @@ def server(input, output, session):
|
|
|
718
881
|
def _apply():
|
|
719
882
|
self.rv.set(self.rv.get() + s)
|
|
720
883
|
self.loop.create_task(reactive.flush())
|
|
721
|
-
|
|
722
884
|
self.loop.call_soon_threadsafe(_apply)
|
|
723
885
|
return len(s)
|
|
724
886
|
|
|
@@ -728,22 +890,24 @@ def server(input, output, session):
|
|
|
728
890
|
|
|
729
891
|
@render.download(filename="identification_output.csv")
|
|
730
892
|
async def run_btn_spec_lib_matching():
|
|
731
|
-
# 1) quick first paint
|
|
732
893
|
match_log_rv.set("Starting identification...\n")
|
|
733
894
|
await reactive.flush()
|
|
734
895
|
|
|
735
|
-
# 2) normalize inputs (same as before)
|
|
736
896
|
hq = input.high_quality_reference_library()
|
|
737
897
|
if isinstance(hq, str):
|
|
738
898
|
hq = hq.lower() == "true"
|
|
739
899
|
elif isinstance(hq, (int, float)):
|
|
740
900
|
hq = bool(hq)
|
|
741
901
|
|
|
902
|
+
weights = [float(weight.strip()) for weight in input.weights().split(",") if weight.strip()]
|
|
903
|
+
weights = {'Cosine':weights[0], 'Shannon':weights[1], 'Renyi':weights[2], 'Tsallis':weights[3]}
|
|
904
|
+
|
|
742
905
|
common_kwargs = dict(
|
|
743
906
|
query_data=input.query_data()[0]["datapath"],
|
|
744
907
|
reference_data=input.reference_data()[0]["datapath"],
|
|
745
908
|
likely_reference_ids=None,
|
|
746
909
|
similarity_measure=input.similarity_measure(),
|
|
910
|
+
weights=weights,
|
|
747
911
|
spectrum_preprocessing_order=input.spectrum_preprocessing_order(),
|
|
748
912
|
high_quality_reference_library=hq,
|
|
749
913
|
mz_min=input.mz_min(), mz_max=input.mz_max(),
|
|
@@ -752,7 +916,7 @@ def server(input, output, session):
|
|
|
752
916
|
wf_mz=input.wf_mz(), wf_intensity=input.wf_int(),
|
|
753
917
|
LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(),
|
|
754
918
|
n_top_matches_to_save=input.n_top_matches_to_save(),
|
|
755
|
-
print_id_results=True, # ensure the library actually prints
|
|
919
|
+
print_id_results=True, # ensure the library actually prints
|
|
756
920
|
output_identification=str(Path.cwd() / "identification_output.csv"),
|
|
757
921
|
output_similarity_scores=str(Path.cwd() / "similarity_scores.csv"),
|
|
758
922
|
return_ID_output=True,
|
|
@@ -761,7 +925,6 @@ def server(input, output, session):
|
|
|
761
925
|
loop = asyncio.get_running_loop()
|
|
762
926
|
rw = ReactiveWriter(match_log_rv, loop)
|
|
763
927
|
|
|
764
|
-
# 3) run the heavy function in a thread so the event loop can repaint
|
|
765
928
|
try:
|
|
766
929
|
with redirect_stdout(rw), redirect_stderr(rw):
|
|
767
930
|
if input.chromatography_platform() == "HRMS":
|
|
@@ -772,9 +935,7 @@ def server(input, output, session):
|
|
|
772
935
|
**common_kwargs
|
|
773
936
|
)
|
|
774
937
|
else:
|
|
775
|
-
df_out = await asyncio.to_thread(
|
|
776
|
-
run_spec_lib_matching_on_NRMS_data, **common_kwargs
|
|
777
|
-
)
|
|
938
|
+
df_out = await asyncio.to_thread(run_spec_lib_matching_on_NRMS_data, **common_kwargs)
|
|
778
939
|
match_log_rv.set(match_log_rv.get() + "\n✅ Identification finished.\n")
|
|
779
940
|
await reactive.flush()
|
|
780
941
|
except Exception as e:
|
|
@@ -782,8 +943,212 @@ def server(input, output, session):
|
|
|
782
943
|
await reactive.flush()
|
|
783
944
|
raise
|
|
784
945
|
|
|
785
|
-
|
|
946
|
+
yield df_out.to_csv(index=True)
|
|
947
|
+
|
|
948
|
+
|
|
949
|
+
|
|
950
|
+
@render.download(filename="plot.png")
|
|
951
|
+
def run_btn_plot_spectra_within_spec_lib_matching():
|
|
952
|
+
req(input.query_data(), input.reference_data())
|
|
953
|
+
|
|
954
|
+
spectrum_ID1 = input.spectrum_ID1() or None
|
|
955
|
+
spectrum_ID2 = input.spectrum_ID2() or None
|
|
956
|
+
|
|
957
|
+
hq = input.high_quality_reference_library()
|
|
958
|
+
if isinstance(hq, str):
|
|
959
|
+
hq = hq.lower() == "true"
|
|
960
|
+
elif isinstance(hq, (int, float)):
|
|
961
|
+
hq = bool(hq)
|
|
962
|
+
|
|
963
|
+
weights = [float(weight.strip()) for weight in input.weights().split(",") if weight.strip()]
|
|
964
|
+
weights = {'Cosine':weights[0], 'Shannon':weights[1], 'Renyi':weights[2], 'Tsallis':weights[3]}
|
|
965
|
+
|
|
966
|
+
common = dict(
|
|
967
|
+
query_data=input.query_data()[0]['datapath'],
|
|
968
|
+
reference_data=input.reference_data()[0]['datapath'],
|
|
969
|
+
spectrum_ID1=spectrum_ID1,
|
|
970
|
+
spectrum_ID2=spectrum_ID2,
|
|
971
|
+
similarity_measure=input.similarity_measure(),
|
|
972
|
+
weights=weights,
|
|
973
|
+
spectrum_preprocessing_order=input.spectrum_preprocessing_order(),
|
|
974
|
+
high_quality_reference_library=hq,
|
|
975
|
+
mz_min=input.mz_min(), mz_max=input.mz_max(),
|
|
976
|
+
int_min=input.int_min(), int_max=input.int_max(),
|
|
977
|
+
noise_threshold=input.noise_threshold(),
|
|
978
|
+
wf_mz=input.wf_mz(), wf_intensity=input.wf_int(),
|
|
979
|
+
LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(),
|
|
980
|
+
y_axis_transformation="normalized",
|
|
981
|
+
return_plot=True
|
|
982
|
+
)
|
|
983
|
+
|
|
984
|
+
if input.chromatography_platform() == "HRMS":
|
|
985
|
+
fig = generate_plots_on_HRMS_data(
|
|
986
|
+
window_size_centroiding=input.window_size_centroiding(),
|
|
987
|
+
window_size_matching=input.window_size_matching(),
|
|
988
|
+
**common
|
|
989
|
+
)
|
|
990
|
+
plt.show()
|
|
991
|
+
else:
|
|
992
|
+
fig = generate_plots_on_NRMS_data(**common)
|
|
993
|
+
plt.show()
|
|
994
|
+
|
|
995
|
+
with io.BytesIO() as buf:
|
|
996
|
+
fig.savefig(buf, format="png", dpi=150, bbox_inches="tight")
|
|
997
|
+
plt.close()
|
|
998
|
+
yield buf.getvalue()
|
|
999
|
+
|
|
1000
|
+
|
|
1001
|
+
'''
|
|
1002
|
+
@render.download(filename="parameter_tuning_output.csv")
|
|
1003
|
+
async def run_btn_parameter_tuning():
|
|
1004
|
+
match_log_rv.set("Running grid search of all parameters specified...\n")
|
|
1005
|
+
|
|
1006
|
+
similarity_measure_tmp = list(input.similarity_measure())
|
|
1007
|
+
high_quality_reference_library_tmp = [x.strip().lower() == "true" for x in input.high_quality_reference_library().strip().strip("[]").split(",") if x.strip()]
|
|
1008
|
+
spectrum_preprocessing_order_tmp = strip_text(input.spectrum_preprocessing_order())
|
|
1009
|
+
mz_min_tmp = strip_numeric(input.mz_min())
|
|
1010
|
+
mz_max_tmp = strip_numeric(input.mz_max())
|
|
1011
|
+
int_min_tmp = strip_numeric(input.int_min())
|
|
1012
|
+
int_max_tmp = strip_numeric(input.int_max())
|
|
1013
|
+
noise_threshold_tmp = strip_numeric(input.noise_threshold())
|
|
1014
|
+
wf_mz_tmp = strip_numeric(input.wf_mz())
|
|
1015
|
+
wf_int_tmp = strip_numeric(input.wf_int())
|
|
1016
|
+
LET_threshold_tmp = strip_numeric(input.LET_threshold())
|
|
1017
|
+
entropy_dimension_tmp = strip_numeric(input.entropy_dimension())
|
|
1018
|
+
weights_tmp = strip_weights(input.weights())
|
|
1019
|
+
|
|
1020
|
+
common_kwargs = dict(
|
|
1021
|
+
query_data=input.query_data()[0]["datapath"],
|
|
1022
|
+
reference_data=input.reference_data()[0]["datapath"],
|
|
1023
|
+
output_path=str(Path.cwd() / "parameter_tuning_output.csv"),
|
|
1024
|
+
return_output=True
|
|
1025
|
+
)
|
|
1026
|
+
|
|
1027
|
+
loop = asyncio.get_running_loop()
|
|
1028
|
+
rw = ReactiveWriter(match_log_rv, loop)
|
|
1029
|
+
|
|
1030
|
+
try:
|
|
1031
|
+
with redirect_stdout(rw), redirect_stderr(rw):
|
|
1032
|
+
if input.chromatography_platform() == "HRMS":
|
|
1033
|
+
window_size_centroiding_tmp = strip_numeric(input.window_size_centroiding())
|
|
1034
|
+
window_size_matching_tmp = strip_numeric(input.window_size_matching())
|
|
1035
|
+
grid={'similarity_measure':similarity_measure_tmp, 'weight':weights_tmp, 'spectrum_preprocessing_order':spectrum_preprocessing_order_tmp, 'mz_min':mz_min_tmp, 'mz_max':mz_max_tmp, 'int_min':int_min_tmp, 'int_max':int_max_tmp, 'noise_threshold':noise_threshold_tmp, 'wf_mz':wf_mz_tmp, 'wf_int':wf_int_tmp, 'LET_threshold':LET_threshold_tmp, 'entropy_dimension':entropy_dimension_tmp, 'high_quality_reference_library':high_quality_reference_library_tmp, 'window_size_centroiding':window_size_centroiding_tmp, 'window_size_matching':window_size_matching_tmp}
|
|
1036
|
+
df_out = await asyncio.to_thread(tune_params_on_HRMS_data, **common_kwargs, grid=grid)
|
|
1037
|
+
else:
|
|
1038
|
+
grid={'similarity_measure':similarity_measure_tmp, 'weight':weights_tmp, 'spectrum_preprocessing_order':spectrum_preprocessing_order_tmp, 'mz_min':mz_min_tmp, 'mz_max':mz_max_tmp, 'int_min':int_min_tmp, 'int_max':int_max_tmp, 'noise_threshold':noise_threshold_tmp, 'wf_mz':wf_mz_tmp, 'wf_int':wf_int_tmp, 'LET_threshold':LET_threshold_tmp, 'entropy_dimension':entropy_dimension_tmp, 'high_quality_reference_library':high_quality_reference_library_tmp}
|
|
1039
|
+
df_out = await asyncio.to_thread(tune_params_on_NRMS_data, **common_kwargs, grid=grid)
|
|
1040
|
+
match_log_rv.set(match_log_rv.get() + "\n✅ Parameter tuning finished.\n")
|
|
1041
|
+
#await reactive.flush()
|
|
1042
|
+
except Exception as e:
|
|
1043
|
+
match_log_rv.set(match_log_rv.get() + f"\n❌ Error: {e}\n")
|
|
1044
|
+
#await reactive.flush()
|
|
1045
|
+
raise
|
|
1046
|
+
|
|
786
1047
|
yield df_out.to_csv(index=False)
|
|
1048
|
+
'''
|
|
1049
|
+
|
|
1050
|
+
|
|
1051
|
+
@render.download(filename="parameter_tuning_output.csv")
|
|
1052
|
+
async def run_btn_parameter_tuning():
|
|
1053
|
+
is_any_job_running.set(True)
|
|
1054
|
+
is_tuning_running.set(True)
|
|
1055
|
+
match_log_rv.set("Running grid search of all parameters specified...\n")
|
|
1056
|
+
_drain_queue_nowait(_LOG_QUEUE)
|
|
1057
|
+
|
|
1058
|
+
similarity_measure_tmp = list(input.similarity_measure())
|
|
1059
|
+
high_quality_reference_library_tmp = [x.strip().lower() == "true" for x in input.high_quality_reference_library().strip().strip("[]").split(",") if x.strip()]
|
|
1060
|
+
spectrum_preprocessing_order_tmp = strip_text(input.spectrum_preprocessing_order())
|
|
1061
|
+
mz_min_tmp = strip_numeric(input.mz_min())
|
|
1062
|
+
mz_max_tmp = strip_numeric(input.mz_max())
|
|
1063
|
+
int_min_tmp = strip_numeric(input.int_min())
|
|
1064
|
+
int_max_tmp = strip_numeric(input.int_max())
|
|
1065
|
+
noise_threshold_tmp = strip_numeric(input.noise_threshold())
|
|
1066
|
+
wf_mz_tmp = strip_numeric(input.wf_mz())
|
|
1067
|
+
wf_int_tmp = strip_numeric(input.wf_int())
|
|
1068
|
+
LET_threshold_tmp = strip_numeric(input.LET_threshold())
|
|
1069
|
+
entropy_dimension_tmp = strip_numeric(input.entropy_dimension())
|
|
1070
|
+
weights_tmp = strip_weights(input.weights())
|
|
1071
|
+
|
|
1072
|
+
common_kwargs = dict(
|
|
1073
|
+
query_data=input.query_data()[0]["datapath"],
|
|
1074
|
+
reference_data=input.reference_data()[0]["datapath"],
|
|
1075
|
+
output_path=str(Path.cwd() / "parameter_tuning_output.csv"),
|
|
1076
|
+
return_output=True
|
|
1077
|
+
)
|
|
1078
|
+
|
|
1079
|
+
loop = asyncio.get_running_loop()
|
|
1080
|
+
rw = ReactiveWriter(match_log_rv,loop)
|
|
1081
|
+
|
|
1082
|
+
try:
|
|
1083
|
+
with redirect_stdout(ReactiveWriter(match_log_rv, asyncio.get_running_loop())), redirect_stderr(ReactiveWriter(match_log_rv, asyncio.get_running_loop())):
|
|
1084
|
+
if input.chromatography_platform() == "HRMS":
|
|
1085
|
+
window_size_centroiding_tmp = strip_numeric(input.window_size_centroiding())
|
|
1086
|
+
window_size_matching_tmp = strip_numeric(input.window_size_matching())
|
|
1087
|
+
grid={'similarity_measure':similarity_measure_tmp, 'weight':weights_tmp, 'spectrum_preprocessing_order':spectrum_preprocessing_order_tmp, 'mz_min':mz_min_tmp, 'mz_max':mz_max_tmp, 'int_min':int_min_tmp, 'int_max':int_max_tmp, 'noise_threshold':noise_threshold_tmp, 'wf_mz':wf_mz_tmp, 'wf_int':wf_int_tmp, 'LET_threshold':LET_threshold_tmp, 'entropy_dimension':entropy_dimension_tmp, 'high_quality_reference_library':high_quality_reference_library_tmp, 'window_size_centroiding':window_size_centroiding_tmp, 'window_size_matching':window_size_matching_tmp}
|
|
1088
|
+
df_out = await asyncio.to_thread(tune_params_on_HRMS_data, **common_kwargs, grid=grid)
|
|
1089
|
+
else:
|
|
1090
|
+
grid={'similarity_measure':similarity_measure_tmp, 'weight':weights_tmp, 'spectrum_preprocessing_order':spectrum_preprocessing_order_tmp, 'mz_min':mz_min_tmp, 'mz_max':mz_max_tmp, 'int_min':int_min_tmp, 'int_max':int_max_tmp, 'noise_threshold':noise_threshold_tmp, 'wf_mz':wf_mz_tmp, 'wf_int':wf_int_tmp, 'LET_threshold':LET_threshold_tmp, 'entropy_dimension':entropy_dimension_tmp, 'high_quality_reference_library':high_quality_reference_library_tmp}
|
|
1091
|
+
df_out = await asyncio.to_thread(tune_params_on_NRMS_data, **common_kwargs, grid=grid)
|
|
1092
|
+
|
|
1093
|
+
match_log_rv.set(match_log_rv.get() + "\n✅ Parameter tuning finished.\n")
|
|
1094
|
+
except Exception as e:
|
|
1095
|
+
match_log_rv.set(match_log_rv.get() + f"\n❌ Error: {e}\n")
|
|
1096
|
+
raise
|
|
1097
|
+
finally:
|
|
1098
|
+
is_tuning_running.set(False)
|
|
1099
|
+
is_any_job_running.set(False)
|
|
1100
|
+
trailing = _drain_queue_nowait(_LOG_QUEUE)
|
|
1101
|
+
if trailing:
|
|
1102
|
+
match_log_rv.set(match_log_rv.get() + "".join(trailing))
|
|
1103
|
+
await reactive.flush()
|
|
1104
|
+
|
|
1105
|
+
#yield df_out.to_csv(index=False)
|
|
1106
|
+
csv_bytes = df_out.to_csv(index=False).encode('utf-8')
|
|
1107
|
+
yield csv_bytes
|
|
1108
|
+
|
|
1109
|
+
|
|
1110
|
+
|
|
1111
|
+
@render.text
|
|
1112
|
+
def status_output():
|
|
1113
|
+
return run_status_plot_spectra.get()
|
|
1114
|
+
return run_status_spec_lib_matching.get()
|
|
1115
|
+
return run_status_parameter_tuning.get()
|
|
1116
|
+
|
|
1117
|
+
|
|
1118
|
+
class ReactiveWriter(io.TextIOBase):
|
|
1119
|
+
def __init__(self, rv: reactive.Value, loop: asyncio.AbstractEventLoop):
|
|
1120
|
+
self._rv = rv
|
|
1121
|
+
self._loop = loop
|
|
1122
|
+
|
|
1123
|
+
def write(self, s: str):
|
|
1124
|
+
if not s:
|
|
1125
|
+
return 0
|
|
1126
|
+
self._loop.call_soon_threadsafe(_LOG_QUEUE.put_nowait, s)
|
|
1127
|
+
return len(s)
|
|
1128
|
+
|
|
1129
|
+
def flush(self):
|
|
1130
|
+
pass
|
|
1131
|
+
|
|
1132
|
+
|
|
1133
|
+
@reactive.effect
|
|
1134
|
+
async def _pump_reactive_writer_logs():
|
|
1135
|
+
if not is_tuning_running.get():
|
|
1136
|
+
return
|
|
1137
|
+
|
|
1138
|
+
reactive.invalidate_later(0.1)
|
|
1139
|
+
msgs = _drain_queue_nowait(_LOG_QUEUE)
|
|
1140
|
+
if msgs:
|
|
1141
|
+
match_log_rv.set(match_log_rv.get() + "".join(msgs))
|
|
1142
|
+
await reactive.flush()
|
|
1143
|
+
|
|
1144
|
+
|
|
1145
|
+
|
|
1146
|
+
@render.text
|
|
1147
|
+
def status_output():
|
|
1148
|
+
return run_status_plot_spectra.get()
|
|
1149
|
+
return run_status_spec_lib_matching.get()
|
|
1150
|
+
return run_status_parameter_tuning.get()
|
|
1151
|
+
|
|
787
1152
|
|
|
788
1153
|
|
|
789
1154
|
app = App(app_ui, server)
|
|
@@ -177,8 +177,6 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
|
|
|
177
177
|
spec_tmp = spectrum_ID1
|
|
178
178
|
spectrum_ID1 = spectrum_ID2
|
|
179
179
|
spectrum_ID2 = spec_tmp
|
|
180
|
-
print(unique_query_ids)
|
|
181
|
-
print(spectrum_ID1)
|
|
182
180
|
query_idx = unique_query_ids.index(spectrum_ID1)
|
|
183
181
|
reference_idx = unique_reference_ids.index(spectrum_ID2)
|
|
184
182
|
q_idxs_tmp = np.where(df_query.iloc[:,0].astype(str) == unique_query_ids[query_idx])[0]
|
|
@@ -311,7 +309,7 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
|
|
|
311
309
|
plt.figlegend(loc = 'upper center')
|
|
312
310
|
fig.text(0.05, 0.18, f'Similarity Measure: {similarity_measure.capitalize()}', fontsize=7)
|
|
313
311
|
fig.text(0.05, 0.15, f'Similarity Score: {round(similarity_score,4)}', fontsize=7)
|
|
314
|
-
fig.text(0.05, 0.12, f
|
|
312
|
+
fig.text(0.05, 0.12, f"Spectrum Preprocessing Order: {''.join(spectrum_preprocessing_order)}", fontsize=7)
|
|
315
313
|
fig.text(0.05, 0.09, f'High Quality Reference Library: {high_quality_reference_library}', fontsize=7)
|
|
316
314
|
fig.text(0.05, 0.06, f'Window Size (Centroiding): {window_size_centroiding}', fontsize=7)
|
|
317
315
|
fig.text(0.05, 0.03, f'Window Size (Matching): {window_size_matching}', fontsize=7)
|
|
@@ -320,6 +318,9 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
|
|
|
320
318
|
fig.text(0.45, 0.12, f'Noise Threshold: {noise_threshold}', fontsize=7)
|
|
321
319
|
fig.text(0.45, 0.09, f'Weight Factors (m/z,intensity): ({wf_mz},{wf_intensity})', fontsize=7)
|
|
322
320
|
fig.text(0.45, 0.06, f'Low-Entropy Threshold: {LET_threshold}', fontsize=7)
|
|
321
|
+
if similarity_measure == 'mixture':
|
|
322
|
+
fig.text(0.45, 0.03, f'Weights for mixture similarity: {weights}', fontsize=7)
|
|
323
|
+
|
|
323
324
|
plt.savefig(output_path, format='pdf')
|
|
324
325
|
|
|
325
326
|
if return_plot == True:
|
|
@@ -606,13 +607,15 @@ def generate_plots_on_NRMS_data(query_data=None, reference_data=None, spectrum_I
|
|
|
606
607
|
plt.figlegend(loc = 'upper center')
|
|
607
608
|
fig.text(0.05, 0.15, f'Similarity Measure: {similarity_measure.capitalize()}', fontsize=7)
|
|
608
609
|
fig.text(0.05, 0.12, f'Similarity Score: {round(similarity_score,4)}', fontsize=7)
|
|
609
|
-
fig.text(0.05, 0.09, f
|
|
610
|
+
fig.text(0.05, 0.09, f"Spectrum Preprocessing Order: {''.join(spectrum_preprocessing_order)}", fontsize=7)
|
|
610
611
|
fig.text(0.05, 0.06, f'High Quality Reference Library: {high_quality_reference_library}', fontsize=7)
|
|
611
612
|
fig.text(0.05, 0.03, f'Raw-Scale M/Z Range: [{min_mz},{max_mz}]', fontsize=7)
|
|
612
613
|
fig.text(0.45, 0.15, f'Raw-Scale Intensity Range: [{int_min_tmp},{int_max_tmp}]', fontsize=7)
|
|
613
614
|
fig.text(0.45, 0.12, f'Noise Threshold: {noise_threshold}', fontsize=7)
|
|
614
615
|
fig.text(0.45, 0.09, f'Weight Factors (m/z,intensity): ({wf_mz},{wf_intensity})', fontsize=7)
|
|
615
616
|
fig.text(0.45, 0.06, f'Low-Entropy Threshold: {LET_threshold}', fontsize=7)
|
|
617
|
+
if similarity_measure=='mixture':
|
|
618
|
+
fig.text(0.45, 0.03, f'Weights for mixture similarity: {weights}', fontsize=7)
|
|
616
619
|
plt.savefig(output_path, format='pdf')
|
|
617
620
|
|
|
618
621
|
if return_plot == True:
|
|
@@ -9,6 +9,12 @@ from pathlib import Path
|
|
|
9
9
|
import json
|
|
10
10
|
from itertools import product
|
|
11
11
|
from joblib import Parallel, delayed
|
|
12
|
+
import csv
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
default_HRMS_grid = {'similarity_measure':['cosine'], 'weight':[{'Cosine':0.25,'Shannon':0.25,'Renyi':0.25,'Tsallis':0.25}], 'spectrum_preprocessing_order':['FCNMWL'], 'mz_min':[0], 'mz_max':[9999999], 'int_min':[0], 'int_max':[99999999], 'window_size_centroiding':[0.5], 'window_size_matching':[0.5], 'noise_threshold':[0.0], 'wf_mz':[0.0], 'wf_int':[1.0], 'LET_threshold':[0.0], 'entropy_dimension':[1.1], 'high_quality_reference_library':[False]}
|
|
16
|
+
default_NRMS_grid = {'similarity_measure':['cosine'], 'weight':[{'Cosine':0.25,'Shannon':0.25,'Renyi':0.25,'Tsallis':0.25}], 'spectrum_preprocessing_order':['FCNMWL'], 'mz_min':[0], 'mz_max':[9999999], 'int_min':[0], 'int_max':[99999999], 'noise_threshold':[0.0], 'wf_mz':[0.0], 'wf_int':[1.0], 'LET_threshold':[0.0], 'entropy_dimension':[1.1], 'high_quality_reference_library':[False]}
|
|
17
|
+
|
|
12
18
|
|
|
13
19
|
def _eval_one_HRMS(df_query, df_reference, unique_query_ids, unique_reference_ids,
|
|
14
20
|
similarity_measure_tmp, weight,
|
|
@@ -71,7 +77,8 @@ def _eval_one_NRMS(df_query, df_reference, unique_query_ids, unique_reference_id
|
|
|
71
77
|
)
|
|
72
78
|
|
|
73
79
|
|
|
74
|
-
|
|
80
|
+
|
|
81
|
+
def tune_params_on_HRMS_data(query_data=None, reference_data=None, grid=None, output_path=None, return_output=False):
|
|
75
82
|
"""
|
|
76
83
|
runs spectral library matching on high-resolution mass spectrometry (HRMS) data with all possible combinations of parameters in the grid dict, saves results from each choice of parameters to a CSV file, and prints top-performing parameters
|
|
77
84
|
|
|
@@ -81,6 +88,7 @@ def tune_params_on_HRMS_data(query_data=None, reference_data=None, grid={'simila
|
|
|
81
88
|
--output_path: accuracy from each choice of parameter set is saved to a CSV file here.
|
|
82
89
|
"""
|
|
83
90
|
|
|
91
|
+
grid = {**default_HRMS_grid, **(grid or {})}
|
|
84
92
|
for key, value in grid.items():
|
|
85
93
|
globals()[key] = value
|
|
86
94
|
|
|
@@ -118,24 +126,35 @@ def tune_params_on_HRMS_data(query_data=None, reference_data=None, grid={'simila
|
|
|
118
126
|
print(f'\nNote that there are {len(unique_query_ids)} unique query spectra, {len(unique_reference_ids)} unique reference spectra, and {len(set(unique_query_ids) & set(unique_reference_ids))} of the query and reference spectra IDs are in common.\n')
|
|
119
127
|
|
|
120
128
|
if output_path is None:
|
|
121
|
-
output_path = f'{Path.cwd()}/tuning_param_output.
|
|
129
|
+
output_path = f'{Path.cwd()}/tuning_param_output.txt'
|
|
122
130
|
print(f'Warning: since output_path=None, the output will be written to the current working directory: {output_path}')
|
|
123
131
|
|
|
124
|
-
# build parameter grid out of the lists you already set
|
|
125
132
|
param_grid = product(similarity_measure, weight, spectrum_preprocessing_order, mz_min, mz_max, int_min, int_max, noise_threshold,
|
|
126
133
|
window_size_centroiding, window_size_matching, wf_mz, wf_int, LET_threshold, entropy_dimension, high_quality_reference_library)
|
|
127
|
-
# run in parallel on all CPUs
|
|
128
134
|
results = Parallel(n_jobs=-1, verbose=10)(delayed(_eval_one_HRMS)(df_query, df_reference, unique_query_ids, unique_reference_ids, *params) for params in param_grid)
|
|
129
135
|
|
|
130
136
|
df_out = pd.DataFrame(results, columns=[
|
|
131
137
|
'ACC','SIMILARITY.MEASURE','WEIGHT','SPECTRUM.PROCESSING.ORDER', 'MZ.MIN','MZ.MAX','INT.MIN','INT.MAX','NOISE.THRESHOLD',
|
|
132
138
|
'WINDOW.SIZE.CENTROIDING','WINDOW.SIZE.MATCHING', 'WF.MZ','WF.INT','LET.THRESHOLD','ENTROPY.DIMENSION', 'HIGH.QUALITY.REFERENCE.LIBRARY'
|
|
133
139
|
])
|
|
134
|
-
df_out = df_out
|
|
135
|
-
df_out.
|
|
140
|
+
df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("\"","",regex=False)
|
|
141
|
+
df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("{","",regex=False)
|
|
142
|
+
df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("}","",regex=False)
|
|
143
|
+
df_out['WEIGHT'] = df_out['WEIGHT'].str.replace(":","",regex=False)
|
|
144
|
+
df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("Cosine","",regex=False)
|
|
145
|
+
df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("Shannon","",regex=False)
|
|
146
|
+
df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("Renyi","",regex=False)
|
|
147
|
+
df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("Tsallis","",regex=False)
|
|
148
|
+
df_out['WEIGHT'] = df_out['WEIGHT'].str.replace(" ","",regex=False)
|
|
149
|
+
df_out.to_csv(output_path, index=False, sep='\t', quoting=csv.QUOTE_NONE)
|
|
150
|
+
|
|
151
|
+
if return_output is False:
|
|
152
|
+
df_out.to_csv(output_path, index=False, sep='\t', quoting=csv.QUOTE_NONE)
|
|
153
|
+
else:
|
|
154
|
+
return df_out
|
|
136
155
|
|
|
137
156
|
|
|
138
|
-
def tune_params_on_NRMS_data(query_data=None, reference_data=None, grid=
|
|
157
|
+
def tune_params_on_NRMS_data(query_data=None, reference_data=None, grid=None, output_path=None, return_output=False):
|
|
139
158
|
"""
|
|
140
159
|
runs spectral library matching on nominal-resolution mass spectrometry (NRMS) data with all possible combinations of parameters in the grid dict, saves results from each choice of parameters to a CSV file, and prints top-performing parameters
|
|
141
160
|
|
|
@@ -145,10 +164,10 @@ def tune_params_on_NRMS_data(query_data=None, reference_data=None, grid={'simila
|
|
|
145
164
|
--output_path: accuracy from each choice of parameter set is saved to a CSV file here
|
|
146
165
|
"""
|
|
147
166
|
|
|
167
|
+
grid = {**default_NRMS_grid, **(grid or {})}
|
|
148
168
|
for key, value in grid.items():
|
|
149
169
|
globals()[key] = value
|
|
150
170
|
|
|
151
|
-
# load query and reference libraries
|
|
152
171
|
if query_data is None:
|
|
153
172
|
print('\nError: No argument passed to the mandatory query_data. Please pass the path to the CSV file of the query data.')
|
|
154
173
|
sys.exit()
|
|
@@ -182,21 +201,30 @@ def tune_params_on_NRMS_data(query_data=None, reference_data=None, grid={'simila
|
|
|
182
201
|
print(f'\nNote that there are {len(unique_query_ids)} unique query spectra, {len(unique_reference_ids)} unique reference spectra, and {len(set(unique_query_ids) & set(unique_reference_ids))} of the query and reference spectra IDs are in common.\n')
|
|
183
202
|
|
|
184
203
|
if output_path is None:
|
|
185
|
-
output_path = f'{Path.cwd()}/tuning_param_output.
|
|
204
|
+
output_path = f'{Path.cwd()}/tuning_param_output.txt'
|
|
186
205
|
print(f'Warning: since output_path=None, the output will be written to the current working directory: {output_path}')
|
|
187
206
|
|
|
188
|
-
# build parameter grid out of the lists you already set
|
|
189
207
|
param_grid = product(similarity_measure, weight, spectrum_preprocessing_order, mz_min, mz_max, int_min, int_max,
|
|
190
208
|
noise_threshold, wf_mz, wf_int, LET_threshold, entropy_dimension, high_quality_reference_library)
|
|
191
|
-
# run in parallel on all CPUs
|
|
192
209
|
results = Parallel(n_jobs=-1, verbose=10)(delayed(_eval_one_NRMS)(df_query, df_reference, unique_query_ids, unique_reference_ids, *params) for params in param_grid)
|
|
193
210
|
|
|
194
211
|
df_out = pd.DataFrame(results, columns=[
|
|
195
212
|
'ACC','SIMILARITY.MEASURE','WEIGHT','SPECTRUM.PROCESSING.ORDER', 'MZ.MIN','MZ.MAX','INT.MIN','INT.MAX',
|
|
196
213
|
'NOISE.THRESHOLD','WF.MZ','WF.INT','LET.THRESHOLD','ENTROPY.DIMENSION', 'HIGH.QUALITY.REFERENCE.LIBRARY'
|
|
197
214
|
])
|
|
198
|
-
df_out = df_out
|
|
199
|
-
df_out.
|
|
215
|
+
df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("\"","",regex=False)
|
|
216
|
+
df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("{","",regex=False)
|
|
217
|
+
df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("}","",regex=False)
|
|
218
|
+
df_out['WEIGHT'] = df_out['WEIGHT'].str.replace(":","",regex=False)
|
|
219
|
+
df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("Cosine","",regex=False)
|
|
220
|
+
df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("Shannon","",regex=False)
|
|
221
|
+
df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("Renyi","",regex=False)
|
|
222
|
+
df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("Tsallis","",regex=False)
|
|
223
|
+
df_out['WEIGHT'] = df_out['WEIGHT'].str.replace(" ","",regex=False)
|
|
224
|
+
if return_output is False:
|
|
225
|
+
df_out.to_csv(output_path, index=False, sep='\t', quoting=csv.QUOTE_NONE)
|
|
226
|
+
else:
|
|
227
|
+
return df_out
|
|
200
228
|
|
|
201
229
|
|
|
202
230
|
|
|
@@ -413,8 +441,8 @@ def run_spec_lib_matching_on_HRMS_data(query_data=None, reference_data=None, lik
|
|
|
413
441
|
--entropy_dimension: Entropy dimension parameter. Must have positive value other than 1. When the entropy dimension is 1, then Renyi and Tsallis entropy are equivalent to Shannon entropy. Therefore, this parameter only applies to the renyi and tsallis similarity measures. This parameter will be ignored if similarity measure cosine or shannon is chosen. Default: 1.1
|
|
414
442
|
--n_top_matches_to_save: The number of top matches to report. For example, if n_top_matches_to_save=5, then for each query spectrum, the five reference spectra with the largest similarity with the given query spectrum will be reported. Default: 1
|
|
415
443
|
--print_id_results: Flag that prints identification results if True. Default: False
|
|
416
|
-
--output_identification: Output CSV file containing the most-similar reference spectra for each query spectrum along with the corresponding similarity scores. Default is to save identification output in current working directory with filename \'output_identification.
|
|
417
|
-
--output_similarity_scores: Output CSV file containing similarity scores between all query spectrum/spectra and all reference spectra. Each row corresponds to a query spectrum, the left-most column contains the query spectrum/spectra identifier, and the remaining column contain the similarity scores with respect to all reference library spectra. If no argument passed, then this CSV file is written to the current working directory with filename \'output_all_similarity_scores\'.
|
|
444
|
+
--output_identification: Output CSV file containing the most-similar reference spectra for each query spectrum along with the corresponding similarity scores. Default is to save identification output in current working directory with filename \'output_identification.txt\'.
|
|
445
|
+
--output_similarity_scores: Output CSV file containing similarity scores between all query spectrum/spectra and all reference spectra. Each row corresponds to a query spectrum, the left-most column contains the query spectrum/spectra identifier, and the remaining column contain the similarity scores with respect to all reference library spectra. If no argument passed, then this CSV file is written to the current working directory with filename \'output_all_similarity_scores\'.txt.')
|
|
418
446
|
'''
|
|
419
447
|
|
|
420
448
|
# load query and reference libraries
|
|
@@ -528,11 +556,11 @@ def run_spec_lib_matching_on_HRMS_data(query_data=None, reference_data=None, lik
|
|
|
528
556
|
sys.exit()
|
|
529
557
|
|
|
530
558
|
if output_identification is None:
|
|
531
|
-
output_identification = f'{Path.cwd()}/output_identification.
|
|
559
|
+
output_identification = f'{Path.cwd()}/output_identification.txt'
|
|
532
560
|
print(f'Warning: writing identification output to {output_identification}')
|
|
533
561
|
|
|
534
562
|
if output_similarity_scores is None:
|
|
535
|
-
output_similarity_scores = f'{Path.cwd()}/output_all_similarity_scores.
|
|
563
|
+
output_similarity_scores = f'{Path.cwd()}/output_all_similarity_scores.txt'
|
|
536
564
|
print(f'Warning: writing similarity scores to {output_similarity_scores}')
|
|
537
565
|
|
|
538
566
|
|
|
@@ -644,10 +672,10 @@ def run_spec_lib_matching_on_HRMS_data(query_data=None, reference_data=None, lik
|
|
|
644
672
|
|
|
645
673
|
if return_ID_output is False:
|
|
646
674
|
# write spectral library matching results to disk
|
|
647
|
-
df_top_ref_specs.to_csv(output_identification)
|
|
675
|
+
df_top_ref_specs.to_csv(output_identification, sep='\t')
|
|
648
676
|
|
|
649
677
|
# write all similarity scores to disk
|
|
650
|
-
df_scores.to_csv(output_similarity_scores)
|
|
678
|
+
df_scores.to_csv(output_similarity_scores, sep='\t')
|
|
651
679
|
else:
|
|
652
680
|
return df_top_ref_specs
|
|
653
681
|
|
|
@@ -678,8 +706,8 @@ def run_spec_lib_matching_on_NRMS_data(query_data=None, reference_data=None, lik
|
|
|
678
706
|
--normalization_method: Method used to normalize the intensities of each spectrum so that the intensities sum to 1. Since the objects entropy quantifies the uncertainy of must be probability distributions, the intensities of a given spectrum must sum to 1 prior to computing the entropy of the given spectrum intensities. Options: \'standard\' and \'softmax\'. Default: standard.
|
|
679
707
|
--n_top_matches_to_save: The number of top matches to report. For example, if n_top_matches_to_save=5, then for each query spectrum, the five reference spectra with the largest similarity with the given query spectrum will be reported. Default: 1
|
|
680
708
|
--print_id_results: Flag that prints identification results if True. Default: False
|
|
681
|
-
--output_identification: Output CSV file containing the most-similar reference spectra for each query spectrum along with the corresponding similarity scores. Default is to save identification output in current working directory with filename \'output_identification.
|
|
682
|
-
--output_similarity_scores: Output CSV file containing similarity scores between all query spectrum/spectra and all reference spectra. Each row corresponds to a query spectrum, the left-most column contains the query spectrum/spectra identifier, and the remaining column contain the similarity scores with respect to all reference library spectra. If no argument passed, then this CSV file is written to the current working directory with filename \'output_all_similarity_scores\'.
|
|
709
|
+
--output_identification: Output CSV file containing the most-similar reference spectra for each query spectrum along with the corresponding similarity scores. Default is to save identification output in current working directory with filename \'output_identification.txt\'.
|
|
710
|
+
--output_similarity_scores: Output CSV file containing similarity scores between all query spectrum/spectra and all reference spectra. Each row corresponds to a query spectrum, the left-most column contains the query spectrum/spectra identifier, and the remaining column contain the similarity scores with respect to all reference library spectra. If no argument passed, then this CSV file is written to the current working directory with filename \'output_all_similarity_scores\'.txt.')
|
|
683
711
|
'''
|
|
684
712
|
|
|
685
713
|
# load query and reference libraries
|
|
@@ -778,11 +806,11 @@ def run_spec_lib_matching_on_NRMS_data(query_data=None, reference_data=None, lik
|
|
|
778
806
|
sys.exit()
|
|
779
807
|
|
|
780
808
|
if output_identification is None:
|
|
781
|
-
output_identification = f'{Path.cwd()}/output_identification.
|
|
809
|
+
output_identification = f'{Path.cwd()}/output_identification.txt'
|
|
782
810
|
print(f'Warning: writing identification output to {output_identification}')
|
|
783
811
|
|
|
784
812
|
if output_similarity_scores is None:
|
|
785
|
-
output_similarity_scores = f'{Path.cwd()}/output_all_similarity_scores.
|
|
813
|
+
output_similarity_scores = f'{Path.cwd()}/output_all_similarity_scores.txt'
|
|
786
814
|
print(f'Warning: writing similarity scores to {output_similarity_scores}')
|
|
787
815
|
|
|
788
816
|
|
|
@@ -894,11 +922,11 @@ def run_spec_lib_matching_on_NRMS_data(query_data=None, reference_data=None, lik
|
|
|
894
922
|
|
|
895
923
|
if return_ID_output is False:
|
|
896
924
|
# write spectral library matching results to disk
|
|
897
|
-
df_top_ref_specs.to_csv(output_identification)
|
|
925
|
+
df_top_ref_specs.to_csv(output_identification, sep='\t')
|
|
898
926
|
|
|
899
927
|
# write all similarity scores to disk
|
|
900
928
|
df_scores.columns = ['Reference Spectrum ID: ' + col for col in list(map(str,df_scores.columns.tolist()))]
|
|
901
|
-
df_scores.to_csv(output_similarity_scores)
|
|
929
|
+
df_scores.to_csv(output_similarity_scores, sep='\t')
|
|
902
930
|
else:
|
|
903
931
|
return df_top_ref_specs
|
|
904
932
|
|
|
@@ -40,8 +40,7 @@ else:
|
|
|
40
40
|
sys.exit()
|
|
41
41
|
|
|
42
42
|
|
|
43
|
-
grid = {'similarity_measure':args.similarity_measure.split(','), 'weight':args.weights, 'spectrum_preprocessing_order':spectrum_preprocessing_order.split(','), 'mz_min':args.mz_min.split(','), 'mz_max':args.mz_max.split(','), 'int_min':args.int_min.split(','), 'int_max':args.int_max.split(','), 'window_size_centroiding':args.window_size_centroiding.split(','), 'window_size_matching':args.window_size_matching.split(','), 'noise_threshold':args.noise_threshold.split(','), 'wf_mz':args.wf_mz.split(','), 'wf_int':args.wf_intensity.split(','), 'LET_threshold':args.LET_threshold.split(','), 'entropy_dimension':args.entropy_dimension.split(','), 'high_quality_reference_library':args.high_quality_reference_library.split(',')}
|
|
44
|
-
|
|
43
|
+
grid = {'similarity_measure':args.similarity_measure.split(','), 'weight':[args.weights], 'spectrum_preprocessing_order':spectrum_preprocessing_order.split(','), 'mz_min':args.mz_min.split(','), 'mz_max':args.mz_max.split(','), 'int_min':args.int_min.split(','), 'int_max':args.int_max.split(','), 'window_size_centroiding':args.window_size_centroiding.split(','), 'window_size_matching':args.window_size_matching.split(','), 'noise_threshold':args.noise_threshold.split(','), 'wf_mz':args.wf_mz.split(','), 'wf_int':args.wf_intensity.split(','), 'LET_threshold':args.LET_threshold.split(','), 'entropy_dimension':args.entropy_dimension.split(','), 'high_quality_reference_library':args.high_quality_reference_library.split(',')}
|
|
45
44
|
|
|
46
45
|
if args.chromatography_platform == 'HRMS':
|
|
47
46
|
grid['mz_min'] = [float(x) for x in grid['mz_min']]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
|
|
2
|
-
from
|
|
3
|
-
from
|
|
2
|
+
from pycompound.plot_spectra import generate_plots_on_HRMS_data
|
|
3
|
+
from pycompound.plot_spectra import generate_plots_on_NRMS_data
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
import os
|
|
6
6
|
|
|
@@ -1,18 +1,23 @@
|
|
|
1
1
|
|
|
2
|
-
from
|
|
3
|
-
from
|
|
2
|
+
from pycompound.spec_lib_matching import tune_params_on_HRMS_data
|
|
3
|
+
from pycompound.spec_lib_matching import tune_params_on_NRMS_data
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
import os
|
|
6
6
|
|
|
7
7
|
print('\n\ntest #1:')
|
|
8
|
-
tune_params_on_HRMS_data(query_data=f'{Path.cwd()}/data/tuning/lcms_query_library.csv', reference_data=f'{Path.cwd()}/data/lcms_reference_library.csv', output_path=f'{Path.cwd()}/tuning_param_output_test1.
|
|
8
|
+
tune_params_on_HRMS_data(query_data=f'{Path.cwd()}/data/tuning/lcms_query_library.csv', reference_data=f'{Path.cwd()}/data/lcms_reference_library.csv', output_path=f'{Path.cwd()}/tuning_param_output_test1.txt')
|
|
9
9
|
|
|
10
|
+
'''
|
|
10
11
|
print('\n\ntest #2:')
|
|
11
|
-
tune_params_on_HRMS_data(query_data=f'{Path.cwd()}/data/tuning/lcms_query_library.csv', reference_data=f'{Path.cwd()}/data/lcms_reference_library.csv', grid={'similarity_measure':['cosine'], 'spectrum_preprocessing_order':['FCNMWL'], 'mz_min':[0], 'mz_max':[9999999], 'int_min':[0], 'int_max':[99999999], 'window_size_centroiding':[0.5], 'window_size_matching':[0.1,0.5], 'noise_threshold':[0.0], 'wf_mz':[0.0], 'wf_int':[1.0], 'LET_threshold':[0.0], 'entropy_dimension':[1.1], 'high_quality_reference_library':[False]}, output_path=f'{Path.cwd()}/tuning_param_output_test2.
|
|
12
|
+
tune_params_on_HRMS_data(query_data=f'{Path.cwd()}/data/tuning/lcms_query_library.csv', reference_data=f'{Path.cwd()}/data/lcms_reference_library.csv', grid={'similarity_measure':['cosine'], 'spectrum_preprocessing_order':['FCNMWL'], 'mz_min':[0], 'mz_max':[9999999], 'int_min':[0], 'int_max':[99999999], 'window_size_centroiding':[0.5], 'window_size_matching':[0.1,0.5], 'noise_threshold':[0.0], 'wf_mz':[0.0], 'wf_int':[1.0], 'LET_threshold':[0.0], 'entropy_dimension':[1.1], 'high_quality_reference_library':[False]}, output_path=f'{Path.cwd()}/tuning_param_output_test2.txt')
|
|
12
13
|
|
|
13
14
|
print('\n\ntest #3:')
|
|
14
|
-
tune_params_on_NRMS_data(query_data=f'{Path.cwd()}/data/tuning/gcms_query_library.csv', reference_data=f'{Path.cwd()}/data/gcms_reference_library.csv', output_path=f'{Path.cwd()}/tuning_param_output_test3.
|
|
15
|
+
tune_params_on_NRMS_data(query_data=f'{Path.cwd()}/data/tuning/gcms_query_library.csv', reference_data=f'{Path.cwd()}/data/gcms_reference_library.csv', output_path=f'{Path.cwd()}/tuning_param_output_test3.txt')
|
|
15
16
|
|
|
16
17
|
print('\n\ntest #4:')
|
|
17
|
-
tune_params_on_NRMS_data(query_data=f'{Path.cwd()}/data/tuning/gcms_query_library.csv', reference_data=f'{Path.cwd()}/data/gcms_reference_library.csv', grid={'similarity_measure':['cosine','shannon'], 'spectrum_preprocessing_order':['FNLW'], 'mz_min':[0], 'mz_max':[9999999], 'int_min':[0], 'int_max':[99999999], 'noise_threshold':[0.0,0.1], 'wf_mz':[0.0], 'wf_int':[1.0], 'LET_threshold':[0.0,3.0], 'entropy_dimension':[1.1], 'high_quality_reference_library':[False]}, output_path=f'{Path.cwd()}/tuning_param_output_test4.
|
|
18
|
+
tune_params_on_NRMS_data(query_data=f'{Path.cwd()}/data/tuning/gcms_query_library.csv', reference_data=f'{Path.cwd()}/data/gcms_reference_library.csv', grid={'similarity_measure':['cosine','shannon'], 'spectrum_preprocessing_order':['FNLW'], 'mz_min':[0], 'mz_max':[9999999], 'int_min':[0], 'int_max':[99999999], 'noise_threshold':[0.0,0.1], 'wf_mz':[0.0], 'wf_int':[1.0], 'LET_threshold':[0.0,3.0], 'entropy_dimension':[1.1], 'high_quality_reference_library':[False]}, output_path=f'{Path.cwd()}/tuning_param_output_test4.txt')
|
|
19
|
+
|
|
20
|
+
print('\n\ntest #5:')
|
|
21
|
+
tune_params_on_HRMS_data(query_data=f'{Path.cwd()}/data/tuning/lcms_query_library.csv', reference_data=f'{Path.cwd()}/data/lcms_reference_library.csv', grid={'similarity_measure':['cosine'], 'weight':[{'Cosine':0.2, 'Shannon':0.2, 'Renyi':0.3, 'Tsallis':0.3},{'Cosine':0.25, 'Shannon':0.25, 'Renyi':0.25, 'Tsallis':0.25}], 'spectrum_preprocessing_order':['FCNMWL'], 'mz_min':[0], 'mz_max':[9999999], 'int_min':[0], 'int_max':[99999999], 'window_size_centroiding':[0.5], 'window_size_matching':[0.5], 'noise_threshold':[0.0], 'wf_mz':[0.0], 'wf_int':[1.0], 'LET_threshold':[0.0,3], 'entropy_dimension':[1.1], 'high_quality_reference_library':[False,True]}, output_path=f'{Path.cwd()}/tuning_param_output_test5.txt')
|
|
22
|
+
'''
|
|
18
23
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|