pycompound 0.1.6__py3-none-any.whl → 0.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pycompound/build_library.py +77 -20
- pycompound/plot_spectra.py +73 -111
- pycompound/processing.py +5 -5
- pycompound/spec_lib_matching.py +262 -491
- pycompound/spec_lib_matching_CLI.py +48 -2
- pycompound/tuning_CLI_DE.py +22 -22
- pycompound/tuning_CLI_grid.py +22 -6
- pycompound-0.1.8.dist-info/METADATA +824 -0
- pycompound-0.1.8.dist-info/RECORD +14 -0
- {pycompound-0.1.6.dist-info → pycompound-0.1.8.dist-info}/top_level.txt +0 -1
- app.py +0 -1519
- pycompound-0.1.6.dist-info/METADATA +0 -27
- pycompound-0.1.6.dist-info/RECORD +0 -15
- {pycompound-0.1.6.dist-info → pycompound-0.1.8.dist-info}/WHEEL +0 -0
- {pycompound-0.1.6.dist-info → pycompound-0.1.8.dist-info}/licenses/LICENSE +0 -0
app.py
DELETED
|
@@ -1,1519 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
from shiny import App, ui, reactive, render, req
|
|
3
|
-
from shiny.types import SilentException
|
|
4
|
-
from pycompound.spec_lib_matching import run_spec_lib_matching_on_HRMS_data
|
|
5
|
-
from pycompound.spec_lib_matching import run_spec_lib_matching_on_NRMS_data
|
|
6
|
-
from pycompound.spec_lib_matching import tune_params_on_HRMS_data_grid
|
|
7
|
-
from pycompound.spec_lib_matching import tune_params_on_NRMS_data_grid
|
|
8
|
-
from pycompound.spec_lib_matching import tune_params_on_HRMS_data_grid_shiny
|
|
9
|
-
from pycompound.spec_lib_matching import tune_params_on_NRMS_data_grid_shiny
|
|
10
|
-
from pycompound.spec_lib_matching import tune_params_DE
|
|
11
|
-
from pycompound.plot_spectra import generate_plots_on_HRMS_data
|
|
12
|
-
from pycompound.plot_spectra import generate_plots_on_NRMS_data
|
|
13
|
-
from pathlib import Path
|
|
14
|
-
from contextlib import redirect_stdout, redirect_stderr
|
|
15
|
-
import contextlib
|
|
16
|
-
import subprocess
|
|
17
|
-
import traceback
|
|
18
|
-
import asyncio
|
|
19
|
-
import io
|
|
20
|
-
import os
|
|
21
|
-
import sys
|
|
22
|
-
import matplotlib.pyplot as plt
|
|
23
|
-
import pandas as pd
|
|
24
|
-
import numpy as np
|
|
25
|
-
import netCDF4 as nc
|
|
26
|
-
from pyteomics import mgf, mzml
|
|
27
|
-
import ast
|
|
28
|
-
from numbers import Real
|
|
29
|
-
import logging
|
|
30
|
-
from scipy.optimize import differential_evolution
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
_LOG_QUEUE: asyncio.Queue[str] = asyncio.Queue()
|
|
34
|
-
|
|
35
|
-
class _UIWriter:
|
|
36
|
-
def __init__(self, loop, q: asyncio.Queue[str]):
|
|
37
|
-
self._loop = loop
|
|
38
|
-
self._q = q
|
|
39
|
-
def write(self, s: str):
|
|
40
|
-
if s:
|
|
41
|
-
self._loop.call_soon_threadsafe(self._q.put_nowait, s)
|
|
42
|
-
return len(s)
|
|
43
|
-
def flush(self):
|
|
44
|
-
pass
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
def attach_logging_to_writer(writer):
|
|
48
|
-
handler = logging.StreamHandler(writer)
|
|
49
|
-
handler.setLevel(logging.INFO)
|
|
50
|
-
root = logging.getLogger()
|
|
51
|
-
root.addHandler(handler)
|
|
52
|
-
root.setLevel(logging.INFO)
|
|
53
|
-
return handler, root
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
def _run_with_redirects(fn, writer, *args, **kwargs):
|
|
58
|
-
with redirect_stdout(writer), redirect_stderr(writer):
|
|
59
|
-
return fn(*args, **kwargs)
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
def strip_text(s):
|
|
63
|
-
return [x.strip() for x in s.strip('[]').split(',') if x.strip()]
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
def strip_numeric(s):
|
|
67
|
-
return [float(x.strip()) for x in s.strip('[]').split(',') if x.strip()]
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
def strip_weights(s):
|
|
71
|
-
obj = ast.literal_eval(s) if isinstance(s, (str, bytes)) else s
|
|
72
|
-
keys = ['Cosine', 'Shannon', 'Renyi', 'Tsallis']
|
|
73
|
-
|
|
74
|
-
if isinstance(obj, (list, tuple)):
|
|
75
|
-
if len(obj) == 4 and all(isinstance(x, Real) for x in obj):
|
|
76
|
-
tuples = [obj]
|
|
77
|
-
else:
|
|
78
|
-
tuples = list(obj)
|
|
79
|
-
else:
|
|
80
|
-
raise ValueError(f"Expected a 4-tuple or a sequence of 4-tuples, got {type(obj).__name__}")
|
|
81
|
-
|
|
82
|
-
out = []
|
|
83
|
-
for t in tuples:
|
|
84
|
-
if not (isinstance(t, (list, tuple)) and len(t) == 4):
|
|
85
|
-
raise ValueError(f"Each item must be a 4-tuple, got: {t!r}")
|
|
86
|
-
out.append(dict(zip(keys, t)))
|
|
87
|
-
return out
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
def build_library(input_path=None, output_path=None):
|
|
91
|
-
last_three_chars = input_path[(len(input_path)-3):len(input_path)]
|
|
92
|
-
last_four_chars = input_path[(len(input_path)-4):len(input_path)]
|
|
93
|
-
if last_three_chars == 'csv' or last_three_chars == 'CSV':
|
|
94
|
-
return pd.read_csv(input_path)
|
|
95
|
-
else:
|
|
96
|
-
if last_three_chars == 'mgf' or last_three_chars == 'MGF':
|
|
97
|
-
input_file_type = 'mgf'
|
|
98
|
-
elif last_four_chars == 'mzML' or last_four_chars == 'mzml' or last_four_chars == 'MZML':
|
|
99
|
-
input_file_type = 'mzML'
|
|
100
|
-
elif last_three_chars == 'cdf' or last_three_chars == 'CDF':
|
|
101
|
-
input_file_type = 'cdf'
|
|
102
|
-
elif last_three_chars == 'msp' or last_three_chars == 'MSP':
|
|
103
|
-
input_file_type = 'msp'
|
|
104
|
-
else:
|
|
105
|
-
print('ERROR: either an \'mgf\', \'mzML\', \'cdf\', or \'msp\' file must be passed to --input_path')
|
|
106
|
-
sys.exit()
|
|
107
|
-
|
|
108
|
-
spectra = []
|
|
109
|
-
if input_file_type == 'mgf':
|
|
110
|
-
with mgf.read(input_path, index_by_scans = True) as reader:
|
|
111
|
-
for spec in reader:
|
|
112
|
-
spectra.append(spec)
|
|
113
|
-
if input_file_type == 'mzML':
|
|
114
|
-
with mzml.read(input_path) as reader:
|
|
115
|
-
for spec in reader:
|
|
116
|
-
spectra.append(spec)
|
|
117
|
-
|
|
118
|
-
if input_file_type == 'mgf' or input_file_type == 'mzML':
|
|
119
|
-
ids = []
|
|
120
|
-
mzs = []
|
|
121
|
-
ints = []
|
|
122
|
-
for i in range(0,len(spectra)):
|
|
123
|
-
for j in range(0,len(spectra[i]['m/z array'])):
|
|
124
|
-
if input_file_type == 'mzML':
|
|
125
|
-
ids.append(f'ID_{i+1}')
|
|
126
|
-
else:
|
|
127
|
-
ids.append(spectra[i]['params']['name'])
|
|
128
|
-
mzs.append(spectra[i]['m/z array'][j])
|
|
129
|
-
ints.append(spectra[i]['intensity array'][j])
|
|
130
|
-
|
|
131
|
-
if input_file_type == 'cdf':
|
|
132
|
-
dataset = nc.Dataset(input_path, 'r')
|
|
133
|
-
all_mzs = dataset.variables['mass_values'][:]
|
|
134
|
-
all_ints = dataset.variables['intensity_values'][:]
|
|
135
|
-
scan_idxs = dataset.variables['scan_index'][:]
|
|
136
|
-
dataset.close()
|
|
137
|
-
|
|
138
|
-
ids = []
|
|
139
|
-
mzs = []
|
|
140
|
-
ints = []
|
|
141
|
-
for i in range(0,(len(scan_idxs)-1)):
|
|
142
|
-
if i % 1000 == 0:
|
|
143
|
-
print(f'analyzed {i} out of {len(scan_idxs)} scans')
|
|
144
|
-
s_idx = scan_idxs[i]
|
|
145
|
-
e_idx = scan_idxs[i+1]
|
|
146
|
-
|
|
147
|
-
mzs_tmp = all_mzs[s_idx:e_idx]
|
|
148
|
-
ints_tmp = all_ints[s_idx:e_idx]
|
|
149
|
-
|
|
150
|
-
for j in range(0,len(mzs_tmp)):
|
|
151
|
-
ids.append(f'ID_{i+1}')
|
|
152
|
-
mzs.append(mzs_tmp[j])
|
|
153
|
-
ints.append(ints_tmp[j])
|
|
154
|
-
|
|
155
|
-
if input_file_type == 'msp':
|
|
156
|
-
ids = []
|
|
157
|
-
mzs = []
|
|
158
|
-
ints = []
|
|
159
|
-
with open(input_path, 'r') as f:
|
|
160
|
-
i = 0
|
|
161
|
-
for line in f:
|
|
162
|
-
line = line.strip()
|
|
163
|
-
if line.startswith('Name:'):
|
|
164
|
-
i += 1
|
|
165
|
-
spectrum_id = line.replace('Name: ','')
|
|
166
|
-
elif line and line[0].isdigit():
|
|
167
|
-
try:
|
|
168
|
-
mz, intensity = map(float, line.split()[:2])
|
|
169
|
-
ids.append(spectrum_id)
|
|
170
|
-
mzs.append(mz)
|
|
171
|
-
ints.append(intensity)
|
|
172
|
-
except ValueError:
|
|
173
|
-
continue
|
|
174
|
-
|
|
175
|
-
df = pd.DataFrame({'id':ids, 'mz_ratio':mzs, 'intensity':ints})
|
|
176
|
-
return df
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
def extract_first_column_ids(file_path: str, max_ids: int = 20000):
|
|
181
|
-
suffix = Path(file_path).suffix.lower()
|
|
182
|
-
|
|
183
|
-
if suffix == ".csv":
|
|
184
|
-
df = pd.read_csv(file_path, usecols=[0])
|
|
185
|
-
ids = df.iloc[:, 0].astype(str).dropna()
|
|
186
|
-
ids = [x for x in ids if x.strip() != ""]
|
|
187
|
-
seen = set()
|
|
188
|
-
uniq = []
|
|
189
|
-
for x in ids:
|
|
190
|
-
if x not in seen:
|
|
191
|
-
uniq.append(x)
|
|
192
|
-
seen.add(x)
|
|
193
|
-
return uniq[:max_ids]
|
|
194
|
-
|
|
195
|
-
ids = []
|
|
196
|
-
try:
|
|
197
|
-
with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
|
|
198
|
-
for line in f:
|
|
199
|
-
ls = line.strip()
|
|
200
|
-
if ls.startswith("TITLE="):
|
|
201
|
-
ids.append(ls.split("=", 1)[1].strip())
|
|
202
|
-
elif ls.lower().startswith("name:"):
|
|
203
|
-
ids.append(ls.split(":", 1)[1].strip())
|
|
204
|
-
if len(ids) >= max_ids:
|
|
205
|
-
break
|
|
206
|
-
except Exception:
|
|
207
|
-
pass
|
|
208
|
-
|
|
209
|
-
if ids:
|
|
210
|
-
seen = set()
|
|
211
|
-
uniq = []
|
|
212
|
-
for x in ids:
|
|
213
|
-
if x not in seen:
|
|
214
|
-
uniq.append(x)
|
|
215
|
-
seen.add(x)
|
|
216
|
-
return uniq
|
|
217
|
-
return []
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
def _open_plot_window(session, png_bytes: bytes, title: str = "plot.png"):
|
|
221
|
-
"""Send PNG bytes to browser and open in a new window as a data URL."""
|
|
222
|
-
b64 = base64.b64encode(png_bytes).decode("ascii")
|
|
223
|
-
data_url = f"data:image/png;base64,{b64}"
|
|
224
|
-
session.send_custom_message("open-plot-window", {"png": data_url, "title": title})
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
def plot_spectra_ui(platform: str):
|
|
228
|
-
base_inputs = [
|
|
229
|
-
ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
230
|
-
ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
231
|
-
ui.input_selectize(
|
|
232
|
-
"spectrum_ID1",
|
|
233
|
-
"Select spectrum ID 1 (default is the first spectrum in the library):",
|
|
234
|
-
choices=[],
|
|
235
|
-
multiple=False,
|
|
236
|
-
options={"placeholder": "Upload a library..."},
|
|
237
|
-
),
|
|
238
|
-
ui.input_selectize(
|
|
239
|
-
"spectrum_ID2",
|
|
240
|
-
"Select spectrum ID 2 (default is the first spectrum in the library):",
|
|
241
|
-
choices=[],
|
|
242
|
-
multiple=False,
|
|
243
|
-
options={"placeholder": "Upload a library..."},
|
|
244
|
-
),
|
|
245
|
-
ui.input_select("similarity_measure", "Select similarity measure:", ["cosine","shannon","renyi","tsallis","mixture","jaccard","dice","3w_jaccard","sokal_sneath","binary_cosine","mountford","mcconnaughey","driver_kroeber","simpson","braun_banquet","fager_mcgowan","kulczynski","intersection","hamming","hellinger"]),
|
|
246
|
-
ui.input_text('weights', 'Weights for mixture similarity measure (cosine, shannon, renyi, tsallis):', '0.25, 0.25, 0.25, 0.25'),
|
|
247
|
-
ui.input_select(
|
|
248
|
-
"high_quality_reference_library",
|
|
249
|
-
"Indicate whether the reference library is considered high quality. If True, filtering and noise removal are only applied to the query spectra.",
|
|
250
|
-
[False, True],
|
|
251
|
-
),
|
|
252
|
-
]
|
|
253
|
-
|
|
254
|
-
if platform == "HRMS":
|
|
255
|
-
extra_inputs = [
|
|
256
|
-
ui.input_text(
|
|
257
|
-
"spectrum_preprocessing_order",
|
|
258
|
-
"Sequence of characters for preprocessing order (C (centroiding), F (filtering), M (matching), N (noise removal), L (low-entropy transformation), W (weight factor transformation)). M must be included, C before M if used.",
|
|
259
|
-
"FCNMWL",
|
|
260
|
-
),
|
|
261
|
-
ui.input_numeric("window_size_centroiding", "Centroiding window-size:", 0.5),
|
|
262
|
-
ui.input_numeric("window_size_matching", "Matching window-size:", 0.5),
|
|
263
|
-
]
|
|
264
|
-
else:
|
|
265
|
-
extra_inputs = [
|
|
266
|
-
ui.input_text(
|
|
267
|
-
"spectrum_preprocessing_order",
|
|
268
|
-
"Sequence of characters for preprocessing order (F (filtering), N (noise removal), L (low-entropy transformation), W (weight factor transformation)).",
|
|
269
|
-
"FNLW",
|
|
270
|
-
)
|
|
271
|
-
]
|
|
272
|
-
|
|
273
|
-
numeric_inputs = [
|
|
274
|
-
ui.input_numeric("mz_min", "Minimum m/z for filtering:", 0),
|
|
275
|
-
ui.input_numeric("mz_max", "Maximum m/z for filtering:", 99999999),
|
|
276
|
-
ui.input_numeric("int_min", "Minimum intensity for filtering:", 0),
|
|
277
|
-
ui.input_numeric("int_max", "Maximum intensity for filtering:", 999999999),
|
|
278
|
-
ui.input_numeric("noise_threshold", "Noise removal threshold:", 0.0),
|
|
279
|
-
ui.input_numeric("wf_mz", "Mass/charge weight factor:", 0.0),
|
|
280
|
-
ui.input_numeric("wf_int", "Intensity weight factor:", 1.0),
|
|
281
|
-
ui.input_numeric("LET_threshold", "Low-entropy threshold:", 0.0),
|
|
282
|
-
ui.input_numeric("entropy_dimension", "Entropy dimension (Renyi/Tsallis only):", 1.1),
|
|
283
|
-
]
|
|
284
|
-
|
|
285
|
-
select_input = ui.input_select(
|
|
286
|
-
"y_axis_transformation",
|
|
287
|
-
"Transformation to apply to intensity axis:",
|
|
288
|
-
["normalized", "none", "log10", "sqrt"],
|
|
289
|
-
)
|
|
290
|
-
|
|
291
|
-
run_button_plot_spectra = ui.download_button("run_btn_plot_spectra", "Run", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
|
|
292
|
-
back_button = ui.input_action_button("back", "Back to main menu", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
|
|
293
|
-
|
|
294
|
-
if platform == "HRMS":
|
|
295
|
-
inputs_columns = ui.layout_columns(
|
|
296
|
-
ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
|
|
297
|
-
ui.div([base_inputs[6:7], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
|
|
298
|
-
ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
|
|
299
|
-
ui.div([numeric_inputs[5:10], select_input], style="display:flex; flex-direction:column; gap:10px;"),
|
|
300
|
-
col_widths=(3,3,3,3),
|
|
301
|
-
)
|
|
302
|
-
elif platform == "NRMS":
|
|
303
|
-
inputs_columns = ui.layout_columns(
|
|
304
|
-
ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
|
|
305
|
-
ui.div([base_inputs[6:7], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
|
|
306
|
-
ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
|
|
307
|
-
ui.div([numeric_inputs[5:10], select_input], style="display:flex; flex-direction:column; gap:10px;"),
|
|
308
|
-
col_widths=(3,3,3,3),
|
|
309
|
-
)
|
|
310
|
-
|
|
311
|
-
return ui.div(
|
|
312
|
-
ui.TagList(
|
|
313
|
-
ui.h2("Plot Spectra"),
|
|
314
|
-
inputs_columns,
|
|
315
|
-
run_button_plot_spectra,
|
|
316
|
-
back_button,
|
|
317
|
-
ui.div(ui.output_text("plot_query_status"), style="margin-top:8px; font-size:14px"),
|
|
318
|
-
ui.div(ui.output_text("plot_reference_status"), style="margin-top:8px; font-size:14px")
|
|
319
|
-
),
|
|
320
|
-
)
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
def run_spec_lib_matching_ui(platform: str):
|
|
325
|
-
base_inputs = [
|
|
326
|
-
ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
327
|
-
ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
328
|
-
ui.input_select("similarity_measure", "Select similarity measure:", ["cosine","shannon","renyi","tsallis","mixture","jaccard","dice","3w_jaccard","sokal_sneath","binary_cosine","mountford","mcconnaughey","driver_kroeber","simpson","braun_banquet","fager_mcgowan","kulczynski","intersection","hamming","hellinger"]),
|
|
329
|
-
ui.input_text('weights', 'Weights for mixture similarity measure (cosine, shannon, renyi, tsallis):', '0.25, 0.25, 0.25, 0.25'),
|
|
330
|
-
ui.input_selectize(
|
|
331
|
-
"spectrum_ID1",
|
|
332
|
-
"Select spectrum ID 1 (only applicable for plotting; default is the first spectrum in the query library):",
|
|
333
|
-
choices=[],
|
|
334
|
-
multiple=False,
|
|
335
|
-
options={"placeholder": "Upload a library..."},
|
|
336
|
-
),
|
|
337
|
-
ui.input_selectize(
|
|
338
|
-
"spectrum_ID2",
|
|
339
|
-
"Select spectrum ID 2 (only applicable for plotting; default is the first spectrum in the reference library):",
|
|
340
|
-
choices=[],
|
|
341
|
-
multiple=False,
|
|
342
|
-
options={"placeholder": "Upload a library..."},
|
|
343
|
-
),
|
|
344
|
-
ui.input_select(
|
|
345
|
-
"high_quality_reference_library",
|
|
346
|
-
"Indicate whether the reference library is considered high quality. If True, filtering and noise removal are only applied to the query spectra.",
|
|
347
|
-
[False, True],
|
|
348
|
-
)
|
|
349
|
-
]
|
|
350
|
-
|
|
351
|
-
if platform == "HRMS":
|
|
352
|
-
extra_inputs = [
|
|
353
|
-
ui.input_text(
|
|
354
|
-
"spectrum_preprocessing_order",
|
|
355
|
-
"Sequence of characters for preprocessing order (C (centroiding), F (filtering), M (matching), N (noise removal), L (low-entropy transformation), W (weight factor transformation)). M must be included, C before M if used.",
|
|
356
|
-
"FCNMWL",
|
|
357
|
-
),
|
|
358
|
-
ui.input_numeric("window_size_centroiding", "Centroiding window-size:", 0.5),
|
|
359
|
-
ui.input_numeric("window_size_matching", "Matching window-size:", 0.5),
|
|
360
|
-
]
|
|
361
|
-
else:
|
|
362
|
-
extra_inputs = [
|
|
363
|
-
ui.input_text(
|
|
364
|
-
"spectrum_preprocessing_order",
|
|
365
|
-
"Sequence of characters for preprocessing order (F (filtering), N (noise removal), L (low-entropy transformation), W (weight factor transformation)).",
|
|
366
|
-
"FNLW",
|
|
367
|
-
)
|
|
368
|
-
]
|
|
369
|
-
|
|
370
|
-
numeric_inputs = [
|
|
371
|
-
ui.input_numeric("mz_min", "Minimum m/z for filtering:", 0),
|
|
372
|
-
ui.input_numeric("mz_max", "Maximum m/z for filtering:", 99999999),
|
|
373
|
-
ui.input_numeric("int_min", "Minimum intensity for filtering:", 0),
|
|
374
|
-
ui.input_numeric("int_max", "Maximum intensity for filtering:", 999999999),
|
|
375
|
-
ui.input_numeric("noise_threshold", "Noise removal threshold:", 0.0),
|
|
376
|
-
ui.input_numeric("wf_mz", "Mass/charge weight factor:", 0.0),
|
|
377
|
-
ui.input_numeric("wf_int", "Intensity weight factor:", 1.0),
|
|
378
|
-
ui.input_numeric("LET_threshold", "Low-entropy threshold:", 0.0),
|
|
379
|
-
ui.input_numeric("entropy_dimension", "Entropy dimension (Renyi/Tsallis only):", 1.1),
|
|
380
|
-
ui.input_numeric("n_top_matches_to_save", "Number of top matches to save:", 3),
|
|
381
|
-
]
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
run_button_spec_lib_matching = ui.download_button("run_btn_spec_lib_matching", "Run Spectral Library Matching", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
|
|
385
|
-
run_button_plot_spectra_within_spec_lib_matching = ui.download_button("run_btn_plot_spectra_within_spec_lib_matching", "Plot Spectra", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
|
|
386
|
-
back_button = ui.input_action_button("back", "Back to main menu", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
|
|
387
|
-
|
|
388
|
-
if platform == "HRMS":
|
|
389
|
-
inputs_columns = ui.layout_columns(
|
|
390
|
-
ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
|
|
391
|
-
ui.div([base_inputs[6:7], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
|
|
392
|
-
ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
|
|
393
|
-
ui.div(numeric_inputs[5:10], style="display:flex; flex-direction:column; gap:10px;"),
|
|
394
|
-
col_widths=(3,3,3,3)
|
|
395
|
-
)
|
|
396
|
-
elif platform == "NRMS":
|
|
397
|
-
inputs_columns = ui.layout_columns(
|
|
398
|
-
ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
|
|
399
|
-
ui.div([base_inputs[6:7], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
|
|
400
|
-
ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
|
|
401
|
-
ui.div(numeric_inputs[5:10], style="display:flex; flex-direction:column; gap:10px;"),
|
|
402
|
-
col_widths=(3,3,3,3)
|
|
403
|
-
)
|
|
404
|
-
|
|
405
|
-
log_panel = ui.card(
|
|
406
|
-
ui.card_header("Identification log"),
|
|
407
|
-
ui.output_text_verbatim("match_log"),
|
|
408
|
-
style="max-height:300px; overflow:auto"
|
|
409
|
-
)
|
|
410
|
-
|
|
411
|
-
return ui.div(
|
|
412
|
-
ui.TagList(
|
|
413
|
-
ui.h2("Run Spectral Library Matching"),
|
|
414
|
-
inputs_columns,
|
|
415
|
-
run_button_spec_lib_matching,
|
|
416
|
-
run_button_plot_spectra_within_spec_lib_matching,
|
|
417
|
-
back_button,
|
|
418
|
-
log_panel
|
|
419
|
-
),
|
|
420
|
-
)
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
def run_parameter_tuning_grid_ui(platform: str):
|
|
425
|
-
base_inputs = [
|
|
426
|
-
ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
427
|
-
ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
428
|
-
ui.input_selectize("similarity_measure", "Select similarity measure(s):", ["cosine","shannon","renyi","tsallis","mixture","jaccard","dice","3w_jaccard","sokal_sneath","binary_cosine","mountford","mcconnaughey","driver_kroeber","simpson","braun_banquet","fager_mcgowan","kulczynski","intersection","hamming","hellinger"], multiple=True, selected='cosine'),
|
|
429
|
-
ui.input_text('weights', 'Weights for mixture similarity measure (cosine, shannon, renyi, tsallis):', '((0.25, 0.25, 0.25, 0.25))'),
|
|
430
|
-
ui.input_text("high_quality_reference_library", "Indicate whether the reference library is considered high quality. If True, filtering and noise removal are only applied to the query spectra.", '[True]')
|
|
431
|
-
]
|
|
432
|
-
|
|
433
|
-
if platform == "HRMS":
|
|
434
|
-
extra_inputs = [
|
|
435
|
-
ui.input_text(
|
|
436
|
-
"spectrum_preprocessing_order",
|
|
437
|
-
"Sequence of characters for preprocessing order (C (centroiding), F (filtering), M (matching), N (noise removal), L (low-entropy transformation), W (weight factor transformation)). M must be included, C before M if used.",
|
|
438
|
-
"[FCNMWL,CWM]",
|
|
439
|
-
),
|
|
440
|
-
ui.input_text("window_size_centroiding", "Centroiding window-size:", "[0.5]"),
|
|
441
|
-
ui.input_text("window_size_matching", "Matching window-size:", "[0.1,0.5]"),
|
|
442
|
-
]
|
|
443
|
-
else:
|
|
444
|
-
extra_inputs = [
|
|
445
|
-
ui.input_text(
|
|
446
|
-
"spectrum_preprocessing_order",
|
|
447
|
-
"Sequence of characters for preprocessing order (F (filtering), N (noise removal), L (low-entropy transformation), W (weight factor transformation)).",
|
|
448
|
-
"[FNLW,WNL]",
|
|
449
|
-
)
|
|
450
|
-
]
|
|
451
|
-
|
|
452
|
-
numeric_inputs = [
|
|
453
|
-
ui.input_text("mz_min", "Minimum m/z for filtering:", '[0]'),
|
|
454
|
-
ui.input_text("mz_max", "Maximum m/z for filtering:", '[99999999]'),
|
|
455
|
-
ui.input_text("int_min", "Minimum intensity for filtering:", '[0]'),
|
|
456
|
-
ui.input_text("int_max", "Maximum intensity for filtering:", '[999999999]'),
|
|
457
|
-
ui.input_text("noise_threshold", "Noise removal threshold:", '[0.0]'),
|
|
458
|
-
ui.input_text("wf_mz", "Mass/charge weight factor:", '[0.0]'),
|
|
459
|
-
ui.input_text("wf_int", "Intensity weight factor:", '[1.0]'),
|
|
460
|
-
ui.input_text("LET_threshold", "Low-entropy threshold:", '[0.0]'),
|
|
461
|
-
ui.input_text("entropy_dimension", "Entropy dimension (Renyi/Tsallis only):", '[1.1]')
|
|
462
|
-
]
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
run_button_parameter_tuning_grid = ui.download_button("run_btn_parameter_tuning_grid", "Tune parameters (grid search)", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
|
|
466
|
-
back_button = ui.input_action_button("back", "Back to main menu", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
|
|
467
|
-
|
|
468
|
-
if platform == "HRMS":
|
|
469
|
-
inputs_columns = ui.layout_columns(
|
|
470
|
-
ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
|
|
471
|
-
ui.div([base_inputs[6:7], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
|
|
472
|
-
ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
|
|
473
|
-
ui.div(numeric_inputs[5:9], style="display:flex; flex-direction:column; gap:10px;"),
|
|
474
|
-
col_widths=(3, 3, 3, 3),
|
|
475
|
-
)
|
|
476
|
-
elif platform == "NRMS":
|
|
477
|
-
inputs_columns = ui.layout_columns(
|
|
478
|
-
ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
|
|
479
|
-
ui.div([base_inputs[6:7], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
|
|
480
|
-
ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
|
|
481
|
-
ui.div(numeric_inputs[5:9], style="display:flex; flex-direction:column; gap:10px;"),
|
|
482
|
-
col_widths=(3, 3, 3, 3),
|
|
483
|
-
)
|
|
484
|
-
|
|
485
|
-
log_panel = ui.card(
|
|
486
|
-
ui.card_header("Identification log"),
|
|
487
|
-
ui.output_text_verbatim("match_log"),
|
|
488
|
-
style="max-height:300px; overflow:auto"
|
|
489
|
-
)
|
|
490
|
-
|
|
491
|
-
return ui.div(
|
|
492
|
-
ui.TagList(
|
|
493
|
-
ui.h2("Tune parameters"),
|
|
494
|
-
inputs_columns,
|
|
495
|
-
run_button_parameter_tuning_grid,
|
|
496
|
-
back_button,
|
|
497
|
-
log_panel
|
|
498
|
-
),
|
|
499
|
-
)
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
PARAMS_HRMS = {
|
|
504
|
-
"window_size_centroiding": (0.0, 0.5),
|
|
505
|
-
"window_size_matching": (0.0, 0.5),
|
|
506
|
-
"noise_threshold": (0.0, 0.25),
|
|
507
|
-
"wf_mz": (0.0, 5.0),
|
|
508
|
-
"wf_int": (0.0, 5.0),
|
|
509
|
-
"LET_threshold": (0.0, 5.0),
|
|
510
|
-
"entropy_dimension": (1.0, 3.0)
|
|
511
|
-
}
|
|
512
|
-
|
|
513
|
-
PARAMS_NRMS = {
|
|
514
|
-
"noise_threshold": (0.0, 0.25),
|
|
515
|
-
"wf_mz": (0.0, 5.0),
|
|
516
|
-
"wf_int": (0.0, 5.0),
|
|
517
|
-
"LET_threshold": (0.0, 5.0),
|
|
518
|
-
"entropy_dimension": (1.0, 3.0)
|
|
519
|
-
}
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
def run_parameter_tuning_DE_ui(platform: str):
|
|
523
|
-
# Pick param set per platform
|
|
524
|
-
if platform == "HRMS":
|
|
525
|
-
PARAMS = PARAMS_HRMS
|
|
526
|
-
else:
|
|
527
|
-
PARAMS = PARAMS_NRMS
|
|
528
|
-
|
|
529
|
-
base_inputs = [
|
|
530
|
-
ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
531
|
-
ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
532
|
-
ui.input_select(
|
|
533
|
-
"similarity_measure",
|
|
534
|
-
"Select similarity measure:",
|
|
535
|
-
[
|
|
536
|
-
"cosine","shannon","renyi","tsallis","mixture","jaccard","dice",
|
|
537
|
-
"3w_jaccard","sokal_sneath","binary_cosine","mountford",
|
|
538
|
-
"mcconnaughey","driver_kroeber","simpson","braun_banquet",
|
|
539
|
-
"fager_mcgowan","kulczynski","intersection","hamming","hellinger",
|
|
540
|
-
],
|
|
541
|
-
),
|
|
542
|
-
ui.input_text(
|
|
543
|
-
"weights",
|
|
544
|
-
"Weights for mixture similarity measure (cosine, shannon, renyi, tsallis):",
|
|
545
|
-
"0.25, 0.25, 0.25, 0.25",
|
|
546
|
-
),
|
|
547
|
-
ui.input_select(
|
|
548
|
-
"high_quality_reference_library",
|
|
549
|
-
"Indicate whether the reference library is considered high quality. If True, filtering and noise removal are only applied to the query spectra.",
|
|
550
|
-
[False, True],
|
|
551
|
-
),
|
|
552
|
-
]
|
|
553
|
-
|
|
554
|
-
if platform == "HRMS":
|
|
555
|
-
extra_inputs = [
|
|
556
|
-
ui.input_text(
|
|
557
|
-
"spectrum_preprocessing_order",
|
|
558
|
-
"Sequence of characters for preprocessing order (C (centroiding), F (filtering), M (matching), N (noise removal), L (low-entropy transformation), W (weight factor transformation)). M must be included, C before M if used.",
|
|
559
|
-
"FCNMWL",
|
|
560
|
-
),
|
|
561
|
-
ui.input_numeric("window_size_centroiding", "Centroiding window-size:", 0.5),
|
|
562
|
-
ui.input_numeric("window_size_matching", "Matching window-size:", 0.5),
|
|
563
|
-
]
|
|
564
|
-
else:
|
|
565
|
-
extra_inputs = [
|
|
566
|
-
ui.input_text(
|
|
567
|
-
"spectrum_preprocessing_order",
|
|
568
|
-
"Sequence of characters for preprocessing order (F (filtering), N (noise removal), L (low-entropy transformation), W (weight factor transformation)).",
|
|
569
|
-
"FNLW",
|
|
570
|
-
)
|
|
571
|
-
]
|
|
572
|
-
|
|
573
|
-
numeric_inputs = [
|
|
574
|
-
ui.input_numeric("mz_min", "Minimum m/z for filtering:", 0),
|
|
575
|
-
ui.input_numeric("mz_max", "Maximum m/z for filtering:", 99_999_999),
|
|
576
|
-
ui.input_numeric("int_min", "Minimum intensity for filtering:", 0),
|
|
577
|
-
ui.input_numeric("int_max", "Maximum intensity for filtering:", 999_999_999),
|
|
578
|
-
ui.input_numeric("noise_threshold", "Noise removal threshold:", 0.0),
|
|
579
|
-
ui.input_numeric("wf_mz", "Mass/charge weight factor:", 0.0),
|
|
580
|
-
ui.input_numeric("wf_int", "Intensity weight factor:", 1.0),
|
|
581
|
-
ui.input_numeric("LET_threshold", "Low-entropy threshold:", 0.0),
|
|
582
|
-
ui.input_numeric("entropy_dimension", "Entropy dimension (Renyi/Tsallis only):", 1.1),
|
|
583
|
-
ui.input_numeric("max_iterations", "Maximum number of iterations:", 5),
|
|
584
|
-
]
|
|
585
|
-
|
|
586
|
-
run_button_parameter_tuning_DE = ui.input_action_button(
|
|
587
|
-
"run_btn_parameter_tuning_DE",
|
|
588
|
-
"Tune parameters (differential evolution optimization)",
|
|
589
|
-
style="font-size:16px; padding:15px 30px; width:300px; height:100px",
|
|
590
|
-
)
|
|
591
|
-
back_button = ui.input_action_button(
|
|
592
|
-
"back",
|
|
593
|
-
"Back to main menu",
|
|
594
|
-
style="font-size:16px; padding:15px 30px; width:300px; height:100px",
|
|
595
|
-
)
|
|
596
|
-
|
|
597
|
-
# Build the 4-column inputs panel (fixed slices corrected, unpack lists properly)
|
|
598
|
-
if platform == "HRMS":
|
|
599
|
-
inputs_columns = ui.layout_columns(
|
|
600
|
-
ui.div(*base_inputs, style="display:flex; flex-direction:column; gap:10px;"),
|
|
601
|
-
ui.div(*extra_inputs, style="display:flex; flex-direction:column; gap:10px;"),
|
|
602
|
-
ui.div(*numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
|
|
603
|
-
ui.div(*numeric_inputs[5:11], style="display:flex; flex-direction:column; gap:10px;"),
|
|
604
|
-
col_widths=(3, 3, 3, 3),
|
|
605
|
-
)
|
|
606
|
-
else: # NRMS
|
|
607
|
-
inputs_columns = ui.layout_columns(
|
|
608
|
-
ui.div(*base_inputs, style="display:flex; flex-direction:column; gap:10px;"),
|
|
609
|
-
ui.div(*extra_inputs, style="display:flex; flex-direction:column; gap:10px;"),
|
|
610
|
-
ui.div(*numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
|
|
611
|
-
ui.div(*numeric_inputs[5:11], style="display:flex; flex-direction:column; gap:10px;"),
|
|
612
|
-
col_widths=(3, 3, 3, 3),
|
|
613
|
-
)
|
|
614
|
-
|
|
615
|
-
# Main page: sidebar (param selection + bounds) and body (inputs + buttons + live log)
|
|
616
|
-
return ui.page_fillable(
|
|
617
|
-
ui.layout_sidebar(
|
|
618
|
-
ui.sidebar(
|
|
619
|
-
ui.h3("Select continuous parameters to optimize"),
|
|
620
|
-
ui.input_checkbox_group(
|
|
621
|
-
"params",
|
|
622
|
-
None,
|
|
623
|
-
choices=list(PARAMS.keys()),
|
|
624
|
-
selected=["noise_threshold", "LET_threshold"],
|
|
625
|
-
),
|
|
626
|
-
ui.hr(),
|
|
627
|
-
ui.h4("Bounds for selected parameters"),
|
|
628
|
-
ui.output_ui("bounds_inputs"),
|
|
629
|
-
width=360,
|
|
630
|
-
),
|
|
631
|
-
ui.div(
|
|
632
|
-
ui.h2("Tune parameters (differential evolution optimization)"),
|
|
633
|
-
inputs_columns,
|
|
634
|
-
run_button_parameter_tuning_DE,
|
|
635
|
-
back_button,
|
|
636
|
-
ui.br(),
|
|
637
|
-
ui.card(
|
|
638
|
-
ui.card_header("Live log"),
|
|
639
|
-
ui.output_text_verbatim("run_log"), # <-- make sure server defines this
|
|
640
|
-
),
|
|
641
|
-
style="display:flex; flex-direction:column; gap:16px;",
|
|
642
|
-
),
|
|
643
|
-
)
|
|
644
|
-
)
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
app_ui = ui.page_fluid(
|
|
651
|
-
ui.head_content(ui.tags.link(rel="icon", href="emblem.png")),
|
|
652
|
-
ui.output_ui("main_ui"),
|
|
653
|
-
ui.output_text("status_output")
|
|
654
|
-
)
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
def server(input, output, session):
|
|
658
|
-
|
|
659
|
-
current_page = reactive.Value("main_menu")
|
|
660
|
-
|
|
661
|
-
plot_clicks = reactive.Value(0)
|
|
662
|
-
match_clicks = reactive.Value(0)
|
|
663
|
-
back_clicks = reactive.Value(0)
|
|
664
|
-
|
|
665
|
-
run_status_plot_spectra = reactive.Value("")
|
|
666
|
-
run_status_spec_lib_matching = reactive.Value("")
|
|
667
|
-
run_status_plot_spectra_within_spec_lib_matching = reactive.Value("")
|
|
668
|
-
run_status_parameter_tuning_grid = reactive.Value("")
|
|
669
|
-
run_status_parameter_tuning_DE = reactive.Value("")
|
|
670
|
-
is_tuning_grid_running = reactive.Value(False)
|
|
671
|
-
is_tuning_DE_running = reactive.Value(False)
|
|
672
|
-
match_log_rv = reactive.Value("")
|
|
673
|
-
is_matching_rv = reactive.Value(False)
|
|
674
|
-
is_any_job_running = reactive.Value(False)
|
|
675
|
-
latest_csv_path_rv = reactive.Value("")
|
|
676
|
-
latest_df_rv = reactive.Value(None)
|
|
677
|
-
is_running_rv = reactive.Value(False)
|
|
678
|
-
|
|
679
|
-
query_ids_rv = reactive.Value([])
|
|
680
|
-
query_file_path_rv = reactive.Value(None)
|
|
681
|
-
query_result_rv = reactive.Value(None)
|
|
682
|
-
query_status_rv = reactive.Value("")
|
|
683
|
-
reference_ids_rv = reactive.Value([])
|
|
684
|
-
reference_file_path_rv = reactive.Value(None)
|
|
685
|
-
reference_result_rv = reactive.Value(None)
|
|
686
|
-
reference_status_rv = reactive.Value("")
|
|
687
|
-
|
|
688
|
-
converted_query_path_rv = reactive.Value(None)
|
|
689
|
-
converted_reference_path_rv = reactive.Value(None)
|
|
690
|
-
|
|
691
|
-
@output
|
|
692
|
-
@render.ui
|
|
693
|
-
def bounds_inputs():
|
|
694
|
-
selected = input.params()
|
|
695
|
-
if not selected:
|
|
696
|
-
return ui.div(ui.em("Select one or more parameters above."))
|
|
697
|
-
|
|
698
|
-
if input.chromatography_platform() == 'HRMS':
|
|
699
|
-
PARAMS = PARAMS_HRMS
|
|
700
|
-
else:
|
|
701
|
-
PARAMS = PARAMS_NRMS
|
|
702
|
-
blocks = []
|
|
703
|
-
for name in selected:
|
|
704
|
-
lo, hi = PARAMS.get(name, (0.0, 1.0))
|
|
705
|
-
blocks.append(
|
|
706
|
-
ui.card(
|
|
707
|
-
ui.card_header(name),
|
|
708
|
-
ui.layout_columns(
|
|
709
|
-
ui.input_numeric(f"min_{name}", "Lower", lo, step=0.001),
|
|
710
|
-
ui.input_numeric(f"max_{name}", "Upper", hi, step=0.001),
|
|
711
|
-
)
|
|
712
|
-
)
|
|
713
|
-
)
|
|
714
|
-
return ui.div(*blocks)
|
|
715
|
-
|
|
716
|
-
def _read_bounds_dict():
|
|
717
|
-
selected = input.params()
|
|
718
|
-
out = {}
|
|
719
|
-
for name in selected:
|
|
720
|
-
lo_default, hi_default = PARAMS.get(name, (0.0, 1.0))
|
|
721
|
-
lo_id = f"min_{name}"
|
|
722
|
-
hi_id = f"max_{name}"
|
|
723
|
-
|
|
724
|
-
lo_val = input[lo_id]() if lo_id in input else lo_default
|
|
725
|
-
hi_val = input[hi_id]() if hi_id in input else hi_default
|
|
726
|
-
|
|
727
|
-
out[name] = (float(lo_val), float(hi_val))
|
|
728
|
-
return out
|
|
729
|
-
|
|
730
|
-
def _read_bounds():
|
|
731
|
-
opt_params = input.params()
|
|
732
|
-
bounds_dict = {}
|
|
733
|
-
if input.chromatography_platform() == 'HRMS':
|
|
734
|
-
PARAMS = PARAMS_HRMS
|
|
735
|
-
else:
|
|
736
|
-
PARAMS = PARAMS_NRMS
|
|
737
|
-
|
|
738
|
-
for p in opt_params:
|
|
739
|
-
lo_id, hi_id = f"min_{p}", f"max_{p}"
|
|
740
|
-
lo_default, hi_default = PARAMS.get(p, (0.0, 1.0))
|
|
741
|
-
lo = input[lo_id]() if lo_id in input else lo_default
|
|
742
|
-
hi = input[hi_id]() if hi_id in input else hi_default
|
|
743
|
-
if lo > hi:
|
|
744
|
-
lo, hi = hi, lo
|
|
745
|
-
bounds_dict[p] = (float(lo), float(hi))
|
|
746
|
-
|
|
747
|
-
bounds_list = [bounds_dict[p] for p in opt_params]
|
|
748
|
-
return opt_params, bounds_dict, bounds_list
|
|
749
|
-
|
|
750
|
-
def _reset_plot_spectra_state():
|
|
751
|
-
query_status_rv.set("")
|
|
752
|
-
reference_status_rv.set("")
|
|
753
|
-
query_ids_rv.set([])
|
|
754
|
-
reference_ids_rv.set([])
|
|
755
|
-
query_file_path_rv.set(None)
|
|
756
|
-
reference_file_path_rv.set(None)
|
|
757
|
-
query_result_rv.set(None)
|
|
758
|
-
reference_result_rv.set(None)
|
|
759
|
-
converted_query_path_rv.set(None)
|
|
760
|
-
converted_reference_path_rv.set(None)
|
|
761
|
-
try:
|
|
762
|
-
ui.update_selectize("spectrum_ID1", choices=[], selected=None)
|
|
763
|
-
ui.update_selectize("spectrum_ID2", choices=[], selected=None)
|
|
764
|
-
except Exception:
|
|
765
|
-
pass
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
def _reset_spec_lib_matching_state():
|
|
769
|
-
match_log_rv.set("")
|
|
770
|
-
is_matching_rv.set(False)
|
|
771
|
-
is_any_job_running.set(False)
|
|
772
|
-
try:
|
|
773
|
-
ui.update_selectize("spectrum_ID1", choices=[], selected=None)
|
|
774
|
-
ui.update_selectize("spectrum_ID2", choices=[], selected=None)
|
|
775
|
-
except Exception:
|
|
776
|
-
pass
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
def _reset_parameter_tuning_state():
|
|
780
|
-
match_log_rv.set("")
|
|
781
|
-
is_tuning_grid_running.set(False)
|
|
782
|
-
is_tuning_DE_running.set(False)
|
|
783
|
-
is_any_job_running.set(False)
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
@reactive.effect
|
|
787
|
-
@reactive.event(input.back)
|
|
788
|
-
def _clear_on_back_from_pages():
|
|
789
|
-
page = current_page()
|
|
790
|
-
if page == "plot_spectra":
|
|
791
|
-
_reset_plot_spectra_state()
|
|
792
|
-
elif page == "run_spec_lib_matching":
|
|
793
|
-
_reset_spec_lib_matching_state()
|
|
794
|
-
elif page == "run_parameter_tuning_grid":
|
|
795
|
-
_reset_parameter_tuning_state()
|
|
796
|
-
elif page == "run_parameter_tuning_DE":
|
|
797
|
-
_reset_parameter_tuning_state()
|
|
798
|
-
|
|
799
|
-
@reactive.effect
|
|
800
|
-
def _clear_on_enter_pages():
|
|
801
|
-
page = current_page()
|
|
802
|
-
if page == "plot_spectra":
|
|
803
|
-
_reset_plot_spectra_state()
|
|
804
|
-
elif page == "run_spec_lib_matching":
|
|
805
|
-
_reset_spec_lib_matching_state()
|
|
806
|
-
elif page == "run_parameter_tuning_grid":
|
|
807
|
-
_reset_parameter_tuning_state()
|
|
808
|
-
elif page == "run_parameter_tuning_DE":
|
|
809
|
-
_reset_parameter_tuning_state()
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
def _drain_queue_nowait(q: asyncio.Queue) -> list[str]:
|
|
813
|
-
out = []
|
|
814
|
-
try:
|
|
815
|
-
while True:
|
|
816
|
-
out.append(q.get_nowait())
|
|
817
|
-
except asyncio.QueueEmpty:
|
|
818
|
-
pass
|
|
819
|
-
return out
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
class ReactiveWriter(io.TextIOBase):
|
|
823
|
-
def __init__(self, loop: asyncio.AbstractEventLoop):
|
|
824
|
-
self._loop = loop
|
|
825
|
-
def write(self, s: str):
|
|
826
|
-
if not s:
|
|
827
|
-
return 0
|
|
828
|
-
self._loop.call_soon_threadsafe(_LOG_QUEUE.put_nowait, s)
|
|
829
|
-
return len(s)
|
|
830
|
-
def flush(self):
|
|
831
|
-
pass
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
@reactive.effect
|
|
835
|
-
async def _pump_logs():
|
|
836
|
-
if not (is_any_job_running.get() or is_tuning_grid_running.get() or is_tuning_DE_running.get() or is_matching_rv.get()):
|
|
837
|
-
return
|
|
838
|
-
reactive.invalidate_later(0.05)
|
|
839
|
-
msgs = _drain_queue_nowait(_LOG_QUEUE)
|
|
840
|
-
if msgs:
|
|
841
|
-
match_log_rv.set(match_log_rv.get() + "".join(msgs))
|
|
842
|
-
await reactive.flush()
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
def process_database(file_path: str):
|
|
846
|
-
suffix = Path(file_path).suffix.lower()
|
|
847
|
-
return {"path": file_path, "suffix": suffix}
|
|
848
|
-
|
|
849
|
-
@render.text
|
|
850
|
-
def plot_query_status():
|
|
851
|
-
return query_status_rv.get() or ""
|
|
852
|
-
|
|
853
|
-
@render.text
|
|
854
|
-
def plot_reference_status():
|
|
855
|
-
return reference_status_rv.get() or ""
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
@reactive.effect
|
|
859
|
-
@reactive.event(input.query_data)
|
|
860
|
-
async def _on_query_upload():
|
|
861
|
-
files = input.query_data()
|
|
862
|
-
req(files and len(files) > 0)
|
|
863
|
-
|
|
864
|
-
file_path = files[0]["datapath"]
|
|
865
|
-
query_file_path_rv.set(file_path)
|
|
866
|
-
|
|
867
|
-
query_status_rv.set(f"Processing query database: {Path(file_path).name} …")
|
|
868
|
-
await reactive.flush()
|
|
869
|
-
|
|
870
|
-
try:
|
|
871
|
-
result = await asyncio.to_thread(process_database, file_path)
|
|
872
|
-
query_result_rv.set(result)
|
|
873
|
-
query_status_rv.set("✅ Query database processed.")
|
|
874
|
-
await reactive.flush()
|
|
875
|
-
except Exception as e:
|
|
876
|
-
query_status_rv.set(f"❌ Failed to process query database: {e}")
|
|
877
|
-
await reactive.flush()
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
@reactive.effect
|
|
881
|
-
@reactive.event(input.reference_data)
|
|
882
|
-
async def _on_reference_upload():
|
|
883
|
-
files = input.reference_data()
|
|
884
|
-
req(files and len(files) > 0)
|
|
885
|
-
|
|
886
|
-
file_path = files[0]["datapath"]
|
|
887
|
-
reference_file_path_rv.set(file_path)
|
|
888
|
-
|
|
889
|
-
reference_status_rv.set(f"Processing reference database: {Path(file_path).name} …")
|
|
890
|
-
await reactive.flush()
|
|
891
|
-
|
|
892
|
-
try:
|
|
893
|
-
result = await asyncio.to_thread(process_database, file_path)
|
|
894
|
-
reference_result_rv.set(result)
|
|
895
|
-
reference_status_rv.set("✅ Reference database processed.")
|
|
896
|
-
await reactive.flush()
|
|
897
|
-
except Exception as e:
|
|
898
|
-
reference_status_rv.set(f"❌ Failed to process reference database: {e}")
|
|
899
|
-
await reactive.flush()
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
@render.text
|
|
903
|
-
def match_log():
|
|
904
|
-
return match_log_rv.get()
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
@reactive.Effect
|
|
908
|
-
def _():
|
|
909
|
-
if input.plot_spectra() > plot_clicks.get():
|
|
910
|
-
current_page.set("plot_spectra")
|
|
911
|
-
plot_clicks.set(input.plot_spectra())
|
|
912
|
-
elif input.run_spec_lib_matching() > match_clicks.get():
|
|
913
|
-
current_page.set("run_spec_lib_matching")
|
|
914
|
-
match_clicks.set(input.run_spec_lib_matching())
|
|
915
|
-
elif input.run_parameter_tuning_grid() > match_clicks.get():
|
|
916
|
-
current_page.set("run_parameter_tuning_grid")
|
|
917
|
-
match_clicks.set(input.run_parameter_tuning_grid())
|
|
918
|
-
elif input.run_parameter_tuning_DE() > match_clicks.get():
|
|
919
|
-
current_page.set("run_parameter_tuning_DE")
|
|
920
|
-
match_clicks.set(input.run_parameter_tuning_DE())
|
|
921
|
-
elif hasattr(input, "back") and input.back() > back_clicks.get():
|
|
922
|
-
current_page.set("main_menu")
|
|
923
|
-
back_clicks.set(input.back())
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
@render.image
|
|
927
|
-
def image():
|
|
928
|
-
dir = Path(__file__).resolve().parent
|
|
929
|
-
img: ImgData = {"src": str(dir / "www/emblem.png"), "width": "320px", "height": "250px"}
|
|
930
|
-
return img
|
|
931
|
-
|
|
932
|
-
@output
|
|
933
|
-
@render.ui
|
|
934
|
-
def main_ui():
|
|
935
|
-
if current_page() == "main_menu":
|
|
936
|
-
return ui.page_fluid(
|
|
937
|
-
ui.h2("Main Menu"),
|
|
938
|
-
ui.div(
|
|
939
|
-
ui.output_image("image"),
|
|
940
|
-
#ui.img(src="emblem.png", width="320px", height="250px"),
|
|
941
|
-
style=(
|
|
942
|
-
"position:fixed; top:0; left:50%; transform:translateX(-50%); "
|
|
943
|
-
"z-index:1000; text-align:center; padding:10px; background-color:white;"
|
|
944
|
-
),
|
|
945
|
-
),
|
|
946
|
-
ui.div(
|
|
947
|
-
"Overview:",
|
|
948
|
-
style="text-align:left; font-size:24px; font-weight:bold; margin-top:350px"
|
|
949
|
-
),
|
|
950
|
-
ui.div(
|
|
951
|
-
"PyCompound is a Python-based tool designed for performing spectral library matching on either high-resolution mass spectrometry data (HRMS) or low-resolution mass spectrometry data (NRMS). PyCompound offers a range of spectrum preprocessing transformations and similarity measures. These spectrum preprocessing transformations include filtering on mass/charge and/or intensity values, weight factor transformation, low-entropy transformation, centroiding, noise removal, and matching. The available similarity measures include the canonical Cosine similarity measure, three entropy-based similarity measures, and a variety of binary similarity measures: Jaccard, Dice, 3W-Jaccard, Sokal-Sneath, Binary Cosine, Mountford, McConnaughey, Driver-Kroeber, Simpson, Braun-Banquet, Fager-McGowan, Kulczynski, Intersection, Hamming, and Hellinger.",
|
|
952
|
-
style="margin-top:10px; text-align:left; font-size:16px; font-weight:500"
|
|
953
|
-
),
|
|
954
|
-
ui.div(
|
|
955
|
-
"Select options:",
|
|
956
|
-
style="margin-top:30px; text-align:left; font-size:24px; font-weight:bold"
|
|
957
|
-
),
|
|
958
|
-
ui.div(
|
|
959
|
-
ui.input_radio_buttons("chromatography_platform", "Specify chromatography platform:", ["HRMS","NRMS"]),
|
|
960
|
-
style="font-size:18px; margin-top:10px; max-width:none"
|
|
961
|
-
),
|
|
962
|
-
ui.input_action_button("plot_spectra", "Plot two spectra before and after preprocessing transformations.", style="font-size:18px; padding:20px 40px; width:550px; height:100px; margin-top:10px; margin-right:50px"),
|
|
963
|
-
ui.input_action_button("run_spec_lib_matching", "Run spectral library matching to perform compound identification on a query library of spectra.", style="font-size:18px; padding:20px 40px; width:550px; height:100px; margin-top:10px; margin-right:50px"),
|
|
964
|
-
ui.input_action_button("run_parameter_tuning_grid", "Grid search: Tune parameters to maximize accuracy of compound identification given a query library with known spectrum IDs.", style="font-size:18px; padding:20px 40px; width:450px; height:120px; margin-top:10px; margin-right:50px"),
|
|
965
|
-
ui.input_action_button("run_parameter_tuning_DE", "Differential evolution optimization: Tune parameters to maximize accuracy of compound identification given a query library with known spectrum IDs.", style="font-size:18px; padding:20px 40px; width:500px; height:150px; margin-top:10px; margin-right:50px"),
|
|
966
|
-
ui.div(
|
|
967
|
-
"References:",
|
|
968
|
-
style="margin-top:35px; text-align:left; font-size:24px; font-weight:bold"
|
|
969
|
-
),
|
|
970
|
-
ui.div(
|
|
971
|
-
"If Shannon Entropy similarity measure, low-entropy transformation, or centroiding are used:",
|
|
972
|
-
style="margin-top:10px; text-align:left; font-size:14px; font-weight:500"
|
|
973
|
-
),
|
|
974
|
-
ui.div(
|
|
975
|
-
ui.HTML(
|
|
976
|
-
'Li, Y., Kind, T., Folz, J. et al. (2021) Spectral entropy outperforms MS/MS dot product similarity for small-molecule compound identification. Nat Methods, 18 1524–1531. <a href="https://doi.org/10.1038/s41592-021-01331-z" target="_blank">https://doi.org/10.1038/s41592-021-01331-z</a>.'
|
|
977
|
-
),
|
|
978
|
-
style="text-align:left; font-size:14px; font-weight:500"
|
|
979
|
-
),
|
|
980
|
-
ui.div(
|
|
981
|
-
"If Tsallis Entropy similarity measure or series of preprocessing transformations are used:",
|
|
982
|
-
style="margin-top:10px; text-align:left; font-size:14px; font-weight:500"
|
|
983
|
-
),
|
|
984
|
-
ui.div(
|
|
985
|
-
ui.HTML(
|
|
986
|
-
'Dlugas, H., Zhang, X., Kim, S. (2025) Comparative analysis of continuous similarity measures for compound identification in mass spectrometry-based metabolomics. Chemometrics and Intelligent Laboratory Systems, 263, 105417. <a href="https://doi.org/10.1016/j.chemolab.2025.105417", target="_blank">https://doi.org/10.1016/j.chemolab.2025.105417</a>.'
|
|
987
|
-
),
|
|
988
|
-
style="text-align:left; font-size:14px; font-weight:500"
|
|
989
|
-
),
|
|
990
|
-
ui.div(
|
|
991
|
-
"If binary similarity measures are used:",
|
|
992
|
-
style="margin-top:10px; text-align:left; font-size:14px; font-weight:500"
|
|
993
|
-
),
|
|
994
|
-
ui.div(
|
|
995
|
-
ui.HTML(
|
|
996
|
-
'Kim, S., Kato, I., & Zhang, X. (2022). Comparative Analysis of Binary Similarity Measures for Compound Identification in Mass Spectrometry-Based Metabolomics. Metabolites, 12(8), 694. <a href="https://doi.org/10.3390/metabo12080694" target="_blank">https://doi.org/10.3390/metabo12080694</a>.'
|
|
997
|
-
),
|
|
998
|
-
style="text-align:left; font-size:14px; font-weight:500"
|
|
999
|
-
),
|
|
1000
|
-
|
|
1001
|
-
ui.div(
|
|
1002
|
-
"If weight factor transformation is used:",
|
|
1003
|
-
style="margin-top:10px; text-align:left; font-size:14px; font-weight:500"
|
|
1004
|
-
),
|
|
1005
|
-
ui.div(
|
|
1006
|
-
ui.HTML(
|
|
1007
|
-
'Kim, S., Koo, I., Wei, X., & Zhang, X. (2012). A method of finding optimal weight factors for compound identification in gas chromatography-mass spectrometry. Bioinformatics, 28(8), 1158-1163. <a href="https://doi.org/10.1093/bioinformatics/bts083" target="_blank">https://doi.org/10.1093/bioinformatics/bts083</a>.'
|
|
1008
|
-
),
|
|
1009
|
-
style="margin-bottom:40px; text-align:left; font-size:14px; font-weight:500"
|
|
1010
|
-
),
|
|
1011
|
-
)
|
|
1012
|
-
elif current_page() == "plot_spectra":
|
|
1013
|
-
return plot_spectra_ui(input.chromatography_platform())
|
|
1014
|
-
elif current_page() == "run_spec_lib_matching":
|
|
1015
|
-
return run_spec_lib_matching_ui(input.chromatography_platform())
|
|
1016
|
-
elif current_page() == "run_parameter_tuning_grid":
|
|
1017
|
-
return run_parameter_tuning_grid_ui(input.chromatography_platform())
|
|
1018
|
-
elif current_page() == "run_parameter_tuning_DE":
|
|
1019
|
-
return run_parameter_tuning_DE_ui(input.chromatography_platform())
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
@reactive.effect
|
|
1024
|
-
@reactive.event(input.query_data)
|
|
1025
|
-
async def _populate_ids_from_query_upload():
|
|
1026
|
-
files = input.query_data()
|
|
1027
|
-
if not files:
|
|
1028
|
-
return
|
|
1029
|
-
|
|
1030
|
-
in_path = Path(files[0]["datapath"])
|
|
1031
|
-
suffix = in_path.suffix.lower()
|
|
1032
|
-
|
|
1033
|
-
try:
|
|
1034
|
-
if suffix == ".csv":
|
|
1035
|
-
csv_path = in_path
|
|
1036
|
-
converted_query_path_rv.set(str(csv_path))
|
|
1037
|
-
else:
|
|
1038
|
-
query_status_rv.set(f"Converting {in_path.name} → CSV …")
|
|
1039
|
-
await reactive.flush()
|
|
1040
|
-
|
|
1041
|
-
tmp_csv_path = in_path.with_suffix(".converted.csv")
|
|
1042
|
-
|
|
1043
|
-
out_obj = await asyncio.to_thread(build_library, str(in_path), str(tmp_csv_path))
|
|
1044
|
-
|
|
1045
|
-
if isinstance(out_obj, (str, os.PathLike, Path)):
|
|
1046
|
-
csv_path = Path(out_obj)
|
|
1047
|
-
elif isinstance(out_obj, pd.DataFrame):
|
|
1048
|
-
out_obj.to_csv(tmp_csv_path, index=False, sep='\t')
|
|
1049
|
-
csv_path = tmp_csv_path
|
|
1050
|
-
else:
|
|
1051
|
-
raise TypeError(f"build_library returned unsupported type: {type(out_obj)}")
|
|
1052
|
-
|
|
1053
|
-
converted_query_path_rv.set(str(csv_path))
|
|
1054
|
-
|
|
1055
|
-
query_status_rv.set(f"Reading IDs from: {csv_path.name} …")
|
|
1056
|
-
await reactive.flush()
|
|
1057
|
-
|
|
1058
|
-
ids = await asyncio.to_thread(extract_first_column_ids, str(csv_path))
|
|
1059
|
-
query_ids_rv.set(ids)
|
|
1060
|
-
|
|
1061
|
-
ui.update_selectize("spectrum_ID1", choices=ids, selected=(ids[0] if ids else None))
|
|
1062
|
-
|
|
1063
|
-
query_status_rv.set(f"✅ Loaded {len(ids)} IDs from {csv_path.name}" if ids else f"⚠️ No IDs found in {csv_path.name}")
|
|
1064
|
-
await reactive.flush()
|
|
1065
|
-
|
|
1066
|
-
except Exception as e:
|
|
1067
|
-
query_status_rv.set(f"❌ Failed: {e}")
|
|
1068
|
-
await reactive.flush()
|
|
1069
|
-
raise
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
@reactive.effect
|
|
1073
|
-
@reactive.event(input.reference_data)
|
|
1074
|
-
async def _populate_ids_from_reference_upload():
|
|
1075
|
-
files = input.reference_data()
|
|
1076
|
-
if not files:
|
|
1077
|
-
return
|
|
1078
|
-
|
|
1079
|
-
in_path = Path(files[0]["datapath"])
|
|
1080
|
-
suffix = in_path.suffix.lower()
|
|
1081
|
-
|
|
1082
|
-
try:
|
|
1083
|
-
if suffix == ".csv":
|
|
1084
|
-
csv_path = in_path
|
|
1085
|
-
converted_reference_path_rv.set(str(csv_path))
|
|
1086
|
-
else:
|
|
1087
|
-
reference_status_rv.set(f"Converting {in_path.name} → CSV …")
|
|
1088
|
-
await reactive.flush()
|
|
1089
|
-
|
|
1090
|
-
tmp_csv_path = in_path.with_suffix(".converted.csv")
|
|
1091
|
-
|
|
1092
|
-
out_obj = await asyncio.to_thread(build_library, str(in_path), str(tmp_csv_path))
|
|
1093
|
-
|
|
1094
|
-
if isinstance(out_obj, (str, os.PathLike, Path)):
|
|
1095
|
-
csv_path = Path(out_obj)
|
|
1096
|
-
elif isinstance(out_obj, pd.DataFrame):
|
|
1097
|
-
out_obj.to_csv(tmp_csv_path, index=False, sep='\t')
|
|
1098
|
-
csv_path = tmp_csv_path
|
|
1099
|
-
else:
|
|
1100
|
-
raise TypeError(f"build_library returned unsupported type: {type(out_obj)}")
|
|
1101
|
-
|
|
1102
|
-
converted_reference_path_rv.set(str(csv_path))
|
|
1103
|
-
|
|
1104
|
-
reference_status_rv.set(f"Reading IDs from: {csv_path.name} …")
|
|
1105
|
-
await reactive.flush()
|
|
1106
|
-
|
|
1107
|
-
ids = await asyncio.to_thread(extract_first_column_ids, str(csv_path))
|
|
1108
|
-
reference_ids_rv.set(ids)
|
|
1109
|
-
|
|
1110
|
-
ui.update_selectize("spectrum_ID2", choices=ids, selected=(ids[0] if ids else None))
|
|
1111
|
-
|
|
1112
|
-
reference_status_rv.set(
|
|
1113
|
-
f"✅ Loaded {len(ids)} IDs from {csv_path.name}" if ids else f"⚠️ No IDs found in {csv_path.name}"
|
|
1114
|
-
)
|
|
1115
|
-
await reactive.flush()
|
|
1116
|
-
|
|
1117
|
-
except Exception as e:
|
|
1118
|
-
reference_status_rv.set(f"❌ Failed: {e}")
|
|
1119
|
-
await reactive.flush()
|
|
1120
|
-
raise
|
|
1121
|
-
|
|
1122
|
-
|
|
1123
|
-
@render.download(filename=lambda: f"plot.png")
|
|
1124
|
-
def run_btn_plot_spectra():
|
|
1125
|
-
spectrum_ID1 = input.spectrum_ID1() or None
|
|
1126
|
-
spectrum_ID2 = input.spectrum_ID2() or None
|
|
1127
|
-
|
|
1128
|
-
weights = [float(weight.strip()) for weight in input.weights().split(",") if weight.strip()]
|
|
1129
|
-
weights = {'Cosine':weights[0], 'Shannon':weights[1], 'Renyi':weights[2], 'Tsallis':weights[3]}
|
|
1130
|
-
|
|
1131
|
-
high_quality_reference_library_tmp2 = False
|
|
1132
|
-
if input.high_quality_reference_library() != 'False':
|
|
1133
|
-
high_quality_reference_library_tmp2 = True
|
|
1134
|
-
|
|
1135
|
-
print(input.high_quality_reference_library())
|
|
1136
|
-
print(high_quality_reference_library_tmp2)
|
|
1137
|
-
|
|
1138
|
-
if input.chromatography_platform() == "HRMS":
|
|
1139
|
-
fig = generate_plots_on_HRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], spectrum_ID1=spectrum_ID1, spectrum_ID2=spectrum_ID2, similarity_measure=input.similarity_measure(), weights=weights, spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=high_quality_reference_library_tmp2, mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), window_size_centroiding=input.window_size_centroiding(), window_size_matching=input.window_size_matching(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), y_axis_transformation=input.y_axis_transformation(), return_plot=True)
|
|
1140
|
-
plt.show()
|
|
1141
|
-
elif input.chromatography_platform() == "NRMS":
|
|
1142
|
-
fig = generate_plots_on_NRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], spectrum_ID1=spectrum_ID1, spectrum_ID2=spectrum_ID2, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=high_quality_reference_library_tmp2, mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), y_axis_transformation=input.y_axis_transformation(), return_plot=True)
|
|
1143
|
-
plt.show()
|
|
1144
|
-
with io.BytesIO() as buf:
|
|
1145
|
-
fig.savefig(buf, format="png", dpi=150, bbox_inches="tight")
|
|
1146
|
-
plt.close()
|
|
1147
|
-
yield buf.getvalue()
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
@render.download(filename="identification_output.txt")
|
|
1152
|
-
async def run_btn_spec_lib_matching():
|
|
1153
|
-
match_log_rv.set("Running identification...\n")
|
|
1154
|
-
await reactive.flush()
|
|
1155
|
-
|
|
1156
|
-
hq = input.high_quality_reference_library()
|
|
1157
|
-
if isinstance(hq, str):
|
|
1158
|
-
hq = hq.lower() == "true"
|
|
1159
|
-
elif isinstance(hq, (int, float)):
|
|
1160
|
-
hq = bool(hq)
|
|
1161
|
-
|
|
1162
|
-
weights = [float(weight.strip()) for weight in input.weights().split(",") if weight.strip()]
|
|
1163
|
-
weights = {'Cosine':weights[0], 'Shannon':weights[1], 'Renyi':weights[2], 'Tsallis':weights[3]}
|
|
1164
|
-
|
|
1165
|
-
common_kwargs = dict(
|
|
1166
|
-
query_data=input.query_data()[0]["datapath"],
|
|
1167
|
-
reference_data=input.reference_data()[0]["datapath"],
|
|
1168
|
-
likely_reference_ids=None,
|
|
1169
|
-
similarity_measure=input.similarity_measure(),
|
|
1170
|
-
weights=weights,
|
|
1171
|
-
spectrum_preprocessing_order=input.spectrum_preprocessing_order(),
|
|
1172
|
-
high_quality_reference_library=hq,
|
|
1173
|
-
mz_min=input.mz_min(), mz_max=input.mz_max(),
|
|
1174
|
-
int_min=input.int_min(), int_max=input.int_max(),
|
|
1175
|
-
noise_threshold=input.noise_threshold(),
|
|
1176
|
-
wf_mz=input.wf_mz(), wf_intensity=input.wf_int(),
|
|
1177
|
-
LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(),
|
|
1178
|
-
n_top_matches_to_save=input.n_top_matches_to_save(),
|
|
1179
|
-
print_id_results=True,
|
|
1180
|
-
output_identification=str(Path.cwd() / "identification_output.txt"),
|
|
1181
|
-
output_similarity_scores=str(Path.cwd() / "similarity_scores.txt"),
|
|
1182
|
-
return_ID_output=True,
|
|
1183
|
-
)
|
|
1184
|
-
|
|
1185
|
-
loop = asyncio.get_running_loop()
|
|
1186
|
-
rw = ReactiveWriter(loop)
|
|
1187
|
-
|
|
1188
|
-
try:
|
|
1189
|
-
with redirect_stdout(rw), redirect_stderr(rw):
|
|
1190
|
-
if input.chromatography_platform() == "HRMS":
|
|
1191
|
-
df_out = await asyncio.to_thread(
|
|
1192
|
-
run_spec_lib_matching_on_HRMS_data,
|
|
1193
|
-
window_size_centroiding=input.window_size_centroiding(),
|
|
1194
|
-
window_size_matching=input.window_size_matching(),
|
|
1195
|
-
**common_kwargs
|
|
1196
|
-
)
|
|
1197
|
-
else:
|
|
1198
|
-
df_out = await asyncio.to_thread(run_spec_lib_matching_on_NRMS_data, **common_kwargs)
|
|
1199
|
-
match_log_rv.set(match_log_rv.get() + "\n✅ Identification finished.\n")
|
|
1200
|
-
await reactive.flush()
|
|
1201
|
-
except Exception as e:
|
|
1202
|
-
match_log_rv.set(match_log_rv.get() + f"\n❌ Error: {e}\n")
|
|
1203
|
-
await reactive.flush()
|
|
1204
|
-
raise
|
|
1205
|
-
|
|
1206
|
-
yield df_out.to_csv(index=True, sep='\t')
|
|
1207
|
-
|
|
1208
|
-
|
|
1209
|
-
|
|
1210
|
-
@render.download(filename="plot.png")
|
|
1211
|
-
def run_btn_plot_spectra_within_spec_lib_matching():
|
|
1212
|
-
req(input.query_data(), input.reference_data())
|
|
1213
|
-
|
|
1214
|
-
spectrum_ID1 = input.spectrum_ID1() or None
|
|
1215
|
-
spectrum_ID2 = input.spectrum_ID2() or None
|
|
1216
|
-
|
|
1217
|
-
hq = input.high_quality_reference_library()
|
|
1218
|
-
if isinstance(hq, str):
|
|
1219
|
-
hq = hq.lower() == "true"
|
|
1220
|
-
elif isinstance(hq, (int, float)):
|
|
1221
|
-
hq = bool(hq)
|
|
1222
|
-
|
|
1223
|
-
weights = [float(weight.strip()) for weight in input.weights().split(",") if weight.strip()]
|
|
1224
|
-
weights = {'Cosine':weights[0], 'Shannon':weights[1], 'Renyi':weights[2], 'Tsallis':weights[3]}
|
|
1225
|
-
|
|
1226
|
-
common = dict(
|
|
1227
|
-
query_data=input.query_data()[0]['datapath'],
|
|
1228
|
-
reference_data=input.reference_data()[0]['datapath'],
|
|
1229
|
-
spectrum_ID1=spectrum_ID1,
|
|
1230
|
-
spectrum_ID2=spectrum_ID2,
|
|
1231
|
-
similarity_measure=input.similarity_measure(),
|
|
1232
|
-
weights=weights,
|
|
1233
|
-
spectrum_preprocessing_order=input.spectrum_preprocessing_order(),
|
|
1234
|
-
high_quality_reference_library=hq,
|
|
1235
|
-
mz_min=input.mz_min(), mz_max=input.mz_max(),
|
|
1236
|
-
int_min=input.int_min(), int_max=input.int_max(),
|
|
1237
|
-
noise_threshold=input.noise_threshold(),
|
|
1238
|
-
wf_mz=input.wf_mz(), wf_intensity=input.wf_int(),
|
|
1239
|
-
LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(),
|
|
1240
|
-
y_axis_transformation="normalized",
|
|
1241
|
-
return_plot=True
|
|
1242
|
-
)
|
|
1243
|
-
|
|
1244
|
-
if input.chromatography_platform() == "HRMS":
|
|
1245
|
-
fig = generate_plots_on_HRMS_data(
|
|
1246
|
-
window_size_centroiding=input.window_size_centroiding(),
|
|
1247
|
-
window_size_matching=input.window_size_matching(),
|
|
1248
|
-
**common
|
|
1249
|
-
)
|
|
1250
|
-
plt.show()
|
|
1251
|
-
else:
|
|
1252
|
-
fig = generate_plots_on_NRMS_data(**common)
|
|
1253
|
-
plt.show()
|
|
1254
|
-
|
|
1255
|
-
with io.BytesIO() as buf:
|
|
1256
|
-
fig.savefig(buf, format="png", dpi=150, bbox_inches="tight")
|
|
1257
|
-
plt.close()
|
|
1258
|
-
yield buf.getvalue()
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
@render.download(filename="parameter_tuning_grid_output.txt")
|
|
1262
|
-
async def run_btn_parameter_tuning_grid():
|
|
1263
|
-
is_any_job_running.set(True)
|
|
1264
|
-
is_tuning_grid_running.set(True)
|
|
1265
|
-
match_log_rv.set("Running grid search of all parameters specified...\n")
|
|
1266
|
-
await reactive.flush()
|
|
1267
|
-
|
|
1268
|
-
similarity_measure_tmp = list(input.similarity_measure())
|
|
1269
|
-
high_quality_reference_library_tmp = [x.strip().lower() == "true" for x in input.high_quality_reference_library().strip().strip("[]").split(",") if x.strip()]
|
|
1270
|
-
spectrum_preprocessing_order_tmp = strip_text(input.spectrum_preprocessing_order())
|
|
1271
|
-
mz_min_tmp = strip_numeric(input.mz_min())
|
|
1272
|
-
mz_max_tmp = strip_numeric(input.mz_max())
|
|
1273
|
-
int_min_tmp = strip_numeric(input.int_min())
|
|
1274
|
-
int_max_tmp = strip_numeric(input.int_max())
|
|
1275
|
-
noise_threshold_tmp = strip_numeric(input.noise_threshold())
|
|
1276
|
-
wf_mz_tmp = strip_numeric(input.wf_mz())
|
|
1277
|
-
wf_int_tmp = strip_numeric(input.wf_int())
|
|
1278
|
-
LET_threshold_tmp = strip_numeric(input.LET_threshold())
|
|
1279
|
-
entropy_dimension_tmp = strip_numeric(input.entropy_dimension())
|
|
1280
|
-
weights_tmp = strip_weights(input.weights())
|
|
1281
|
-
|
|
1282
|
-
common_kwargs = dict(
|
|
1283
|
-
query_data=input.query_data()[0]["datapath"],
|
|
1284
|
-
reference_data=input.reference_data()[0]["datapath"],
|
|
1285
|
-
output_path=str(Path.cwd() / "parameter_tuning_grid_output.txt"),
|
|
1286
|
-
return_output=True,
|
|
1287
|
-
)
|
|
1288
|
-
|
|
1289
|
-
loop = asyncio.get_running_loop()
|
|
1290
|
-
rw = ReactiveWriter(loop)
|
|
1291
|
-
|
|
1292
|
-
try:
|
|
1293
|
-
if input.chromatography_platform() == "HRMS":
|
|
1294
|
-
window_size_centroiding_tmp = strip_numeric(input.window_size_centroiding())
|
|
1295
|
-
window_size_matching_tmp = strip_numeric(input.window_size_matching())
|
|
1296
|
-
grid = {
|
|
1297
|
-
'similarity_measure': similarity_measure_tmp,
|
|
1298
|
-
'weight': weights_tmp,
|
|
1299
|
-
'spectrum_preprocessing_order': spectrum_preprocessing_order_tmp,
|
|
1300
|
-
'mz_min': mz_min_tmp,
|
|
1301
|
-
'mz_max': mz_max_tmp,
|
|
1302
|
-
'int_min': int_min_tmp,
|
|
1303
|
-
'int_max': int_max_tmp,
|
|
1304
|
-
'noise_threshold': noise_threshold_tmp,
|
|
1305
|
-
'wf_mz': wf_mz_tmp,
|
|
1306
|
-
'wf_int': wf_int_tmp,
|
|
1307
|
-
'LET_threshold': LET_threshold_tmp,
|
|
1308
|
-
'entropy_dimension': entropy_dimension_tmp,
|
|
1309
|
-
'high_quality_reference_library': high_quality_reference_library_tmp,
|
|
1310
|
-
'window_size_centroiding': window_size_centroiding_tmp,
|
|
1311
|
-
'window_size_matching': window_size_matching_tmp,
|
|
1312
|
-
}
|
|
1313
|
-
df_out = await asyncio.to_thread(_run_with_redirects, tune_params_on_HRMS_data_grid_shiny, rw, **common_kwargs, grid=grid)
|
|
1314
|
-
else:
|
|
1315
|
-
grid = {
|
|
1316
|
-
'similarity_measure': similarity_measure_tmp,
|
|
1317
|
-
'weight': weights_tmp,
|
|
1318
|
-
'spectrum_preprocessing_order': spectrum_preprocessing_order_tmp,
|
|
1319
|
-
'mz_min': mz_min_tmp,
|
|
1320
|
-
'mz_max': mz_max_tmp,
|
|
1321
|
-
'int_min': int_min_tmp,
|
|
1322
|
-
'int_max': int_max_tmp,
|
|
1323
|
-
'noise_threshold': noise_threshold_tmp,
|
|
1324
|
-
'wf_mz': wf_mz_tmp,
|
|
1325
|
-
'wf_int': wf_int_tmp,
|
|
1326
|
-
'LET_threshold': LET_threshold_tmp,
|
|
1327
|
-
'entropy_dimension': entropy_dimension_tmp,
|
|
1328
|
-
'high_quality_reference_library': high_quality_reference_library_tmp,
|
|
1329
|
-
}
|
|
1330
|
-
df_out = await asyncio.to_thread(_run_with_redirects, tune_params_on_NRMS_data_grid_shiny, rw, **common_kwargs, grid=grid)
|
|
1331
|
-
|
|
1332
|
-
match_log_rv.set(match_log_rv.get() + "\n✅ Parameter tuning finished.\n")
|
|
1333
|
-
except Exception as e:
|
|
1334
|
-
match_log_rv.set(match_log_rv.get() + f"\n❌ Error: {e}\n")
|
|
1335
|
-
raise
|
|
1336
|
-
finally:
|
|
1337
|
-
is_tuning_grid_running.set(False)
|
|
1338
|
-
is_any_job_running.set(False)
|
|
1339
|
-
await reactive.flush()
|
|
1340
|
-
|
|
1341
|
-
yield df_out.to_csv(index=False).encode("utf-8", sep='\t')
|
|
1342
|
-
|
|
1343
|
-
|
|
1344
|
-
|
|
1345
|
-
@reactive.effect
|
|
1346
|
-
@reactive.event(input.run_btn_parameter_tuning_DE)
|
|
1347
|
-
async def run_btn_parameter_tuning_DE():
|
|
1348
|
-
match_log_rv.set("Tuning specified continuous parameters using differential evolution...\n")
|
|
1349
|
-
is_any_job_running.set(True)
|
|
1350
|
-
is_tuning_DE_running.set(True)
|
|
1351
|
-
await reactive.flush()
|
|
1352
|
-
|
|
1353
|
-
# --- helpers ---
|
|
1354
|
-
def _safe_float(v, default):
|
|
1355
|
-
try:
|
|
1356
|
-
if v is None:
|
|
1357
|
-
return default
|
|
1358
|
-
return float(v)
|
|
1359
|
-
except Exception:
|
|
1360
|
-
return default
|
|
1361
|
-
|
|
1362
|
-
def _iget(id, default=None):
|
|
1363
|
-
# Safe getter for Shiny inputs (avoids SilentException)
|
|
1364
|
-
if id in input:
|
|
1365
|
-
try:
|
|
1366
|
-
return input[id]()
|
|
1367
|
-
except SilentException:
|
|
1368
|
-
return default
|
|
1369
|
-
return default
|
|
1370
|
-
|
|
1371
|
-
# ---- log plumbing (stdout/stderr -> UI) ----
|
|
1372
|
-
loop = asyncio.get_running_loop()
|
|
1373
|
-
q: asyncio.Queue[str | None] = asyncio.Queue()
|
|
1374
|
-
|
|
1375
|
-
class UIWriter(io.TextIOBase):
|
|
1376
|
-
def write(self, s: str):
|
|
1377
|
-
if s:
|
|
1378
|
-
loop.call_soon_threadsafe(q.put_nowait, s)
|
|
1379
|
-
return len(s)
|
|
1380
|
-
def flush(self): pass
|
|
1381
|
-
|
|
1382
|
-
async def _drain():
|
|
1383
|
-
while True:
|
|
1384
|
-
msg = await q.get()
|
|
1385
|
-
if msg is None:
|
|
1386
|
-
break
|
|
1387
|
-
match_log_rv.set(match_log_rv.get() + msg)
|
|
1388
|
-
await reactive.flush()
|
|
1389
|
-
|
|
1390
|
-
drain_task = asyncio.create_task(_drain())
|
|
1391
|
-
writer = UIWriter()
|
|
1392
|
-
|
|
1393
|
-
# ---------- SNAPSHOT INPUTS SAFELY ----------
|
|
1394
|
-
try:
|
|
1395
|
-
qfile = _iget("query_data")[0]["datapath"]
|
|
1396
|
-
rfile = _iget("reference_data")[0]["datapath"]
|
|
1397
|
-
|
|
1398
|
-
platform = _iget("chromatography_platform", "HRMS")
|
|
1399
|
-
sim = _iget("similarity_measure", "cosine")
|
|
1400
|
-
spro = _iget("spectrum_preprocessing_order", "FCNMWL")
|
|
1401
|
-
|
|
1402
|
-
hq_raw = _iget("high_quality_reference_library", False)
|
|
1403
|
-
if isinstance(hq_raw, str):
|
|
1404
|
-
hq = hq_raw.lower() == "true"
|
|
1405
|
-
else:
|
|
1406
|
-
hq = bool(hq_raw)
|
|
1407
|
-
|
|
1408
|
-
mz_min = _safe_float(_iget("mz_min", 0.0), 0.0)
|
|
1409
|
-
mz_max = _safe_float(_iget("mz_max", 99_999_999.0), 99_999_999.0)
|
|
1410
|
-
int_min = _safe_float(_iget("int_min", 0.0), 0.0)
|
|
1411
|
-
int_max = _safe_float(_iget("int_max", 999_999_999.0), 999_999_999.0)
|
|
1412
|
-
|
|
1413
|
-
# weights "a,b,c,d"
|
|
1414
|
-
w_text = _iget("weights", "") or ""
|
|
1415
|
-
w_list = [float(w.strip()) for w in w_text.split(",") if w.strip()]
|
|
1416
|
-
w_list = (w_list + [0.0, 0.0, 0.0, 0.0])[:4]
|
|
1417
|
-
weights = {"Cosine": w_list[0], "Shannon": w_list[1], "Renyi": w_list[2], "Tsallis": w_list[3]}
|
|
1418
|
-
|
|
1419
|
-
# selected params + bounds
|
|
1420
|
-
opt_params = tuple(_iget("params", ()) or ())
|
|
1421
|
-
bounds_dict = {}
|
|
1422
|
-
# populate bounds using the min_/max_ inputs if present, otherwise fall back
|
|
1423
|
-
# to your default PARAMS dicts already defined in your file
|
|
1424
|
-
param_defaults = PARAMS_HRMS if platform == "HRMS" else PARAMS_NRMS
|
|
1425
|
-
for p in opt_params:
|
|
1426
|
-
lo = _safe_float(_iget(f"min_{p}", param_defaults.get(p, (0.0, 1.0))[0]),
|
|
1427
|
-
param_defaults.get(p, (0.0, 1.0))[0])
|
|
1428
|
-
hi = _safe_float(_iget(f"max_{p}", param_defaults.get(p, (0.0, 1.0))[1]),
|
|
1429
|
-
param_defaults.get(p, (0.0, 1.0))[1])
|
|
1430
|
-
if lo > hi:
|
|
1431
|
-
lo, hi = hi, lo
|
|
1432
|
-
bounds_dict[p] = (lo, hi)
|
|
1433
|
-
|
|
1434
|
-
# defaults (guarded!)
|
|
1435
|
-
defaults = {
|
|
1436
|
-
"window_size_centroiding": _safe_float(_iget("window_size_centroiding", 0.5), 0.5),
|
|
1437
|
-
"window_size_matching": _safe_float(_iget("window_size_matching", 0.5), 0.5),
|
|
1438
|
-
"noise_threshold": _safe_float(_iget("noise_threshold", 0.0), 0.0),
|
|
1439
|
-
"wf_mz": _safe_float(_iget("wf_mz", 0.0), 0.0),
|
|
1440
|
-
"wf_int": _safe_float(_iget("wf_int", 1.0), 1.0),
|
|
1441
|
-
"LET_threshold": _safe_float(_iget("LET_threshold", 0.0), 0.0),
|
|
1442
|
-
"entropy_dimension": _safe_float(_iget("entropy_dimension", 1.1), 1.1),
|
|
1443
|
-
}
|
|
1444
|
-
if platform == "NRMS":
|
|
1445
|
-
defaults.pop("window_size_centroiding", None)
|
|
1446
|
-
defaults.pop("window_size_matching", None)
|
|
1447
|
-
|
|
1448
|
-
except Exception as e:
|
|
1449
|
-
import traceback
|
|
1450
|
-
tb = "".join(traceback.format_exception(type(e), e, e.__traceback__))
|
|
1451
|
-
match_log_rv.set(match_log_rv.get() + f"\n❌ Input snapshot failed:\n{tb}\n")
|
|
1452
|
-
is_tuning_DE_running.set(False); is_any_job_running.set(False)
|
|
1453
|
-
await q.put(None); await drain_task; await reactive.flush()
|
|
1454
|
-
return
|
|
1455
|
-
|
|
1456
|
-
def _run():
|
|
1457
|
-
from contextlib import redirect_stdout, redirect_stderr
|
|
1458
|
-
with redirect_stdout(writer), redirect_stderr(writer):
|
|
1459
|
-
return tune_params_DE(
|
|
1460
|
-
query_data=qfile,
|
|
1461
|
-
reference_data=rfile,
|
|
1462
|
-
chromatography_platform=input.chromatography_platform(),
|
|
1463
|
-
similarity_measure=sim,
|
|
1464
|
-
weights=weights,
|
|
1465
|
-
spectrum_preprocessing_order=spro,
|
|
1466
|
-
mz_min=mz_min, mz_max=mz_max,
|
|
1467
|
-
int_min=int_min, int_max=int_max,
|
|
1468
|
-
high_quality_reference_library=hq,
|
|
1469
|
-
optimize_params=list(opt_params),
|
|
1470
|
-
param_bounds=bounds_dict,
|
|
1471
|
-
default_params=defaults,
|
|
1472
|
-
de_workers=1,
|
|
1473
|
-
maxiters=input.max_iterations()
|
|
1474
|
-
)
|
|
1475
|
-
|
|
1476
|
-
try:
|
|
1477
|
-
_ = await asyncio.to_thread(_run)
|
|
1478
|
-
match_log_rv.set(match_log_rv.get() + "\n✅ Differential evolution finished.\n")
|
|
1479
|
-
except Exception as e:
|
|
1480
|
-
import traceback
|
|
1481
|
-
tb = "".join(traceback.format_exception(type(e), e, e.__traceback__))
|
|
1482
|
-
match_log_rv.set(match_log_rv.get() + f"\n❌ {type(e).__name__}: {e}\n{tb}\n")
|
|
1483
|
-
finally:
|
|
1484
|
-
await q.put(None)
|
|
1485
|
-
await drain_task
|
|
1486
|
-
is_tuning_DE_running.set(False)
|
|
1487
|
-
is_any_job_running.set(False)
|
|
1488
|
-
await reactive.flush()
|
|
1489
|
-
|
|
1490
|
-
|
|
1491
|
-
@reactive.effect
|
|
1492
|
-
async def _pump_reactive_writer_logs():
|
|
1493
|
-
if not is_tuning_grid_running.get():
|
|
1494
|
-
return
|
|
1495
|
-
|
|
1496
|
-
reactive.invalidate_later(0.1)
|
|
1497
|
-
msgs = _drain_queue_nowait(_LOG_QUEUE)
|
|
1498
|
-
if msgs:
|
|
1499
|
-
match_log_rv.set(match_log_rv.get() + "".join(msgs))
|
|
1500
|
-
await reactive.flush()
|
|
1501
|
-
|
|
1502
|
-
|
|
1503
|
-
@render.text
|
|
1504
|
-
def status_output():
|
|
1505
|
-
return run_status_plot_spectra.get()
|
|
1506
|
-
return run_status_spec_lib_matching.get()
|
|
1507
|
-
return run_status_parameter_tuning_grid.get()
|
|
1508
|
-
return run_status_parameter_tuning_DE.get()
|
|
1509
|
-
|
|
1510
|
-
@output
|
|
1511
|
-
@render.text
|
|
1512
|
-
def run_log():
|
|
1513
|
-
return match_log_rv.get()
|
|
1514
|
-
|
|
1515
|
-
|
|
1516
|
-
app = App(app_ui, server)
|
|
1517
|
-
|
|
1518
|
-
|
|
1519
|
-
|