pycompound 0.1.6__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
app.py DELETED
@@ -1,1519 +0,0 @@
1
-
2
- from shiny import App, ui, reactive, render, req
3
- from shiny.types import SilentException
4
- from pycompound.spec_lib_matching import run_spec_lib_matching_on_HRMS_data
5
- from pycompound.spec_lib_matching import run_spec_lib_matching_on_NRMS_data
6
- from pycompound.spec_lib_matching import tune_params_on_HRMS_data_grid
7
- from pycompound.spec_lib_matching import tune_params_on_NRMS_data_grid
8
- from pycompound.spec_lib_matching import tune_params_on_HRMS_data_grid_shiny
9
- from pycompound.spec_lib_matching import tune_params_on_NRMS_data_grid_shiny
10
- from pycompound.spec_lib_matching import tune_params_DE
11
- from pycompound.plot_spectra import generate_plots_on_HRMS_data
12
- from pycompound.plot_spectra import generate_plots_on_NRMS_data
13
- from pathlib import Path
14
- from contextlib import redirect_stdout, redirect_stderr
15
- import contextlib
16
- import subprocess
17
- import traceback
18
- import asyncio
19
- import io
20
- import os
21
- import sys
22
- import matplotlib.pyplot as plt
23
- import pandas as pd
24
- import numpy as np
25
- import netCDF4 as nc
26
- from pyteomics import mgf, mzml
27
- import ast
28
- from numbers import Real
29
- import logging
30
- from scipy.optimize import differential_evolution
31
-
32
-
33
- _LOG_QUEUE: asyncio.Queue[str] = asyncio.Queue()
34
-
35
- class _UIWriter:
36
- def __init__(self, loop, q: asyncio.Queue[str]):
37
- self._loop = loop
38
- self._q = q
39
- def write(self, s: str):
40
- if s:
41
- self._loop.call_soon_threadsafe(self._q.put_nowait, s)
42
- return len(s)
43
- def flush(self):
44
- pass
45
-
46
-
47
- def attach_logging_to_writer(writer):
48
- handler = logging.StreamHandler(writer)
49
- handler.setLevel(logging.INFO)
50
- root = logging.getLogger()
51
- root.addHandler(handler)
52
- root.setLevel(logging.INFO)
53
- return handler, root
54
-
55
-
56
-
57
- def _run_with_redirects(fn, writer, *args, **kwargs):
58
- with redirect_stdout(writer), redirect_stderr(writer):
59
- return fn(*args, **kwargs)
60
-
61
-
62
- def strip_text(s):
63
- return [x.strip() for x in s.strip('[]').split(',') if x.strip()]
64
-
65
-
66
- def strip_numeric(s):
67
- return [float(x.strip()) for x in s.strip('[]').split(',') if x.strip()]
68
-
69
-
70
- def strip_weights(s):
71
- obj = ast.literal_eval(s) if isinstance(s, (str, bytes)) else s
72
- keys = ['Cosine', 'Shannon', 'Renyi', 'Tsallis']
73
-
74
- if isinstance(obj, (list, tuple)):
75
- if len(obj) == 4 and all(isinstance(x, Real) for x in obj):
76
- tuples = [obj]
77
- else:
78
- tuples = list(obj)
79
- else:
80
- raise ValueError(f"Expected a 4-tuple or a sequence of 4-tuples, got {type(obj).__name__}")
81
-
82
- out = []
83
- for t in tuples:
84
- if not (isinstance(t, (list, tuple)) and len(t) == 4):
85
- raise ValueError(f"Each item must be a 4-tuple, got: {t!r}")
86
- out.append(dict(zip(keys, t)))
87
- return out
88
-
89
-
90
- def build_library(input_path=None, output_path=None):
91
- last_three_chars = input_path[(len(input_path)-3):len(input_path)]
92
- last_four_chars = input_path[(len(input_path)-4):len(input_path)]
93
- if last_three_chars == 'csv' or last_three_chars == 'CSV':
94
- return pd.read_csv(input_path)
95
- else:
96
- if last_three_chars == 'mgf' or last_three_chars == 'MGF':
97
- input_file_type = 'mgf'
98
- elif last_four_chars == 'mzML' or last_four_chars == 'mzml' or last_four_chars == 'MZML':
99
- input_file_type = 'mzML'
100
- elif last_three_chars == 'cdf' or last_three_chars == 'CDF':
101
- input_file_type = 'cdf'
102
- elif last_three_chars == 'msp' or last_three_chars == 'MSP':
103
- input_file_type = 'msp'
104
- else:
105
- print('ERROR: either an \'mgf\', \'mzML\', \'cdf\', or \'msp\' file must be passed to --input_path')
106
- sys.exit()
107
-
108
- spectra = []
109
- if input_file_type == 'mgf':
110
- with mgf.read(input_path, index_by_scans = True) as reader:
111
- for spec in reader:
112
- spectra.append(spec)
113
- if input_file_type == 'mzML':
114
- with mzml.read(input_path) as reader:
115
- for spec in reader:
116
- spectra.append(spec)
117
-
118
- if input_file_type == 'mgf' or input_file_type == 'mzML':
119
- ids = []
120
- mzs = []
121
- ints = []
122
- for i in range(0,len(spectra)):
123
- for j in range(0,len(spectra[i]['m/z array'])):
124
- if input_file_type == 'mzML':
125
- ids.append(f'ID_{i+1}')
126
- else:
127
- ids.append(spectra[i]['params']['name'])
128
- mzs.append(spectra[i]['m/z array'][j])
129
- ints.append(spectra[i]['intensity array'][j])
130
-
131
- if input_file_type == 'cdf':
132
- dataset = nc.Dataset(input_path, 'r')
133
- all_mzs = dataset.variables['mass_values'][:]
134
- all_ints = dataset.variables['intensity_values'][:]
135
- scan_idxs = dataset.variables['scan_index'][:]
136
- dataset.close()
137
-
138
- ids = []
139
- mzs = []
140
- ints = []
141
- for i in range(0,(len(scan_idxs)-1)):
142
- if i % 1000 == 0:
143
- print(f'analyzed {i} out of {len(scan_idxs)} scans')
144
- s_idx = scan_idxs[i]
145
- e_idx = scan_idxs[i+1]
146
-
147
- mzs_tmp = all_mzs[s_idx:e_idx]
148
- ints_tmp = all_ints[s_idx:e_idx]
149
-
150
- for j in range(0,len(mzs_tmp)):
151
- ids.append(f'ID_{i+1}')
152
- mzs.append(mzs_tmp[j])
153
- ints.append(ints_tmp[j])
154
-
155
- if input_file_type == 'msp':
156
- ids = []
157
- mzs = []
158
- ints = []
159
- with open(input_path, 'r') as f:
160
- i = 0
161
- for line in f:
162
- line = line.strip()
163
- if line.startswith('Name:'):
164
- i += 1
165
- spectrum_id = line.replace('Name: ','')
166
- elif line and line[0].isdigit():
167
- try:
168
- mz, intensity = map(float, line.split()[:2])
169
- ids.append(spectrum_id)
170
- mzs.append(mz)
171
- ints.append(intensity)
172
- except ValueError:
173
- continue
174
-
175
- df = pd.DataFrame({'id':ids, 'mz_ratio':mzs, 'intensity':ints})
176
- return df
177
-
178
-
179
-
180
- def extract_first_column_ids(file_path: str, max_ids: int = 20000):
181
- suffix = Path(file_path).suffix.lower()
182
-
183
- if suffix == ".csv":
184
- df = pd.read_csv(file_path, usecols=[0])
185
- ids = df.iloc[:, 0].astype(str).dropna()
186
- ids = [x for x in ids if x.strip() != ""]
187
- seen = set()
188
- uniq = []
189
- for x in ids:
190
- if x not in seen:
191
- uniq.append(x)
192
- seen.add(x)
193
- return uniq[:max_ids]
194
-
195
- ids = []
196
- try:
197
- with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
198
- for line in f:
199
- ls = line.strip()
200
- if ls.startswith("TITLE="):
201
- ids.append(ls.split("=", 1)[1].strip())
202
- elif ls.lower().startswith("name:"):
203
- ids.append(ls.split(":", 1)[1].strip())
204
- if len(ids) >= max_ids:
205
- break
206
- except Exception:
207
- pass
208
-
209
- if ids:
210
- seen = set()
211
- uniq = []
212
- for x in ids:
213
- if x not in seen:
214
- uniq.append(x)
215
- seen.add(x)
216
- return uniq
217
- return []
218
-
219
-
220
- def _open_plot_window(session, png_bytes: bytes, title: str = "plot.png"):
221
- """Send PNG bytes to browser and open in a new window as a data URL."""
222
- b64 = base64.b64encode(png_bytes).decode("ascii")
223
- data_url = f"data:image/png;base64,{b64}"
224
- session.send_custom_message("open-plot-window", {"png": data_url, "title": title})
225
-
226
-
227
- def plot_spectra_ui(platform: str):
228
- base_inputs = [
229
- ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
230
- ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
231
- ui.input_selectize(
232
- "spectrum_ID1",
233
- "Select spectrum ID 1 (default is the first spectrum in the library):",
234
- choices=[],
235
- multiple=False,
236
- options={"placeholder": "Upload a library..."},
237
- ),
238
- ui.input_selectize(
239
- "spectrum_ID2",
240
- "Select spectrum ID 2 (default is the first spectrum in the library):",
241
- choices=[],
242
- multiple=False,
243
- options={"placeholder": "Upload a library..."},
244
- ),
245
- ui.input_select("similarity_measure", "Select similarity measure:", ["cosine","shannon","renyi","tsallis","mixture","jaccard","dice","3w_jaccard","sokal_sneath","binary_cosine","mountford","mcconnaughey","driver_kroeber","simpson","braun_banquet","fager_mcgowan","kulczynski","intersection","hamming","hellinger"]),
246
- ui.input_text('weights', 'Weights for mixture similarity measure (cosine, shannon, renyi, tsallis):', '0.25, 0.25, 0.25, 0.25'),
247
- ui.input_select(
248
- "high_quality_reference_library",
249
- "Indicate whether the reference library is considered high quality. If True, filtering and noise removal are only applied to the query spectra.",
250
- [False, True],
251
- ),
252
- ]
253
-
254
- if platform == "HRMS":
255
- extra_inputs = [
256
- ui.input_text(
257
- "spectrum_preprocessing_order",
258
- "Sequence of characters for preprocessing order (C (centroiding), F (filtering), M (matching), N (noise removal), L (low-entropy transformation), W (weight factor transformation)). M must be included, C before M if used.",
259
- "FCNMWL",
260
- ),
261
- ui.input_numeric("window_size_centroiding", "Centroiding window-size:", 0.5),
262
- ui.input_numeric("window_size_matching", "Matching window-size:", 0.5),
263
- ]
264
- else:
265
- extra_inputs = [
266
- ui.input_text(
267
- "spectrum_preprocessing_order",
268
- "Sequence of characters for preprocessing order (F (filtering), N (noise removal), L (low-entropy transformation), W (weight factor transformation)).",
269
- "FNLW",
270
- )
271
- ]
272
-
273
- numeric_inputs = [
274
- ui.input_numeric("mz_min", "Minimum m/z for filtering:", 0),
275
- ui.input_numeric("mz_max", "Maximum m/z for filtering:", 99999999),
276
- ui.input_numeric("int_min", "Minimum intensity for filtering:", 0),
277
- ui.input_numeric("int_max", "Maximum intensity for filtering:", 999999999),
278
- ui.input_numeric("noise_threshold", "Noise removal threshold:", 0.0),
279
- ui.input_numeric("wf_mz", "Mass/charge weight factor:", 0.0),
280
- ui.input_numeric("wf_int", "Intensity weight factor:", 1.0),
281
- ui.input_numeric("LET_threshold", "Low-entropy threshold:", 0.0),
282
- ui.input_numeric("entropy_dimension", "Entropy dimension (Renyi/Tsallis only):", 1.1),
283
- ]
284
-
285
- select_input = ui.input_select(
286
- "y_axis_transformation",
287
- "Transformation to apply to intensity axis:",
288
- ["normalized", "none", "log10", "sqrt"],
289
- )
290
-
291
- run_button_plot_spectra = ui.download_button("run_btn_plot_spectra", "Run", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
292
- back_button = ui.input_action_button("back", "Back to main menu", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
293
-
294
- if platform == "HRMS":
295
- inputs_columns = ui.layout_columns(
296
- ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
297
- ui.div([base_inputs[6:7], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
298
- ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
299
- ui.div([numeric_inputs[5:10], select_input], style="display:flex; flex-direction:column; gap:10px;"),
300
- col_widths=(3,3,3,3),
301
- )
302
- elif platform == "NRMS":
303
- inputs_columns = ui.layout_columns(
304
- ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
305
- ui.div([base_inputs[6:7], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
306
- ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
307
- ui.div([numeric_inputs[5:10], select_input], style="display:flex; flex-direction:column; gap:10px;"),
308
- col_widths=(3,3,3,3),
309
- )
310
-
311
- return ui.div(
312
- ui.TagList(
313
- ui.h2("Plot Spectra"),
314
- inputs_columns,
315
- run_button_plot_spectra,
316
- back_button,
317
- ui.div(ui.output_text("plot_query_status"), style="margin-top:8px; font-size:14px"),
318
- ui.div(ui.output_text("plot_reference_status"), style="margin-top:8px; font-size:14px")
319
- ),
320
- )
321
-
322
-
323
-
324
- def run_spec_lib_matching_ui(platform: str):
325
- base_inputs = [
326
- ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
327
- ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
328
- ui.input_select("similarity_measure", "Select similarity measure:", ["cosine","shannon","renyi","tsallis","mixture","jaccard","dice","3w_jaccard","sokal_sneath","binary_cosine","mountford","mcconnaughey","driver_kroeber","simpson","braun_banquet","fager_mcgowan","kulczynski","intersection","hamming","hellinger"]),
329
- ui.input_text('weights', 'Weights for mixture similarity measure (cosine, shannon, renyi, tsallis):', '0.25, 0.25, 0.25, 0.25'),
330
- ui.input_selectize(
331
- "spectrum_ID1",
332
- "Select spectrum ID 1 (only applicable for plotting; default is the first spectrum in the query library):",
333
- choices=[],
334
- multiple=False,
335
- options={"placeholder": "Upload a library..."},
336
- ),
337
- ui.input_selectize(
338
- "spectrum_ID2",
339
- "Select spectrum ID 2 (only applicable for plotting; default is the first spectrum in the reference library):",
340
- choices=[],
341
- multiple=False,
342
- options={"placeholder": "Upload a library..."},
343
- ),
344
- ui.input_select(
345
- "high_quality_reference_library",
346
- "Indicate whether the reference library is considered high quality. If True, filtering and noise removal are only applied to the query spectra.",
347
- [False, True],
348
- )
349
- ]
350
-
351
- if platform == "HRMS":
352
- extra_inputs = [
353
- ui.input_text(
354
- "spectrum_preprocessing_order",
355
- "Sequence of characters for preprocessing order (C (centroiding), F (filtering), M (matching), N (noise removal), L (low-entropy transformation), W (weight factor transformation)). M must be included, C before M if used.",
356
- "FCNMWL",
357
- ),
358
- ui.input_numeric("window_size_centroiding", "Centroiding window-size:", 0.5),
359
- ui.input_numeric("window_size_matching", "Matching window-size:", 0.5),
360
- ]
361
- else:
362
- extra_inputs = [
363
- ui.input_text(
364
- "spectrum_preprocessing_order",
365
- "Sequence of characters for preprocessing order (F (filtering), N (noise removal), L (low-entropy transformation), W (weight factor transformation)).",
366
- "FNLW",
367
- )
368
- ]
369
-
370
- numeric_inputs = [
371
- ui.input_numeric("mz_min", "Minimum m/z for filtering:", 0),
372
- ui.input_numeric("mz_max", "Maximum m/z for filtering:", 99999999),
373
- ui.input_numeric("int_min", "Minimum intensity for filtering:", 0),
374
- ui.input_numeric("int_max", "Maximum intensity for filtering:", 999999999),
375
- ui.input_numeric("noise_threshold", "Noise removal threshold:", 0.0),
376
- ui.input_numeric("wf_mz", "Mass/charge weight factor:", 0.0),
377
- ui.input_numeric("wf_int", "Intensity weight factor:", 1.0),
378
- ui.input_numeric("LET_threshold", "Low-entropy threshold:", 0.0),
379
- ui.input_numeric("entropy_dimension", "Entropy dimension (Renyi/Tsallis only):", 1.1),
380
- ui.input_numeric("n_top_matches_to_save", "Number of top matches to save:", 3),
381
- ]
382
-
383
-
384
- run_button_spec_lib_matching = ui.download_button("run_btn_spec_lib_matching", "Run Spectral Library Matching", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
385
- run_button_plot_spectra_within_spec_lib_matching = ui.download_button("run_btn_plot_spectra_within_spec_lib_matching", "Plot Spectra", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
386
- back_button = ui.input_action_button("back", "Back to main menu", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
387
-
388
- if platform == "HRMS":
389
- inputs_columns = ui.layout_columns(
390
- ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
391
- ui.div([base_inputs[6:7], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
392
- ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
393
- ui.div(numeric_inputs[5:10], style="display:flex; flex-direction:column; gap:10px;"),
394
- col_widths=(3,3,3,3)
395
- )
396
- elif platform == "NRMS":
397
- inputs_columns = ui.layout_columns(
398
- ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
399
- ui.div([base_inputs[6:7], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
400
- ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
401
- ui.div(numeric_inputs[5:10], style="display:flex; flex-direction:column; gap:10px;"),
402
- col_widths=(3,3,3,3)
403
- )
404
-
405
- log_panel = ui.card(
406
- ui.card_header("Identification log"),
407
- ui.output_text_verbatim("match_log"),
408
- style="max-height:300px; overflow:auto"
409
- )
410
-
411
- return ui.div(
412
- ui.TagList(
413
- ui.h2("Run Spectral Library Matching"),
414
- inputs_columns,
415
- run_button_spec_lib_matching,
416
- run_button_plot_spectra_within_spec_lib_matching,
417
- back_button,
418
- log_panel
419
- ),
420
- )
421
-
422
-
423
-
424
- def run_parameter_tuning_grid_ui(platform: str):
425
- base_inputs = [
426
- ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
427
- ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
428
- ui.input_selectize("similarity_measure", "Select similarity measure(s):", ["cosine","shannon","renyi","tsallis","mixture","jaccard","dice","3w_jaccard","sokal_sneath","binary_cosine","mountford","mcconnaughey","driver_kroeber","simpson","braun_banquet","fager_mcgowan","kulczynski","intersection","hamming","hellinger"], multiple=True, selected='cosine'),
429
- ui.input_text('weights', 'Weights for mixture similarity measure (cosine, shannon, renyi, tsallis):', '((0.25, 0.25, 0.25, 0.25))'),
430
- ui.input_text("high_quality_reference_library", "Indicate whether the reference library is considered high quality. If True, filtering and noise removal are only applied to the query spectra.", '[True]')
431
- ]
432
-
433
- if platform == "HRMS":
434
- extra_inputs = [
435
- ui.input_text(
436
- "spectrum_preprocessing_order",
437
- "Sequence of characters for preprocessing order (C (centroiding), F (filtering), M (matching), N (noise removal), L (low-entropy transformation), W (weight factor transformation)). M must be included, C before M if used.",
438
- "[FCNMWL,CWM]",
439
- ),
440
- ui.input_text("window_size_centroiding", "Centroiding window-size:", "[0.5]"),
441
- ui.input_text("window_size_matching", "Matching window-size:", "[0.1,0.5]"),
442
- ]
443
- else:
444
- extra_inputs = [
445
- ui.input_text(
446
- "spectrum_preprocessing_order",
447
- "Sequence of characters for preprocessing order (F (filtering), N (noise removal), L (low-entropy transformation), W (weight factor transformation)).",
448
- "[FNLW,WNL]",
449
- )
450
- ]
451
-
452
- numeric_inputs = [
453
- ui.input_text("mz_min", "Minimum m/z for filtering:", '[0]'),
454
- ui.input_text("mz_max", "Maximum m/z for filtering:", '[99999999]'),
455
- ui.input_text("int_min", "Minimum intensity for filtering:", '[0]'),
456
- ui.input_text("int_max", "Maximum intensity for filtering:", '[999999999]'),
457
- ui.input_text("noise_threshold", "Noise removal threshold:", '[0.0]'),
458
- ui.input_text("wf_mz", "Mass/charge weight factor:", '[0.0]'),
459
- ui.input_text("wf_int", "Intensity weight factor:", '[1.0]'),
460
- ui.input_text("LET_threshold", "Low-entropy threshold:", '[0.0]'),
461
- ui.input_text("entropy_dimension", "Entropy dimension (Renyi/Tsallis only):", '[1.1]')
462
- ]
463
-
464
-
465
- run_button_parameter_tuning_grid = ui.download_button("run_btn_parameter_tuning_grid", "Tune parameters (grid search)", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
466
- back_button = ui.input_action_button("back", "Back to main menu", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
467
-
468
- if platform == "HRMS":
469
- inputs_columns = ui.layout_columns(
470
- ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
471
- ui.div([base_inputs[6:7], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
472
- ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
473
- ui.div(numeric_inputs[5:9], style="display:flex; flex-direction:column; gap:10px;"),
474
- col_widths=(3, 3, 3, 3),
475
- )
476
- elif platform == "NRMS":
477
- inputs_columns = ui.layout_columns(
478
- ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
479
- ui.div([base_inputs[6:7], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
480
- ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
481
- ui.div(numeric_inputs[5:9], style="display:flex; flex-direction:column; gap:10px;"),
482
- col_widths=(3, 3, 3, 3),
483
- )
484
-
485
- log_panel = ui.card(
486
- ui.card_header("Identification log"),
487
- ui.output_text_verbatim("match_log"),
488
- style="max-height:300px; overflow:auto"
489
- )
490
-
491
- return ui.div(
492
- ui.TagList(
493
- ui.h2("Tune parameters"),
494
- inputs_columns,
495
- run_button_parameter_tuning_grid,
496
- back_button,
497
- log_panel
498
- ),
499
- )
500
-
501
-
502
-
503
- PARAMS_HRMS = {
504
- "window_size_centroiding": (0.0, 0.5),
505
- "window_size_matching": (0.0, 0.5),
506
- "noise_threshold": (0.0, 0.25),
507
- "wf_mz": (0.0, 5.0),
508
- "wf_int": (0.0, 5.0),
509
- "LET_threshold": (0.0, 5.0),
510
- "entropy_dimension": (1.0, 3.0)
511
- }
512
-
513
- PARAMS_NRMS = {
514
- "noise_threshold": (0.0, 0.25),
515
- "wf_mz": (0.0, 5.0),
516
- "wf_int": (0.0, 5.0),
517
- "LET_threshold": (0.0, 5.0),
518
- "entropy_dimension": (1.0, 3.0)
519
- }
520
-
521
-
522
- def run_parameter_tuning_DE_ui(platform: str):
523
- # Pick param set per platform
524
- if platform == "HRMS":
525
- PARAMS = PARAMS_HRMS
526
- else:
527
- PARAMS = PARAMS_NRMS
528
-
529
- base_inputs = [
530
- ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
531
- ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
532
- ui.input_select(
533
- "similarity_measure",
534
- "Select similarity measure:",
535
- [
536
- "cosine","shannon","renyi","tsallis","mixture","jaccard","dice",
537
- "3w_jaccard","sokal_sneath","binary_cosine","mountford",
538
- "mcconnaughey","driver_kroeber","simpson","braun_banquet",
539
- "fager_mcgowan","kulczynski","intersection","hamming","hellinger",
540
- ],
541
- ),
542
- ui.input_text(
543
- "weights",
544
- "Weights for mixture similarity measure (cosine, shannon, renyi, tsallis):",
545
- "0.25, 0.25, 0.25, 0.25",
546
- ),
547
- ui.input_select(
548
- "high_quality_reference_library",
549
- "Indicate whether the reference library is considered high quality. If True, filtering and noise removal are only applied to the query spectra.",
550
- [False, True],
551
- ),
552
- ]
553
-
554
- if platform == "HRMS":
555
- extra_inputs = [
556
- ui.input_text(
557
- "spectrum_preprocessing_order",
558
- "Sequence of characters for preprocessing order (C (centroiding), F (filtering), M (matching), N (noise removal), L (low-entropy transformation), W (weight factor transformation)). M must be included, C before M if used.",
559
- "FCNMWL",
560
- ),
561
- ui.input_numeric("window_size_centroiding", "Centroiding window-size:", 0.5),
562
- ui.input_numeric("window_size_matching", "Matching window-size:", 0.5),
563
- ]
564
- else:
565
- extra_inputs = [
566
- ui.input_text(
567
- "spectrum_preprocessing_order",
568
- "Sequence of characters for preprocessing order (F (filtering), N (noise removal), L (low-entropy transformation), W (weight factor transformation)).",
569
- "FNLW",
570
- )
571
- ]
572
-
573
- numeric_inputs = [
574
- ui.input_numeric("mz_min", "Minimum m/z for filtering:", 0),
575
- ui.input_numeric("mz_max", "Maximum m/z for filtering:", 99_999_999),
576
- ui.input_numeric("int_min", "Minimum intensity for filtering:", 0),
577
- ui.input_numeric("int_max", "Maximum intensity for filtering:", 999_999_999),
578
- ui.input_numeric("noise_threshold", "Noise removal threshold:", 0.0),
579
- ui.input_numeric("wf_mz", "Mass/charge weight factor:", 0.0),
580
- ui.input_numeric("wf_int", "Intensity weight factor:", 1.0),
581
- ui.input_numeric("LET_threshold", "Low-entropy threshold:", 0.0),
582
- ui.input_numeric("entropy_dimension", "Entropy dimension (Renyi/Tsallis only):", 1.1),
583
- ui.input_numeric("max_iterations", "Maximum number of iterations:", 5),
584
- ]
585
-
586
- run_button_parameter_tuning_DE = ui.input_action_button(
587
- "run_btn_parameter_tuning_DE",
588
- "Tune parameters (differential evolution optimization)",
589
- style="font-size:16px; padding:15px 30px; width:300px; height:100px",
590
- )
591
- back_button = ui.input_action_button(
592
- "back",
593
- "Back to main menu",
594
- style="font-size:16px; padding:15px 30px; width:300px; height:100px",
595
- )
596
-
597
- # Build the 4-column inputs panel (fixed slices corrected, unpack lists properly)
598
- if platform == "HRMS":
599
- inputs_columns = ui.layout_columns(
600
- ui.div(*base_inputs, style="display:flex; flex-direction:column; gap:10px;"),
601
- ui.div(*extra_inputs, style="display:flex; flex-direction:column; gap:10px;"),
602
- ui.div(*numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
603
- ui.div(*numeric_inputs[5:11], style="display:flex; flex-direction:column; gap:10px;"),
604
- col_widths=(3, 3, 3, 3),
605
- )
606
- else: # NRMS
607
- inputs_columns = ui.layout_columns(
608
- ui.div(*base_inputs, style="display:flex; flex-direction:column; gap:10px;"),
609
- ui.div(*extra_inputs, style="display:flex; flex-direction:column; gap:10px;"),
610
- ui.div(*numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
611
- ui.div(*numeric_inputs[5:11], style="display:flex; flex-direction:column; gap:10px;"),
612
- col_widths=(3, 3, 3, 3),
613
- )
614
-
615
- # Main page: sidebar (param selection + bounds) and body (inputs + buttons + live log)
616
- return ui.page_fillable(
617
- ui.layout_sidebar(
618
- ui.sidebar(
619
- ui.h3("Select continuous parameters to optimize"),
620
- ui.input_checkbox_group(
621
- "params",
622
- None,
623
- choices=list(PARAMS.keys()),
624
- selected=["noise_threshold", "LET_threshold"],
625
- ),
626
- ui.hr(),
627
- ui.h4("Bounds for selected parameters"),
628
- ui.output_ui("bounds_inputs"),
629
- width=360,
630
- ),
631
- ui.div(
632
- ui.h2("Tune parameters (differential evolution optimization)"),
633
- inputs_columns,
634
- run_button_parameter_tuning_DE,
635
- back_button,
636
- ui.br(),
637
- ui.card(
638
- ui.card_header("Live log"),
639
- ui.output_text_verbatim("run_log"), # <-- make sure server defines this
640
- ),
641
- style="display:flex; flex-direction:column; gap:16px;",
642
- ),
643
- )
644
- )
645
-
646
-
647
-
648
-
649
-
650
- app_ui = ui.page_fluid(
651
- ui.head_content(ui.tags.link(rel="icon", href="emblem.png")),
652
- ui.output_ui("main_ui"),
653
- ui.output_text("status_output")
654
- )
655
-
656
-
657
- def server(input, output, session):
658
-
659
- current_page = reactive.Value("main_menu")
660
-
661
- plot_clicks = reactive.Value(0)
662
- match_clicks = reactive.Value(0)
663
- back_clicks = reactive.Value(0)
664
-
665
- run_status_plot_spectra = reactive.Value("")
666
- run_status_spec_lib_matching = reactive.Value("")
667
- run_status_plot_spectra_within_spec_lib_matching = reactive.Value("")
668
- run_status_parameter_tuning_grid = reactive.Value("")
669
- run_status_parameter_tuning_DE = reactive.Value("")
670
- is_tuning_grid_running = reactive.Value(False)
671
- is_tuning_DE_running = reactive.Value(False)
672
- match_log_rv = reactive.Value("")
673
- is_matching_rv = reactive.Value(False)
674
- is_any_job_running = reactive.Value(False)
675
- latest_csv_path_rv = reactive.Value("")
676
- latest_df_rv = reactive.Value(None)
677
- is_running_rv = reactive.Value(False)
678
-
679
- query_ids_rv = reactive.Value([])
680
- query_file_path_rv = reactive.Value(None)
681
- query_result_rv = reactive.Value(None)
682
- query_status_rv = reactive.Value("")
683
- reference_ids_rv = reactive.Value([])
684
- reference_file_path_rv = reactive.Value(None)
685
- reference_result_rv = reactive.Value(None)
686
- reference_status_rv = reactive.Value("")
687
-
688
- converted_query_path_rv = reactive.Value(None)
689
- converted_reference_path_rv = reactive.Value(None)
690
-
691
- @output
692
- @render.ui
693
- def bounds_inputs():
694
- selected = input.params()
695
- if not selected:
696
- return ui.div(ui.em("Select one or more parameters above."))
697
-
698
- if input.chromatography_platform() == 'HRMS':
699
- PARAMS = PARAMS_HRMS
700
- else:
701
- PARAMS = PARAMS_NRMS
702
- blocks = []
703
- for name in selected:
704
- lo, hi = PARAMS.get(name, (0.0, 1.0))
705
- blocks.append(
706
- ui.card(
707
- ui.card_header(name),
708
- ui.layout_columns(
709
- ui.input_numeric(f"min_{name}", "Lower", lo, step=0.001),
710
- ui.input_numeric(f"max_{name}", "Upper", hi, step=0.001),
711
- )
712
- )
713
- )
714
- return ui.div(*blocks)
715
-
716
- def _read_bounds_dict():
717
- selected = input.params()
718
- out = {}
719
- for name in selected:
720
- lo_default, hi_default = PARAMS.get(name, (0.0, 1.0))
721
- lo_id = f"min_{name}"
722
- hi_id = f"max_{name}"
723
-
724
- lo_val = input[lo_id]() if lo_id in input else lo_default
725
- hi_val = input[hi_id]() if hi_id in input else hi_default
726
-
727
- out[name] = (float(lo_val), float(hi_val))
728
- return out
729
-
730
- def _read_bounds():
731
- opt_params = input.params()
732
- bounds_dict = {}
733
- if input.chromatography_platform() == 'HRMS':
734
- PARAMS = PARAMS_HRMS
735
- else:
736
- PARAMS = PARAMS_NRMS
737
-
738
- for p in opt_params:
739
- lo_id, hi_id = f"min_{p}", f"max_{p}"
740
- lo_default, hi_default = PARAMS.get(p, (0.0, 1.0))
741
- lo = input[lo_id]() if lo_id in input else lo_default
742
- hi = input[hi_id]() if hi_id in input else hi_default
743
- if lo > hi:
744
- lo, hi = hi, lo
745
- bounds_dict[p] = (float(lo), float(hi))
746
-
747
- bounds_list = [bounds_dict[p] for p in opt_params]
748
- return opt_params, bounds_dict, bounds_list
749
-
750
- def _reset_plot_spectra_state():
751
- query_status_rv.set("")
752
- reference_status_rv.set("")
753
- query_ids_rv.set([])
754
- reference_ids_rv.set([])
755
- query_file_path_rv.set(None)
756
- reference_file_path_rv.set(None)
757
- query_result_rv.set(None)
758
- reference_result_rv.set(None)
759
- converted_query_path_rv.set(None)
760
- converted_reference_path_rv.set(None)
761
- try:
762
- ui.update_selectize("spectrum_ID1", choices=[], selected=None)
763
- ui.update_selectize("spectrum_ID2", choices=[], selected=None)
764
- except Exception:
765
- pass
766
-
767
-
768
- def _reset_spec_lib_matching_state():
769
- match_log_rv.set("")
770
- is_matching_rv.set(False)
771
- is_any_job_running.set(False)
772
- try:
773
- ui.update_selectize("spectrum_ID1", choices=[], selected=None)
774
- ui.update_selectize("spectrum_ID2", choices=[], selected=None)
775
- except Exception:
776
- pass
777
-
778
-
779
- def _reset_parameter_tuning_state():
780
- match_log_rv.set("")
781
- is_tuning_grid_running.set(False)
782
- is_tuning_DE_running.set(False)
783
- is_any_job_running.set(False)
784
-
785
-
786
- @reactive.effect
787
- @reactive.event(input.back)
788
- def _clear_on_back_from_pages():
789
- page = current_page()
790
- if page == "plot_spectra":
791
- _reset_plot_spectra_state()
792
- elif page == "run_spec_lib_matching":
793
- _reset_spec_lib_matching_state()
794
- elif page == "run_parameter_tuning_grid":
795
- _reset_parameter_tuning_state()
796
- elif page == "run_parameter_tuning_DE":
797
- _reset_parameter_tuning_state()
798
-
799
- @reactive.effect
800
- def _clear_on_enter_pages():
801
- page = current_page()
802
- if page == "plot_spectra":
803
- _reset_plot_spectra_state()
804
- elif page == "run_spec_lib_matching":
805
- _reset_spec_lib_matching_state()
806
- elif page == "run_parameter_tuning_grid":
807
- _reset_parameter_tuning_state()
808
- elif page == "run_parameter_tuning_DE":
809
- _reset_parameter_tuning_state()
810
-
811
-
812
- def _drain_queue_nowait(q: asyncio.Queue) -> list[str]:
813
- out = []
814
- try:
815
- while True:
816
- out.append(q.get_nowait())
817
- except asyncio.QueueEmpty:
818
- pass
819
- return out
820
-
821
-
822
- class ReactiveWriter(io.TextIOBase):
823
- def __init__(self, loop: asyncio.AbstractEventLoop):
824
- self._loop = loop
825
- def write(self, s: str):
826
- if not s:
827
- return 0
828
- self._loop.call_soon_threadsafe(_LOG_QUEUE.put_nowait, s)
829
- return len(s)
830
- def flush(self):
831
- pass
832
-
833
-
834
- @reactive.effect
835
- async def _pump_logs():
836
- if not (is_any_job_running.get() or is_tuning_grid_running.get() or is_tuning_DE_running.get() or is_matching_rv.get()):
837
- return
838
- reactive.invalidate_later(0.05)
839
- msgs = _drain_queue_nowait(_LOG_QUEUE)
840
- if msgs:
841
- match_log_rv.set(match_log_rv.get() + "".join(msgs))
842
- await reactive.flush()
843
-
844
-
845
- def process_database(file_path: str):
846
- suffix = Path(file_path).suffix.lower()
847
- return {"path": file_path, "suffix": suffix}
848
-
849
- @render.text
850
- def plot_query_status():
851
- return query_status_rv.get() or ""
852
-
853
- @render.text
854
- def plot_reference_status():
855
- return reference_status_rv.get() or ""
856
-
857
-
858
- @reactive.effect
859
- @reactive.event(input.query_data)
860
- async def _on_query_upload():
861
- files = input.query_data()
862
- req(files and len(files) > 0)
863
-
864
- file_path = files[0]["datapath"]
865
- query_file_path_rv.set(file_path)
866
-
867
- query_status_rv.set(f"Processing query database: {Path(file_path).name} …")
868
- await reactive.flush()
869
-
870
- try:
871
- result = await asyncio.to_thread(process_database, file_path)
872
- query_result_rv.set(result)
873
- query_status_rv.set("✅ Query database processed.")
874
- await reactive.flush()
875
- except Exception as e:
876
- query_status_rv.set(f"❌ Failed to process query database: {e}")
877
- await reactive.flush()
878
-
879
-
880
- @reactive.effect
881
- @reactive.event(input.reference_data)
882
- async def _on_reference_upload():
883
- files = input.reference_data()
884
- req(files and len(files) > 0)
885
-
886
- file_path = files[0]["datapath"]
887
- reference_file_path_rv.set(file_path)
888
-
889
- reference_status_rv.set(f"Processing reference database: {Path(file_path).name} …")
890
- await reactive.flush()
891
-
892
- try:
893
- result = await asyncio.to_thread(process_database, file_path)
894
- reference_result_rv.set(result)
895
- reference_status_rv.set("✅ Reference database processed.")
896
- await reactive.flush()
897
- except Exception as e:
898
- reference_status_rv.set(f"❌ Failed to process reference database: {e}")
899
- await reactive.flush()
900
-
901
-
902
- @render.text
903
- def match_log():
904
- return match_log_rv.get()
905
-
906
-
907
- @reactive.Effect
908
- def _():
909
- if input.plot_spectra() > plot_clicks.get():
910
- current_page.set("plot_spectra")
911
- plot_clicks.set(input.plot_spectra())
912
- elif input.run_spec_lib_matching() > match_clicks.get():
913
- current_page.set("run_spec_lib_matching")
914
- match_clicks.set(input.run_spec_lib_matching())
915
- elif input.run_parameter_tuning_grid() > match_clicks.get():
916
- current_page.set("run_parameter_tuning_grid")
917
- match_clicks.set(input.run_parameter_tuning_grid())
918
- elif input.run_parameter_tuning_DE() > match_clicks.get():
919
- current_page.set("run_parameter_tuning_DE")
920
- match_clicks.set(input.run_parameter_tuning_DE())
921
- elif hasattr(input, "back") and input.back() > back_clicks.get():
922
- current_page.set("main_menu")
923
- back_clicks.set(input.back())
924
-
925
-
926
- @render.image
927
- def image():
928
- dir = Path(__file__).resolve().parent
929
- img: ImgData = {"src": str(dir / "www/emblem.png"), "width": "320px", "height": "250px"}
930
- return img
931
-
932
- @output
933
- @render.ui
934
- def main_ui():
935
- if current_page() == "main_menu":
936
- return ui.page_fluid(
937
- ui.h2("Main Menu"),
938
- ui.div(
939
- ui.output_image("image"),
940
- #ui.img(src="emblem.png", width="320px", height="250px"),
941
- style=(
942
- "position:fixed; top:0; left:50%; transform:translateX(-50%); "
943
- "z-index:1000; text-align:center; padding:10px; background-color:white;"
944
- ),
945
- ),
946
- ui.div(
947
- "Overview:",
948
- style="text-align:left; font-size:24px; font-weight:bold; margin-top:350px"
949
- ),
950
- ui.div(
951
- "PyCompound is a Python-based tool designed for performing spectral library matching on either high-resolution mass spectrometry data (HRMS) or low-resolution mass spectrometry data (NRMS). PyCompound offers a range of spectrum preprocessing transformations and similarity measures. These spectrum preprocessing transformations include filtering on mass/charge and/or intensity values, weight factor transformation, low-entropy transformation, centroiding, noise removal, and matching. The available similarity measures include the canonical Cosine similarity measure, three entropy-based similarity measures, and a variety of binary similarity measures: Jaccard, Dice, 3W-Jaccard, Sokal-Sneath, Binary Cosine, Mountford, McConnaughey, Driver-Kroeber, Simpson, Braun-Banquet, Fager-McGowan, Kulczynski, Intersection, Hamming, and Hellinger.",
952
- style="margin-top:10px; text-align:left; font-size:16px; font-weight:500"
953
- ),
954
- ui.div(
955
- "Select options:",
956
- style="margin-top:30px; text-align:left; font-size:24px; font-weight:bold"
957
- ),
958
- ui.div(
959
- ui.input_radio_buttons("chromatography_platform", "Specify chromatography platform:", ["HRMS","NRMS"]),
960
- style="font-size:18px; margin-top:10px; max-width:none"
961
- ),
962
- ui.input_action_button("plot_spectra", "Plot two spectra before and after preprocessing transformations.", style="font-size:18px; padding:20px 40px; width:550px; height:100px; margin-top:10px; margin-right:50px"),
963
- ui.input_action_button("run_spec_lib_matching", "Run spectral library matching to perform compound identification on a query library of spectra.", style="font-size:18px; padding:20px 40px; width:550px; height:100px; margin-top:10px; margin-right:50px"),
964
- ui.input_action_button("run_parameter_tuning_grid", "Grid search: Tune parameters to maximize accuracy of compound identification given a query library with known spectrum IDs.", style="font-size:18px; padding:20px 40px; width:450px; height:120px; margin-top:10px; margin-right:50px"),
965
- ui.input_action_button("run_parameter_tuning_DE", "Differential evolution optimization: Tune parameters to maximize accuracy of compound identification given a query library with known spectrum IDs.", style="font-size:18px; padding:20px 40px; width:500px; height:150px; margin-top:10px; margin-right:50px"),
966
- ui.div(
967
- "References:",
968
- style="margin-top:35px; text-align:left; font-size:24px; font-weight:bold"
969
- ),
970
- ui.div(
971
- "If Shannon Entropy similarity measure, low-entropy transformation, or centroiding are used:",
972
- style="margin-top:10px; text-align:left; font-size:14px; font-weight:500"
973
- ),
974
- ui.div(
975
- ui.HTML(
976
- 'Li, Y., Kind, T., Folz, J. et al. (2021) Spectral entropy outperforms MS/MS dot product similarity for small-molecule compound identification. Nat Methods, 18 1524–1531. <a href="https://doi.org/10.1038/s41592-021-01331-z" target="_blank">https://doi.org/10.1038/s41592-021-01331-z</a>.'
977
- ),
978
- style="text-align:left; font-size:14px; font-weight:500"
979
- ),
980
- ui.div(
981
- "If Tsallis Entropy similarity measure or series of preprocessing transformations are used:",
982
- style="margin-top:10px; text-align:left; font-size:14px; font-weight:500"
983
- ),
984
- ui.div(
985
- ui.HTML(
986
- 'Dlugas, H., Zhang, X., Kim, S. (2025) Comparative analysis of continuous similarity measures for compound identification in mass spectrometry-based metabolomics. Chemometrics and Intelligent Laboratory Systems, 263, 105417. <a href="https://doi.org/10.1016/j.chemolab.2025.105417", target="_blank">https://doi.org/10.1016/j.chemolab.2025.105417</a>.'
987
- ),
988
- style="text-align:left; font-size:14px; font-weight:500"
989
- ),
990
- ui.div(
991
- "If binary similarity measures are used:",
992
- style="margin-top:10px; text-align:left; font-size:14px; font-weight:500"
993
- ),
994
- ui.div(
995
- ui.HTML(
996
- 'Kim, S., Kato, I., & Zhang, X. (2022). Comparative Analysis of Binary Similarity Measures for Compound Identification in Mass Spectrometry-Based Metabolomics. Metabolites, 12(8), 694. <a href="https://doi.org/10.3390/metabo12080694" target="_blank">https://doi.org/10.3390/metabo12080694</a>.'
997
- ),
998
- style="text-align:left; font-size:14px; font-weight:500"
999
- ),
1000
-
1001
- ui.div(
1002
- "If weight factor transformation is used:",
1003
- style="margin-top:10px; text-align:left; font-size:14px; font-weight:500"
1004
- ),
1005
- ui.div(
1006
- ui.HTML(
1007
- 'Kim, S., Koo, I., Wei, X., & Zhang, X. (2012). A method of finding optimal weight factors for compound identification in gas chromatography-mass spectrometry. Bioinformatics, 28(8), 1158-1163. <a href="https://doi.org/10.1093/bioinformatics/bts083" target="_blank">https://doi.org/10.1093/bioinformatics/bts083</a>.'
1008
- ),
1009
- style="margin-bottom:40px; text-align:left; font-size:14px; font-weight:500"
1010
- ),
1011
- )
1012
- elif current_page() == "plot_spectra":
1013
- return plot_spectra_ui(input.chromatography_platform())
1014
- elif current_page() == "run_spec_lib_matching":
1015
- return run_spec_lib_matching_ui(input.chromatography_platform())
1016
- elif current_page() == "run_parameter_tuning_grid":
1017
- return run_parameter_tuning_grid_ui(input.chromatography_platform())
1018
- elif current_page() == "run_parameter_tuning_DE":
1019
- return run_parameter_tuning_DE_ui(input.chromatography_platform())
1020
-
1021
-
1022
-
1023
- @reactive.effect
1024
- @reactive.event(input.query_data)
1025
- async def _populate_ids_from_query_upload():
1026
- files = input.query_data()
1027
- if not files:
1028
- return
1029
-
1030
- in_path = Path(files[0]["datapath"])
1031
- suffix = in_path.suffix.lower()
1032
-
1033
- try:
1034
- if suffix == ".csv":
1035
- csv_path = in_path
1036
- converted_query_path_rv.set(str(csv_path))
1037
- else:
1038
- query_status_rv.set(f"Converting {in_path.name} → CSV …")
1039
- await reactive.flush()
1040
-
1041
- tmp_csv_path = in_path.with_suffix(".converted.csv")
1042
-
1043
- out_obj = await asyncio.to_thread(build_library, str(in_path), str(tmp_csv_path))
1044
-
1045
- if isinstance(out_obj, (str, os.PathLike, Path)):
1046
- csv_path = Path(out_obj)
1047
- elif isinstance(out_obj, pd.DataFrame):
1048
- out_obj.to_csv(tmp_csv_path, index=False, sep='\t')
1049
- csv_path = tmp_csv_path
1050
- else:
1051
- raise TypeError(f"build_library returned unsupported type: {type(out_obj)}")
1052
-
1053
- converted_query_path_rv.set(str(csv_path))
1054
-
1055
- query_status_rv.set(f"Reading IDs from: {csv_path.name} …")
1056
- await reactive.flush()
1057
-
1058
- ids = await asyncio.to_thread(extract_first_column_ids, str(csv_path))
1059
- query_ids_rv.set(ids)
1060
-
1061
- ui.update_selectize("spectrum_ID1", choices=ids, selected=(ids[0] if ids else None))
1062
-
1063
- query_status_rv.set(f"✅ Loaded {len(ids)} IDs from {csv_path.name}" if ids else f"⚠️ No IDs found in {csv_path.name}")
1064
- await reactive.flush()
1065
-
1066
- except Exception as e:
1067
- query_status_rv.set(f"❌ Failed: {e}")
1068
- await reactive.flush()
1069
- raise
1070
-
1071
-
1072
- @reactive.effect
1073
- @reactive.event(input.reference_data)
1074
- async def _populate_ids_from_reference_upload():
1075
- files = input.reference_data()
1076
- if not files:
1077
- return
1078
-
1079
- in_path = Path(files[0]["datapath"])
1080
- suffix = in_path.suffix.lower()
1081
-
1082
- try:
1083
- if suffix == ".csv":
1084
- csv_path = in_path
1085
- converted_reference_path_rv.set(str(csv_path))
1086
- else:
1087
- reference_status_rv.set(f"Converting {in_path.name} → CSV …")
1088
- await reactive.flush()
1089
-
1090
- tmp_csv_path = in_path.with_suffix(".converted.csv")
1091
-
1092
- out_obj = await asyncio.to_thread(build_library, str(in_path), str(tmp_csv_path))
1093
-
1094
- if isinstance(out_obj, (str, os.PathLike, Path)):
1095
- csv_path = Path(out_obj)
1096
- elif isinstance(out_obj, pd.DataFrame):
1097
- out_obj.to_csv(tmp_csv_path, index=False, sep='\t')
1098
- csv_path = tmp_csv_path
1099
- else:
1100
- raise TypeError(f"build_library returned unsupported type: {type(out_obj)}")
1101
-
1102
- converted_reference_path_rv.set(str(csv_path))
1103
-
1104
- reference_status_rv.set(f"Reading IDs from: {csv_path.name} …")
1105
- await reactive.flush()
1106
-
1107
- ids = await asyncio.to_thread(extract_first_column_ids, str(csv_path))
1108
- reference_ids_rv.set(ids)
1109
-
1110
- ui.update_selectize("spectrum_ID2", choices=ids, selected=(ids[0] if ids else None))
1111
-
1112
- reference_status_rv.set(
1113
- f"✅ Loaded {len(ids)} IDs from {csv_path.name}" if ids else f"⚠️ No IDs found in {csv_path.name}"
1114
- )
1115
- await reactive.flush()
1116
-
1117
- except Exception as e:
1118
- reference_status_rv.set(f"❌ Failed: {e}")
1119
- await reactive.flush()
1120
- raise
1121
-
1122
-
1123
- @render.download(filename=lambda: f"plot.png")
1124
- def run_btn_plot_spectra():
1125
- spectrum_ID1 = input.spectrum_ID1() or None
1126
- spectrum_ID2 = input.spectrum_ID2() or None
1127
-
1128
- weights = [float(weight.strip()) for weight in input.weights().split(",") if weight.strip()]
1129
- weights = {'Cosine':weights[0], 'Shannon':weights[1], 'Renyi':weights[2], 'Tsallis':weights[3]}
1130
-
1131
- high_quality_reference_library_tmp2 = False
1132
- if input.high_quality_reference_library() != 'False':
1133
- high_quality_reference_library_tmp2 = True
1134
-
1135
- print(input.high_quality_reference_library())
1136
- print(high_quality_reference_library_tmp2)
1137
-
1138
- if input.chromatography_platform() == "HRMS":
1139
- fig = generate_plots_on_HRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], spectrum_ID1=spectrum_ID1, spectrum_ID2=spectrum_ID2, similarity_measure=input.similarity_measure(), weights=weights, spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=high_quality_reference_library_tmp2, mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), window_size_centroiding=input.window_size_centroiding(), window_size_matching=input.window_size_matching(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), y_axis_transformation=input.y_axis_transformation(), return_plot=True)
1140
- plt.show()
1141
- elif input.chromatography_platform() == "NRMS":
1142
- fig = generate_plots_on_NRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], spectrum_ID1=spectrum_ID1, spectrum_ID2=spectrum_ID2, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=high_quality_reference_library_tmp2, mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), y_axis_transformation=input.y_axis_transformation(), return_plot=True)
1143
- plt.show()
1144
- with io.BytesIO() as buf:
1145
- fig.savefig(buf, format="png", dpi=150, bbox_inches="tight")
1146
- plt.close()
1147
- yield buf.getvalue()
1148
-
1149
-
1150
-
1151
- @render.download(filename="identification_output.txt")
1152
- async def run_btn_spec_lib_matching():
1153
- match_log_rv.set("Running identification...\n")
1154
- await reactive.flush()
1155
-
1156
- hq = input.high_quality_reference_library()
1157
- if isinstance(hq, str):
1158
- hq = hq.lower() == "true"
1159
- elif isinstance(hq, (int, float)):
1160
- hq = bool(hq)
1161
-
1162
- weights = [float(weight.strip()) for weight in input.weights().split(",") if weight.strip()]
1163
- weights = {'Cosine':weights[0], 'Shannon':weights[1], 'Renyi':weights[2], 'Tsallis':weights[3]}
1164
-
1165
- common_kwargs = dict(
1166
- query_data=input.query_data()[0]["datapath"],
1167
- reference_data=input.reference_data()[0]["datapath"],
1168
- likely_reference_ids=None,
1169
- similarity_measure=input.similarity_measure(),
1170
- weights=weights,
1171
- spectrum_preprocessing_order=input.spectrum_preprocessing_order(),
1172
- high_quality_reference_library=hq,
1173
- mz_min=input.mz_min(), mz_max=input.mz_max(),
1174
- int_min=input.int_min(), int_max=input.int_max(),
1175
- noise_threshold=input.noise_threshold(),
1176
- wf_mz=input.wf_mz(), wf_intensity=input.wf_int(),
1177
- LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(),
1178
- n_top_matches_to_save=input.n_top_matches_to_save(),
1179
- print_id_results=True,
1180
- output_identification=str(Path.cwd() / "identification_output.txt"),
1181
- output_similarity_scores=str(Path.cwd() / "similarity_scores.txt"),
1182
- return_ID_output=True,
1183
- )
1184
-
1185
- loop = asyncio.get_running_loop()
1186
- rw = ReactiveWriter(loop)
1187
-
1188
- try:
1189
- with redirect_stdout(rw), redirect_stderr(rw):
1190
- if input.chromatography_platform() == "HRMS":
1191
- df_out = await asyncio.to_thread(
1192
- run_spec_lib_matching_on_HRMS_data,
1193
- window_size_centroiding=input.window_size_centroiding(),
1194
- window_size_matching=input.window_size_matching(),
1195
- **common_kwargs
1196
- )
1197
- else:
1198
- df_out = await asyncio.to_thread(run_spec_lib_matching_on_NRMS_data, **common_kwargs)
1199
- match_log_rv.set(match_log_rv.get() + "\n✅ Identification finished.\n")
1200
- await reactive.flush()
1201
- except Exception as e:
1202
- match_log_rv.set(match_log_rv.get() + f"\n❌ Error: {e}\n")
1203
- await reactive.flush()
1204
- raise
1205
-
1206
- yield df_out.to_csv(index=True, sep='\t')
1207
-
1208
-
1209
-
1210
- @render.download(filename="plot.png")
1211
- def run_btn_plot_spectra_within_spec_lib_matching():
1212
- req(input.query_data(), input.reference_data())
1213
-
1214
- spectrum_ID1 = input.spectrum_ID1() or None
1215
- spectrum_ID2 = input.spectrum_ID2() or None
1216
-
1217
- hq = input.high_quality_reference_library()
1218
- if isinstance(hq, str):
1219
- hq = hq.lower() == "true"
1220
- elif isinstance(hq, (int, float)):
1221
- hq = bool(hq)
1222
-
1223
- weights = [float(weight.strip()) for weight in input.weights().split(",") if weight.strip()]
1224
- weights = {'Cosine':weights[0], 'Shannon':weights[1], 'Renyi':weights[2], 'Tsallis':weights[3]}
1225
-
1226
- common = dict(
1227
- query_data=input.query_data()[0]['datapath'],
1228
- reference_data=input.reference_data()[0]['datapath'],
1229
- spectrum_ID1=spectrum_ID1,
1230
- spectrum_ID2=spectrum_ID2,
1231
- similarity_measure=input.similarity_measure(),
1232
- weights=weights,
1233
- spectrum_preprocessing_order=input.spectrum_preprocessing_order(),
1234
- high_quality_reference_library=hq,
1235
- mz_min=input.mz_min(), mz_max=input.mz_max(),
1236
- int_min=input.int_min(), int_max=input.int_max(),
1237
- noise_threshold=input.noise_threshold(),
1238
- wf_mz=input.wf_mz(), wf_intensity=input.wf_int(),
1239
- LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(),
1240
- y_axis_transformation="normalized",
1241
- return_plot=True
1242
- )
1243
-
1244
- if input.chromatography_platform() == "HRMS":
1245
- fig = generate_plots_on_HRMS_data(
1246
- window_size_centroiding=input.window_size_centroiding(),
1247
- window_size_matching=input.window_size_matching(),
1248
- **common
1249
- )
1250
- plt.show()
1251
- else:
1252
- fig = generate_plots_on_NRMS_data(**common)
1253
- plt.show()
1254
-
1255
- with io.BytesIO() as buf:
1256
- fig.savefig(buf, format="png", dpi=150, bbox_inches="tight")
1257
- plt.close()
1258
- yield buf.getvalue()
1259
-
1260
-
1261
- @render.download(filename="parameter_tuning_grid_output.txt")
1262
- async def run_btn_parameter_tuning_grid():
1263
- is_any_job_running.set(True)
1264
- is_tuning_grid_running.set(True)
1265
- match_log_rv.set("Running grid search of all parameters specified...\n")
1266
- await reactive.flush()
1267
-
1268
- similarity_measure_tmp = list(input.similarity_measure())
1269
- high_quality_reference_library_tmp = [x.strip().lower() == "true" for x in input.high_quality_reference_library().strip().strip("[]").split(",") if x.strip()]
1270
- spectrum_preprocessing_order_tmp = strip_text(input.spectrum_preprocessing_order())
1271
- mz_min_tmp = strip_numeric(input.mz_min())
1272
- mz_max_tmp = strip_numeric(input.mz_max())
1273
- int_min_tmp = strip_numeric(input.int_min())
1274
- int_max_tmp = strip_numeric(input.int_max())
1275
- noise_threshold_tmp = strip_numeric(input.noise_threshold())
1276
- wf_mz_tmp = strip_numeric(input.wf_mz())
1277
- wf_int_tmp = strip_numeric(input.wf_int())
1278
- LET_threshold_tmp = strip_numeric(input.LET_threshold())
1279
- entropy_dimension_tmp = strip_numeric(input.entropy_dimension())
1280
- weights_tmp = strip_weights(input.weights())
1281
-
1282
- common_kwargs = dict(
1283
- query_data=input.query_data()[0]["datapath"],
1284
- reference_data=input.reference_data()[0]["datapath"],
1285
- output_path=str(Path.cwd() / "parameter_tuning_grid_output.txt"),
1286
- return_output=True,
1287
- )
1288
-
1289
- loop = asyncio.get_running_loop()
1290
- rw = ReactiveWriter(loop)
1291
-
1292
- try:
1293
- if input.chromatography_platform() == "HRMS":
1294
- window_size_centroiding_tmp = strip_numeric(input.window_size_centroiding())
1295
- window_size_matching_tmp = strip_numeric(input.window_size_matching())
1296
- grid = {
1297
- 'similarity_measure': similarity_measure_tmp,
1298
- 'weight': weights_tmp,
1299
- 'spectrum_preprocessing_order': spectrum_preprocessing_order_tmp,
1300
- 'mz_min': mz_min_tmp,
1301
- 'mz_max': mz_max_tmp,
1302
- 'int_min': int_min_tmp,
1303
- 'int_max': int_max_tmp,
1304
- 'noise_threshold': noise_threshold_tmp,
1305
- 'wf_mz': wf_mz_tmp,
1306
- 'wf_int': wf_int_tmp,
1307
- 'LET_threshold': LET_threshold_tmp,
1308
- 'entropy_dimension': entropy_dimension_tmp,
1309
- 'high_quality_reference_library': high_quality_reference_library_tmp,
1310
- 'window_size_centroiding': window_size_centroiding_tmp,
1311
- 'window_size_matching': window_size_matching_tmp,
1312
- }
1313
- df_out = await asyncio.to_thread(_run_with_redirects, tune_params_on_HRMS_data_grid_shiny, rw, **common_kwargs, grid=grid)
1314
- else:
1315
- grid = {
1316
- 'similarity_measure': similarity_measure_tmp,
1317
- 'weight': weights_tmp,
1318
- 'spectrum_preprocessing_order': spectrum_preprocessing_order_tmp,
1319
- 'mz_min': mz_min_tmp,
1320
- 'mz_max': mz_max_tmp,
1321
- 'int_min': int_min_tmp,
1322
- 'int_max': int_max_tmp,
1323
- 'noise_threshold': noise_threshold_tmp,
1324
- 'wf_mz': wf_mz_tmp,
1325
- 'wf_int': wf_int_tmp,
1326
- 'LET_threshold': LET_threshold_tmp,
1327
- 'entropy_dimension': entropy_dimension_tmp,
1328
- 'high_quality_reference_library': high_quality_reference_library_tmp,
1329
- }
1330
- df_out = await asyncio.to_thread(_run_with_redirects, tune_params_on_NRMS_data_grid_shiny, rw, **common_kwargs, grid=grid)
1331
-
1332
- match_log_rv.set(match_log_rv.get() + "\n✅ Parameter tuning finished.\n")
1333
- except Exception as e:
1334
- match_log_rv.set(match_log_rv.get() + f"\n❌ Error: {e}\n")
1335
- raise
1336
- finally:
1337
- is_tuning_grid_running.set(False)
1338
- is_any_job_running.set(False)
1339
- await reactive.flush()
1340
-
1341
- yield df_out.to_csv(index=False).encode("utf-8", sep='\t')
1342
-
1343
-
1344
-
1345
- @reactive.effect
1346
- @reactive.event(input.run_btn_parameter_tuning_DE)
1347
- async def run_btn_parameter_tuning_DE():
1348
- match_log_rv.set("Tuning specified continuous parameters using differential evolution...\n")
1349
- is_any_job_running.set(True)
1350
- is_tuning_DE_running.set(True)
1351
- await reactive.flush()
1352
-
1353
- # --- helpers ---
1354
- def _safe_float(v, default):
1355
- try:
1356
- if v is None:
1357
- return default
1358
- return float(v)
1359
- except Exception:
1360
- return default
1361
-
1362
- def _iget(id, default=None):
1363
- # Safe getter for Shiny inputs (avoids SilentException)
1364
- if id in input:
1365
- try:
1366
- return input[id]()
1367
- except SilentException:
1368
- return default
1369
- return default
1370
-
1371
- # ---- log plumbing (stdout/stderr -> UI) ----
1372
- loop = asyncio.get_running_loop()
1373
- q: asyncio.Queue[str | None] = asyncio.Queue()
1374
-
1375
- class UIWriter(io.TextIOBase):
1376
- def write(self, s: str):
1377
- if s:
1378
- loop.call_soon_threadsafe(q.put_nowait, s)
1379
- return len(s)
1380
- def flush(self): pass
1381
-
1382
- async def _drain():
1383
- while True:
1384
- msg = await q.get()
1385
- if msg is None:
1386
- break
1387
- match_log_rv.set(match_log_rv.get() + msg)
1388
- await reactive.flush()
1389
-
1390
- drain_task = asyncio.create_task(_drain())
1391
- writer = UIWriter()
1392
-
1393
- # ---------- SNAPSHOT INPUTS SAFELY ----------
1394
- try:
1395
- qfile = _iget("query_data")[0]["datapath"]
1396
- rfile = _iget("reference_data")[0]["datapath"]
1397
-
1398
- platform = _iget("chromatography_platform", "HRMS")
1399
- sim = _iget("similarity_measure", "cosine")
1400
- spro = _iget("spectrum_preprocessing_order", "FCNMWL")
1401
-
1402
- hq_raw = _iget("high_quality_reference_library", False)
1403
- if isinstance(hq_raw, str):
1404
- hq = hq_raw.lower() == "true"
1405
- else:
1406
- hq = bool(hq_raw)
1407
-
1408
- mz_min = _safe_float(_iget("mz_min", 0.0), 0.0)
1409
- mz_max = _safe_float(_iget("mz_max", 99_999_999.0), 99_999_999.0)
1410
- int_min = _safe_float(_iget("int_min", 0.0), 0.0)
1411
- int_max = _safe_float(_iget("int_max", 999_999_999.0), 999_999_999.0)
1412
-
1413
- # weights "a,b,c,d"
1414
- w_text = _iget("weights", "") or ""
1415
- w_list = [float(w.strip()) for w in w_text.split(",") if w.strip()]
1416
- w_list = (w_list + [0.0, 0.0, 0.0, 0.0])[:4]
1417
- weights = {"Cosine": w_list[0], "Shannon": w_list[1], "Renyi": w_list[2], "Tsallis": w_list[3]}
1418
-
1419
- # selected params + bounds
1420
- opt_params = tuple(_iget("params", ()) or ())
1421
- bounds_dict = {}
1422
- # populate bounds using the min_/max_ inputs if present, otherwise fall back
1423
- # to your default PARAMS dicts already defined in your file
1424
- param_defaults = PARAMS_HRMS if platform == "HRMS" else PARAMS_NRMS
1425
- for p in opt_params:
1426
- lo = _safe_float(_iget(f"min_{p}", param_defaults.get(p, (0.0, 1.0))[0]),
1427
- param_defaults.get(p, (0.0, 1.0))[0])
1428
- hi = _safe_float(_iget(f"max_{p}", param_defaults.get(p, (0.0, 1.0))[1]),
1429
- param_defaults.get(p, (0.0, 1.0))[1])
1430
- if lo > hi:
1431
- lo, hi = hi, lo
1432
- bounds_dict[p] = (lo, hi)
1433
-
1434
- # defaults (guarded!)
1435
- defaults = {
1436
- "window_size_centroiding": _safe_float(_iget("window_size_centroiding", 0.5), 0.5),
1437
- "window_size_matching": _safe_float(_iget("window_size_matching", 0.5), 0.5),
1438
- "noise_threshold": _safe_float(_iget("noise_threshold", 0.0), 0.0),
1439
- "wf_mz": _safe_float(_iget("wf_mz", 0.0), 0.0),
1440
- "wf_int": _safe_float(_iget("wf_int", 1.0), 1.0),
1441
- "LET_threshold": _safe_float(_iget("LET_threshold", 0.0), 0.0),
1442
- "entropy_dimension": _safe_float(_iget("entropy_dimension", 1.1), 1.1),
1443
- }
1444
- if platform == "NRMS":
1445
- defaults.pop("window_size_centroiding", None)
1446
- defaults.pop("window_size_matching", None)
1447
-
1448
- except Exception as e:
1449
- import traceback
1450
- tb = "".join(traceback.format_exception(type(e), e, e.__traceback__))
1451
- match_log_rv.set(match_log_rv.get() + f"\n❌ Input snapshot failed:\n{tb}\n")
1452
- is_tuning_DE_running.set(False); is_any_job_running.set(False)
1453
- await q.put(None); await drain_task; await reactive.flush()
1454
- return
1455
-
1456
- def _run():
1457
- from contextlib import redirect_stdout, redirect_stderr
1458
- with redirect_stdout(writer), redirect_stderr(writer):
1459
- return tune_params_DE(
1460
- query_data=qfile,
1461
- reference_data=rfile,
1462
- chromatography_platform=input.chromatography_platform(),
1463
- similarity_measure=sim,
1464
- weights=weights,
1465
- spectrum_preprocessing_order=spro,
1466
- mz_min=mz_min, mz_max=mz_max,
1467
- int_min=int_min, int_max=int_max,
1468
- high_quality_reference_library=hq,
1469
- optimize_params=list(opt_params),
1470
- param_bounds=bounds_dict,
1471
- default_params=defaults,
1472
- de_workers=1,
1473
- maxiters=input.max_iterations()
1474
- )
1475
-
1476
- try:
1477
- _ = await asyncio.to_thread(_run)
1478
- match_log_rv.set(match_log_rv.get() + "\n✅ Differential evolution finished.\n")
1479
- except Exception as e:
1480
- import traceback
1481
- tb = "".join(traceback.format_exception(type(e), e, e.__traceback__))
1482
- match_log_rv.set(match_log_rv.get() + f"\n❌ {type(e).__name__}: {e}\n{tb}\n")
1483
- finally:
1484
- await q.put(None)
1485
- await drain_task
1486
- is_tuning_DE_running.set(False)
1487
- is_any_job_running.set(False)
1488
- await reactive.flush()
1489
-
1490
-
1491
- @reactive.effect
1492
- async def _pump_reactive_writer_logs():
1493
- if not is_tuning_grid_running.get():
1494
- return
1495
-
1496
- reactive.invalidate_later(0.1)
1497
- msgs = _drain_queue_nowait(_LOG_QUEUE)
1498
- if msgs:
1499
- match_log_rv.set(match_log_rv.get() + "".join(msgs))
1500
- await reactive.flush()
1501
-
1502
-
1503
- @render.text
1504
- def status_output():
1505
- return run_status_plot_spectra.get()
1506
- return run_status_spec_lib_matching.get()
1507
- return run_status_parameter_tuning_grid.get()
1508
- return run_status_parameter_tuning_DE.get()
1509
-
1510
- @output
1511
- @render.text
1512
- def run_log():
1513
- return match_log_rv.get()
1514
-
1515
-
1516
- app = App(app_ui, server)
1517
-
1518
-
1519
-