pycompound 0.0.55__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
app.py CHANGED
@@ -1,17 +1,179 @@
1
1
 
2
- from shiny import App, ui, reactive, render
3
- from pycompound_fy7392.spec_lib_matching import run_spec_lib_matching_on_HRMS_data
4
- from pycompound_fy7392.spec_lib_matching import run_spec_lib_matching_on_NRMS_data
5
- from pycompound_fy7392.spec_lib_matching import tune_params_on_HRMS_data
6
- from pycompound_fy7392.spec_lib_matching import tune_params_on_NRMS_data
7
- from pycompound_fy7392.plot_spectra import generate_plots_on_HRMS_data
8
- from pycompound_fy7392.plot_spectra import generate_plots_on_NRMS_data
2
+ from shiny import App, ui, reactive, render, req
3
+ from pycompound.spec_lib_matching import run_spec_lib_matching_on_HRMS_data
4
+ from pycompound.spec_lib_matching import run_spec_lib_matching_on_NRMS_data
5
+ from pycompound.spec_lib_matching import tune_params_on_HRMS_data
6
+ from pycompound.spec_lib_matching import tune_params_on_NRMS_data
7
+ from pycompound.plot_spectra import generate_plots_on_HRMS_data
8
+ from pycompound.plot_spectra import generate_plots_on_NRMS_data
9
9
  from pathlib import Path
10
+ from contextlib import redirect_stdout, redirect_stderr
10
11
  import subprocess
11
12
  import traceback
12
13
  import asyncio
13
14
  import io
15
+ import os
16
+ import sys
14
17
  import matplotlib.pyplot as plt
18
+ import pandas as pd
19
+ import numpy as np
20
+ import netCDF4 as nc
21
+ from pyteomics import mgf, mzml
22
+ import ast
23
+
24
+
25
+ _LOG_QUEUE: asyncio.Queue[str] = asyncio.Queue()
26
+
27
+
28
+ def strip_text(s):
29
+ return [x.strip() for x in s.strip('[]').split(',') if x.strip()]
30
+
31
+
32
+ def strip_numeric(s):
33
+ return [float(x.strip()) for x in s.strip('[]').split(',') if x.strip()]
34
+
35
+
36
+ def strip_weights(s):
37
+ tuples = ast.literal_eval(s)
38
+ keys = ['Cosine', 'Shannon', 'Renyi', 'Tsallis']
39
+ return [dict(zip(keys,t)) for t in tuples]
40
+
41
+
42
+ def build_library(input_path=None, output_path=None):
43
+ last_three_chars = input_path[(len(input_path)-3):len(input_path)]
44
+ last_four_chars = input_path[(len(input_path)-4):len(input_path)]
45
+ if last_three_chars == 'csv' or last_three_chars == 'CSV':
46
+ return pd.read_csv(input_path)
47
+ else:
48
+ if last_three_chars == 'mgf' or last_three_chars == 'MGF':
49
+ input_file_type = 'mgf'
50
+ elif last_four_chars == 'mzML' or last_four_chars == 'mzml' or last_four_chars == 'MZML':
51
+ input_file_type = 'mzML'
52
+ elif last_three_chars == 'cdf' or last_three_chars == 'CDF':
53
+ input_file_type = 'cdf'
54
+ elif last_three_chars == 'msp' or last_three_chars == 'MSP':
55
+ input_file_type = 'msp'
56
+ else:
57
+ print('ERROR: either an \'mgf\', \'mzML\', \'cdf\', or \'msp\' file must be passed to --input_path')
58
+ sys.exit()
59
+
60
+ spectra = []
61
+ if input_file_type == 'mgf':
62
+ with mgf.read(input_path, index_by_scans = True) as reader:
63
+ for spec in reader:
64
+ spectra.append(spec)
65
+ if input_file_type == 'mzML':
66
+ with mzml.read(input_path) as reader:
67
+ for spec in reader:
68
+ spectra.append(spec)
69
+
70
+ if input_file_type == 'mgf' or input_file_type == 'mzML':
71
+ ids = []
72
+ mzs = []
73
+ ints = []
74
+ for i in range(0,len(spectra)):
75
+ for j in range(0,len(spectra[i]['m/z array'])):
76
+ if input_file_type == 'mzML':
77
+ ids.append(f'ID_{i+1}')
78
+ else:
79
+ ids.append(spectra[i]['params']['name'])
80
+ mzs.append(spectra[i]['m/z array'][j])
81
+ ints.append(spectra[i]['intensity array'][j])
82
+
83
+ if input_file_type == 'cdf':
84
+ dataset = nc.Dataset(input_path, 'r')
85
+ all_mzs = dataset.variables['mass_values'][:]
86
+ all_ints = dataset.variables['intensity_values'][:]
87
+ scan_idxs = dataset.variables['scan_index'][:]
88
+ dataset.close()
89
+
90
+ ids = []
91
+ mzs = []
92
+ ints = []
93
+ for i in range(0,(len(scan_idxs)-1)):
94
+ if i % 1000 == 0:
95
+ print(f'analyzed {i} out of {len(scan_idxs)} scans')
96
+ s_idx = scan_idxs[i]
97
+ e_idx = scan_idxs[i+1]
98
+
99
+ mzs_tmp = all_mzs[s_idx:e_idx]
100
+ ints_tmp = all_ints[s_idx:e_idx]
101
+
102
+ for j in range(0,len(mzs_tmp)):
103
+ ids.append(f'ID_{i+1}')
104
+ mzs.append(mzs_tmp[j])
105
+ ints.append(ints_tmp[j])
106
+
107
+ if input_file_type == 'msp':
108
+ ids = []
109
+ mzs = []
110
+ ints = []
111
+ with open(input_path, 'r') as f:
112
+ i = 0
113
+ for line in f:
114
+ line = line.strip()
115
+ if line.startswith('Name:'):
116
+ i += 1
117
+ spectrum_id = line.replace('Name: ','')
118
+ elif line and line[0].isdigit():
119
+ try:
120
+ mz, intensity = map(float, line.split()[:2])
121
+ ids.append(spectrum_id)
122
+ mzs.append(mz)
123
+ ints.append(intensity)
124
+ except ValueError:
125
+ continue
126
+
127
+ df = pd.DataFrame({'id':ids, 'mz_ratio':mzs, 'intensity':ints})
128
+ return df
129
+
130
+
131
+
132
+ def extract_first_column_ids(file_path: str, max_ids: int = 20000):
133
+ suffix = Path(file_path).suffix.lower()
134
+
135
+ if suffix == ".csv":
136
+ df = pd.read_csv(file_path, usecols=[0])
137
+ ids = df.iloc[:, 0].astype(str).dropna()
138
+ ids = [x for x in ids if x.strip() != ""]
139
+ seen = set()
140
+ uniq = []
141
+ for x in ids:
142
+ if x not in seen:
143
+ uniq.append(x)
144
+ seen.add(x)
145
+ return uniq[:max_ids]
146
+
147
+ ids = []
148
+ try:
149
+ with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
150
+ for line in f:
151
+ ls = line.strip()
152
+ if ls.startswith("TITLE="):
153
+ ids.append(ls.split("=", 1)[1].strip())
154
+ elif ls.lower().startswith("name:"):
155
+ ids.append(ls.split(":", 1)[1].strip())
156
+ if len(ids) >= max_ids:
157
+ break
158
+ except Exception:
159
+ pass
160
+
161
+ if ids:
162
+ seen = set()
163
+ uniq = []
164
+ for x in ids:
165
+ if x not in seen:
166
+ uniq.append(x)
167
+ seen.add(x)
168
+ return uniq
169
+ return []
170
+
171
+
172
+ def _open_plot_window(session, png_bytes: bytes, title: str = "plot.png"):
173
+ """Send PNG bytes to browser and open in a new window as a data URL."""
174
+ b64 = base64.b64encode(png_bytes).decode("ascii")
175
+ data_url = f"data:image/png;base64,{b64}"
176
+ session.send_custom_message("open-plot-window", {"png": data_url, "title": title})
15
177
 
16
178
 
17
179
  def plot_spectra_ui(platform: str):
@@ -19,13 +181,25 @@ def plot_spectra_ui(platform: str):
19
181
  base_inputs = [
20
182
  ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
21
183
  ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
22
- ui.input_text("spectrum_ID1", "Input ID of one spectrum to be plotted:", None),
23
- ui.input_text("spectrum_ID2", "Input ID of another spectrum to be plotted:", None),
184
+ ui.input_selectize(
185
+ "spectrum_ID1",
186
+ "Select spectrum ID 1 (default is the first spectrum in the library):",
187
+ choices=[],
188
+ multiple=False,
189
+ options={"placeholder": "Upload a library..."},
190
+ ),
191
+ ui.input_selectize(
192
+ "spectrum_ID2",
193
+ "Select spectrum ID 2 (default is the first spectrum in the library):",
194
+ choices=[],
195
+ multiple=False,
196
+ options={"placeholder": "Upload a library..."},
197
+ ),
24
198
  ui.input_select("similarity_measure", "Select similarity measure:", ["cosine","shannon","renyi","tsallis","mixture","jaccard","dice","3w_jaccard","sokal_sneath","binary_cosine","mountford","mcconnaughey","driver_kroeber","simpson","braun_banquet","fager_mcgowan","kulczynski","intersection","hamming","hellinger"]),
199
+ ui.input_text('weights', 'Weights for similarity measure (cosine, shannon, renyi, tsallis):', '0.25, 0.25, 0.25, 0.25'),
25
200
  ui.input_select(
26
201
  "high_quality_reference_library",
27
- "Indicate whether the reference library is considered high quality. "
28
- "If True, filtering and noise removal are only applied to the query spectra.",
202
+ "Indicate whether the reference library is considered high quality. If True, filtering and noise removal are only applied to the query spectra.",
29
203
  [False, True],
30
204
  ),
31
205
  ]
@@ -35,7 +209,7 @@ def plot_spectra_ui(platform: str):
35
209
  extra_inputs = [
36
210
  ui.input_text(
37
211
  "spectrum_preprocessing_order",
38
- "Sequence of characters for preprocessing order (C, F, M, N, L, W). M must be included, C before M if used.",
212
+ "Sequence of characters for preprocessing order (C (centroiding), F (filtering), M (matching), N (noise removal), L (low-entropy transformation), W (weight factor transformation)). M must be included, C before M if used.",
39
213
  "FCNMWL",
40
214
  ),
41
215
  ui.input_numeric("window_size_centroiding", "Centroiding window-size:", 0.5),
@@ -45,7 +219,7 @@ def plot_spectra_ui(platform: str):
45
219
  extra_inputs = [
46
220
  ui.input_text(
47
221
  "spectrum_preprocessing_order",
48
- "Sequence of characters for preprocessing order (F, N, L, W).",
222
+ "Sequence of characters for preprocessing order (F (filtering), N (noise removal), L (low-entropy transformation), W (weight factor transformation)).",
49
223
  "FNLW",
50
224
  )
51
225
  ]
@@ -71,26 +245,25 @@ def plot_spectra_ui(platform: str):
71
245
  )
72
246
 
73
247
  # Run and Back buttons
74
- run_button = ui.input_action_button("run_btn", "Run", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
248
+ run_button_plot_spectra = ui.download_button("run_btn_plot_spectra", "Run", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
75
249
  back_button = ui.input_action_button("back", "Back to main menu", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
76
250
 
77
- #print(len(extra_inputs))
78
251
  # Layout base_inputs and extra_inputs in columns
79
252
  if platform == "HRMS":
80
253
  inputs_columns = ui.layout_columns(
81
- ui.div(base_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
82
- ui.div([base_inputs[5:6], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
254
+ ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
255
+ ui.div([base_inputs[6:7], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
83
256
  ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
84
257
  ui.div([numeric_inputs[5:10], select_input], style="display:flex; flex-direction:column; gap:10px;"),
85
- col_widths=(3, 3, 3, 3),
258
+ col_widths=(3,3,3,3),
86
259
  )
87
260
  elif platform == "NRMS":
88
261
  inputs_columns = ui.layout_columns(
89
- ui.div(base_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
90
- ui.div([base_inputs[5:6], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
262
+ ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
263
+ ui.div([base_inputs[6:7], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
91
264
  ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
92
265
  ui.div([numeric_inputs[5:10], select_input], style="display:flex; flex-direction:column; gap:10px;"),
93
- col_widths=(3, 3, 3, 3),
266
+ col_widths=(3,3,3,3),
94
267
  )
95
268
 
96
269
  # Combine everything
@@ -98,8 +271,10 @@ def plot_spectra_ui(platform: str):
98
271
  ui.TagList(
99
272
  ui.h2("Plot Spectra"),
100
273
  inputs_columns,
101
- run_button,
102
- back_button
274
+ run_button_plot_spectra,
275
+ back_button,
276
+ ui.div(ui.output_text("plot_query_status"), style="margin-top:8px; font-size:14px"),
277
+ ui.div(ui.output_text("plot_reference_status"), style="margin-top:8px; font-size:14px")
103
278
  ),
104
279
  )
105
280
 
@@ -111,12 +286,26 @@ def run_spec_lib_matching_ui(platform: str):
111
286
  ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
112
287
  ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
113
288
  ui.input_select("similarity_measure", "Select similarity measure:", ["cosine","shannon","renyi","tsallis","mixture","jaccard","dice","3w_jaccard","sokal_sneath","binary_cosine","mountford","mcconnaughey","driver_kroeber","simpson","braun_banquet","fager_mcgowan","kulczynski","intersection","hamming","hellinger"]),
289
+ ui.input_text('weights', 'Weights for similarity measure (cosine, shannon, renyi, tsallis):', '0.25, 0.25, 0.25, 0.25'),
290
+ ui.input_selectize(
291
+ "spectrum_ID1",
292
+ "Select spectrum ID 1 (only applicable for plotting; default is the first spectrum in the query library):",
293
+ choices=[],
294
+ multiple=False,
295
+ options={"placeholder": "Upload a library..."},
296
+ ),
297
+ ui.input_selectize(
298
+ "spectrum_ID2",
299
+ "Select spectrum ID 2 (only applicable for plotting; default is the first spectrum in the reference library):",
300
+ choices=[],
301
+ multiple=False,
302
+ options={"placeholder": "Upload a library..."},
303
+ ),
114
304
  ui.input_select(
115
305
  "high_quality_reference_library",
116
- "Indicate whether the reference library is considered high quality. "
117
- "If True, filtering and noise removal are only applied to the query spectra.",
306
+ "Indicate whether the reference library is considered high quality. If True, filtering and noise removal are only applied to the query spectra.",
118
307
  [False, True],
119
- ),
308
+ )
120
309
  ]
121
310
 
122
311
  # Extra inputs depending on platform
@@ -124,7 +313,7 @@ def run_spec_lib_matching_ui(platform: str):
124
313
  extra_inputs = [
125
314
  ui.input_text(
126
315
  "spectrum_preprocessing_order",
127
- "Sequence of characters for preprocessing order (C, F, M, N, L, W). M must be included, C before M if used.",
316
+ "Sequence of characters for preprocessing order (C (centroiding), F (filtering), M (matching), N (noise removal), L (low-entropy transformation), W (weight factor transformation)). M must be included, C before M if used.",
128
317
  "FCNMWL",
129
318
  ),
130
319
  ui.input_numeric("window_size_centroiding", "Centroiding window-size:", 0.5),
@@ -134,7 +323,7 @@ def run_spec_lib_matching_ui(platform: str):
134
323
  extra_inputs = [
135
324
  ui.input_text(
136
325
  "spectrum_preprocessing_order",
137
- "Sequence of characters for preprocessing order (F, N, L, W).",
326
+ "Sequence of characters for preprocessing order (F (filtering), N (noise removal), L (low-entropy transformation), W (weight factor transformation)).",
138
327
  "FNLW",
139
328
  )
140
329
  ]
@@ -150,45 +339,139 @@ def run_spec_lib_matching_ui(platform: str):
150
339
  ui.input_numeric("wf_int", "Intensity weight factor:", 1.0),
151
340
  ui.input_numeric("LET_threshold", "Low-entropy threshold:", 0.0),
152
341
  ui.input_numeric("entropy_dimension", "Entropy dimension (Renyi/Tsallis only):", 1.1),
153
- ui.input_numeric("n_top_matches_to_save", "Number of top matches to save:", 1),
342
+ ui.input_numeric("n_top_matches_to_save", "Number of top matches to save:", 3),
154
343
  ]
155
344
 
156
345
 
157
346
  # Run and Back buttons
158
- run_button = ui.input_action_button("run_btn", "Run", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
347
+ run_button_spec_lib_matching = ui.download_button("run_btn_spec_lib_matching", "Run Spectral Library Matching", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
348
+ run_button_plot_spectra_within_spec_lib_matching = ui.download_button("run_btn_plot_spectra_within_spec_lib_matching", "Plot Spectra", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
159
349
  back_button = ui.input_action_button("back", "Back to main menu", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
160
350
 
161
- #print(len(extra_inputs))
162
351
  # Layout base_inputs and extra_inputs in columns
163
352
  if platform == "HRMS":
164
353
  inputs_columns = ui.layout_columns(
165
- ui.div(base_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
166
- ui.div([base_inputs[5:6], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
354
+ ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
355
+ ui.div([base_inputs[6:7], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
167
356
  ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
168
357
  ui.div(numeric_inputs[5:10], style="display:flex; flex-direction:column; gap:10px;"),
169
- col_widths=(3, 3, 3, 3),
358
+ col_widths=(3,3,3,3)
170
359
  )
171
360
  elif platform == "NRMS":
172
361
  inputs_columns = ui.layout_columns(
173
- ui.div(base_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
174
- ui.div([base_inputs[5:6], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
362
+ ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
363
+ ui.div([base_inputs[6:7], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
175
364
  ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
176
365
  ui.div(numeric_inputs[5:10], style="display:flex; flex-direction:column; gap:10px;"),
177
- col_widths=(3, 3, 3, 3),
366
+ col_widths=(3,3,3,3)
178
367
  )
179
368
 
369
+ log_panel = ui.card(
370
+ ui.card_header("Identification log"),
371
+ ui.output_text_verbatim("match_log"),
372
+ style="max-height:300px; overflow:auto"
373
+ )
374
+
180
375
  # Combine everything
181
376
  return ui.div(
182
377
  ui.TagList(
183
378
  ui.h2("Run Spectral Library Matching"),
184
379
  inputs_columns,
185
- run_button,
186
- back_button
380
+ run_button_spec_lib_matching,
381
+ run_button_plot_spectra_within_spec_lib_matching,
382
+ back_button,
383
+ log_panel
187
384
  ),
188
385
  )
189
386
 
190
387
 
191
388
 
389
+ def run_parameter_tuning_ui(platform: str):
390
+ # Base inputs common to all platforms
391
+ base_inputs = [
392
+ ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
393
+ ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
394
+ ui.input_selectize("similarity_measure", "Select similarity measure(s):", ["cosine","shannon","renyi","tsallis","mixture","jaccard","dice","3w_jaccard","sokal_sneath","binary_cosine","mountford","mcconnaughey","driver_kroeber","simpson","braun_banquet","fager_mcgowan","kulczynski","intersection","hamming","hellinger"], multiple=True, selected='cosine'),
395
+ ui.input_text('weights', 'Weights for similarity measure (cosine, shannon, renyi, tsallis):', '((0.25, 0.25, 0.25, 0.25), (0.2, 0.3, 0.4, 0.1))'),
396
+ ui.input_text("high_quality_reference_library", "Indicate whether the reference library is considered high quality. If True, filtering and noise removal are only applied to the query spectra.", '[True]')
397
+ ]
398
+
399
+ # Extra inputs depending on platform
400
+ if platform == "HRMS":
401
+ extra_inputs = [
402
+ ui.input_text(
403
+ "spectrum_preprocessing_order",
404
+ "Sequence of characters for preprocessing order (C (centroiding), F (filtering), M (matching), N (noise removal), L (low-entropy transformation), W (weight factor transformation)). M must be included, C before M if used.",
405
+ "[FCNMWL,CWM]",
406
+ ),
407
+ ui.input_text("window_size_centroiding", "Centroiding window-size:", "[0.5]"),
408
+ ui.input_text("window_size_matching", "Matching window-size:", "[0.1,0.5]"),
409
+ ]
410
+ else:
411
+ extra_inputs = [
412
+ ui.input_text(
413
+ "spectrum_preprocessing_order",
414
+ "Sequence of characters for preprocessing order (F (filtering), N (noise removal), L (low-entropy transformation), W (weight factor transformation)).",
415
+ "[FNLW,WNL]",
416
+ )
417
+ ]
418
+
419
+ # Numeric inputs
420
+ numeric_inputs = [
421
+ ui.input_text("mz_min", "Minimum m/z for filtering:", '[0]'),
422
+ ui.input_text("mz_max", "Maximum m/z for filtering:", '[99999999]'),
423
+ ui.input_text("int_min", "Minimum intensity for filtering:", '[0]'),
424
+ ui.input_text("int_max", "Maximum intensity for filtering:", '[999999999]'),
425
+ ui.input_text("noise_threshold", "Noise removal threshold:", '[0.0]'),
426
+ ui.input_text("wf_mz", "Mass/charge weight factor:", '[0.0]'),
427
+ ui.input_text("wf_int", "Intensity weight factor:", '[1.0]'),
428
+ ui.input_text("LET_threshold", "Low-entropy threshold:", '[0.0]'),
429
+ ui.input_text("entropy_dimension", "Entropy dimension (Renyi/Tsallis only):", '[1.1]')
430
+ ]
431
+
432
+
433
+ # Run and Back buttons
434
+ run_button_parameter_tuning = ui.download_button("run_btn_parameter_tuning", "Tune parameters", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
435
+ back_button = ui.input_action_button("back", "Back to main menu", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
436
+
437
+ # Layout base_inputs and extra_inputs in columns
438
+ if platform == "HRMS":
439
+ inputs_columns = ui.layout_columns(
440
+ ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
441
+ ui.div([base_inputs[6:7], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
442
+ ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
443
+ ui.div(numeric_inputs[5:9], style="display:flex; flex-direction:column; gap:10px;"),
444
+ col_widths=(3, 3, 3, 3),
445
+ )
446
+ elif platform == "NRMS":
447
+ inputs_columns = ui.layout_columns(
448
+ ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
449
+ ui.div([base_inputs[6:7], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
450
+ ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
451
+ ui.div(numeric_inputs[5:9], style="display:flex; flex-direction:column; gap:10px;"),
452
+ col_widths=(3, 3, 3, 3),
453
+ )
454
+
455
+ log_panel = ui.card(
456
+ ui.card_header("Identification log"),
457
+ ui.output_text_verbatim("match_log"),
458
+ style="max-height:300px; overflow:auto"
459
+ )
460
+
461
+ # Combine everything
462
+ return ui.div(
463
+ ui.TagList(
464
+ ui.h2("Tune parameters"),
465
+ inputs_columns,
466
+ run_button_parameter_tuning,
467
+ back_button,
468
+ log_panel
469
+ ),
470
+ )
471
+
472
+
473
+
474
+
192
475
  app_ui = ui.page_fluid(
193
476
  ui.output_ui("main_ui"),
194
477
  ui.output_text("status_output")
@@ -197,29 +480,153 @@ app_ui = ui.page_fluid(
197
480
 
198
481
  def server(input, output, session):
199
482
 
200
- # Track which page to show
201
483
  current_page = reactive.Value("main_menu")
202
484
 
203
- # Track button clicks
204
485
  plot_clicks = reactive.Value(0)
205
486
  match_clicks = reactive.Value(0)
206
487
  back_clicks = reactive.Value(0)
207
488
 
208
- run_status = reactive.Value("Waiting for input...")
489
+ run_status_plot_spectra = reactive.Value("")
490
+ run_status_spec_lib_matching = reactive.Value("")
491
+ run_status_plot_spectra_within_spec_lib_matching = reactive.Value("")
492
+ run_status_parameter_tuning = reactive.Value("")
493
+ is_tuning_running = reactive.Value(False)
494
+ match_log_rv = reactive.Value("")
495
+ is_matching_rv = reactive.Value(False)
496
+ is_any_job_running = reactive.Value(False)
497
+
498
+ query_ids_rv = reactive.Value([])
499
+ query_file_path_rv = reactive.Value(None)
500
+ query_result_rv = reactive.Value(None)
501
+ query_status_rv = reactive.Value("")
502
+ reference_ids_rv = reactive.Value([])
503
+ reference_file_path_rv = reactive.Value(None)
504
+ reference_result_rv = reactive.Value(None)
505
+ reference_status_rv = reactive.Value("")
506
+
507
+ converted_query_path_rv = reactive.Value(None)
508
+ converted_reference_path_rv = reactive.Value(None)
509
+
510
+
511
+ #def _drain_queue_nowait(q: asyncio.Queue[str]) -> list[str]:
512
+ def _drain_queue_nowait(q: asyncio.Queue) -> list[str]:
513
+ out = []
514
+ try:
515
+ while True:
516
+ out.append(q.get_nowait())
517
+ except asyncio.QueueEmpty:
518
+ pass
519
+ return out
520
+
521
+
522
+ @reactive.effect
523
+ async def _pump_logs():
524
+ if not is_any_job_running.get():
525
+ return
526
+
527
+ reactive.invalidate_later(0.1)
528
+ msgs = _drain_queue_nowait(_LOG_QUEUE)
529
+ if msgs:
530
+ match_log_rv.set(match_log_rv.get() + "".join(msgs))
531
+ await reactive.flush()
532
+
533
+
534
+ def process_database(file_path: str):
535
+ suffix = Path(file_path).suffix.lower()
536
+ return {"path": file_path, "suffix": suffix}
537
+
538
+ @render.text
539
+ def plot_query_status():
540
+ return query_status_rv.get() or ""
541
+
542
+ @render.text
543
+ def plot_reference_status():
544
+ return reference_status_rv.get() or ""
545
+
546
+
547
+ @reactive.effect
548
+ @reactive.event(input.query_data)
549
+ async def _on_query_upload():
550
+ files = input.query_data()
551
+ req(files and len(files) > 0)
552
+
553
+ file_path = files[0]["datapath"]
554
+ query_file_path_rv.set(file_path)
555
+
556
+ query_status_rv.set(f"Processing query database: {Path(file_path).name} …")
557
+ await reactive.flush()
558
+
559
+ try:
560
+ result = await asyncio.to_thread(process_database, file_path)
561
+ query_result_rv.set(result)
562
+ query_status_rv.set("✅ Query database processed.")
563
+ await reactive.flush()
564
+ except Exception as e:
565
+ query_status_rv.set(f"❌ Failed to process query database: {e}")
566
+ await reactive.flush()
567
+
568
+
569
+ @reactive.effect
570
+ @reactive.event(input.reference_data)
571
+ async def _on_reference_upload():
572
+ files = input.reference_data()
573
+ req(files and len(files) > 0)
574
+
575
+ file_path = files[0]["datapath"]
576
+ reference_file_path_rv.set(file_path)
577
+
578
+ reference_status_rv.set(f"Processing reference database: {Path(file_path).name} …")
579
+ await reactive.flush()
580
+
581
+ try:
582
+ result = await asyncio.to_thread(process_database, file_path)
583
+ reference_result_rv.set(result)
584
+ reference_status_rv.set("✅ Reference database processed.")
585
+ await reactive.flush()
586
+ except Exception as e:
587
+ reference_status_rv.set(f"❌ Failed to process reference database: {e}")
588
+ await reactive.flush()
589
+
590
+
591
+ @render.text
592
+ def match_log():
593
+ return match_log_rv.get()
594
+
595
+
596
+ class ReactiveWriter(io.TextIOBase):
597
+ def __init__(self, rv):
598
+ self.rv = rv
599
+ def write(self, s: str):
600
+ if not s:
601
+ return 0
602
+ self.rv.set(self.rv.get() + s)
603
+ try:
604
+ loop = asyncio.get_running_loop()
605
+ loop.create_task(reactive.flush())
606
+ except RuntimeError:
607
+ pass
608
+ return len(s)
609
+ def flush(self):
610
+ pass
611
+
612
+
209
613
 
210
614
  @reactive.Effect
211
615
  def _():
212
- # Main menu buttons
213
616
  if input.plot_spectra() > plot_clicks.get():
214
617
  current_page.set("plot_spectra")
215
618
  plot_clicks.set(input.plot_spectra())
216
619
  elif input.run_spec_lib_matching() > match_clicks.get():
217
620
  current_page.set("run_spec_lib_matching")
218
621
  match_clicks.set(input.run_spec_lib_matching())
622
+ elif input.run_parameter_tuning() > match_clicks.get():
623
+ current_page.set("run_parameter_tuning")
624
+ match_clicks.set(input.run_parameter_tuning())
219
625
  elif hasattr(input, "back") and input.back() > back_clicks.get():
220
626
  current_page.set("main_menu")
221
627
  back_clicks.set(input.back())
222
628
 
629
+
223
630
  @render.image
224
631
  def image():
225
632
  from pathlib import Path
@@ -228,6 +635,7 @@ def server(input, output, session):
228
635
  img: ImgData = {"src": str(dir / "www/emblem.png"), "width": "320px", "height": "250px"}
229
636
  return img
230
637
 
638
+
231
639
  @output
232
640
  @render.ui
233
641
  def main_ui():
@@ -259,6 +667,7 @@ def server(input, output, session):
259
667
  ),
260
668
  ui.input_action_button("plot_spectra", "Plot two spectra before and after preprocessing transformations.", style="font-size:18px; padding:20px 40px; width:550px; height:100px; margin-top:10px; margin-right:50px"),
261
669
  ui.input_action_button("run_spec_lib_matching", "Run spectral library matching to perform compound identification on a query library of spectra.", style="font-size:18px; padding:20px 40px; width:550px; height:100px; margin-top:10px; margin-right:50px"),
670
+ ui.input_action_button("run_parameter_tuning", "Tune parameters to maximize accuracy of compound identification given a query library with known spectrum IDs.", style="font-size:18px; padding:20px 40px; width:450px; height:120px; margin-top:10px; margin-right:50px"),
262
671
  ui.div(
263
672
  "References:",
264
673
  style="margin-top:35px; text-align:left; font-size:24px; font-weight:bold"
@@ -309,53 +718,436 @@ def server(input, output, session):
309
718
  return plot_spectra_ui(input.chromatography_platform())
310
719
  elif current_page() == "run_spec_lib_matching":
311
720
  return run_spec_lib_matching_ui(input.chromatography_platform())
721
+ elif current_page() == "run_parameter_tuning":
722
+ return run_parameter_tuning_ui(input.chromatography_platform())
723
+
724
+
312
725
 
313
726
  @reactive.effect
314
- @reactive.event(input.run_btn)
315
- def _():
316
- if current_page() == "plot_spectra":
317
- if len(input.spectrum_ID1())==0:
318
- spectrum_ID1 = None
727
+ @reactive.event(input.query_data)
728
+ async def _populate_ids_from_query_upload():
729
+ #if current_page() != "plot_spectra":
730
+ # return
731
+
732
+ files = input.query_data()
733
+ if not files:
734
+ return
735
+
736
+ in_path = Path(files[0]["datapath"])
737
+ suffix = in_path.suffix.lower()
738
+
739
+ # Decide what CSV to read IDs from
740
+ try:
741
+ if suffix == ".csv":
742
+ csv_path = in_path
743
+ converted_query_path_rv.set(str(csv_path))
319
744
  else:
320
- spectrum_ID1 = input.spectrum_ID1()
321
- if len(input.spectrum_ID2())==0:
322
- spectrum_ID2 = None
745
+ query_status_rv.set(f"Converting {in_path.name} → CSV …")
746
+ await reactive.flush()
747
+
748
+ # Choose an output temp path next to the upload
749
+ tmp_csv_path = in_path.with_suffix(".converted.csv")
750
+
751
+ out_obj = await asyncio.to_thread(build_library, str(in_path), str(tmp_csv_path))
752
+
753
+ # out_obj may be a path (str/PathLike) OR a DataFrame. Normalize to a path.
754
+ if isinstance(out_obj, (str, os.PathLike, Path)):
755
+ csv_path = Path(out_obj)
756
+ elif isinstance(out_obj, pd.DataFrame):
757
+ # Write the DF to our chosen path
758
+ out_obj.to_csv(tmp_csv_path, index=False)
759
+ csv_path = tmp_csv_path
760
+ else:
761
+ raise TypeError(f"build_library returned unsupported type: {type(out_obj)}")
762
+
763
+ converted_query_path_rv.set(str(csv_path))
764
+
765
+ query_status_rv.set(f"Reading IDs from: {csv_path.name} …")
766
+ await reactive.flush()
767
+
768
+ # Extract IDs from the CSV’s first column
769
+ ids = await asyncio.to_thread(extract_first_column_ids, str(csv_path))
770
+ query_ids_rv.set(ids)
771
+
772
+ # Update dropdowns
773
+ ui.update_selectize("spectrum_ID1", choices=ids, selected=(ids[0] if ids else None))
774
+
775
+ query_status_rv.set(
776
+ f"✅ Loaded {len(ids)} IDs from {csv_path.name}" if ids else f"⚠️ No IDs found in {csv_path.name}"
777
+ )
778
+ await reactive.flush()
779
+
780
+ except Exception as e:
781
+ query_status_rv.set(f"❌ Failed: {e}")
782
+ await reactive.flush()
783
+ raise
784
+
785
+
786
+ @reactive.effect
787
+ @reactive.event(input.reference_data)
788
+ async def _populate_ids_from_reference_upload():
789
+ #if current_page() != "plot_spectra":
790
+ # return
791
+
792
+ files = input.reference_data()
793
+ if not files:
794
+ return
795
+
796
+ in_path = Path(files[0]["datapath"])
797
+ suffix = in_path.suffix.lower()
798
+
799
+ # Decide what CSV to read IDs from
800
+ try:
801
+ if suffix == ".csv":
802
+ csv_path = in_path
803
+ converted_reference_path_rv.set(str(csv_path))
323
804
  else:
324
- spectrum_ID2 = input.spectrum_ID2()
325
-
326
- if input.chromatography_platform() == "HRMS":
327
- try:
328
- fig = generate_plots_on_HRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], spectrum_ID1=spectrum_ID1, spectrum_ID2=spectrum_ID2, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), window_size_centroiding=input.window_size_centroiding(), window_size_matching=input.window_size_matching(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), y_axis_transformation=input.y_axis_transformation(), return_plot=True)
329
- plt.show()
330
- run_status.set(f"✅ Plotting has finished.")
331
- except Exception as e:
332
- run_status.set(f"❌ Error: {traceback.format_exc()}")
333
- elif input.chromatography_platform() == "NRMS":
334
- try:
335
- generate_plots_on_NRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], spectrum_ID1=spectrum_ID1, spectrum_ID2=spectrum_ID2, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), y_axis_transformation=input.y_axis_transformation(), return_plot=True)
336
- plt.show()
337
- run_status.set(f"✅ Plotting has finished.")
338
- except Exception as e:
339
- run_status.set(f"❌ Error: {traceback.format_exc()}")
340
-
341
- elif current_page() == 'run_spec_lib_matching':
342
- if input.chromatography_platform() == 'HRMS':
343
- try:
344
- run_spec_lib_matching_on_HRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], likely_reference_ids=None, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), window_size_centroiding=input.window_size_centroiding(), window_size_matching=input.window_size_matching(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), n_top_matches_to_save=input.n_top_matches_to_save(), print_id_results=False, output_identification=f'{Path.cwd()}/output_identification.csv', output_similarity_scores=f'{Path.cwd()}/')
345
- run_status.set(f"✅ Spectral library matching has finished and results were written to {Path.cwd()}/output_similarity_scores.csv.")
346
- except Exception as e:
347
- run_status.set(f"❌ Error: {traceback.format_exc()}")
348
- elif input.chromatography_platform() == 'NRMS':
349
- try:
350
- run_spec_lib_matching_on_NRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], likely_reference_ids=None, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), n_top_matches_to_save=input.n_top_matches_to_save(), print_id_results=False, output_identification=f'{Path.cwd()}/output_identification.csv', output_similarity_scores=f'{Path.cwd()}/output_similarity_scores.csv')
351
- run_status.set(f"✅ Spectral library matching has finished and results were written to {Path.cwd()}/")
352
- except Exception as e:
353
- run_status.set(f"❌ Error: {traceback.format_exc()}")
805
+ reference_status_rv.set(f"Converting {in_path.name} → CSV …")
806
+ await reactive.flush()
807
+
808
+ # Choose an output temp path next to the upload
809
+ tmp_csv_path = in_path.with_suffix(".converted.csv")
810
+
811
+ out_obj = await asyncio.to_thread(build_library, str(in_path), str(tmp_csv_path))
812
+
813
+ # out_obj may be a path (str/PathLike) OR a DataFrame. Normalize to a path.
814
+ if isinstance(out_obj, (str, os.PathLike, Path)):
815
+ csv_path = Path(out_obj)
816
+ elif isinstance(out_obj, pd.DataFrame):
817
+ # Write the DF to our chosen path
818
+ out_obj.to_csv(tmp_csv_path, index=False)
819
+ csv_path = tmp_csv_path
820
+ else:
821
+ raise TypeError(f"build_library returned unsupported type: {type(out_obj)}")
822
+
823
+ converted_reference_path_rv.set(str(csv_path))
824
+
825
+ reference_status_rv.set(f"Reading IDs from: {csv_path.name} …")
826
+ await reactive.flush()
827
+
828
+ # Extract IDs from the CSV’s first column
829
+ ids = await asyncio.to_thread(extract_first_column_ids, str(csv_path))
830
+ reference_ids_rv.set(ids)
831
+
832
+ # Update dropdowns
833
+ ui.update_selectize("spectrum_ID2", choices=ids, selected=(ids[0] if ids else None))
834
+
835
+ reference_status_rv.set(
836
+ f"✅ Loaded {len(ids)} IDs from {csv_path.name}" if ids else f"⚠️ No IDs found in {csv_path.name}"
837
+ )
838
+ await reactive.flush()
839
+
840
+ except Exception as e:
841
+ reference_status_rv.set(f"❌ Failed: {e}")
842
+ await reactive.flush()
843
+ raise
844
+
845
+
846
+ @render.download(filename=lambda: f"plot.png")
847
+ def run_btn_plot_spectra():
848
+ spectrum_ID1 = input.spectrum_ID1() or None
849
+ spectrum_ID2 = input.spectrum_ID2() or None
850
+
851
+ weights = [float(weight.strip()) for weight in input.weights().split(",") if weight.strip()]
852
+ weights = {'Cosine':weights[0], 'Shannon':weights[1], 'Renyi':weights[2], 'Tsallis':weights[3]}
853
+
854
+ if input.chromatography_platform() == "HRMS":
855
+ fig = generate_plots_on_HRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], spectrum_ID1=spectrum_ID1, spectrum_ID2=spectrum_ID2, similarity_measure=input.similarity_measure(), weights=weights, spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), window_size_centroiding=input.window_size_centroiding(), window_size_matching=input.window_size_matching(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), y_axis_transformation=input.y_axis_transformation(), return_plot=True)
856
+ plt.show()
857
+ elif input.chromatography_platform() == "NRMS":
858
+ fig = generate_plots_on_NRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], spectrum_ID1=spectrum_ID1, spectrum_ID2=spectrum_ID2, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), y_axis_transformation=input.y_axis_transformation(), return_plot=True)
859
+ plt.show()
860
+ with io.BytesIO() as buf:
861
+ fig.savefig(buf, format="png", dpi=150, bbox_inches="tight")
862
+ plt.close()
863
+ yield buf.getvalue()
864
+
865
+
866
+ @render.text
867
+ def status_output():
868
+ return run_status_plot_spectra.get()
869
+ return run_status_spec_lib_matching.get()
870
+ return run_status_parameter_tuning.get()
871
+
872
+
873
+ class ReactiveWriter(io.TextIOBase):
874
+ def __init__(self, rv: reactive.Value, loop: asyncio.AbstractEventLoop):
875
+ self.rv = rv
876
+ self.loop = loop
877
+
878
+ def write(self, s: str):
879
+ if not s:
880
+ return 0
881
+ def _apply():
882
+ self.rv.set(self.rv.get() + s)
883
+ self.loop.create_task(reactive.flush())
884
+ self.loop.call_soon_threadsafe(_apply)
885
+ return len(s)
886
+
887
+ def flush(self):
888
+ pass
889
+
890
+
891
+ @render.download(filename="identification_output.csv")
892
+ async def run_btn_spec_lib_matching():
893
+ match_log_rv.set("Starting identification...\n")
894
+ await reactive.flush()
895
+
896
+ hq = input.high_quality_reference_library()
897
+ if isinstance(hq, str):
898
+ hq = hq.lower() == "true"
899
+ elif isinstance(hq, (int, float)):
900
+ hq = bool(hq)
901
+
902
+ weights = [float(weight.strip()) for weight in input.weights().split(",") if weight.strip()]
903
+ weights = {'Cosine':weights[0], 'Shannon':weights[1], 'Renyi':weights[2], 'Tsallis':weights[3]}
904
+
905
+ common_kwargs = dict(
906
+ query_data=input.query_data()[0]["datapath"],
907
+ reference_data=input.reference_data()[0]["datapath"],
908
+ likely_reference_ids=None,
909
+ similarity_measure=input.similarity_measure(),
910
+ weights=weights,
911
+ spectrum_preprocessing_order=input.spectrum_preprocessing_order(),
912
+ high_quality_reference_library=hq,
913
+ mz_min=input.mz_min(), mz_max=input.mz_max(),
914
+ int_min=input.int_min(), int_max=input.int_max(),
915
+ noise_threshold=input.noise_threshold(),
916
+ wf_mz=input.wf_mz(), wf_intensity=input.wf_int(),
917
+ LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(),
918
+ n_top_matches_to_save=input.n_top_matches_to_save(),
919
+ print_id_results=True, # ensure the library actually prints
920
+ output_identification=str(Path.cwd() / "identification_output.csv"),
921
+ output_similarity_scores=str(Path.cwd() / "similarity_scores.csv"),
922
+ return_ID_output=True,
923
+ )
924
+
925
+ loop = asyncio.get_running_loop()
926
+ rw = ReactiveWriter(match_log_rv, loop)
927
+
928
+ try:
929
+ with redirect_stdout(rw), redirect_stderr(rw):
930
+ if input.chromatography_platform() == "HRMS":
931
+ df_out = await asyncio.to_thread(
932
+ run_spec_lib_matching_on_HRMS_data,
933
+ window_size_centroiding=input.window_size_centroiding(),
934
+ window_size_matching=input.window_size_matching(),
935
+ **common_kwargs
936
+ )
937
+ else:
938
+ df_out = await asyncio.to_thread(run_spec_lib_matching_on_NRMS_data, **common_kwargs)
939
+ match_log_rv.set(match_log_rv.get() + "\n✅ Identification finished.\n")
940
+ await reactive.flush()
941
+ except Exception as e:
942
+ match_log_rv.set(match_log_rv.get() + f"\n❌ Error: {e}\n")
943
+ await reactive.flush()
944
+ raise
945
+
946
+ yield df_out.to_csv(index=True)
947
+
948
+
949
+
950
+ @render.download(filename="plot.png")
951
+ def run_btn_plot_spectra_within_spec_lib_matching():
952
+ req(input.query_data(), input.reference_data())
953
+
954
+ spectrum_ID1 = input.spectrum_ID1() or None
955
+ spectrum_ID2 = input.spectrum_ID2() or None
956
+
957
+ hq = input.high_quality_reference_library()
958
+ if isinstance(hq, str):
959
+ hq = hq.lower() == "true"
960
+ elif isinstance(hq, (int, float)):
961
+ hq = bool(hq)
962
+
963
+ weights = [float(weight.strip()) for weight in input.weights().split(",") if weight.strip()]
964
+ weights = {'Cosine':weights[0], 'Shannon':weights[1], 'Renyi':weights[2], 'Tsallis':weights[3]}
965
+
966
+ common = dict(
967
+ query_data=input.query_data()[0]['datapath'],
968
+ reference_data=input.reference_data()[0]['datapath'],
969
+ spectrum_ID1=spectrum_ID1,
970
+ spectrum_ID2=spectrum_ID2,
971
+ similarity_measure=input.similarity_measure(),
972
+ weights=weights,
973
+ spectrum_preprocessing_order=input.spectrum_preprocessing_order(),
974
+ high_quality_reference_library=hq,
975
+ mz_min=input.mz_min(), mz_max=input.mz_max(),
976
+ int_min=input.int_min(), int_max=input.int_max(),
977
+ noise_threshold=input.noise_threshold(),
978
+ wf_mz=input.wf_mz(), wf_intensity=input.wf_int(),
979
+ LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(),
980
+ y_axis_transformation="normalized",
981
+ return_plot=True
982
+ )
983
+
984
+ if input.chromatography_platform() == "HRMS":
985
+ fig = generate_plots_on_HRMS_data(
986
+ window_size_centroiding=input.window_size_centroiding(),
987
+ window_size_matching=input.window_size_matching(),
988
+ **common
989
+ )
990
+ plt.show()
991
+ else:
992
+ fig = generate_plots_on_NRMS_data(**common)
993
+ plt.show()
994
+
995
+ with io.BytesIO() as buf:
996
+ fig.savefig(buf, format="png", dpi=150, bbox_inches="tight")
997
+ plt.close()
998
+ yield buf.getvalue()
999
+
1000
+
1001
+ '''
1002
+ @render.download(filename="parameter_tuning_output.csv")
1003
+ async def run_btn_parameter_tuning():
1004
+ match_log_rv.set("Running grid search of all parameters specified...\n")
1005
+
1006
+ similarity_measure_tmp = list(input.similarity_measure())
1007
+ high_quality_reference_library_tmp = [x.strip().lower() == "true" for x in input.high_quality_reference_library().strip().strip("[]").split(",") if x.strip()]
1008
+ spectrum_preprocessing_order_tmp = strip_text(input.spectrum_preprocessing_order())
1009
+ mz_min_tmp = strip_numeric(input.mz_min())
1010
+ mz_max_tmp = strip_numeric(input.mz_max())
1011
+ int_min_tmp = strip_numeric(input.int_min())
1012
+ int_max_tmp = strip_numeric(input.int_max())
1013
+ noise_threshold_tmp = strip_numeric(input.noise_threshold())
1014
+ wf_mz_tmp = strip_numeric(input.wf_mz())
1015
+ wf_int_tmp = strip_numeric(input.wf_int())
1016
+ LET_threshold_tmp = strip_numeric(input.LET_threshold())
1017
+ entropy_dimension_tmp = strip_numeric(input.entropy_dimension())
1018
+ weights_tmp = strip_weights(input.weights())
1019
+
1020
+ common_kwargs = dict(
1021
+ query_data=input.query_data()[0]["datapath"],
1022
+ reference_data=input.reference_data()[0]["datapath"],
1023
+ output_path=str(Path.cwd() / "parameter_tuning_output.csv"),
1024
+ return_output=True
1025
+ )
1026
+
1027
+ loop = asyncio.get_running_loop()
1028
+ rw = ReactiveWriter(match_log_rv, loop)
1029
+
1030
+ try:
1031
+ with redirect_stdout(rw), redirect_stderr(rw):
1032
+ if input.chromatography_platform() == "HRMS":
1033
+ window_size_centroiding_tmp = strip_numeric(input.window_size_centroiding())
1034
+ window_size_matching_tmp = strip_numeric(input.window_size_matching())
1035
+ grid={'similarity_measure':similarity_measure_tmp, 'weight':weights_tmp, 'spectrum_preprocessing_order':spectrum_preprocessing_order_tmp, 'mz_min':mz_min_tmp, 'mz_max':mz_max_tmp, 'int_min':int_min_tmp, 'int_max':int_max_tmp, 'noise_threshold':noise_threshold_tmp, 'wf_mz':wf_mz_tmp, 'wf_int':wf_int_tmp, 'LET_threshold':LET_threshold_tmp, 'entropy_dimension':entropy_dimension_tmp, 'high_quality_reference_library':high_quality_reference_library_tmp, 'window_size_centroiding':window_size_centroiding_tmp, 'window_size_matching':window_size_matching_tmp}
1036
+ df_out = await asyncio.to_thread(tune_params_on_HRMS_data, **common_kwargs, grid=grid)
1037
+ else:
1038
+ grid={'similarity_measure':similarity_measure_tmp, 'weight':weights_tmp, 'spectrum_preprocessing_order':spectrum_preprocessing_order_tmp, 'mz_min':mz_min_tmp, 'mz_max':mz_max_tmp, 'int_min':int_min_tmp, 'int_max':int_max_tmp, 'noise_threshold':noise_threshold_tmp, 'wf_mz':wf_mz_tmp, 'wf_int':wf_int_tmp, 'LET_threshold':LET_threshold_tmp, 'entropy_dimension':entropy_dimension_tmp, 'high_quality_reference_library':high_quality_reference_library_tmp}
1039
+ df_out = await asyncio.to_thread(tune_params_on_NRMS_data, **common_kwargs, grid=grid)
1040
+ match_log_rv.set(match_log_rv.get() + "\n✅ Parameter tuning finished.\n")
1041
+ #await reactive.flush()
1042
+ except Exception as e:
1043
+ match_log_rv.set(match_log_rv.get() + f"\n❌ Error: {e}\n")
1044
+ #await reactive.flush()
1045
+ raise
1046
+
1047
+ yield df_out.to_csv(index=False)
1048
+ '''
1049
+
1050
+
1051
+ @render.download(filename="parameter_tuning_output.csv")
1052
+ async def run_btn_parameter_tuning():
1053
+ is_any_job_running.set(True)
1054
+ is_tuning_running.set(True)
1055
+ match_log_rv.set("Running grid search of all parameters specified...\n")
1056
+ _drain_queue_nowait(_LOG_QUEUE)
1057
+
1058
+ similarity_measure_tmp = list(input.similarity_measure())
1059
+ high_quality_reference_library_tmp = [x.strip().lower() == "true" for x in input.high_quality_reference_library().strip().strip("[]").split(",") if x.strip()]
1060
+ spectrum_preprocessing_order_tmp = strip_text(input.spectrum_preprocessing_order())
1061
+ mz_min_tmp = strip_numeric(input.mz_min())
1062
+ mz_max_tmp = strip_numeric(input.mz_max())
1063
+ int_min_tmp = strip_numeric(input.int_min())
1064
+ int_max_tmp = strip_numeric(input.int_max())
1065
+ noise_threshold_tmp = strip_numeric(input.noise_threshold())
1066
+ wf_mz_tmp = strip_numeric(input.wf_mz())
1067
+ wf_int_tmp = strip_numeric(input.wf_int())
1068
+ LET_threshold_tmp = strip_numeric(input.LET_threshold())
1069
+ entropy_dimension_tmp = strip_numeric(input.entropy_dimension())
1070
+ weights_tmp = strip_weights(input.weights())
1071
+
1072
+ common_kwargs = dict(
1073
+ query_data=input.query_data()[0]["datapath"],
1074
+ reference_data=input.reference_data()[0]["datapath"],
1075
+ output_path=str(Path.cwd() / "parameter_tuning_output.csv"),
1076
+ return_output=True
1077
+ )
1078
+
1079
+ loop = asyncio.get_running_loop()
1080
+ rw = ReactiveWriter(match_log_rv,loop)
1081
+
1082
+ try:
1083
+ with redirect_stdout(ReactiveWriter(match_log_rv, asyncio.get_running_loop())), redirect_stderr(ReactiveWriter(match_log_rv, asyncio.get_running_loop())):
1084
+ if input.chromatography_platform() == "HRMS":
1085
+ window_size_centroiding_tmp = strip_numeric(input.window_size_centroiding())
1086
+ window_size_matching_tmp = strip_numeric(input.window_size_matching())
1087
+ grid={'similarity_measure':similarity_measure_tmp, 'weight':weights_tmp, 'spectrum_preprocessing_order':spectrum_preprocessing_order_tmp, 'mz_min':mz_min_tmp, 'mz_max':mz_max_tmp, 'int_min':int_min_tmp, 'int_max':int_max_tmp, 'noise_threshold':noise_threshold_tmp, 'wf_mz':wf_mz_tmp, 'wf_int':wf_int_tmp, 'LET_threshold':LET_threshold_tmp, 'entropy_dimension':entropy_dimension_tmp, 'high_quality_reference_library':high_quality_reference_library_tmp, 'window_size_centroiding':window_size_centroiding_tmp, 'window_size_matching':window_size_matching_tmp}
1088
+ df_out = await asyncio.to_thread(tune_params_on_HRMS_data, **common_kwargs, grid=grid)
1089
+ else:
1090
+ grid={'similarity_measure':similarity_measure_tmp, 'weight':weights_tmp, 'spectrum_preprocessing_order':spectrum_preprocessing_order_tmp, 'mz_min':mz_min_tmp, 'mz_max':mz_max_tmp, 'int_min':int_min_tmp, 'int_max':int_max_tmp, 'noise_threshold':noise_threshold_tmp, 'wf_mz':wf_mz_tmp, 'wf_int':wf_int_tmp, 'LET_threshold':LET_threshold_tmp, 'entropy_dimension':entropy_dimension_tmp, 'high_quality_reference_library':high_quality_reference_library_tmp}
1091
+ df_out = await asyncio.to_thread(tune_params_on_NRMS_data, **common_kwargs, grid=grid)
1092
+
1093
+ match_log_rv.set(match_log_rv.get() + "\n✅ Parameter tuning finished.\n")
1094
+ except Exception as e:
1095
+ match_log_rv.set(match_log_rv.get() + f"\n❌ Error: {e}\n")
1096
+ raise
1097
+ finally:
1098
+ is_tuning_running.set(False)
1099
+ is_any_job_running.set(False)
1100
+ trailing = _drain_queue_nowait(_LOG_QUEUE)
1101
+ if trailing:
1102
+ match_log_rv.set(match_log_rv.get() + "".join(trailing))
1103
+ await reactive.flush()
1104
+
1105
+ #yield df_out.to_csv(index=False)
1106
+ csv_bytes = df_out.to_csv(index=False).encode('utf-8')
1107
+ yield csv_bytes
1108
+
1109
+
1110
+
1111
+ @render.text
1112
+ def status_output():
1113
+ return run_status_plot_spectra.get()
1114
+ return run_status_spec_lib_matching.get()
1115
+ return run_status_parameter_tuning.get()
1116
+
1117
+
1118
+ class ReactiveWriter(io.TextIOBase):
1119
+ def __init__(self, rv: reactive.Value, loop: asyncio.AbstractEventLoop):
1120
+ self._rv = rv
1121
+ self._loop = loop
1122
+
1123
+ def write(self, s: str):
1124
+ if not s:
1125
+ return 0
1126
+ self._loop.call_soon_threadsafe(_LOG_QUEUE.put_nowait, s)
1127
+ return len(s)
1128
+
1129
+ def flush(self):
1130
+ pass
1131
+
1132
+
1133
+ @reactive.effect
1134
+ async def _pump_reactive_writer_logs():
1135
+ if not is_tuning_running.get():
1136
+ return
1137
+
1138
+ reactive.invalidate_later(0.1)
1139
+ msgs = _drain_queue_nowait(_LOG_QUEUE)
1140
+ if msgs:
1141
+ match_log_rv.set(match_log_rv.get() + "".join(msgs))
1142
+ await reactive.flush()
1143
+
354
1144
 
355
1145
 
356
1146
  @render.text
357
1147
  def status_output():
358
- return run_status.get()
1148
+ return run_status_plot_spectra.get()
1149
+ return run_status_spec_lib_matching.get()
1150
+ return run_status_parameter_tuning.get()
359
1151
 
360
1152
 
361
1153