pycompound 0.0.55__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- app.py +874 -82
- {pycompound_fy7392 → pycompound}/plot_spectra.py +8 -3
- {pycompound_fy7392 → pycompound}/plot_spectra_CLI.py +2 -2
- {pycompound_fy7392 → pycompound}/processing.py +1 -1
- {pycompound_fy7392 → pycompound}/spec_lib_matching.py +70 -34
- {pycompound_fy7392 → pycompound}/spec_lib_matching_CLI.py +2 -2
- {pycompound_fy7392 → pycompound}/tuning_CLI.py +3 -4
- {pycompound-0.0.55.dist-info → pycompound-0.1.1.dist-info}/METADATA +2 -1
- pycompound-0.1.1.dist-info/RECORD +14 -0
- pycompound-0.1.1.dist-info/top_level.txt +2 -0
- pycompound-0.0.55.dist-info/RECORD +0 -15
- pycompound-0.0.55.dist-info/top_level.txt +0 -2
- pycompound_fy7392/pycompound_shiny.py +0 -299
- {pycompound_fy7392 → pycompound}/build_library.py +0 -0
- {pycompound_fy7392 → pycompound}/similarity_measures.py +0 -0
- {pycompound-0.0.55.dist-info → pycompound-0.1.1.dist-info}/WHEEL +0 -0
- {pycompound-0.0.55.dist-info → pycompound-0.1.1.dist-info}/licenses/LICENSE +0 -0
app.py
CHANGED
|
@@ -1,17 +1,179 @@
|
|
|
1
1
|
|
|
2
|
-
from shiny import App, ui, reactive, render
|
|
3
|
-
from
|
|
4
|
-
from
|
|
5
|
-
from
|
|
6
|
-
from
|
|
7
|
-
from
|
|
8
|
-
from
|
|
2
|
+
from shiny import App, ui, reactive, render, req
|
|
3
|
+
from pycompound.spec_lib_matching import run_spec_lib_matching_on_HRMS_data
|
|
4
|
+
from pycompound.spec_lib_matching import run_spec_lib_matching_on_NRMS_data
|
|
5
|
+
from pycompound.spec_lib_matching import tune_params_on_HRMS_data
|
|
6
|
+
from pycompound.spec_lib_matching import tune_params_on_NRMS_data
|
|
7
|
+
from pycompound.plot_spectra import generate_plots_on_HRMS_data
|
|
8
|
+
from pycompound.plot_spectra import generate_plots_on_NRMS_data
|
|
9
9
|
from pathlib import Path
|
|
10
|
+
from contextlib import redirect_stdout, redirect_stderr
|
|
10
11
|
import subprocess
|
|
11
12
|
import traceback
|
|
12
13
|
import asyncio
|
|
13
14
|
import io
|
|
15
|
+
import os
|
|
16
|
+
import sys
|
|
14
17
|
import matplotlib.pyplot as plt
|
|
18
|
+
import pandas as pd
|
|
19
|
+
import numpy as np
|
|
20
|
+
import netCDF4 as nc
|
|
21
|
+
from pyteomics import mgf, mzml
|
|
22
|
+
import ast
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
_LOG_QUEUE: asyncio.Queue[str] = asyncio.Queue()
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def strip_text(s):
|
|
29
|
+
return [x.strip() for x in s.strip('[]').split(',') if x.strip()]
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def strip_numeric(s):
|
|
33
|
+
return [float(x.strip()) for x in s.strip('[]').split(',') if x.strip()]
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def strip_weights(s):
|
|
37
|
+
tuples = ast.literal_eval(s)
|
|
38
|
+
keys = ['Cosine', 'Shannon', 'Renyi', 'Tsallis']
|
|
39
|
+
return [dict(zip(keys,t)) for t in tuples]
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def build_library(input_path=None, output_path=None):
|
|
43
|
+
last_three_chars = input_path[(len(input_path)-3):len(input_path)]
|
|
44
|
+
last_four_chars = input_path[(len(input_path)-4):len(input_path)]
|
|
45
|
+
if last_three_chars == 'csv' or last_three_chars == 'CSV':
|
|
46
|
+
return pd.read_csv(input_path)
|
|
47
|
+
else:
|
|
48
|
+
if last_three_chars == 'mgf' or last_three_chars == 'MGF':
|
|
49
|
+
input_file_type = 'mgf'
|
|
50
|
+
elif last_four_chars == 'mzML' or last_four_chars == 'mzml' or last_four_chars == 'MZML':
|
|
51
|
+
input_file_type = 'mzML'
|
|
52
|
+
elif last_three_chars == 'cdf' or last_three_chars == 'CDF':
|
|
53
|
+
input_file_type = 'cdf'
|
|
54
|
+
elif last_three_chars == 'msp' or last_three_chars == 'MSP':
|
|
55
|
+
input_file_type = 'msp'
|
|
56
|
+
else:
|
|
57
|
+
print('ERROR: either an \'mgf\', \'mzML\', \'cdf\', or \'msp\' file must be passed to --input_path')
|
|
58
|
+
sys.exit()
|
|
59
|
+
|
|
60
|
+
spectra = []
|
|
61
|
+
if input_file_type == 'mgf':
|
|
62
|
+
with mgf.read(input_path, index_by_scans = True) as reader:
|
|
63
|
+
for spec in reader:
|
|
64
|
+
spectra.append(spec)
|
|
65
|
+
if input_file_type == 'mzML':
|
|
66
|
+
with mzml.read(input_path) as reader:
|
|
67
|
+
for spec in reader:
|
|
68
|
+
spectra.append(spec)
|
|
69
|
+
|
|
70
|
+
if input_file_type == 'mgf' or input_file_type == 'mzML':
|
|
71
|
+
ids = []
|
|
72
|
+
mzs = []
|
|
73
|
+
ints = []
|
|
74
|
+
for i in range(0,len(spectra)):
|
|
75
|
+
for j in range(0,len(spectra[i]['m/z array'])):
|
|
76
|
+
if input_file_type == 'mzML':
|
|
77
|
+
ids.append(f'ID_{i+1}')
|
|
78
|
+
else:
|
|
79
|
+
ids.append(spectra[i]['params']['name'])
|
|
80
|
+
mzs.append(spectra[i]['m/z array'][j])
|
|
81
|
+
ints.append(spectra[i]['intensity array'][j])
|
|
82
|
+
|
|
83
|
+
if input_file_type == 'cdf':
|
|
84
|
+
dataset = nc.Dataset(input_path, 'r')
|
|
85
|
+
all_mzs = dataset.variables['mass_values'][:]
|
|
86
|
+
all_ints = dataset.variables['intensity_values'][:]
|
|
87
|
+
scan_idxs = dataset.variables['scan_index'][:]
|
|
88
|
+
dataset.close()
|
|
89
|
+
|
|
90
|
+
ids = []
|
|
91
|
+
mzs = []
|
|
92
|
+
ints = []
|
|
93
|
+
for i in range(0,(len(scan_idxs)-1)):
|
|
94
|
+
if i % 1000 == 0:
|
|
95
|
+
print(f'analyzed {i} out of {len(scan_idxs)} scans')
|
|
96
|
+
s_idx = scan_idxs[i]
|
|
97
|
+
e_idx = scan_idxs[i+1]
|
|
98
|
+
|
|
99
|
+
mzs_tmp = all_mzs[s_idx:e_idx]
|
|
100
|
+
ints_tmp = all_ints[s_idx:e_idx]
|
|
101
|
+
|
|
102
|
+
for j in range(0,len(mzs_tmp)):
|
|
103
|
+
ids.append(f'ID_{i+1}')
|
|
104
|
+
mzs.append(mzs_tmp[j])
|
|
105
|
+
ints.append(ints_tmp[j])
|
|
106
|
+
|
|
107
|
+
if input_file_type == 'msp':
|
|
108
|
+
ids = []
|
|
109
|
+
mzs = []
|
|
110
|
+
ints = []
|
|
111
|
+
with open(input_path, 'r') as f:
|
|
112
|
+
i = 0
|
|
113
|
+
for line in f:
|
|
114
|
+
line = line.strip()
|
|
115
|
+
if line.startswith('Name:'):
|
|
116
|
+
i += 1
|
|
117
|
+
spectrum_id = line.replace('Name: ','')
|
|
118
|
+
elif line and line[0].isdigit():
|
|
119
|
+
try:
|
|
120
|
+
mz, intensity = map(float, line.split()[:2])
|
|
121
|
+
ids.append(spectrum_id)
|
|
122
|
+
mzs.append(mz)
|
|
123
|
+
ints.append(intensity)
|
|
124
|
+
except ValueError:
|
|
125
|
+
continue
|
|
126
|
+
|
|
127
|
+
df = pd.DataFrame({'id':ids, 'mz_ratio':mzs, 'intensity':ints})
|
|
128
|
+
return df
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def extract_first_column_ids(file_path: str, max_ids: int = 20000):
|
|
133
|
+
suffix = Path(file_path).suffix.lower()
|
|
134
|
+
|
|
135
|
+
if suffix == ".csv":
|
|
136
|
+
df = pd.read_csv(file_path, usecols=[0])
|
|
137
|
+
ids = df.iloc[:, 0].astype(str).dropna()
|
|
138
|
+
ids = [x for x in ids if x.strip() != ""]
|
|
139
|
+
seen = set()
|
|
140
|
+
uniq = []
|
|
141
|
+
for x in ids:
|
|
142
|
+
if x not in seen:
|
|
143
|
+
uniq.append(x)
|
|
144
|
+
seen.add(x)
|
|
145
|
+
return uniq[:max_ids]
|
|
146
|
+
|
|
147
|
+
ids = []
|
|
148
|
+
try:
|
|
149
|
+
with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
|
|
150
|
+
for line in f:
|
|
151
|
+
ls = line.strip()
|
|
152
|
+
if ls.startswith("TITLE="):
|
|
153
|
+
ids.append(ls.split("=", 1)[1].strip())
|
|
154
|
+
elif ls.lower().startswith("name:"):
|
|
155
|
+
ids.append(ls.split(":", 1)[1].strip())
|
|
156
|
+
if len(ids) >= max_ids:
|
|
157
|
+
break
|
|
158
|
+
except Exception:
|
|
159
|
+
pass
|
|
160
|
+
|
|
161
|
+
if ids:
|
|
162
|
+
seen = set()
|
|
163
|
+
uniq = []
|
|
164
|
+
for x in ids:
|
|
165
|
+
if x not in seen:
|
|
166
|
+
uniq.append(x)
|
|
167
|
+
seen.add(x)
|
|
168
|
+
return uniq
|
|
169
|
+
return []
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def _open_plot_window(session, png_bytes: bytes, title: str = "plot.png"):
|
|
173
|
+
"""Send PNG bytes to browser and open in a new window as a data URL."""
|
|
174
|
+
b64 = base64.b64encode(png_bytes).decode("ascii")
|
|
175
|
+
data_url = f"data:image/png;base64,{b64}"
|
|
176
|
+
session.send_custom_message("open-plot-window", {"png": data_url, "title": title})
|
|
15
177
|
|
|
16
178
|
|
|
17
179
|
def plot_spectra_ui(platform: str):
|
|
@@ -19,13 +181,25 @@ def plot_spectra_ui(platform: str):
|
|
|
19
181
|
base_inputs = [
|
|
20
182
|
ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
21
183
|
ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
22
|
-
ui.
|
|
23
|
-
|
|
184
|
+
ui.input_selectize(
|
|
185
|
+
"spectrum_ID1",
|
|
186
|
+
"Select spectrum ID 1 (default is the first spectrum in the library):",
|
|
187
|
+
choices=[],
|
|
188
|
+
multiple=False,
|
|
189
|
+
options={"placeholder": "Upload a library..."},
|
|
190
|
+
),
|
|
191
|
+
ui.input_selectize(
|
|
192
|
+
"spectrum_ID2",
|
|
193
|
+
"Select spectrum ID 2 (default is the first spectrum in the library):",
|
|
194
|
+
choices=[],
|
|
195
|
+
multiple=False,
|
|
196
|
+
options={"placeholder": "Upload a library..."},
|
|
197
|
+
),
|
|
24
198
|
ui.input_select("similarity_measure", "Select similarity measure:", ["cosine","shannon","renyi","tsallis","mixture","jaccard","dice","3w_jaccard","sokal_sneath","binary_cosine","mountford","mcconnaughey","driver_kroeber","simpson","braun_banquet","fager_mcgowan","kulczynski","intersection","hamming","hellinger"]),
|
|
199
|
+
ui.input_text('weights', 'Weights for similarity measure (cosine, shannon, renyi, tsallis):', '0.25, 0.25, 0.25, 0.25'),
|
|
25
200
|
ui.input_select(
|
|
26
201
|
"high_quality_reference_library",
|
|
27
|
-
"Indicate whether the reference library is considered high quality. "
|
|
28
|
-
"If True, filtering and noise removal are only applied to the query spectra.",
|
|
202
|
+
"Indicate whether the reference library is considered high quality. If True, filtering and noise removal are only applied to the query spectra.",
|
|
29
203
|
[False, True],
|
|
30
204
|
),
|
|
31
205
|
]
|
|
@@ -35,7 +209,7 @@ def plot_spectra_ui(platform: str):
|
|
|
35
209
|
extra_inputs = [
|
|
36
210
|
ui.input_text(
|
|
37
211
|
"spectrum_preprocessing_order",
|
|
38
|
-
"Sequence of characters for preprocessing order (C, F, M, N, L, W). M must be included, C before M if used.",
|
|
212
|
+
"Sequence of characters for preprocessing order (C (centroiding), F (filtering), M (matching), N (noise removal), L (low-entropy transformation), W (weight factor transformation)). M must be included, C before M if used.",
|
|
39
213
|
"FCNMWL",
|
|
40
214
|
),
|
|
41
215
|
ui.input_numeric("window_size_centroiding", "Centroiding window-size:", 0.5),
|
|
@@ -45,7 +219,7 @@ def plot_spectra_ui(platform: str):
|
|
|
45
219
|
extra_inputs = [
|
|
46
220
|
ui.input_text(
|
|
47
221
|
"spectrum_preprocessing_order",
|
|
48
|
-
"Sequence of characters for preprocessing order (F, N, L, W).",
|
|
222
|
+
"Sequence of characters for preprocessing order (F (filtering), N (noise removal), L (low-entropy transformation), W (weight factor transformation)).",
|
|
49
223
|
"FNLW",
|
|
50
224
|
)
|
|
51
225
|
]
|
|
@@ -71,26 +245,25 @@ def plot_spectra_ui(platform: str):
|
|
|
71
245
|
)
|
|
72
246
|
|
|
73
247
|
# Run and Back buttons
|
|
74
|
-
|
|
248
|
+
run_button_plot_spectra = ui.download_button("run_btn_plot_spectra", "Run", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
|
|
75
249
|
back_button = ui.input_action_button("back", "Back to main menu", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
|
|
76
250
|
|
|
77
|
-
#print(len(extra_inputs))
|
|
78
251
|
# Layout base_inputs and extra_inputs in columns
|
|
79
252
|
if platform == "HRMS":
|
|
80
253
|
inputs_columns = ui.layout_columns(
|
|
81
|
-
ui.div(base_inputs[0:
|
|
82
|
-
ui.div([base_inputs[
|
|
254
|
+
ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
|
|
255
|
+
ui.div([base_inputs[6:7], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
|
|
83
256
|
ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
|
|
84
257
|
ui.div([numeric_inputs[5:10], select_input], style="display:flex; flex-direction:column; gap:10px;"),
|
|
85
|
-
col_widths=(3,
|
|
258
|
+
col_widths=(3,3,3,3),
|
|
86
259
|
)
|
|
87
260
|
elif platform == "NRMS":
|
|
88
261
|
inputs_columns = ui.layout_columns(
|
|
89
|
-
ui.div(base_inputs[0:
|
|
90
|
-
ui.div([base_inputs[
|
|
262
|
+
ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
|
|
263
|
+
ui.div([base_inputs[6:7], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
|
|
91
264
|
ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
|
|
92
265
|
ui.div([numeric_inputs[5:10], select_input], style="display:flex; flex-direction:column; gap:10px;"),
|
|
93
|
-
col_widths=(3,
|
|
266
|
+
col_widths=(3,3,3,3),
|
|
94
267
|
)
|
|
95
268
|
|
|
96
269
|
# Combine everything
|
|
@@ -98,8 +271,10 @@ def plot_spectra_ui(platform: str):
|
|
|
98
271
|
ui.TagList(
|
|
99
272
|
ui.h2("Plot Spectra"),
|
|
100
273
|
inputs_columns,
|
|
101
|
-
|
|
102
|
-
back_button
|
|
274
|
+
run_button_plot_spectra,
|
|
275
|
+
back_button,
|
|
276
|
+
ui.div(ui.output_text("plot_query_status"), style="margin-top:8px; font-size:14px"),
|
|
277
|
+
ui.div(ui.output_text("plot_reference_status"), style="margin-top:8px; font-size:14px")
|
|
103
278
|
),
|
|
104
279
|
)
|
|
105
280
|
|
|
@@ -111,12 +286,26 @@ def run_spec_lib_matching_ui(platform: str):
|
|
|
111
286
|
ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
112
287
|
ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
113
288
|
ui.input_select("similarity_measure", "Select similarity measure:", ["cosine","shannon","renyi","tsallis","mixture","jaccard","dice","3w_jaccard","sokal_sneath","binary_cosine","mountford","mcconnaughey","driver_kroeber","simpson","braun_banquet","fager_mcgowan","kulczynski","intersection","hamming","hellinger"]),
|
|
289
|
+
ui.input_text('weights', 'Weights for similarity measure (cosine, shannon, renyi, tsallis):', '0.25, 0.25, 0.25, 0.25'),
|
|
290
|
+
ui.input_selectize(
|
|
291
|
+
"spectrum_ID1",
|
|
292
|
+
"Select spectrum ID 1 (only applicable for plotting; default is the first spectrum in the query library):",
|
|
293
|
+
choices=[],
|
|
294
|
+
multiple=False,
|
|
295
|
+
options={"placeholder": "Upload a library..."},
|
|
296
|
+
),
|
|
297
|
+
ui.input_selectize(
|
|
298
|
+
"spectrum_ID2",
|
|
299
|
+
"Select spectrum ID 2 (only applicable for plotting; default is the first spectrum in the reference library):",
|
|
300
|
+
choices=[],
|
|
301
|
+
multiple=False,
|
|
302
|
+
options={"placeholder": "Upload a library..."},
|
|
303
|
+
),
|
|
114
304
|
ui.input_select(
|
|
115
305
|
"high_quality_reference_library",
|
|
116
|
-
"Indicate whether the reference library is considered high quality. "
|
|
117
|
-
"If True, filtering and noise removal are only applied to the query spectra.",
|
|
306
|
+
"Indicate whether the reference library is considered high quality. If True, filtering and noise removal are only applied to the query spectra.",
|
|
118
307
|
[False, True],
|
|
119
|
-
)
|
|
308
|
+
)
|
|
120
309
|
]
|
|
121
310
|
|
|
122
311
|
# Extra inputs depending on platform
|
|
@@ -124,7 +313,7 @@ def run_spec_lib_matching_ui(platform: str):
|
|
|
124
313
|
extra_inputs = [
|
|
125
314
|
ui.input_text(
|
|
126
315
|
"spectrum_preprocessing_order",
|
|
127
|
-
"Sequence of characters for preprocessing order (C, F, M, N, L, W). M must be included, C before M if used.",
|
|
316
|
+
"Sequence of characters for preprocessing order (C (centroiding), F (filtering), M (matching), N (noise removal), L (low-entropy transformation), W (weight factor transformation)). M must be included, C before M if used.",
|
|
128
317
|
"FCNMWL",
|
|
129
318
|
),
|
|
130
319
|
ui.input_numeric("window_size_centroiding", "Centroiding window-size:", 0.5),
|
|
@@ -134,7 +323,7 @@ def run_spec_lib_matching_ui(platform: str):
|
|
|
134
323
|
extra_inputs = [
|
|
135
324
|
ui.input_text(
|
|
136
325
|
"spectrum_preprocessing_order",
|
|
137
|
-
"Sequence of characters for preprocessing order (F, N, L, W).",
|
|
326
|
+
"Sequence of characters for preprocessing order (F (filtering), N (noise removal), L (low-entropy transformation), W (weight factor transformation)).",
|
|
138
327
|
"FNLW",
|
|
139
328
|
)
|
|
140
329
|
]
|
|
@@ -150,45 +339,139 @@ def run_spec_lib_matching_ui(platform: str):
|
|
|
150
339
|
ui.input_numeric("wf_int", "Intensity weight factor:", 1.0),
|
|
151
340
|
ui.input_numeric("LET_threshold", "Low-entropy threshold:", 0.0),
|
|
152
341
|
ui.input_numeric("entropy_dimension", "Entropy dimension (Renyi/Tsallis only):", 1.1),
|
|
153
|
-
ui.input_numeric("n_top_matches_to_save", "Number of top matches to save:",
|
|
342
|
+
ui.input_numeric("n_top_matches_to_save", "Number of top matches to save:", 3),
|
|
154
343
|
]
|
|
155
344
|
|
|
156
345
|
|
|
157
346
|
# Run and Back buttons
|
|
158
|
-
|
|
347
|
+
run_button_spec_lib_matching = ui.download_button("run_btn_spec_lib_matching", "Run Spectral Library Matching", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
|
|
348
|
+
run_button_plot_spectra_within_spec_lib_matching = ui.download_button("run_btn_plot_spectra_within_spec_lib_matching", "Plot Spectra", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
|
|
159
349
|
back_button = ui.input_action_button("back", "Back to main menu", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
|
|
160
350
|
|
|
161
|
-
#print(len(extra_inputs))
|
|
162
351
|
# Layout base_inputs and extra_inputs in columns
|
|
163
352
|
if platform == "HRMS":
|
|
164
353
|
inputs_columns = ui.layout_columns(
|
|
165
|
-
ui.div(base_inputs[0:
|
|
166
|
-
ui.div([base_inputs[
|
|
354
|
+
ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
|
|
355
|
+
ui.div([base_inputs[6:7], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
|
|
167
356
|
ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
|
|
168
357
|
ui.div(numeric_inputs[5:10], style="display:flex; flex-direction:column; gap:10px;"),
|
|
169
|
-
col_widths=(3,
|
|
358
|
+
col_widths=(3,3,3,3)
|
|
170
359
|
)
|
|
171
360
|
elif platform == "NRMS":
|
|
172
361
|
inputs_columns = ui.layout_columns(
|
|
173
|
-
ui.div(base_inputs[0:
|
|
174
|
-
ui.div([base_inputs[
|
|
362
|
+
ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
|
|
363
|
+
ui.div([base_inputs[6:7], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
|
|
175
364
|
ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
|
|
176
365
|
ui.div(numeric_inputs[5:10], style="display:flex; flex-direction:column; gap:10px;"),
|
|
177
|
-
col_widths=(3,
|
|
366
|
+
col_widths=(3,3,3,3)
|
|
178
367
|
)
|
|
179
368
|
|
|
369
|
+
log_panel = ui.card(
|
|
370
|
+
ui.card_header("Identification log"),
|
|
371
|
+
ui.output_text_verbatim("match_log"),
|
|
372
|
+
style="max-height:300px; overflow:auto"
|
|
373
|
+
)
|
|
374
|
+
|
|
180
375
|
# Combine everything
|
|
181
376
|
return ui.div(
|
|
182
377
|
ui.TagList(
|
|
183
378
|
ui.h2("Run Spectral Library Matching"),
|
|
184
379
|
inputs_columns,
|
|
185
|
-
|
|
186
|
-
|
|
380
|
+
run_button_spec_lib_matching,
|
|
381
|
+
run_button_plot_spectra_within_spec_lib_matching,
|
|
382
|
+
back_button,
|
|
383
|
+
log_panel
|
|
187
384
|
),
|
|
188
385
|
)
|
|
189
386
|
|
|
190
387
|
|
|
191
388
|
|
|
389
|
+
def run_parameter_tuning_ui(platform: str):
|
|
390
|
+
# Base inputs common to all platforms
|
|
391
|
+
base_inputs = [
|
|
392
|
+
ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
393
|
+
ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
394
|
+
ui.input_selectize("similarity_measure", "Select similarity measure(s):", ["cosine","shannon","renyi","tsallis","mixture","jaccard","dice","3w_jaccard","sokal_sneath","binary_cosine","mountford","mcconnaughey","driver_kroeber","simpson","braun_banquet","fager_mcgowan","kulczynski","intersection","hamming","hellinger"], multiple=True, selected='cosine'),
|
|
395
|
+
ui.input_text('weights', 'Weights for similarity measure (cosine, shannon, renyi, tsallis):', '((0.25, 0.25, 0.25, 0.25), (0.2, 0.3, 0.4, 0.1))'),
|
|
396
|
+
ui.input_text("high_quality_reference_library", "Indicate whether the reference library is considered high quality. If True, filtering and noise removal are only applied to the query spectra.", '[True]')
|
|
397
|
+
]
|
|
398
|
+
|
|
399
|
+
# Extra inputs depending on platform
|
|
400
|
+
if platform == "HRMS":
|
|
401
|
+
extra_inputs = [
|
|
402
|
+
ui.input_text(
|
|
403
|
+
"spectrum_preprocessing_order",
|
|
404
|
+
"Sequence of characters for preprocessing order (C (centroiding), F (filtering), M (matching), N (noise removal), L (low-entropy transformation), W (weight factor transformation)). M must be included, C before M if used.",
|
|
405
|
+
"[FCNMWL,CWM]",
|
|
406
|
+
),
|
|
407
|
+
ui.input_text("window_size_centroiding", "Centroiding window-size:", "[0.5]"),
|
|
408
|
+
ui.input_text("window_size_matching", "Matching window-size:", "[0.1,0.5]"),
|
|
409
|
+
]
|
|
410
|
+
else:
|
|
411
|
+
extra_inputs = [
|
|
412
|
+
ui.input_text(
|
|
413
|
+
"spectrum_preprocessing_order",
|
|
414
|
+
"Sequence of characters for preprocessing order (F (filtering), N (noise removal), L (low-entropy transformation), W (weight factor transformation)).",
|
|
415
|
+
"[FNLW,WNL]",
|
|
416
|
+
)
|
|
417
|
+
]
|
|
418
|
+
|
|
419
|
+
# Numeric inputs
|
|
420
|
+
numeric_inputs = [
|
|
421
|
+
ui.input_text("mz_min", "Minimum m/z for filtering:", '[0]'),
|
|
422
|
+
ui.input_text("mz_max", "Maximum m/z for filtering:", '[99999999]'),
|
|
423
|
+
ui.input_text("int_min", "Minimum intensity for filtering:", '[0]'),
|
|
424
|
+
ui.input_text("int_max", "Maximum intensity for filtering:", '[999999999]'),
|
|
425
|
+
ui.input_text("noise_threshold", "Noise removal threshold:", '[0.0]'),
|
|
426
|
+
ui.input_text("wf_mz", "Mass/charge weight factor:", '[0.0]'),
|
|
427
|
+
ui.input_text("wf_int", "Intensity weight factor:", '[1.0]'),
|
|
428
|
+
ui.input_text("LET_threshold", "Low-entropy threshold:", '[0.0]'),
|
|
429
|
+
ui.input_text("entropy_dimension", "Entropy dimension (Renyi/Tsallis only):", '[1.1]')
|
|
430
|
+
]
|
|
431
|
+
|
|
432
|
+
|
|
433
|
+
# Run and Back buttons
|
|
434
|
+
run_button_parameter_tuning = ui.download_button("run_btn_parameter_tuning", "Tune parameters", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
|
|
435
|
+
back_button = ui.input_action_button("back", "Back to main menu", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
|
|
436
|
+
|
|
437
|
+
# Layout base_inputs and extra_inputs in columns
|
|
438
|
+
if platform == "HRMS":
|
|
439
|
+
inputs_columns = ui.layout_columns(
|
|
440
|
+
ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
|
|
441
|
+
ui.div([base_inputs[6:7], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
|
|
442
|
+
ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
|
|
443
|
+
ui.div(numeric_inputs[5:9], style="display:flex; flex-direction:column; gap:10px;"),
|
|
444
|
+
col_widths=(3, 3, 3, 3),
|
|
445
|
+
)
|
|
446
|
+
elif platform == "NRMS":
|
|
447
|
+
inputs_columns = ui.layout_columns(
|
|
448
|
+
ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
|
|
449
|
+
ui.div([base_inputs[6:7], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
|
|
450
|
+
ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
|
|
451
|
+
ui.div(numeric_inputs[5:9], style="display:flex; flex-direction:column; gap:10px;"),
|
|
452
|
+
col_widths=(3, 3, 3, 3),
|
|
453
|
+
)
|
|
454
|
+
|
|
455
|
+
log_panel = ui.card(
|
|
456
|
+
ui.card_header("Identification log"),
|
|
457
|
+
ui.output_text_verbatim("match_log"),
|
|
458
|
+
style="max-height:300px; overflow:auto"
|
|
459
|
+
)
|
|
460
|
+
|
|
461
|
+
# Combine everything
|
|
462
|
+
return ui.div(
|
|
463
|
+
ui.TagList(
|
|
464
|
+
ui.h2("Tune parameters"),
|
|
465
|
+
inputs_columns,
|
|
466
|
+
run_button_parameter_tuning,
|
|
467
|
+
back_button,
|
|
468
|
+
log_panel
|
|
469
|
+
),
|
|
470
|
+
)
|
|
471
|
+
|
|
472
|
+
|
|
473
|
+
|
|
474
|
+
|
|
192
475
|
app_ui = ui.page_fluid(
|
|
193
476
|
ui.output_ui("main_ui"),
|
|
194
477
|
ui.output_text("status_output")
|
|
@@ -197,29 +480,153 @@ app_ui = ui.page_fluid(
|
|
|
197
480
|
|
|
198
481
|
def server(input, output, session):
|
|
199
482
|
|
|
200
|
-
# Track which page to show
|
|
201
483
|
current_page = reactive.Value("main_menu")
|
|
202
484
|
|
|
203
|
-
# Track button clicks
|
|
204
485
|
plot_clicks = reactive.Value(0)
|
|
205
486
|
match_clicks = reactive.Value(0)
|
|
206
487
|
back_clicks = reactive.Value(0)
|
|
207
488
|
|
|
208
|
-
|
|
489
|
+
run_status_plot_spectra = reactive.Value("")
|
|
490
|
+
run_status_spec_lib_matching = reactive.Value("")
|
|
491
|
+
run_status_plot_spectra_within_spec_lib_matching = reactive.Value("")
|
|
492
|
+
run_status_parameter_tuning = reactive.Value("")
|
|
493
|
+
is_tuning_running = reactive.Value(False)
|
|
494
|
+
match_log_rv = reactive.Value("")
|
|
495
|
+
is_matching_rv = reactive.Value(False)
|
|
496
|
+
is_any_job_running = reactive.Value(False)
|
|
497
|
+
|
|
498
|
+
query_ids_rv = reactive.Value([])
|
|
499
|
+
query_file_path_rv = reactive.Value(None)
|
|
500
|
+
query_result_rv = reactive.Value(None)
|
|
501
|
+
query_status_rv = reactive.Value("")
|
|
502
|
+
reference_ids_rv = reactive.Value([])
|
|
503
|
+
reference_file_path_rv = reactive.Value(None)
|
|
504
|
+
reference_result_rv = reactive.Value(None)
|
|
505
|
+
reference_status_rv = reactive.Value("")
|
|
506
|
+
|
|
507
|
+
converted_query_path_rv = reactive.Value(None)
|
|
508
|
+
converted_reference_path_rv = reactive.Value(None)
|
|
509
|
+
|
|
510
|
+
|
|
511
|
+
#def _drain_queue_nowait(q: asyncio.Queue[str]) -> list[str]:
|
|
512
|
+
def _drain_queue_nowait(q: asyncio.Queue) -> list[str]:
|
|
513
|
+
out = []
|
|
514
|
+
try:
|
|
515
|
+
while True:
|
|
516
|
+
out.append(q.get_nowait())
|
|
517
|
+
except asyncio.QueueEmpty:
|
|
518
|
+
pass
|
|
519
|
+
return out
|
|
520
|
+
|
|
521
|
+
|
|
522
|
+
@reactive.effect
|
|
523
|
+
async def _pump_logs():
|
|
524
|
+
if not is_any_job_running.get():
|
|
525
|
+
return
|
|
526
|
+
|
|
527
|
+
reactive.invalidate_later(0.1)
|
|
528
|
+
msgs = _drain_queue_nowait(_LOG_QUEUE)
|
|
529
|
+
if msgs:
|
|
530
|
+
match_log_rv.set(match_log_rv.get() + "".join(msgs))
|
|
531
|
+
await reactive.flush()
|
|
532
|
+
|
|
533
|
+
|
|
534
|
+
def process_database(file_path: str):
|
|
535
|
+
suffix = Path(file_path).suffix.lower()
|
|
536
|
+
return {"path": file_path, "suffix": suffix}
|
|
537
|
+
|
|
538
|
+
@render.text
|
|
539
|
+
def plot_query_status():
|
|
540
|
+
return query_status_rv.get() or ""
|
|
541
|
+
|
|
542
|
+
@render.text
|
|
543
|
+
def plot_reference_status():
|
|
544
|
+
return reference_status_rv.get() or ""
|
|
545
|
+
|
|
546
|
+
|
|
547
|
+
@reactive.effect
|
|
548
|
+
@reactive.event(input.query_data)
|
|
549
|
+
async def _on_query_upload():
|
|
550
|
+
files = input.query_data()
|
|
551
|
+
req(files and len(files) > 0)
|
|
552
|
+
|
|
553
|
+
file_path = files[0]["datapath"]
|
|
554
|
+
query_file_path_rv.set(file_path)
|
|
555
|
+
|
|
556
|
+
query_status_rv.set(f"Processing query database: {Path(file_path).name} …")
|
|
557
|
+
await reactive.flush()
|
|
558
|
+
|
|
559
|
+
try:
|
|
560
|
+
result = await asyncio.to_thread(process_database, file_path)
|
|
561
|
+
query_result_rv.set(result)
|
|
562
|
+
query_status_rv.set("✅ Query database processed.")
|
|
563
|
+
await reactive.flush()
|
|
564
|
+
except Exception as e:
|
|
565
|
+
query_status_rv.set(f"❌ Failed to process query database: {e}")
|
|
566
|
+
await reactive.flush()
|
|
567
|
+
|
|
568
|
+
|
|
569
|
+
@reactive.effect
|
|
570
|
+
@reactive.event(input.reference_data)
|
|
571
|
+
async def _on_reference_upload():
|
|
572
|
+
files = input.reference_data()
|
|
573
|
+
req(files and len(files) > 0)
|
|
574
|
+
|
|
575
|
+
file_path = files[0]["datapath"]
|
|
576
|
+
reference_file_path_rv.set(file_path)
|
|
577
|
+
|
|
578
|
+
reference_status_rv.set(f"Processing reference database: {Path(file_path).name} …")
|
|
579
|
+
await reactive.flush()
|
|
580
|
+
|
|
581
|
+
try:
|
|
582
|
+
result = await asyncio.to_thread(process_database, file_path)
|
|
583
|
+
reference_result_rv.set(result)
|
|
584
|
+
reference_status_rv.set("✅ Reference database processed.")
|
|
585
|
+
await reactive.flush()
|
|
586
|
+
except Exception as e:
|
|
587
|
+
reference_status_rv.set(f"❌ Failed to process reference database: {e}")
|
|
588
|
+
await reactive.flush()
|
|
589
|
+
|
|
590
|
+
|
|
591
|
+
@render.text
|
|
592
|
+
def match_log():
|
|
593
|
+
return match_log_rv.get()
|
|
594
|
+
|
|
595
|
+
|
|
596
|
+
class ReactiveWriter(io.TextIOBase):
|
|
597
|
+
def __init__(self, rv):
|
|
598
|
+
self.rv = rv
|
|
599
|
+
def write(self, s: str):
|
|
600
|
+
if not s:
|
|
601
|
+
return 0
|
|
602
|
+
self.rv.set(self.rv.get() + s)
|
|
603
|
+
try:
|
|
604
|
+
loop = asyncio.get_running_loop()
|
|
605
|
+
loop.create_task(reactive.flush())
|
|
606
|
+
except RuntimeError:
|
|
607
|
+
pass
|
|
608
|
+
return len(s)
|
|
609
|
+
def flush(self):
|
|
610
|
+
pass
|
|
611
|
+
|
|
612
|
+
|
|
209
613
|
|
|
210
614
|
@reactive.Effect
|
|
211
615
|
def _():
|
|
212
|
-
# Main menu buttons
|
|
213
616
|
if input.plot_spectra() > plot_clicks.get():
|
|
214
617
|
current_page.set("plot_spectra")
|
|
215
618
|
plot_clicks.set(input.plot_spectra())
|
|
216
619
|
elif input.run_spec_lib_matching() > match_clicks.get():
|
|
217
620
|
current_page.set("run_spec_lib_matching")
|
|
218
621
|
match_clicks.set(input.run_spec_lib_matching())
|
|
622
|
+
elif input.run_parameter_tuning() > match_clicks.get():
|
|
623
|
+
current_page.set("run_parameter_tuning")
|
|
624
|
+
match_clicks.set(input.run_parameter_tuning())
|
|
219
625
|
elif hasattr(input, "back") and input.back() > back_clicks.get():
|
|
220
626
|
current_page.set("main_menu")
|
|
221
627
|
back_clicks.set(input.back())
|
|
222
628
|
|
|
629
|
+
|
|
223
630
|
@render.image
|
|
224
631
|
def image():
|
|
225
632
|
from pathlib import Path
|
|
@@ -228,6 +635,7 @@ def server(input, output, session):
|
|
|
228
635
|
img: ImgData = {"src": str(dir / "www/emblem.png"), "width": "320px", "height": "250px"}
|
|
229
636
|
return img
|
|
230
637
|
|
|
638
|
+
|
|
231
639
|
@output
|
|
232
640
|
@render.ui
|
|
233
641
|
def main_ui():
|
|
@@ -259,6 +667,7 @@ def server(input, output, session):
|
|
|
259
667
|
),
|
|
260
668
|
ui.input_action_button("plot_spectra", "Plot two spectra before and after preprocessing transformations.", style="font-size:18px; padding:20px 40px; width:550px; height:100px; margin-top:10px; margin-right:50px"),
|
|
261
669
|
ui.input_action_button("run_spec_lib_matching", "Run spectral library matching to perform compound identification on a query library of spectra.", style="font-size:18px; padding:20px 40px; width:550px; height:100px; margin-top:10px; margin-right:50px"),
|
|
670
|
+
ui.input_action_button("run_parameter_tuning", "Tune parameters to maximize accuracy of compound identification given a query library with known spectrum IDs.", style="font-size:18px; padding:20px 40px; width:450px; height:120px; margin-top:10px; margin-right:50px"),
|
|
262
671
|
ui.div(
|
|
263
672
|
"References:",
|
|
264
673
|
style="margin-top:35px; text-align:left; font-size:24px; font-weight:bold"
|
|
@@ -309,53 +718,436 @@ def server(input, output, session):
|
|
|
309
718
|
return plot_spectra_ui(input.chromatography_platform())
|
|
310
719
|
elif current_page() == "run_spec_lib_matching":
|
|
311
720
|
return run_spec_lib_matching_ui(input.chromatography_platform())
|
|
721
|
+
elif current_page() == "run_parameter_tuning":
|
|
722
|
+
return run_parameter_tuning_ui(input.chromatography_platform())
|
|
723
|
+
|
|
724
|
+
|
|
312
725
|
|
|
313
726
|
@reactive.effect
|
|
314
|
-
@reactive.event(input.
|
|
315
|
-
def
|
|
316
|
-
if current_page()
|
|
317
|
-
|
|
318
|
-
|
|
727
|
+
@reactive.event(input.query_data)
|
|
728
|
+
async def _populate_ids_from_query_upload():
|
|
729
|
+
#if current_page() != "plot_spectra":
|
|
730
|
+
# return
|
|
731
|
+
|
|
732
|
+
files = input.query_data()
|
|
733
|
+
if not files:
|
|
734
|
+
return
|
|
735
|
+
|
|
736
|
+
in_path = Path(files[0]["datapath"])
|
|
737
|
+
suffix = in_path.suffix.lower()
|
|
738
|
+
|
|
739
|
+
# Decide what CSV to read IDs from
|
|
740
|
+
try:
|
|
741
|
+
if suffix == ".csv":
|
|
742
|
+
csv_path = in_path
|
|
743
|
+
converted_query_path_rv.set(str(csv_path))
|
|
319
744
|
else:
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
745
|
+
query_status_rv.set(f"Converting {in_path.name} → CSV …")
|
|
746
|
+
await reactive.flush()
|
|
747
|
+
|
|
748
|
+
# Choose an output temp path next to the upload
|
|
749
|
+
tmp_csv_path = in_path.with_suffix(".converted.csv")
|
|
750
|
+
|
|
751
|
+
out_obj = await asyncio.to_thread(build_library, str(in_path), str(tmp_csv_path))
|
|
752
|
+
|
|
753
|
+
# out_obj may be a path (str/PathLike) OR a DataFrame. Normalize to a path.
|
|
754
|
+
if isinstance(out_obj, (str, os.PathLike, Path)):
|
|
755
|
+
csv_path = Path(out_obj)
|
|
756
|
+
elif isinstance(out_obj, pd.DataFrame):
|
|
757
|
+
# Write the DF to our chosen path
|
|
758
|
+
out_obj.to_csv(tmp_csv_path, index=False)
|
|
759
|
+
csv_path = tmp_csv_path
|
|
760
|
+
else:
|
|
761
|
+
raise TypeError(f"build_library returned unsupported type: {type(out_obj)}")
|
|
762
|
+
|
|
763
|
+
converted_query_path_rv.set(str(csv_path))
|
|
764
|
+
|
|
765
|
+
query_status_rv.set(f"Reading IDs from: {csv_path.name} …")
|
|
766
|
+
await reactive.flush()
|
|
767
|
+
|
|
768
|
+
# Extract IDs from the CSV’s first column
|
|
769
|
+
ids = await asyncio.to_thread(extract_first_column_ids, str(csv_path))
|
|
770
|
+
query_ids_rv.set(ids)
|
|
771
|
+
|
|
772
|
+
# Update dropdowns
|
|
773
|
+
ui.update_selectize("spectrum_ID1", choices=ids, selected=(ids[0] if ids else None))
|
|
774
|
+
|
|
775
|
+
query_status_rv.set(
|
|
776
|
+
f"✅ Loaded {len(ids)} IDs from {csv_path.name}" if ids else f"⚠️ No IDs found in {csv_path.name}"
|
|
777
|
+
)
|
|
778
|
+
await reactive.flush()
|
|
779
|
+
|
|
780
|
+
except Exception as e:
|
|
781
|
+
query_status_rv.set(f"❌ Failed: {e}")
|
|
782
|
+
await reactive.flush()
|
|
783
|
+
raise
|
|
784
|
+
|
|
785
|
+
|
|
786
|
+
@reactive.effect
|
|
787
|
+
@reactive.event(input.reference_data)
|
|
788
|
+
async def _populate_ids_from_reference_upload():
|
|
789
|
+
#if current_page() != "plot_spectra":
|
|
790
|
+
# return
|
|
791
|
+
|
|
792
|
+
files = input.reference_data()
|
|
793
|
+
if not files:
|
|
794
|
+
return
|
|
795
|
+
|
|
796
|
+
in_path = Path(files[0]["datapath"])
|
|
797
|
+
suffix = in_path.suffix.lower()
|
|
798
|
+
|
|
799
|
+
# Decide what CSV to read IDs from
|
|
800
|
+
try:
|
|
801
|
+
if suffix == ".csv":
|
|
802
|
+
csv_path = in_path
|
|
803
|
+
converted_reference_path_rv.set(str(csv_path))
|
|
323
804
|
else:
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
805
|
+
reference_status_rv.set(f"Converting {in_path.name} → CSV …")
|
|
806
|
+
await reactive.flush()
|
|
807
|
+
|
|
808
|
+
# Choose an output temp path next to the upload
|
|
809
|
+
tmp_csv_path = in_path.with_suffix(".converted.csv")
|
|
810
|
+
|
|
811
|
+
out_obj = await asyncio.to_thread(build_library, str(in_path), str(tmp_csv_path))
|
|
812
|
+
|
|
813
|
+
# out_obj may be a path (str/PathLike) OR a DataFrame. Normalize to a path.
|
|
814
|
+
if isinstance(out_obj, (str, os.PathLike, Path)):
|
|
815
|
+
csv_path = Path(out_obj)
|
|
816
|
+
elif isinstance(out_obj, pd.DataFrame):
|
|
817
|
+
# Write the DF to our chosen path
|
|
818
|
+
out_obj.to_csv(tmp_csv_path, index=False)
|
|
819
|
+
csv_path = tmp_csv_path
|
|
820
|
+
else:
|
|
821
|
+
raise TypeError(f"build_library returned unsupported type: {type(out_obj)}")
|
|
822
|
+
|
|
823
|
+
converted_reference_path_rv.set(str(csv_path))
|
|
824
|
+
|
|
825
|
+
reference_status_rv.set(f"Reading IDs from: {csv_path.name} …")
|
|
826
|
+
await reactive.flush()
|
|
827
|
+
|
|
828
|
+
# Extract IDs from the CSV’s first column
|
|
829
|
+
ids = await asyncio.to_thread(extract_first_column_ids, str(csv_path))
|
|
830
|
+
reference_ids_rv.set(ids)
|
|
831
|
+
|
|
832
|
+
# Update dropdowns
|
|
833
|
+
ui.update_selectize("spectrum_ID2", choices=ids, selected=(ids[0] if ids else None))
|
|
834
|
+
|
|
835
|
+
reference_status_rv.set(
|
|
836
|
+
f"✅ Loaded {len(ids)} IDs from {csv_path.name}" if ids else f"⚠️ No IDs found in {csv_path.name}"
|
|
837
|
+
)
|
|
838
|
+
await reactive.flush()
|
|
839
|
+
|
|
840
|
+
except Exception as e:
|
|
841
|
+
reference_status_rv.set(f"❌ Failed: {e}")
|
|
842
|
+
await reactive.flush()
|
|
843
|
+
raise
|
|
844
|
+
|
|
845
|
+
|
|
846
|
+
@render.download(filename=lambda: f"plot.png")
|
|
847
|
+
def run_btn_plot_spectra():
|
|
848
|
+
spectrum_ID1 = input.spectrum_ID1() or None
|
|
849
|
+
spectrum_ID2 = input.spectrum_ID2() or None
|
|
850
|
+
|
|
851
|
+
weights = [float(weight.strip()) for weight in input.weights().split(",") if weight.strip()]
|
|
852
|
+
weights = {'Cosine':weights[0], 'Shannon':weights[1], 'Renyi':weights[2], 'Tsallis':weights[3]}
|
|
853
|
+
|
|
854
|
+
if input.chromatography_platform() == "HRMS":
|
|
855
|
+
fig = generate_plots_on_HRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], spectrum_ID1=spectrum_ID1, spectrum_ID2=spectrum_ID2, similarity_measure=input.similarity_measure(), weights=weights, spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), window_size_centroiding=input.window_size_centroiding(), window_size_matching=input.window_size_matching(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), y_axis_transformation=input.y_axis_transformation(), return_plot=True)
|
|
856
|
+
plt.show()
|
|
857
|
+
elif input.chromatography_platform() == "NRMS":
|
|
858
|
+
fig = generate_plots_on_NRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], spectrum_ID1=spectrum_ID1, spectrum_ID2=spectrum_ID2, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), y_axis_transformation=input.y_axis_transformation(), return_plot=True)
|
|
859
|
+
plt.show()
|
|
860
|
+
with io.BytesIO() as buf:
|
|
861
|
+
fig.savefig(buf, format="png", dpi=150, bbox_inches="tight")
|
|
862
|
+
plt.close()
|
|
863
|
+
yield buf.getvalue()
|
|
864
|
+
|
|
865
|
+
|
|
866
|
+
@render.text
|
|
867
|
+
def status_output():
|
|
868
|
+
return run_status_plot_spectra.get()
|
|
869
|
+
return run_status_spec_lib_matching.get()
|
|
870
|
+
return run_status_parameter_tuning.get()
|
|
871
|
+
|
|
872
|
+
|
|
873
|
+
class ReactiveWriter(io.TextIOBase):
|
|
874
|
+
def __init__(self, rv: reactive.Value, loop: asyncio.AbstractEventLoop):
|
|
875
|
+
self.rv = rv
|
|
876
|
+
self.loop = loop
|
|
877
|
+
|
|
878
|
+
def write(self, s: str):
|
|
879
|
+
if not s:
|
|
880
|
+
return 0
|
|
881
|
+
def _apply():
|
|
882
|
+
self.rv.set(self.rv.get() + s)
|
|
883
|
+
self.loop.create_task(reactive.flush())
|
|
884
|
+
self.loop.call_soon_threadsafe(_apply)
|
|
885
|
+
return len(s)
|
|
886
|
+
|
|
887
|
+
def flush(self):
|
|
888
|
+
pass
|
|
889
|
+
|
|
890
|
+
|
|
891
|
+
@render.download(filename="identification_output.csv")
|
|
892
|
+
async def run_btn_spec_lib_matching():
|
|
893
|
+
match_log_rv.set("Starting identification...\n")
|
|
894
|
+
await reactive.flush()
|
|
895
|
+
|
|
896
|
+
hq = input.high_quality_reference_library()
|
|
897
|
+
if isinstance(hq, str):
|
|
898
|
+
hq = hq.lower() == "true"
|
|
899
|
+
elif isinstance(hq, (int, float)):
|
|
900
|
+
hq = bool(hq)
|
|
901
|
+
|
|
902
|
+
weights = [float(weight.strip()) for weight in input.weights().split(",") if weight.strip()]
|
|
903
|
+
weights = {'Cosine':weights[0], 'Shannon':weights[1], 'Renyi':weights[2], 'Tsallis':weights[3]}
|
|
904
|
+
|
|
905
|
+
common_kwargs = dict(
|
|
906
|
+
query_data=input.query_data()[0]["datapath"],
|
|
907
|
+
reference_data=input.reference_data()[0]["datapath"],
|
|
908
|
+
likely_reference_ids=None,
|
|
909
|
+
similarity_measure=input.similarity_measure(),
|
|
910
|
+
weights=weights,
|
|
911
|
+
spectrum_preprocessing_order=input.spectrum_preprocessing_order(),
|
|
912
|
+
high_quality_reference_library=hq,
|
|
913
|
+
mz_min=input.mz_min(), mz_max=input.mz_max(),
|
|
914
|
+
int_min=input.int_min(), int_max=input.int_max(),
|
|
915
|
+
noise_threshold=input.noise_threshold(),
|
|
916
|
+
wf_mz=input.wf_mz(), wf_intensity=input.wf_int(),
|
|
917
|
+
LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(),
|
|
918
|
+
n_top_matches_to_save=input.n_top_matches_to_save(),
|
|
919
|
+
print_id_results=True, # ensure the library actually prints
|
|
920
|
+
output_identification=str(Path.cwd() / "identification_output.csv"),
|
|
921
|
+
output_similarity_scores=str(Path.cwd() / "similarity_scores.csv"),
|
|
922
|
+
return_ID_output=True,
|
|
923
|
+
)
|
|
924
|
+
|
|
925
|
+
loop = asyncio.get_running_loop()
|
|
926
|
+
rw = ReactiveWriter(match_log_rv, loop)
|
|
927
|
+
|
|
928
|
+
try:
|
|
929
|
+
with redirect_stdout(rw), redirect_stderr(rw):
|
|
930
|
+
if input.chromatography_platform() == "HRMS":
|
|
931
|
+
df_out = await asyncio.to_thread(
|
|
932
|
+
run_spec_lib_matching_on_HRMS_data,
|
|
933
|
+
window_size_centroiding=input.window_size_centroiding(),
|
|
934
|
+
window_size_matching=input.window_size_matching(),
|
|
935
|
+
**common_kwargs
|
|
936
|
+
)
|
|
937
|
+
else:
|
|
938
|
+
df_out = await asyncio.to_thread(run_spec_lib_matching_on_NRMS_data, **common_kwargs)
|
|
939
|
+
match_log_rv.set(match_log_rv.get() + "\n✅ Identification finished.\n")
|
|
940
|
+
await reactive.flush()
|
|
941
|
+
except Exception as e:
|
|
942
|
+
match_log_rv.set(match_log_rv.get() + f"\n❌ Error: {e}\n")
|
|
943
|
+
await reactive.flush()
|
|
944
|
+
raise
|
|
945
|
+
|
|
946
|
+
yield df_out.to_csv(index=True)
|
|
947
|
+
|
|
948
|
+
|
|
949
|
+
|
|
950
|
+
@render.download(filename="plot.png")
|
|
951
|
+
def run_btn_plot_spectra_within_spec_lib_matching():
|
|
952
|
+
req(input.query_data(), input.reference_data())
|
|
953
|
+
|
|
954
|
+
spectrum_ID1 = input.spectrum_ID1() or None
|
|
955
|
+
spectrum_ID2 = input.spectrum_ID2() or None
|
|
956
|
+
|
|
957
|
+
hq = input.high_quality_reference_library()
|
|
958
|
+
if isinstance(hq, str):
|
|
959
|
+
hq = hq.lower() == "true"
|
|
960
|
+
elif isinstance(hq, (int, float)):
|
|
961
|
+
hq = bool(hq)
|
|
962
|
+
|
|
963
|
+
weights = [float(weight.strip()) for weight in input.weights().split(",") if weight.strip()]
|
|
964
|
+
weights = {'Cosine':weights[0], 'Shannon':weights[1], 'Renyi':weights[2], 'Tsallis':weights[3]}
|
|
965
|
+
|
|
966
|
+
common = dict(
|
|
967
|
+
query_data=input.query_data()[0]['datapath'],
|
|
968
|
+
reference_data=input.reference_data()[0]['datapath'],
|
|
969
|
+
spectrum_ID1=spectrum_ID1,
|
|
970
|
+
spectrum_ID2=spectrum_ID2,
|
|
971
|
+
similarity_measure=input.similarity_measure(),
|
|
972
|
+
weights=weights,
|
|
973
|
+
spectrum_preprocessing_order=input.spectrum_preprocessing_order(),
|
|
974
|
+
high_quality_reference_library=hq,
|
|
975
|
+
mz_min=input.mz_min(), mz_max=input.mz_max(),
|
|
976
|
+
int_min=input.int_min(), int_max=input.int_max(),
|
|
977
|
+
noise_threshold=input.noise_threshold(),
|
|
978
|
+
wf_mz=input.wf_mz(), wf_intensity=input.wf_int(),
|
|
979
|
+
LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(),
|
|
980
|
+
y_axis_transformation="normalized",
|
|
981
|
+
return_plot=True
|
|
982
|
+
)
|
|
983
|
+
|
|
984
|
+
if input.chromatography_platform() == "HRMS":
|
|
985
|
+
fig = generate_plots_on_HRMS_data(
|
|
986
|
+
window_size_centroiding=input.window_size_centroiding(),
|
|
987
|
+
window_size_matching=input.window_size_matching(),
|
|
988
|
+
**common
|
|
989
|
+
)
|
|
990
|
+
plt.show()
|
|
991
|
+
else:
|
|
992
|
+
fig = generate_plots_on_NRMS_data(**common)
|
|
993
|
+
plt.show()
|
|
994
|
+
|
|
995
|
+
with io.BytesIO() as buf:
|
|
996
|
+
fig.savefig(buf, format="png", dpi=150, bbox_inches="tight")
|
|
997
|
+
plt.close()
|
|
998
|
+
yield buf.getvalue()
|
|
999
|
+
|
|
1000
|
+
|
|
1001
|
+
'''
|
|
1002
|
+
@render.download(filename="parameter_tuning_output.csv")
|
|
1003
|
+
async def run_btn_parameter_tuning():
|
|
1004
|
+
match_log_rv.set("Running grid search of all parameters specified...\n")
|
|
1005
|
+
|
|
1006
|
+
similarity_measure_tmp = list(input.similarity_measure())
|
|
1007
|
+
high_quality_reference_library_tmp = [x.strip().lower() == "true" for x in input.high_quality_reference_library().strip().strip("[]").split(",") if x.strip()]
|
|
1008
|
+
spectrum_preprocessing_order_tmp = strip_text(input.spectrum_preprocessing_order())
|
|
1009
|
+
mz_min_tmp = strip_numeric(input.mz_min())
|
|
1010
|
+
mz_max_tmp = strip_numeric(input.mz_max())
|
|
1011
|
+
int_min_tmp = strip_numeric(input.int_min())
|
|
1012
|
+
int_max_tmp = strip_numeric(input.int_max())
|
|
1013
|
+
noise_threshold_tmp = strip_numeric(input.noise_threshold())
|
|
1014
|
+
wf_mz_tmp = strip_numeric(input.wf_mz())
|
|
1015
|
+
wf_int_tmp = strip_numeric(input.wf_int())
|
|
1016
|
+
LET_threshold_tmp = strip_numeric(input.LET_threshold())
|
|
1017
|
+
entropy_dimension_tmp = strip_numeric(input.entropy_dimension())
|
|
1018
|
+
weights_tmp = strip_weights(input.weights())
|
|
1019
|
+
|
|
1020
|
+
common_kwargs = dict(
|
|
1021
|
+
query_data=input.query_data()[0]["datapath"],
|
|
1022
|
+
reference_data=input.reference_data()[0]["datapath"],
|
|
1023
|
+
output_path=str(Path.cwd() / "parameter_tuning_output.csv"),
|
|
1024
|
+
return_output=True
|
|
1025
|
+
)
|
|
1026
|
+
|
|
1027
|
+
loop = asyncio.get_running_loop()
|
|
1028
|
+
rw = ReactiveWriter(match_log_rv, loop)
|
|
1029
|
+
|
|
1030
|
+
try:
|
|
1031
|
+
with redirect_stdout(rw), redirect_stderr(rw):
|
|
1032
|
+
if input.chromatography_platform() == "HRMS":
|
|
1033
|
+
window_size_centroiding_tmp = strip_numeric(input.window_size_centroiding())
|
|
1034
|
+
window_size_matching_tmp = strip_numeric(input.window_size_matching())
|
|
1035
|
+
grid={'similarity_measure':similarity_measure_tmp, 'weight':weights_tmp, 'spectrum_preprocessing_order':spectrum_preprocessing_order_tmp, 'mz_min':mz_min_tmp, 'mz_max':mz_max_tmp, 'int_min':int_min_tmp, 'int_max':int_max_tmp, 'noise_threshold':noise_threshold_tmp, 'wf_mz':wf_mz_tmp, 'wf_int':wf_int_tmp, 'LET_threshold':LET_threshold_tmp, 'entropy_dimension':entropy_dimension_tmp, 'high_quality_reference_library':high_quality_reference_library_tmp, 'window_size_centroiding':window_size_centroiding_tmp, 'window_size_matching':window_size_matching_tmp}
|
|
1036
|
+
df_out = await asyncio.to_thread(tune_params_on_HRMS_data, **common_kwargs, grid=grid)
|
|
1037
|
+
else:
|
|
1038
|
+
grid={'similarity_measure':similarity_measure_tmp, 'weight':weights_tmp, 'spectrum_preprocessing_order':spectrum_preprocessing_order_tmp, 'mz_min':mz_min_tmp, 'mz_max':mz_max_tmp, 'int_min':int_min_tmp, 'int_max':int_max_tmp, 'noise_threshold':noise_threshold_tmp, 'wf_mz':wf_mz_tmp, 'wf_int':wf_int_tmp, 'LET_threshold':LET_threshold_tmp, 'entropy_dimension':entropy_dimension_tmp, 'high_quality_reference_library':high_quality_reference_library_tmp}
|
|
1039
|
+
df_out = await asyncio.to_thread(tune_params_on_NRMS_data, **common_kwargs, grid=grid)
|
|
1040
|
+
match_log_rv.set(match_log_rv.get() + "\n✅ Parameter tuning finished.\n")
|
|
1041
|
+
#await reactive.flush()
|
|
1042
|
+
except Exception as e:
|
|
1043
|
+
match_log_rv.set(match_log_rv.get() + f"\n❌ Error: {e}\n")
|
|
1044
|
+
#await reactive.flush()
|
|
1045
|
+
raise
|
|
1046
|
+
|
|
1047
|
+
yield df_out.to_csv(index=False)
|
|
1048
|
+
'''
|
|
1049
|
+
|
|
1050
|
+
|
|
1051
|
+
@render.download(filename="parameter_tuning_output.csv")
|
|
1052
|
+
async def run_btn_parameter_tuning():
|
|
1053
|
+
is_any_job_running.set(True)
|
|
1054
|
+
is_tuning_running.set(True)
|
|
1055
|
+
match_log_rv.set("Running grid search of all parameters specified...\n")
|
|
1056
|
+
_drain_queue_nowait(_LOG_QUEUE)
|
|
1057
|
+
|
|
1058
|
+
similarity_measure_tmp = list(input.similarity_measure())
|
|
1059
|
+
high_quality_reference_library_tmp = [x.strip().lower() == "true" for x in input.high_quality_reference_library().strip().strip("[]").split(",") if x.strip()]
|
|
1060
|
+
spectrum_preprocessing_order_tmp = strip_text(input.spectrum_preprocessing_order())
|
|
1061
|
+
mz_min_tmp = strip_numeric(input.mz_min())
|
|
1062
|
+
mz_max_tmp = strip_numeric(input.mz_max())
|
|
1063
|
+
int_min_tmp = strip_numeric(input.int_min())
|
|
1064
|
+
int_max_tmp = strip_numeric(input.int_max())
|
|
1065
|
+
noise_threshold_tmp = strip_numeric(input.noise_threshold())
|
|
1066
|
+
wf_mz_tmp = strip_numeric(input.wf_mz())
|
|
1067
|
+
wf_int_tmp = strip_numeric(input.wf_int())
|
|
1068
|
+
LET_threshold_tmp = strip_numeric(input.LET_threshold())
|
|
1069
|
+
entropy_dimension_tmp = strip_numeric(input.entropy_dimension())
|
|
1070
|
+
weights_tmp = strip_weights(input.weights())
|
|
1071
|
+
|
|
1072
|
+
common_kwargs = dict(
|
|
1073
|
+
query_data=input.query_data()[0]["datapath"],
|
|
1074
|
+
reference_data=input.reference_data()[0]["datapath"],
|
|
1075
|
+
output_path=str(Path.cwd() / "parameter_tuning_output.csv"),
|
|
1076
|
+
return_output=True
|
|
1077
|
+
)
|
|
1078
|
+
|
|
1079
|
+
loop = asyncio.get_running_loop()
|
|
1080
|
+
rw = ReactiveWriter(match_log_rv,loop)
|
|
1081
|
+
|
|
1082
|
+
try:
|
|
1083
|
+
with redirect_stdout(ReactiveWriter(match_log_rv, asyncio.get_running_loop())), redirect_stderr(ReactiveWriter(match_log_rv, asyncio.get_running_loop())):
|
|
1084
|
+
if input.chromatography_platform() == "HRMS":
|
|
1085
|
+
window_size_centroiding_tmp = strip_numeric(input.window_size_centroiding())
|
|
1086
|
+
window_size_matching_tmp = strip_numeric(input.window_size_matching())
|
|
1087
|
+
grid={'similarity_measure':similarity_measure_tmp, 'weight':weights_tmp, 'spectrum_preprocessing_order':spectrum_preprocessing_order_tmp, 'mz_min':mz_min_tmp, 'mz_max':mz_max_tmp, 'int_min':int_min_tmp, 'int_max':int_max_tmp, 'noise_threshold':noise_threshold_tmp, 'wf_mz':wf_mz_tmp, 'wf_int':wf_int_tmp, 'LET_threshold':LET_threshold_tmp, 'entropy_dimension':entropy_dimension_tmp, 'high_quality_reference_library':high_quality_reference_library_tmp, 'window_size_centroiding':window_size_centroiding_tmp, 'window_size_matching':window_size_matching_tmp}
|
|
1088
|
+
df_out = await asyncio.to_thread(tune_params_on_HRMS_data, **common_kwargs, grid=grid)
|
|
1089
|
+
else:
|
|
1090
|
+
grid={'similarity_measure':similarity_measure_tmp, 'weight':weights_tmp, 'spectrum_preprocessing_order':spectrum_preprocessing_order_tmp, 'mz_min':mz_min_tmp, 'mz_max':mz_max_tmp, 'int_min':int_min_tmp, 'int_max':int_max_tmp, 'noise_threshold':noise_threshold_tmp, 'wf_mz':wf_mz_tmp, 'wf_int':wf_int_tmp, 'LET_threshold':LET_threshold_tmp, 'entropy_dimension':entropy_dimension_tmp, 'high_quality_reference_library':high_quality_reference_library_tmp}
|
|
1091
|
+
df_out = await asyncio.to_thread(tune_params_on_NRMS_data, **common_kwargs, grid=grid)
|
|
1092
|
+
|
|
1093
|
+
match_log_rv.set(match_log_rv.get() + "\n✅ Parameter tuning finished.\n")
|
|
1094
|
+
except Exception as e:
|
|
1095
|
+
match_log_rv.set(match_log_rv.get() + f"\n❌ Error: {e}\n")
|
|
1096
|
+
raise
|
|
1097
|
+
finally:
|
|
1098
|
+
is_tuning_running.set(False)
|
|
1099
|
+
is_any_job_running.set(False)
|
|
1100
|
+
trailing = _drain_queue_nowait(_LOG_QUEUE)
|
|
1101
|
+
if trailing:
|
|
1102
|
+
match_log_rv.set(match_log_rv.get() + "".join(trailing))
|
|
1103
|
+
await reactive.flush()
|
|
1104
|
+
|
|
1105
|
+
#yield df_out.to_csv(index=False)
|
|
1106
|
+
csv_bytes = df_out.to_csv(index=False).encode('utf-8')
|
|
1107
|
+
yield csv_bytes
|
|
1108
|
+
|
|
1109
|
+
|
|
1110
|
+
|
|
1111
|
+
@render.text
|
|
1112
|
+
def status_output():
|
|
1113
|
+
return run_status_plot_spectra.get()
|
|
1114
|
+
return run_status_spec_lib_matching.get()
|
|
1115
|
+
return run_status_parameter_tuning.get()
|
|
1116
|
+
|
|
1117
|
+
|
|
1118
|
+
class ReactiveWriter(io.TextIOBase):
|
|
1119
|
+
def __init__(self, rv: reactive.Value, loop: asyncio.AbstractEventLoop):
|
|
1120
|
+
self._rv = rv
|
|
1121
|
+
self._loop = loop
|
|
1122
|
+
|
|
1123
|
+
def write(self, s: str):
|
|
1124
|
+
if not s:
|
|
1125
|
+
return 0
|
|
1126
|
+
self._loop.call_soon_threadsafe(_LOG_QUEUE.put_nowait, s)
|
|
1127
|
+
return len(s)
|
|
1128
|
+
|
|
1129
|
+
def flush(self):
|
|
1130
|
+
pass
|
|
1131
|
+
|
|
1132
|
+
|
|
1133
|
+
@reactive.effect
|
|
1134
|
+
async def _pump_reactive_writer_logs():
|
|
1135
|
+
if not is_tuning_running.get():
|
|
1136
|
+
return
|
|
1137
|
+
|
|
1138
|
+
reactive.invalidate_later(0.1)
|
|
1139
|
+
msgs = _drain_queue_nowait(_LOG_QUEUE)
|
|
1140
|
+
if msgs:
|
|
1141
|
+
match_log_rv.set(match_log_rv.get() + "".join(msgs))
|
|
1142
|
+
await reactive.flush()
|
|
1143
|
+
|
|
354
1144
|
|
|
355
1145
|
|
|
356
1146
|
@render.text
|
|
357
1147
|
def status_output():
|
|
358
|
-
return
|
|
1148
|
+
return run_status_plot_spectra.get()
|
|
1149
|
+
return run_status_spec_lib_matching.get()
|
|
1150
|
+
return run_status_parameter_tuning.get()
|
|
359
1151
|
|
|
360
1152
|
|
|
361
1153
|
|