pycompound 0.0.55__py3-none-any.whl → 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- app.py +487 -60
- {pycompound_fy7392 → pycompound}/plot_spectra.py +3 -1
- {pycompound_fy7392 → pycompound}/plot_spectra_CLI.py +2 -2
- {pycompound_fy7392 → pycompound}/processing.py +1 -1
- {pycompound_fy7392 → pycompound}/spec_lib_matching.py +21 -13
- {pycompound_fy7392 → pycompound}/spec_lib_matching_CLI.py +2 -2
- {pycompound_fy7392 → pycompound}/tuning_CLI.py +2 -2
- {pycompound-0.0.55.dist-info → pycompound-0.1.0.dist-info}/METADATA +2 -1
- pycompound-0.1.0.dist-info/RECORD +14 -0
- pycompound-0.1.0.dist-info/top_level.txt +2 -0
- pycompound-0.0.55.dist-info/RECORD +0 -15
- pycompound-0.0.55.dist-info/top_level.txt +0 -2
- pycompound_fy7392/pycompound_shiny.py +0 -299
- {pycompound_fy7392 → pycompound}/build_library.py +0 -0
- {pycompound_fy7392 → pycompound}/similarity_measures.py +0 -0
- {pycompound-0.0.55.dist-info → pycompound-0.1.0.dist-info}/WHEEL +0 -0
- {pycompound-0.0.55.dist-info → pycompound-0.1.0.dist-info}/licenses/LICENSE +0 -0
app.py
CHANGED
|
@@ -1,17 +1,155 @@
|
|
|
1
1
|
|
|
2
|
-
from shiny import App, ui, reactive, render
|
|
3
|
-
from
|
|
4
|
-
from
|
|
5
|
-
from
|
|
6
|
-
from
|
|
7
|
-
from
|
|
8
|
-
from
|
|
2
|
+
from shiny import App, ui, reactive, render, req
|
|
3
|
+
from pycompound.spec_lib_matching import run_spec_lib_matching_on_HRMS_data
|
|
4
|
+
from pycompound.spec_lib_matching import run_spec_lib_matching_on_NRMS_data
|
|
5
|
+
from pycompound.spec_lib_matching import tune_params_on_HRMS_data
|
|
6
|
+
from pycompound.spec_lib_matching import tune_params_on_NRMS_data
|
|
7
|
+
from pycompound.plot_spectra import generate_plots_on_HRMS_data
|
|
8
|
+
from pycompound.plot_spectra import generate_plots_on_NRMS_data
|
|
9
9
|
from pathlib import Path
|
|
10
|
+
from contextlib import redirect_stdout, redirect_stderr
|
|
10
11
|
import subprocess
|
|
11
12
|
import traceback
|
|
12
13
|
import asyncio
|
|
13
14
|
import io
|
|
15
|
+
import os
|
|
16
|
+
import sys
|
|
14
17
|
import matplotlib.pyplot as plt
|
|
18
|
+
import pandas as pd
|
|
19
|
+
import numpy as np
|
|
20
|
+
import netCDF4 as nc
|
|
21
|
+
from pyteomics import mgf
|
|
22
|
+
from pyteomics import mzml
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def build_library(input_path=None, output_path=None):
|
|
26
|
+
last_three_chars = input_path[(len(input_path)-3):len(input_path)]
|
|
27
|
+
last_four_chars = input_path[(len(input_path)-4):len(input_path)]
|
|
28
|
+
if last_three_chars == 'csv' or last_three_chars == 'CSV':
|
|
29
|
+
return pd.read_csv(input_path)
|
|
30
|
+
else:
|
|
31
|
+
if last_three_chars == 'mgf' or last_three_chars == 'MGF':
|
|
32
|
+
input_file_type = 'mgf'
|
|
33
|
+
elif last_four_chars == 'mzML' or last_four_chars == 'mzml' or last_four_chars == 'MZML':
|
|
34
|
+
input_file_type = 'mzML'
|
|
35
|
+
elif last_three_chars == 'cdf' or last_three_chars == 'CDF':
|
|
36
|
+
input_file_type = 'cdf'
|
|
37
|
+
elif last_three_chars == 'msp' or last_three_chars == 'MSP':
|
|
38
|
+
input_file_type = 'msp'
|
|
39
|
+
else:
|
|
40
|
+
print('ERROR: either an \'mgf\', \'mzML\', \'cdf\', or \'msp\' file must be passed to --input_path')
|
|
41
|
+
sys.exit()
|
|
42
|
+
|
|
43
|
+
spectra = []
|
|
44
|
+
if input_file_type == 'mgf':
|
|
45
|
+
with mgf.read(input_path, index_by_scans = True) as reader:
|
|
46
|
+
for spec in reader:
|
|
47
|
+
spectra.append(spec)
|
|
48
|
+
if input_file_type == 'mzML':
|
|
49
|
+
with mzml.read(input_path) as reader:
|
|
50
|
+
for spec in reader:
|
|
51
|
+
spectra.append(spec)
|
|
52
|
+
|
|
53
|
+
if input_file_type == 'mgf' or input_file_type == 'mzML':
|
|
54
|
+
ids = []
|
|
55
|
+
mzs = []
|
|
56
|
+
ints = []
|
|
57
|
+
for i in range(0,len(spectra)):
|
|
58
|
+
for j in range(0,len(spectra[i]['m/z array'])):
|
|
59
|
+
if input_file_type == 'mzML':
|
|
60
|
+
ids.append(f'ID_{i+1}')
|
|
61
|
+
else:
|
|
62
|
+
ids.append(spectra[i]['params']['name'])
|
|
63
|
+
mzs.append(spectra[i]['m/z array'][j])
|
|
64
|
+
ints.append(spectra[i]['intensity array'][j])
|
|
65
|
+
|
|
66
|
+
if input_file_type == 'cdf':
|
|
67
|
+
dataset = nc.Dataset(input_path, 'r')
|
|
68
|
+
all_mzs = dataset.variables['mass_values'][:]
|
|
69
|
+
all_ints = dataset.variables['intensity_values'][:]
|
|
70
|
+
scan_idxs = dataset.variables['scan_index'][:]
|
|
71
|
+
dataset.close()
|
|
72
|
+
|
|
73
|
+
ids = []
|
|
74
|
+
mzs = []
|
|
75
|
+
ints = []
|
|
76
|
+
for i in range(0,(len(scan_idxs)-1)):
|
|
77
|
+
if i % 1000 == 0:
|
|
78
|
+
print(f'analyzed {i} out of {len(scan_idxs)} scans')
|
|
79
|
+
s_idx = scan_idxs[i]
|
|
80
|
+
e_idx = scan_idxs[i+1]
|
|
81
|
+
|
|
82
|
+
mzs_tmp = all_mzs[s_idx:e_idx]
|
|
83
|
+
ints_tmp = all_ints[s_idx:e_idx]
|
|
84
|
+
|
|
85
|
+
for j in range(0,len(mzs_tmp)):
|
|
86
|
+
ids.append(f'ID_{i+1}')
|
|
87
|
+
mzs.append(mzs_tmp[j])
|
|
88
|
+
ints.append(ints_tmp[j])
|
|
89
|
+
|
|
90
|
+
if input_file_type == 'msp':
|
|
91
|
+
ids = []
|
|
92
|
+
mzs = []
|
|
93
|
+
ints = []
|
|
94
|
+
with open(input_path, 'r') as f:
|
|
95
|
+
i = 0
|
|
96
|
+
for line in f:
|
|
97
|
+
line = line.strip()
|
|
98
|
+
if line.startswith('Name:'):
|
|
99
|
+
i += 1
|
|
100
|
+
spectrum_id = line.replace('Name: ','')
|
|
101
|
+
elif line and line[0].isdigit():
|
|
102
|
+
try:
|
|
103
|
+
mz, intensity = map(float, line.split()[:2])
|
|
104
|
+
ids.append(spectrum_id)
|
|
105
|
+
mzs.append(mz)
|
|
106
|
+
ints.append(intensity)
|
|
107
|
+
except ValueError:
|
|
108
|
+
continue
|
|
109
|
+
|
|
110
|
+
df = pd.DataFrame({'id':ids, 'mz_ratio':mzs, 'intensity':ints})
|
|
111
|
+
return df
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def extract_first_column_ids(file_path: str, max_ids: int = 20000):
|
|
116
|
+
suffix = Path(file_path).suffix.lower()
|
|
117
|
+
|
|
118
|
+
if suffix == ".csv":
|
|
119
|
+
df = pd.read_csv(file_path, usecols=[0])
|
|
120
|
+
ids = df.iloc[:, 0].astype(str).dropna()
|
|
121
|
+
ids = [x for x in ids if x.strip() != ""]
|
|
122
|
+
seen = set()
|
|
123
|
+
uniq = []
|
|
124
|
+
for x in ids:
|
|
125
|
+
if x not in seen:
|
|
126
|
+
uniq.append(x)
|
|
127
|
+
seen.add(x)
|
|
128
|
+
return uniq[:max_ids]
|
|
129
|
+
|
|
130
|
+
ids = []
|
|
131
|
+
try:
|
|
132
|
+
with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
|
|
133
|
+
for line in f:
|
|
134
|
+
ls = line.strip()
|
|
135
|
+
if ls.startswith("TITLE="):
|
|
136
|
+
ids.append(ls.split("=", 1)[1].strip())
|
|
137
|
+
elif ls.lower().startswith("name:"):
|
|
138
|
+
ids.append(ls.split(":", 1)[1].strip())
|
|
139
|
+
if len(ids) >= max_ids:
|
|
140
|
+
break
|
|
141
|
+
except Exception:
|
|
142
|
+
pass
|
|
143
|
+
|
|
144
|
+
if ids:
|
|
145
|
+
seen = set()
|
|
146
|
+
uniq = []
|
|
147
|
+
for x in ids:
|
|
148
|
+
if x not in seen:
|
|
149
|
+
uniq.append(x)
|
|
150
|
+
seen.add(x)
|
|
151
|
+
return uniq
|
|
152
|
+
return []
|
|
15
153
|
|
|
16
154
|
|
|
17
155
|
def plot_spectra_ui(platform: str):
|
|
@@ -19,8 +157,20 @@ def plot_spectra_ui(platform: str):
|
|
|
19
157
|
base_inputs = [
|
|
20
158
|
ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
21
159
|
ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
22
|
-
|
|
23
|
-
|
|
160
|
+
ui.input_selectize(
|
|
161
|
+
"spectrum_ID1",
|
|
162
|
+
"Select spectrum ID 1:",
|
|
163
|
+
choices=[],
|
|
164
|
+
multiple=False,
|
|
165
|
+
options={"placeholder": "Upload a query file to load IDs..."},
|
|
166
|
+
),
|
|
167
|
+
ui.input_selectize(
|
|
168
|
+
"spectrum_ID2",
|
|
169
|
+
"Select spectrum ID 2 (optional):",
|
|
170
|
+
choices=[],
|
|
171
|
+
multiple=False,
|
|
172
|
+
options={"placeholder": "Upload a reference file to load IDs..."},
|
|
173
|
+
),
|
|
24
174
|
ui.input_select("similarity_measure", "Select similarity measure:", ["cosine","shannon","renyi","tsallis","mixture","jaccard","dice","3w_jaccard","sokal_sneath","binary_cosine","mountford","mcconnaughey","driver_kroeber","simpson","braun_banquet","fager_mcgowan","kulczynski","intersection","hamming","hellinger"]),
|
|
25
175
|
ui.input_select(
|
|
26
176
|
"high_quality_reference_library",
|
|
@@ -71,10 +221,9 @@ def plot_spectra_ui(platform: str):
|
|
|
71
221
|
)
|
|
72
222
|
|
|
73
223
|
# Run and Back buttons
|
|
74
|
-
|
|
224
|
+
run_button_plot_spectra = ui.download_button("run_btn_plot_spectra", "Run", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
|
|
75
225
|
back_button = ui.input_action_button("back", "Back to main menu", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
|
|
76
226
|
|
|
77
|
-
#print(len(extra_inputs))
|
|
78
227
|
# Layout base_inputs and extra_inputs in columns
|
|
79
228
|
if platform == "HRMS":
|
|
80
229
|
inputs_columns = ui.layout_columns(
|
|
@@ -98,8 +247,9 @@ def plot_spectra_ui(platform: str):
|
|
|
98
247
|
ui.TagList(
|
|
99
248
|
ui.h2("Plot Spectra"),
|
|
100
249
|
inputs_columns,
|
|
101
|
-
|
|
102
|
-
back_button
|
|
250
|
+
run_button_plot_spectra,
|
|
251
|
+
back_button,
|
|
252
|
+
ui.div(ui.output_text("plot_query_status"), style="margin-top:8px; font-size:14px")
|
|
103
253
|
),
|
|
104
254
|
)
|
|
105
255
|
|
|
@@ -155,10 +305,9 @@ def run_spec_lib_matching_ui(platform: str):
|
|
|
155
305
|
|
|
156
306
|
|
|
157
307
|
# Run and Back buttons
|
|
158
|
-
|
|
308
|
+
run_button_spec_lib_matching = ui.download_button("run_btn_spec_lib_matching", "Run", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
|
|
159
309
|
back_button = ui.input_action_button("back", "Back to main menu", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
|
|
160
310
|
|
|
161
|
-
#print(len(extra_inputs))
|
|
162
311
|
# Layout base_inputs and extra_inputs in columns
|
|
163
312
|
if platform == "HRMS":
|
|
164
313
|
inputs_columns = ui.layout_columns(
|
|
@@ -177,13 +326,20 @@ def run_spec_lib_matching_ui(platform: str):
|
|
|
177
326
|
col_widths=(3, 3, 3, 3),
|
|
178
327
|
)
|
|
179
328
|
|
|
329
|
+
log_panel = ui.card(
|
|
330
|
+
ui.card_header("Identification log"),
|
|
331
|
+
ui.output_text_verbatim("match_log"),
|
|
332
|
+
style="max-height:300px; overflow:auto"
|
|
333
|
+
)
|
|
334
|
+
|
|
180
335
|
# Combine everything
|
|
181
336
|
return ui.div(
|
|
182
337
|
ui.TagList(
|
|
183
338
|
ui.h2("Run Spectral Library Matching"),
|
|
184
339
|
inputs_columns,
|
|
185
|
-
|
|
186
|
-
back_button
|
|
340
|
+
run_button_spec_lib_matching,
|
|
341
|
+
back_button,
|
|
342
|
+
log_panel,
|
|
187
343
|
),
|
|
188
344
|
)
|
|
189
345
|
|
|
@@ -197,19 +353,114 @@ app_ui = ui.page_fluid(
|
|
|
197
353
|
|
|
198
354
|
def server(input, output, session):
|
|
199
355
|
|
|
200
|
-
# Track which page to show
|
|
201
356
|
current_page = reactive.Value("main_menu")
|
|
202
357
|
|
|
203
|
-
# Track button clicks
|
|
204
358
|
plot_clicks = reactive.Value(0)
|
|
205
359
|
match_clicks = reactive.Value(0)
|
|
206
360
|
back_clicks = reactive.Value(0)
|
|
207
361
|
|
|
208
|
-
|
|
362
|
+
run_status_plot_spectra = reactive.Value("")
|
|
363
|
+
run_status_spec_lib_matching = reactive.Value("")
|
|
364
|
+
match_log_rv = reactive.Value("")
|
|
365
|
+
is_matching_rv = reactive.Value(False)
|
|
366
|
+
|
|
367
|
+
query_ids_rv = reactive.Value([])
|
|
368
|
+
query_file_path_rv = reactive.Value(None)
|
|
369
|
+
query_result_rv = reactive.Value(None)
|
|
370
|
+
query_status_rv = reactive.Value("")
|
|
371
|
+
reference_ids_rv = reactive.Value([])
|
|
372
|
+
reference_file_path_rv = reactive.Value(None)
|
|
373
|
+
reference_result_rv = reactive.Value(None)
|
|
374
|
+
reference_status_rv = reactive.Value("")
|
|
375
|
+
|
|
376
|
+
converted_query_path_rv = reactive.Value(None)
|
|
377
|
+
converted_reference_path_rv = reactive.Value(None)
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
def process_database(file_path: str):
|
|
381
|
+
suffix = Path(file_path).suffix.lower()
|
|
382
|
+
return {"path": file_path, "suffix": suffix}
|
|
383
|
+
|
|
384
|
+
@render.text
|
|
385
|
+
def plot_query_status():
|
|
386
|
+
return query_status_rv.get() or ""
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
@reactive.effect
|
|
390
|
+
@reactive.event(input.query_data)
|
|
391
|
+
async def _on_query_upload():
|
|
392
|
+
if current_page() != "plot_spectra":
|
|
393
|
+
return
|
|
394
|
+
|
|
395
|
+
files = input.query_data()
|
|
396
|
+
req(files and len(files) > 0)
|
|
397
|
+
|
|
398
|
+
file_path = files[0]["datapath"]
|
|
399
|
+
query_file_path_rv.set(file_path)
|
|
400
|
+
|
|
401
|
+
query_status_rv.set(f"Processing query database: {Path(file_path).name} …")
|
|
402
|
+
await reactive.flush()
|
|
403
|
+
|
|
404
|
+
try:
|
|
405
|
+
result = await asyncio.to_thread(process_database, file_path)
|
|
406
|
+
query_result_rv.set(result)
|
|
407
|
+
query_status_rv.set("✅ Query database processed.")
|
|
408
|
+
await reactive.flush()
|
|
409
|
+
except Exception as e:
|
|
410
|
+
query_status_rv.set(f"❌ Failed to process query database: {e}")
|
|
411
|
+
await reactive.flush()
|
|
412
|
+
|
|
413
|
+
|
|
414
|
+
@reactive.effect
|
|
415
|
+
@reactive.event(input.reference_data)
|
|
416
|
+
async def _on_reference_upload():
|
|
417
|
+
if current_page() != "plot_spectra":
|
|
418
|
+
return
|
|
419
|
+
|
|
420
|
+
files = input.reference_data()
|
|
421
|
+
req(files and len(files) > 0)
|
|
422
|
+
|
|
423
|
+
file_path = files[0]["datapath"]
|
|
424
|
+
reference_file_path_rv.set(file_path)
|
|
425
|
+
|
|
426
|
+
reference_status_rv.set(f"Processing reference database: {Path(file_path).name} …")
|
|
427
|
+
await reactive.flush()
|
|
428
|
+
|
|
429
|
+
try:
|
|
430
|
+
result = await asyncio.to_thread(process_database, file_path)
|
|
431
|
+
reference_result_rv.set(result)
|
|
432
|
+
reference_status_rv.set("✅ Reference database processed.")
|
|
433
|
+
await reactive.flush()
|
|
434
|
+
except Exception as e:
|
|
435
|
+
reference_status_rv.set(f"❌ Failed to process reference database: {e}")
|
|
436
|
+
await reactive.flush()
|
|
437
|
+
|
|
438
|
+
|
|
439
|
+
@render.text
|
|
440
|
+
def match_log():
|
|
441
|
+
return match_log_rv.get()
|
|
442
|
+
|
|
443
|
+
|
|
444
|
+
class ReactiveWriter(io.TextIOBase):
|
|
445
|
+
def __init__(self, rv):
|
|
446
|
+
self.rv = rv
|
|
447
|
+
def write(self, s: str):
|
|
448
|
+
if not s:
|
|
449
|
+
return 0
|
|
450
|
+
self.rv.set(self.rv.get() + s)
|
|
451
|
+
try:
|
|
452
|
+
loop = asyncio.get_running_loop()
|
|
453
|
+
loop.create_task(reactive.flush())
|
|
454
|
+
except RuntimeError:
|
|
455
|
+
pass
|
|
456
|
+
return len(s)
|
|
457
|
+
def flush(self):
|
|
458
|
+
pass
|
|
459
|
+
|
|
460
|
+
|
|
209
461
|
|
|
210
462
|
@reactive.Effect
|
|
211
463
|
def _():
|
|
212
|
-
# Main menu buttons
|
|
213
464
|
if input.plot_spectra() > plot_clicks.get():
|
|
214
465
|
current_page.set("plot_spectra")
|
|
215
466
|
plot_clicks.set(input.plot_spectra())
|
|
@@ -220,6 +471,7 @@ def server(input, output, session):
|
|
|
220
471
|
current_page.set("main_menu")
|
|
221
472
|
back_clicks.set(input.back())
|
|
222
473
|
|
|
474
|
+
|
|
223
475
|
@render.image
|
|
224
476
|
def image():
|
|
225
477
|
from pathlib import Path
|
|
@@ -228,6 +480,7 @@ def server(input, output, session):
|
|
|
228
480
|
img: ImgData = {"src": str(dir / "www/emblem.png"), "width": "320px", "height": "250px"}
|
|
229
481
|
return img
|
|
230
482
|
|
|
483
|
+
|
|
231
484
|
@output
|
|
232
485
|
@render.ui
|
|
233
486
|
def main_ui():
|
|
@@ -310,53 +563,227 @@ def server(input, output, session):
|
|
|
310
563
|
elif current_page() == "run_spec_lib_matching":
|
|
311
564
|
return run_spec_lib_matching_ui(input.chromatography_platform())
|
|
312
565
|
|
|
566
|
+
|
|
567
|
+
|
|
313
568
|
@reactive.effect
|
|
314
|
-
@reactive.event(input.
|
|
315
|
-
def
|
|
316
|
-
if current_page()
|
|
317
|
-
|
|
318
|
-
|
|
569
|
+
@reactive.event(input.query_data)
|
|
570
|
+
async def _populate_ids_from_query_upload():
|
|
571
|
+
if current_page() != "plot_spectra":
|
|
572
|
+
return
|
|
573
|
+
|
|
574
|
+
files = input.query_data()
|
|
575
|
+
if not files:
|
|
576
|
+
return
|
|
577
|
+
|
|
578
|
+
in_path = Path(files[0]["datapath"])
|
|
579
|
+
suffix = in_path.suffix.lower()
|
|
580
|
+
|
|
581
|
+
# Decide what CSV to read IDs from
|
|
582
|
+
try:
|
|
583
|
+
if suffix == ".csv":
|
|
584
|
+
csv_path = in_path
|
|
585
|
+
converted_query_path_rv.set(str(csv_path))
|
|
319
586
|
else:
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
587
|
+
query_status_rv.set(f"Converting {in_path.name} → CSV …")
|
|
588
|
+
await reactive.flush()
|
|
589
|
+
|
|
590
|
+
# Choose an output temp path next to the upload
|
|
591
|
+
tmp_csv_path = in_path.with_suffix(".converted.csv")
|
|
592
|
+
|
|
593
|
+
out_obj = await asyncio.to_thread(build_library, str(in_path), str(tmp_csv_path))
|
|
594
|
+
|
|
595
|
+
# out_obj may be a path (str/PathLike) OR a DataFrame. Normalize to a path.
|
|
596
|
+
if isinstance(out_obj, (str, os.PathLike, Path)):
|
|
597
|
+
csv_path = Path(out_obj)
|
|
598
|
+
elif isinstance(out_obj, pd.DataFrame):
|
|
599
|
+
# Write the DF to our chosen path
|
|
600
|
+
out_obj.to_csv(tmp_csv_path, index=False)
|
|
601
|
+
csv_path = tmp_csv_path
|
|
602
|
+
else:
|
|
603
|
+
raise TypeError(f"build_library returned unsupported type: {type(out_obj)}")
|
|
604
|
+
|
|
605
|
+
converted_query_path_rv.set(str(csv_path))
|
|
606
|
+
|
|
607
|
+
query_status_rv.set(f"Reading IDs from: {csv_path.name} …")
|
|
608
|
+
await reactive.flush()
|
|
609
|
+
|
|
610
|
+
# Extract IDs from the CSV’s first column
|
|
611
|
+
ids = await asyncio.to_thread(extract_first_column_ids, str(csv_path))
|
|
612
|
+
query_ids_rv.set(ids)
|
|
613
|
+
|
|
614
|
+
# Update dropdowns
|
|
615
|
+
ui.update_selectize("spectrum_ID1", choices=ids, selected=(ids[0] if ids else None))
|
|
616
|
+
|
|
617
|
+
query_status_rv.set(
|
|
618
|
+
f"✅ Loaded {len(ids)} IDs from {csv_path.name}" if ids else f"⚠️ No IDs found in {csv_path.name}"
|
|
619
|
+
)
|
|
620
|
+
await reactive.flush()
|
|
621
|
+
|
|
622
|
+
except Exception as e:
|
|
623
|
+
query_status_rv.set(f"❌ Failed: {e}")
|
|
624
|
+
await reactive.flush()
|
|
625
|
+
raise
|
|
626
|
+
|
|
627
|
+
|
|
628
|
+
@reactive.effect
|
|
629
|
+
@reactive.event(input.reference_data)
|
|
630
|
+
async def _populate_ids_from_reference_upload():
|
|
631
|
+
if current_page() != "plot_spectra":
|
|
632
|
+
return
|
|
633
|
+
|
|
634
|
+
files = input.reference_data()
|
|
635
|
+
if not files:
|
|
636
|
+
return
|
|
637
|
+
|
|
638
|
+
in_path = Path(files[0]["datapath"])
|
|
639
|
+
suffix = in_path.suffix.lower()
|
|
640
|
+
|
|
641
|
+
# Decide what CSV to read IDs from
|
|
642
|
+
try:
|
|
643
|
+
if suffix == ".csv":
|
|
644
|
+
csv_path = in_path
|
|
645
|
+
converted_reference_path_rv.set(str(csv_path))
|
|
323
646
|
else:
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
647
|
+
reference_status_rv.set(f"Converting {in_path.name} → CSV …")
|
|
648
|
+
await reactive.flush()
|
|
649
|
+
|
|
650
|
+
# Choose an output temp path next to the upload
|
|
651
|
+
tmp_csv_path = in_path.with_suffix(".converted.csv")
|
|
652
|
+
|
|
653
|
+
out_obj = await asyncio.to_thread(build_library, str(in_path), str(tmp_csv_path))
|
|
654
|
+
|
|
655
|
+
# out_obj may be a path (str/PathLike) OR a DataFrame. Normalize to a path.
|
|
656
|
+
if isinstance(out_obj, (str, os.PathLike, Path)):
|
|
657
|
+
csv_path = Path(out_obj)
|
|
658
|
+
elif isinstance(out_obj, pd.DataFrame):
|
|
659
|
+
# Write the DF to our chosen path
|
|
660
|
+
out_obj.to_csv(tmp_csv_path, index=False)
|
|
661
|
+
csv_path = tmp_csv_path
|
|
662
|
+
else:
|
|
663
|
+
raise TypeError(f"build_library returned unsupported type: {type(out_obj)}")
|
|
664
|
+
|
|
665
|
+
converted_reference_path_rv.set(str(csv_path))
|
|
666
|
+
|
|
667
|
+
reference_status_rv.set(f"Reading IDs from: {csv_path.name} …")
|
|
668
|
+
await reactive.flush()
|
|
669
|
+
|
|
670
|
+
# Extract IDs from the CSV’s first column
|
|
671
|
+
ids = await asyncio.to_thread(extract_first_column_ids, str(csv_path))
|
|
672
|
+
reference_ids_rv.set(ids)
|
|
673
|
+
|
|
674
|
+
# Update dropdowns
|
|
675
|
+
ui.update_selectize("spectrum_ID2", choices=ids, selected=(ids[0] if ids else None))
|
|
676
|
+
|
|
677
|
+
reference_status_rv.set(
|
|
678
|
+
f"✅ Loaded {len(ids)} IDs from {csv_path.name}" if ids else f"⚠️ No IDs found in {csv_path.name}"
|
|
679
|
+
)
|
|
680
|
+
await reactive.flush()
|
|
681
|
+
|
|
682
|
+
except Exception as e:
|
|
683
|
+
reference_status_rv.set(f"❌ Failed: {e}")
|
|
684
|
+
await reactive.flush()
|
|
685
|
+
raise
|
|
686
|
+
|
|
687
|
+
|
|
688
|
+
|
|
689
|
+
@render.download(filename=lambda: f"plot.png")
|
|
690
|
+
def run_btn_plot_spectra():
|
|
691
|
+
spectrum_ID1 = input.spectrum_ID1() or None
|
|
692
|
+
spectrum_ID2 = input.spectrum_ID2() or None
|
|
693
|
+
|
|
694
|
+
if input.chromatography_platform() == "HRMS":
|
|
695
|
+
fig = generate_plots_on_HRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], spectrum_ID1=spectrum_ID1, spectrum_ID2=spectrum_ID2, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), window_size_centroiding=input.window_size_centroiding(), window_size_matching=input.window_size_matching(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), y_axis_transformation=input.y_axis_transformation(), return_plot=True)
|
|
696
|
+
#run_status_plot_spectra.set("✅ Plotting has finished.")
|
|
697
|
+
elif input.chromatography_platform() == "NRMS":
|
|
698
|
+
fig = generate_plots_on_NRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], spectrum_ID1=spectrum_ID1, spectrum_ID2=spectrum_ID2, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), y_axis_transformation=input.y_axis_transformation(), return_plot=True)
|
|
699
|
+
with io.BytesIO() as buf:
|
|
700
|
+
fig.savefig(buf, format="png", dpi=150, bbox_inches="tight")
|
|
701
|
+
yield buf.getvalue()
|
|
354
702
|
|
|
355
703
|
|
|
356
704
|
@render.text
|
|
357
705
|
def status_output():
|
|
358
|
-
return
|
|
706
|
+
return run_status_plot_spectra.get()
|
|
707
|
+
return run_status_spec_lib_matching.get()
|
|
708
|
+
|
|
709
|
+
|
|
710
|
+
class ReactiveWriter(io.TextIOBase):
|
|
711
|
+
def __init__(self, rv: reactive.Value, loop: asyncio.AbstractEventLoop):
|
|
712
|
+
self.rv = rv
|
|
713
|
+
self.loop = loop
|
|
714
|
+
|
|
715
|
+
def write(self, s: str):
|
|
716
|
+
if not s:
|
|
717
|
+
return 0
|
|
718
|
+
def _apply():
|
|
719
|
+
self.rv.set(self.rv.get() + s)
|
|
720
|
+
self.loop.create_task(reactive.flush())
|
|
721
|
+
|
|
722
|
+
self.loop.call_soon_threadsafe(_apply)
|
|
723
|
+
return len(s)
|
|
724
|
+
|
|
725
|
+
def flush(self):
|
|
726
|
+
pass
|
|
727
|
+
|
|
728
|
+
|
|
729
|
+
@render.download(filename="identification_output.csv")
|
|
730
|
+
async def run_btn_spec_lib_matching():
|
|
731
|
+
# 1) quick first paint
|
|
732
|
+
match_log_rv.set("Starting identification...\n")
|
|
733
|
+
await reactive.flush()
|
|
734
|
+
|
|
735
|
+
# 2) normalize inputs (same as before)
|
|
736
|
+
hq = input.high_quality_reference_library()
|
|
737
|
+
if isinstance(hq, str):
|
|
738
|
+
hq = hq.lower() == "true"
|
|
739
|
+
elif isinstance(hq, (int, float)):
|
|
740
|
+
hq = bool(hq)
|
|
741
|
+
|
|
742
|
+
common_kwargs = dict(
|
|
743
|
+
query_data=input.query_data()[0]["datapath"],
|
|
744
|
+
reference_data=input.reference_data()[0]["datapath"],
|
|
745
|
+
likely_reference_ids=None,
|
|
746
|
+
similarity_measure=input.similarity_measure(),
|
|
747
|
+
spectrum_preprocessing_order=input.spectrum_preprocessing_order(),
|
|
748
|
+
high_quality_reference_library=hq,
|
|
749
|
+
mz_min=input.mz_min(), mz_max=input.mz_max(),
|
|
750
|
+
int_min=input.int_min(), int_max=input.int_max(),
|
|
751
|
+
noise_threshold=input.noise_threshold(),
|
|
752
|
+
wf_mz=input.wf_mz(), wf_intensity=input.wf_int(),
|
|
753
|
+
LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(),
|
|
754
|
+
n_top_matches_to_save=input.n_top_matches_to_save(),
|
|
755
|
+
print_id_results=True, # ensure the library actually prints progress
|
|
756
|
+
output_identification=str(Path.cwd() / "identification_output.csv"),
|
|
757
|
+
output_similarity_scores=str(Path.cwd() / "similarity_scores.csv"),
|
|
758
|
+
return_ID_output=True,
|
|
759
|
+
)
|
|
359
760
|
|
|
761
|
+
loop = asyncio.get_running_loop()
|
|
762
|
+
rw = ReactiveWriter(match_log_rv, loop)
|
|
763
|
+
|
|
764
|
+
# 3) run the heavy function in a thread so the event loop can repaint
|
|
765
|
+
try:
|
|
766
|
+
with redirect_stdout(rw), redirect_stderr(rw):
|
|
767
|
+
if input.chromatography_platform() == "HRMS":
|
|
768
|
+
df_out = await asyncio.to_thread(
|
|
769
|
+
run_spec_lib_matching_on_HRMS_data,
|
|
770
|
+
window_size_centroiding=input.window_size_centroiding(),
|
|
771
|
+
window_size_matching=input.window_size_matching(),
|
|
772
|
+
**common_kwargs
|
|
773
|
+
)
|
|
774
|
+
else:
|
|
775
|
+
df_out = await asyncio.to_thread(
|
|
776
|
+
run_spec_lib_matching_on_NRMS_data, **common_kwargs
|
|
777
|
+
)
|
|
778
|
+
match_log_rv.set(match_log_rv.get() + "\n✅ Identification finished.\n")
|
|
779
|
+
await reactive.flush()
|
|
780
|
+
except Exception as e:
|
|
781
|
+
match_log_rv.set(match_log_rv.get() + f"\n❌ Error: {e}\n")
|
|
782
|
+
await reactive.flush()
|
|
783
|
+
raise
|
|
784
|
+
|
|
785
|
+
# 4) stream CSV back to the browser
|
|
786
|
+
yield df_out.to_csv(index=False)
|
|
360
787
|
|
|
361
788
|
|
|
362
789
|
app = App(app_ui, server)
|
|
@@ -45,7 +45,7 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
|
|
|
45
45
|
extension = extension[(len(extension)-1)]
|
|
46
46
|
if extension == 'mgf' or extension == 'MGF' or extension == 'mzML' or extension == 'mzml' or extension == 'MZML' or extension == 'cdf' or extension == 'CDF':
|
|
47
47
|
output_path_tmp = query_data[:-3] + 'csv'
|
|
48
|
-
build_library_from_raw_data(input_path=query_data, output_path=output_path_tmp, is_reference=
|
|
48
|
+
build_library_from_raw_data(input_path=query_data, output_path=output_path_tmp, is_reference=True)
|
|
49
49
|
df_query = pd.read_csv(output_path_tmp)
|
|
50
50
|
if extension == 'csv' or extension == 'CSV':
|
|
51
51
|
df_query = pd.read_csv(query_data)
|
|
@@ -177,6 +177,8 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
|
|
|
177
177
|
spec_tmp = spectrum_ID1
|
|
178
178
|
spectrum_ID1 = spectrum_ID2
|
|
179
179
|
spectrum_ID2 = spec_tmp
|
|
180
|
+
print(unique_query_ids)
|
|
181
|
+
print(spectrum_ID1)
|
|
180
182
|
query_idx = unique_query_ids.index(spectrum_ID1)
|
|
181
183
|
reference_idx = unique_reference_ids.index(spectrum_ID2)
|
|
182
184
|
q_idxs_tmp = np.where(df_query.iloc[:,0].astype(str) == unique_query_ids[query_idx])[0]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
|
|
2
|
-
from
|
|
3
|
-
from
|
|
2
|
+
from pycompound.plot_spectra import generate_plots_on_HRMS_data
|
|
3
|
+
from pycompound.plot_spectra import generate_plots_on_NRMS_data
|
|
4
4
|
import pandas as pd
|
|
5
5
|
import argparse
|
|
6
6
|
import json
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
|
|
2
2
|
# This script contains the functions used to transform spectra prior to computing similarity scores
|
|
3
3
|
|
|
4
|
-
from
|
|
4
|
+
from pycompound.build_library import build_library_from_raw_data
|
|
5
5
|
import scipy.stats
|
|
6
6
|
import numpy as np
|
|
7
7
|
import pandas as pd
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
|
|
2
2
|
# this script's function runs spectral library matching to identify unknown query compound(s)
|
|
3
3
|
|
|
4
|
-
from
|
|
4
|
+
from pycompound.build_library import build_library_from_raw_data
|
|
5
5
|
from .processing import *
|
|
6
6
|
from .similarity_measures import *
|
|
7
7
|
import pandas as pd
|
|
@@ -389,7 +389,7 @@ def get_acc_NRMS(df_query, df_reference, unique_query_ids, unique_reference_ids,
|
|
|
389
389
|
|
|
390
390
|
|
|
391
391
|
|
|
392
|
-
def run_spec_lib_matching_on_HRMS_data(query_data=None, reference_data=None, likely_reference_ids=None, similarity_measure='cosine', weights={'Cosine':0.25,'Shannon':0.25,'Renyi':0.25,'Tsallis':0.25}, spectrum_preprocessing_order='FCNMWL', high_quality_reference_library=False, mz_min=0, mz_max=9999999, int_min=0, int_max=9999999, window_size_centroiding=0.5, window_size_matching=0.5, noise_threshold=0.0, wf_mz=0.0, wf_intensity=1.0, LET_threshold=0.0, entropy_dimension=1.1, n_top_matches_to_save=1, print_id_results=False, output_identification=None, output_similarity_scores=None):
|
|
392
|
+
def run_spec_lib_matching_on_HRMS_data(query_data=None, reference_data=None, likely_reference_ids=None, similarity_measure='cosine', weights={'Cosine':0.25,'Shannon':0.25,'Renyi':0.25,'Tsallis':0.25}, spectrum_preprocessing_order='FCNMWL', high_quality_reference_library=False, mz_min=0, mz_max=9999999, int_min=0, int_max=9999999, window_size_centroiding=0.5, window_size_matching=0.5, noise_threshold=0.0, wf_mz=0.0, wf_intensity=1.0, LET_threshold=0.0, entropy_dimension=1.1, n_top_matches_to_save=1, print_id_results=False, output_identification=None, output_similarity_scores=None, return_ID_output=False):
|
|
393
393
|
'''
|
|
394
394
|
runs spectral library matching on high-resolution mass spectrometry (HRMS) data
|
|
395
395
|
|
|
@@ -636,22 +636,26 @@ def run_spec_lib_matching_on_HRMS_data(query_data=None, reference_data=None, lik
|
|
|
636
636
|
df_top_ref_specs.index = unique_query_ids
|
|
637
637
|
df_top_ref_specs.index.names = ['Query Spectrum ID']
|
|
638
638
|
|
|
639
|
+
df_scores.columns = ['Reference Spectrum ID: ' + col for col in list(map(str,df_scores.columns.tolist()))]
|
|
640
|
+
|
|
639
641
|
# print the identification results if the user desires
|
|
640
642
|
if print_id_results == True:
|
|
641
643
|
print(df_top_ref_specs.to_string())
|
|
642
644
|
|
|
643
|
-
|
|
644
|
-
|
|
645
|
+
if return_ID_output is False:
|
|
646
|
+
# write spectral library matching results to disk
|
|
647
|
+
df_top_ref_specs.to_csv(output_identification)
|
|
645
648
|
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
+
# write all similarity scores to disk
|
|
650
|
+
df_scores.to_csv(output_similarity_scores)
|
|
651
|
+
else:
|
|
652
|
+
return df_top_ref_specs
|
|
649
653
|
|
|
650
654
|
|
|
651
655
|
|
|
652
656
|
|
|
653
657
|
|
|
654
|
-
def run_spec_lib_matching_on_NRMS_data(query_data=None, reference_data=None, likely_reference_ids=None, spectrum_preprocessing_order='FNLW', similarity_measure='cosine', weights={'Cosine':0.25,'Shannon':0.25,'Renyi':0.25,'Tsallis':0.25}, high_quality_reference_library=False, mz_min=0, mz_max=9999999, int_min=0, int_max=9999999, noise_threshold=0.0, wf_mz=0.0, wf_intensity=1.0, LET_threshold=0.0, entropy_dimension=1.1, n_top_matches_to_save=1, print_id_results=False, output_identification=None, output_similarity_scores=None):
|
|
658
|
+
def run_spec_lib_matching_on_NRMS_data(query_data=None, reference_data=None, likely_reference_ids=None, spectrum_preprocessing_order='FNLW', similarity_measure='cosine', weights={'Cosine':0.25,'Shannon':0.25,'Renyi':0.25,'Tsallis':0.25}, high_quality_reference_library=False, mz_min=0, mz_max=9999999, int_min=0, int_max=9999999, noise_threshold=0.0, wf_mz=0.0, wf_intensity=1.0, LET_threshold=0.0, entropy_dimension=1.1, n_top_matches_to_save=1, print_id_results=False, output_identification=None, output_similarity_scores=None, return_ID_output=False):
|
|
655
659
|
'''
|
|
656
660
|
runs spectral library matching on nominal-resolution mass spectrometry (NRMS) data
|
|
657
661
|
|
|
@@ -886,11 +890,15 @@ def run_spec_lib_matching_on_NRMS_data(query_data=None, reference_data=None, lik
|
|
|
886
890
|
if print_id_results == True:
|
|
887
891
|
print(df_top_ref_specs.to_string())
|
|
888
892
|
|
|
889
|
-
# write spectral library matching results to disk
|
|
890
|
-
df_top_ref_specs.to_csv(output_identification)
|
|
891
|
-
|
|
892
|
-
# write all similarity scores to disk
|
|
893
893
|
df_scores.columns = ['Reference Spectrum ID: ' + col for col in list(map(str,df_scores.columns.tolist()))]
|
|
894
|
-
df_scores.to_csv(output_similarity_scores)
|
|
895
894
|
|
|
895
|
+
if return_ID_output is False:
|
|
896
|
+
# write spectral library matching results to disk
|
|
897
|
+
df_top_ref_specs.to_csv(output_identification)
|
|
898
|
+
|
|
899
|
+
# write all similarity scores to disk
|
|
900
|
+
df_scores.columns = ['Reference Spectrum ID: ' + col for col in list(map(str,df_scores.columns.tolist()))]
|
|
901
|
+
df_scores.to_csv(output_similarity_scores)
|
|
902
|
+
else:
|
|
903
|
+
return df_top_ref_specs
|
|
896
904
|
|
|
@@ -2,8 +2,8 @@
|
|
|
2
2
|
# this script performs spectral library matching to identify unknown query compound(s) from GC-MS data
|
|
3
3
|
|
|
4
4
|
# load libraries
|
|
5
|
-
from
|
|
6
|
-
from
|
|
5
|
+
from pycompound.spec_lib_matching import run_spec_lib_matching_on_HRMS_data
|
|
6
|
+
from pycompound.spec_lib_matching import run_spec_lib_matching_on_NRMS_data
|
|
7
7
|
from pathlib import Path
|
|
8
8
|
import pandas as pd
|
|
9
9
|
import argparse
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
|
|
2
|
-
from
|
|
3
|
-
from
|
|
2
|
+
from pycompound.spec_lib_matching import tune_params_on_HRMS_data
|
|
3
|
+
from pycompound.spec_lib_matching import tune_params_on_NRMS_data
|
|
4
4
|
import argparse
|
|
5
5
|
import json
|
|
6
6
|
from pathlib import Path
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pycompound
|
|
3
|
-
Version: 0.0
|
|
3
|
+
Version: 0.1.0
|
|
4
4
|
Summary: Python package to perform compound identification in mass spectrometry via spectral library matching.
|
|
5
5
|
Author-email: Hunter Dlugas <fy7392@wayne.edu>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -19,6 +19,7 @@ Requires-Dist: pyteomics==4.7.2
|
|
|
19
19
|
Requires-Dist: netCDF4==1.6.5
|
|
20
20
|
Requires-Dist: lxml>=5.1.0
|
|
21
21
|
Requires-Dist: orjson==3.11.0
|
|
22
|
+
Requires-Dist: shiny==1.4.0
|
|
22
23
|
Requires-Dist: joblib==1.5.2
|
|
23
24
|
Dynamic: license-file
|
|
24
25
|
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
app.py,sha256=ab1hII23lVwAmMh4bfzdni50vz-bK-ODbJT_b1VjGMA,34678
|
|
2
|
+
pycompound/build_library.py,sha256=8ghpX8wfj6u-3V5X2IdJ-e8G_FRSla1lO0pzLj7hOtI,5373
|
|
3
|
+
pycompound/plot_spectra.py,sha256=_5r9YR3AA2IfTbcyfyTnPxxxA92T4hQ9olOgaw7FE6A,42082
|
|
4
|
+
pycompound/plot_spectra_CLI.py,sha256=ObaLad5Z5DmfQB-j0HSCg1mLORbYj2BM3hb5Yd0ZdDI,8395
|
|
5
|
+
pycompound/processing.py,sha256=vqtKaZ6vot6wlnKNTYUQFX7ccPpnCAl0L6bN289vZoM,11068
|
|
6
|
+
pycompound/similarity_measures.py,sha256=TuvtEXWwyxE6dfpmuAqRC6gOHvHg3Jf21099pVaNBAs,10702
|
|
7
|
+
pycompound/spec_lib_matching.py,sha256=p8gj-72fjkf0p7XrqEl9hnYUGNSbyr7BXugvRT7Y5OA,60311
|
|
8
|
+
pycompound/spec_lib_matching_CLI.py,sha256=EdXM0dRQfwGQAK4OKxhcVytuUnX9pRyJROwC6rloZ9s,9915
|
|
9
|
+
pycompound/tuning_CLI.py,sha256=lkFBRZ5VxCBteIh_KTkQFdUBVZA0dL-BLiyMZce1vzE,8539
|
|
10
|
+
pycompound-0.1.0.dist-info/licenses/LICENSE,sha256=fPFFlkSGg60VQWyWqTSv8yoJnpLzppzdihVWY5NKom8,1064
|
|
11
|
+
pycompound-0.1.0.dist-info/METADATA,sha256=qfM4rP0BeGThYpxvGa7vOseRsUgtJ4aH8hgUtio0Ugw,1732
|
|
12
|
+
pycompound-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
13
|
+
pycompound-0.1.0.dist-info/top_level.txt,sha256=wFBLVrqpC07HghIU8tsEdgdvgkdOE3GN_1Gfjk-uEUc,15
|
|
14
|
+
pycompound-0.1.0.dist-info/RECORD,,
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
app.py,sha256=PKiCJe_18EJIHvs0R7pl_Yf-XakZn5J0AAfI-AnGsX0,21535
|
|
2
|
-
pycompound-0.0.55.dist-info/licenses/LICENSE,sha256=fPFFlkSGg60VQWyWqTSv8yoJnpLzppzdihVWY5NKom8,1064
|
|
3
|
-
pycompound_fy7392/build_library.py,sha256=8ghpX8wfj6u-3V5X2IdJ-e8G_FRSla1lO0pzLj7hOtI,5373
|
|
4
|
-
pycompound_fy7392/plot_spectra.py,sha256=wOnf2oOAfifj7FYkTZAcIeD7dHW1aRHzmsspPpySDcY,42023
|
|
5
|
-
pycompound_fy7392/plot_spectra_CLI.py,sha256=fo0nUmbuy2qE6d9HgVdASn2CNUG8seg2mUCPrUU-rao,8409
|
|
6
|
-
pycompound_fy7392/processing.py,sha256=7cKMX7PQ4Q-I4c8lRo5qXbOVGr8CeRdgNPURJx8DBV0,11075
|
|
7
|
-
pycompound_fy7392/pycompound_shiny.py,sha256=uYfeIuR5j1UK_KE8RbDPaQxqMIU1qykVJ2L-zgaSkY0,30154
|
|
8
|
-
pycompound_fy7392/similarity_measures.py,sha256=TuvtEXWwyxE6dfpmuAqRC6gOHvHg3Jf21099pVaNBAs,10702
|
|
9
|
-
pycompound_fy7392/spec_lib_matching.py,sha256=jtUpG5OBDtIaHIpCNc62a3y-wQ_SmIgXZ9Q_p8xKZu4,59969
|
|
10
|
-
pycompound_fy7392/spec_lib_matching_CLI.py,sha256=TAafJ3DGPorBTDzmXLQaaSH3giKn6q3GrRJPWh03yyo,9929
|
|
11
|
-
pycompound_fy7392/tuning_CLI.py,sha256=qLglxqq-y6EXCDk0P3CkWn6cTFCmWDeKz0-SZBXcwCA,8553
|
|
12
|
-
pycompound-0.0.55.dist-info/METADATA,sha256=3i67ba8TVHHSK-toc2-OI9XJYdQRkrCKGXOrqHyV5e4,1705
|
|
13
|
-
pycompound-0.0.55.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
14
|
-
pycompound-0.0.55.dist-info/top_level.txt,sha256=h_c9lBkHcABTURy4sDAmgRzZdFHYWX9MDdsaiftT-Yw,22
|
|
15
|
-
pycompound-0.0.55.dist-info/RECORD,,
|
|
@@ -1,299 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
from shiny import App, ui, render, reactive
|
|
3
|
-
from pycompound_fy7392.spec_lib_matching import run_spec_lib_matching_on_HRMS_data
|
|
4
|
-
from pycompound_fy7392.spec_lib_matching import run_spec_lib_matching_on_NRMS_data
|
|
5
|
-
from pycompound_fy7392.plot_spectra import generate_plots_on_HRMS_data
|
|
6
|
-
from pycompound_fy7392.plot_spectra import generate_plots_on_NRMS_data
|
|
7
|
-
from pycompound_fy7392.spec_lib_matching import tune_params_on_HRMS_data
|
|
8
|
-
from pycompound_fy7392.spec_lib_matching import tune_params_on_NRMS_data
|
|
9
|
-
import subprocess
|
|
10
|
-
import traceback
|
|
11
|
-
from pathlib import Path
|
|
12
|
-
import pandas as pd
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
def split_or_wrap(s):
|
|
16
|
-
s = str(s)
|
|
17
|
-
def parse(x):
|
|
18
|
-
x = x.strip()
|
|
19
|
-
if x.lower() == 'true':
|
|
20
|
-
return True
|
|
21
|
-
elif x.lower() == 'false':
|
|
22
|
-
return False
|
|
23
|
-
try:
|
|
24
|
-
return int(x)
|
|
25
|
-
except ValueError:
|
|
26
|
-
try:
|
|
27
|
-
return float(x)
|
|
28
|
-
except ValueError:
|
|
29
|
-
return x
|
|
30
|
-
|
|
31
|
-
if ',' not in s:
|
|
32
|
-
return [parse(s)]
|
|
33
|
-
else:
|
|
34
|
-
return [parse(item) for item in s.split(',')]
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
def custom_on_off_to_bool(lst):
|
|
38
|
-
if lst == ['no']:
|
|
39
|
-
return [False]
|
|
40
|
-
elif lst == ['yes']:
|
|
41
|
-
return [True]
|
|
42
|
-
elif lst == ['no','yes']:
|
|
43
|
-
return [False,True]
|
|
44
|
-
elif lst == ['yes','no']:
|
|
45
|
-
return [False,True]
|
|
46
|
-
elif not lst:
|
|
47
|
-
return [False]
|
|
48
|
-
else:
|
|
49
|
-
raise ValueError(f"Unhandled input: {lst}")
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
app_ui = ui.page_fluid(
|
|
54
|
-
ui.div(
|
|
55
|
-
ui.input_select("choice", "Choose an option:", ["Run spectral library matching to identify unknown compounds", "Tune parameters with a query library with known compound IDs", "Plot two spectra"]),
|
|
56
|
-
ui.input_radio_buttons("chromatography_platform", "Choose chromatography platform:", ["HRMS","NRMS"]),
|
|
57
|
-
style="width: 2000px; max-width: none;"),
|
|
58
|
-
ui.output_ui("dynamic_inputs"),
|
|
59
|
-
ui.output_text("status_output")
|
|
60
|
-
)
|
|
61
|
-
|
|
62
|
-
def server(input, output, session):
|
|
63
|
-
run_status = reactive.Value("Waiting for input...")
|
|
64
|
-
|
|
65
|
-
@output
|
|
66
|
-
@render.ui
|
|
67
|
-
def dynamic_inputs():
|
|
68
|
-
if input.choice() == "Run spectral library matching to identify unknown compounds":
|
|
69
|
-
if input.chromatography_platform() == "HRMS":
|
|
70
|
-
return ui.TagList(
|
|
71
|
-
ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
72
|
-
ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
73
|
-
ui.input_select("similarity_measure", "Select similarity measure:", ["cosine", "shannon", "renyi", "tsallis"]),
|
|
74
|
-
ui.input_select("high_quality_reference_library", "Indicate whether the reference library is considered to be of high quality. If true, then the spectrum preprocessing transformations of filtering and noise removal are performed only on the query spectrum/spectra.", [False,True]),
|
|
75
|
-
ui.input_text("spectrum_preprocessing_order", "Input a sequence of characters denoting the order in which spectrum preprocessing transformations should be applied. Available characters/transformations are C (centroiding), F (filtering), M (matching), N (noise removal), L (low-entropy transformation), and W (weight factor transformation. M must be in sequence, and if C is performed, then C must be performed before M.", "FCNMWL"),
|
|
76
|
-
ui.input_numeric("mz_min", "Enter numeric value for minimum mass/charge ratio for filtering:", 0),
|
|
77
|
-
ui.input_numeric("mz_max", "Enter numeric value for minimum mass/charge ratio for filtering:", 99999999),
|
|
78
|
-
ui.input_numeric("int_min", "Enter numeric value for minimum intensity for filtering:", 0),
|
|
79
|
-
ui.input_numeric("int_max", "Enter numeric value for maximum intensity for filtering:", 999999999),
|
|
80
|
-
ui.input_numeric("window_size_centroiding", "Enter numeric value for the centroiding window-size:", 0.5),
|
|
81
|
-
ui.input_numeric("window_size_matching", "Enter numeric value for the matching window-size:", 0.5),
|
|
82
|
-
ui.input_numeric("noise_threshold", "Enter numeric value for the noise removal threshold:", 0.0),
|
|
83
|
-
ui.input_numeric("wf_mz", "Enter numeric value for the mass/charge weight factor:", 0.0),
|
|
84
|
-
ui.input_numeric("wf_int", "Enter numeric value for the intensity weight factor:", 1.0),
|
|
85
|
-
ui.input_numeric("LET_threshold", "Enter non-negative numeric value for the low-entropy threshold:", 0.0),
|
|
86
|
-
ui.input_numeric("entropy_dimension", "Enter non-negative, non-unity numeric value for the entropy dimension (only applicable to Renyi and Tsallis):", 1.1),
|
|
87
|
-
ui.input_numeric("n_top_matches_to_save", "Enter positive integer for the number of top matches to save:", 1),
|
|
88
|
-
ui.input_text("output_identification", "Path to identification output:", f'{Path.cwd()}/output_identification.csv'),
|
|
89
|
-
ui.input_text("output_similarity_scores", "Path to output file of similarity scores:", f'{Path.cwd()}/output_similarity_scores.csv'),
|
|
90
|
-
ui.input_action_button("run_btn", "Run"))
|
|
91
|
-
else:
|
|
92
|
-
return ui.TagList(
|
|
93
|
-
ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
94
|
-
ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
95
|
-
ui.input_select("similarity_measure", "Select similarity measure:", ["cosine", "shannon", "renyi", "tsallis"]),
|
|
96
|
-
ui.input_select("high_quality_reference_library", "Indicate whether the reference library is considered to be of high quality. If true, then the spectrum preprocessing transformations of filtering and noise removal are performed only on the query spectrum/spectra.", [False,True]),
|
|
97
|
-
ui.input_text("spectrum_preprocessing_order", "Input a sequence of characters denoting the order in which spectrum preprocessing transformations should be applied. Available characters/transformations are F (filtering), N (noise removal), L (low-entropy transformation), and W (weight factor transformation).", "FNLW"),
|
|
98
|
-
ui.input_numeric("mz_min", "Enter numeric value for minimum mass/charge ratio for filtering:", 0),
|
|
99
|
-
ui.input_numeric("mz_max", "Enter numeric value for minimum mass/charge ratio for filtering:", 99999999),
|
|
100
|
-
ui.input_numeric("int_min", "Enter numeric value for minimum intensity for filtering:", 0),
|
|
101
|
-
ui.input_numeric("int_max", "Enter numeric value for maximum intensity for filtering:", 999999999),
|
|
102
|
-
ui.input_numeric("noise_threshold", "Enter numeric value for the noise removal threshold:", 0.0),
|
|
103
|
-
ui.input_numeric("wf_mz", "Enter numeric value for the mass/charge weight factor:", 0.0),
|
|
104
|
-
ui.input_numeric("wf_int", "Enter numeric value for the intensity weight factor:", 1.0),
|
|
105
|
-
ui.input_numeric("LET_threshold", "Enter non-negative numeric value for the low-entropy threshold:", 0.0),
|
|
106
|
-
ui.input_numeric("entropy_dimension", "Enter non-negative, non-unity numeric value for the entropy dimension (only applicable to Renyi and Tsallis):", 1.1),
|
|
107
|
-
ui.input_numeric("n_top_matches_to_save", "Enter positive integer for the number of top matches to save:", 1),
|
|
108
|
-
ui.input_text("output_identification", "Path to identification output:", f'{Path.cwd()}/output_identification.csv'),
|
|
109
|
-
ui.input_text("output_similarity_scores", "Path to output file of similarity scores:", f'{Path.cwd()}/output_similarity_scores.csv'),
|
|
110
|
-
ui.input_action_button("run_btn", "Run"))
|
|
111
|
-
|
|
112
|
-
elif input.choice() == "Tune parameters with a query library with known compound IDs":
|
|
113
|
-
if input.chromatography_platform() == "HRMS":
|
|
114
|
-
return ui.TagList(
|
|
115
|
-
ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
116
|
-
ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
117
|
-
ui.input_checkbox_group("similarity_measure", "Select similarity measure(s):", ["cosine", "shannon", "renyi", "tsallis"]),
|
|
118
|
-
ui.input_checkbox_group("high_quality_reference_library", "Indicate whether the reference library is considered to be of high quality. If True, then the spectrum preprocessing transformations of filtering and noise removal are performed only on the query spectrum/spectra.", ["no","yes"]),
|
|
119
|
-
ui.input_text("spectrum_preprocessing_order", "Input a sequence of characters denoting the order in which spectrum preprocessing transformations should be applied. Available characters/transformations are C (centroiding), F (filtering), M (matching), N (noise removal), L (low-entropy transformation), and W (weight factor transformation. M must be in sequence, and if C is performed, then C must be performed before M. If multiple spectrum preprocessing orders are to be tried, separate by comma.", "FCNMWL"),
|
|
120
|
-
ui.input_text("mz_min", "Enter numeric value(s) for minimum mass/charge ratio for filtering. Separate multiple entries with comma.", 0),
|
|
121
|
-
ui.input_text("mz_max", "Enter numeric value(s) for minimum mass/charge ratio for filtering. Separate multiple entries with comma.", 99999999),
|
|
122
|
-
ui.input_text("int_min", "Enter numeric value(s) for minimum intensity for filtering. Separate multiple entries with comma.", 0),
|
|
123
|
-
ui.input_text("int_max", "Enter numeric value(s) for maximum intensity for filtering. Separate multiple entries with comma.", 999999999),
|
|
124
|
-
ui.input_text("window_size_centroiding", "Enter numeric value(s) for the centroiding window-size. Separate multiple entries with comma.", 0.5),
|
|
125
|
-
ui.input_text("window_size_matching", "Enter numeric value(s) for the matching window-size. Separate multiple entries with comma.", 0.5),
|
|
126
|
-
ui.input_text("noise_threshold", "Enter numeric value(s) for the noise removal threshold. Separate multiple entries with comma.", 0.0),
|
|
127
|
-
ui.input_text("wf_mz", "Enter numeric value(s) for the mass/charge weight factor. Separate multiple entries with comma.", 0.0),
|
|
128
|
-
ui.input_text("wf_int", "Enter numeric value(s) for the intensity weight factor. Separate multiple entries with comma.", 1.0),
|
|
129
|
-
ui.input_text("LET_threshold", "Enter non-negative numeric value(s) for the low-entropy threshold. Separate multiple entries with comma.", 0.0),
|
|
130
|
-
ui.input_text("entropy_dimension", "Enter non-negative, non-unity numeric value(s) for the entropy dimension (only applicable to Renyi and Tsallis). Separate multiple entries with comma.", 1.1),
|
|
131
|
-
ui.input_text("output_path", "Path to parameter tuning output:", f'{Path.cwd()}/output_parameter_tuning.csv'),
|
|
132
|
-
ui.input_action_button("run_btn", "Run"))
|
|
133
|
-
else:
|
|
134
|
-
return ui.TagList(
|
|
135
|
-
ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
136
|
-
ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
137
|
-
ui.input_checkbox_group("similarity_measure", "Select similarity measure(s):", ["cosine", "shannon", "renyi", "tsallis"]),
|
|
138
|
-
ui.input_checkbox_group("high_quality_reference_library", "Indicate whether the reference library is considered to be of high quality. If True, then the spectrum preprocessing transformations of filtering and noise removal are performed only on the query spectrum/spectra.", ["no","yes"]),
|
|
139
|
-
ui.input_text("spectrum_preprocessing_order", "Input a sequence of characters denoting the order in which spectrum preprocessing transformations should be applied. Available characters/transformations are F (filtering), N (noise removal), L (low-entropy transformation), and W (weight factor transformation).", "FNLW"),
|
|
140
|
-
ui.input_text("mz_min", "Enter numeric value(s) for minimum mass/charge ratio for filtering. Separate multiple entries with comma.", 0),
|
|
141
|
-
ui.input_text("mz_max", "Enter numeric value(s) for minimum mass/charge ratio for filtering. Separate multiple entries with comma.", 99999999),
|
|
142
|
-
ui.input_text("int_min", "Enter numeric value(s) for minimum intensity for filtering. Separate multiple entries with comma.", 0),
|
|
143
|
-
ui.input_text("int_max", "Enter numeric value(s) for maximum intensity for filtering. Separate multiple entries with comma.", 999999999),
|
|
144
|
-
ui.input_text("noise_threshold", "Enter numeric value(s) for the noise removal threshold. Separate multiple entries with comma.", 0.0),
|
|
145
|
-
ui.input_text("wf_mz", "Enter numeric value(s) for the mass/charge weight factor. Separate multiple entries with comma.", 0.0),
|
|
146
|
-
ui.input_text("wf_int", "Enter numeric value(s) for the intensity weight factor. Separate multiple entries with comma.", 1.0),
|
|
147
|
-
ui.input_text("LET_threshold", "Enter non-negative numeric value(s) for the low-entropy threshold. Separate multiple entries with comma.", 0.0),
|
|
148
|
-
ui.input_text("entropy_dimension", "Enter non-negative, non-unity numeric value(s) for the entropy dimension (only applicable to Renyi and Tsallis). Separate multiple entries with comma.", 1.1),
|
|
149
|
-
ui.input_text("output_path", "Path to parameter tuning output:", f'{Path.cwd()}/output_parameter_tuning.csv'),
|
|
150
|
-
ui.input_action_button("run_btn", "Run"))
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
elif input.choice() == "Plot two spectra":
|
|
154
|
-
if input.chromatography_platform() == "HRMS":
|
|
155
|
-
return ui.TagList(
|
|
156
|
-
ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
157
|
-
ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
158
|
-
ui.input_text("spectrum_ID1", "Input ID of one spectrum to be plotted:", None),
|
|
159
|
-
ui.input_text("spectrum_ID2", "Input ID of another spectrum to be plotted:", None),
|
|
160
|
-
ui.input_select("similarity_measure", "Select similarity measure:", ["cosine", "shannon", "renyi", "tsallis"]),
|
|
161
|
-
ui.input_select("high_quality_reference_library", "Indicate whether the reference library is considered to be of high quality. If True, then the spectrum preprocessing transformations of filtering and noise removal are performed only on the query spectrum/spectra.", [False,True]),
|
|
162
|
-
ui.input_text("spectrum_preprocessing_order", "Input a sequence of characters denoting the order in which spectrum preprocessing transformations should be applied. Available characters/transformations are C (centroiding), F (filtering), M (matching), N (noise removal), L (low-entropy transformation), and W (weight factor transformation. M must be in sequence, and if C is performed, then C must be performed before M. If multiple spectrum preprocessing orders are to be tried, separate by comma.", "FCNMWL"),
|
|
163
|
-
ui.input_numeric("mz_min", "Enter numeric value for minimum mass/charge ratio for filtering. Separate multiple entries with comma.", 0),
|
|
164
|
-
ui.input_numeric("mz_max", "Enter numeric value for minimum mass/charge ratio for filtering. Separate multiple entries with comma.", 99999999),
|
|
165
|
-
ui.input_numeric("int_min", "Enter numeric value for minimum intensity for filtering. Separate multiple entries with comma.", 0),
|
|
166
|
-
ui.input_numeric("int_max", "Enter numeric value for maximum intensity for filtering. Separate multiple entries with comma.", 999999999),
|
|
167
|
-
ui.input_numeric("window_size_centroiding", "Enter numeric value for the centroiding window-size. Separate multiple entries with comma.", 0.5),
|
|
168
|
-
ui.input_numeric("window_size_matching", "Enter numeric value for the matching window-size. Separate multiple entries with comma.", 0.5),
|
|
169
|
-
ui.input_numeric("noise_threshold", "Enter numeric value for the noise removal threshold. Separate multiple entries with comma.", 0.0),
|
|
170
|
-
ui.input_numeric("wf_mz", "Enter numeric value for the mass/charge weight factor. Separate multiple entries with comma.", 0.0),
|
|
171
|
-
ui.input_numeric("wf_int", "Enter numeric value for the intensity weight factor. Separate multiple entries with comma.", 1.0),
|
|
172
|
-
ui.input_numeric("LET_threshold", "Enter non-negative numeric value for the low-entropy threshold. Separate multiple entries with comma.", 0.0),
|
|
173
|
-
ui.input_numeric("entropy_dimension", "Enter non-negative, non-unity numeric value for the entropy dimension (only applicable to Renyi and Tsallis). Separate multiple entries with comma.", 1.1),
|
|
174
|
-
ui.input_select("y_axis_transformation", "Select the transformation to apply to the intensity axis of the generated plots:", ["normalized", "none", "log10", "sqrt"]),
|
|
175
|
-
ui.input_text("output_path", "Path to parameter tuning output:", f'{Path.cwd()}/output_plots.pdf'),
|
|
176
|
-
ui.input_action_button("run_btn", "Run"))
|
|
177
|
-
else:
|
|
178
|
-
return ui.TagList(
|
|
179
|
-
ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
180
|
-
ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
181
|
-
ui.input_text("spectrum_ID1", "Input ID of one spectrum to be plotted:", None),
|
|
182
|
-
ui.input_text("spectrum_ID2", "Input ID of another spectrum to be plotted:", None),
|
|
183
|
-
ui.input_select("similarity_measure", "Select similarity measure:", ["cosine", "shannon", "renyi", "tsallis"]),
|
|
184
|
-
ui.input_select("high_quality_reference_library", "Indicate whether the reference library is considered to be of high quality. If True, then the spectrum preprocessing transformations of filtering and noise removal are performed only on the query spectrum/spectra.", [False,True]),
|
|
185
|
-
ui.input_text("spectrum_preprocessing_order", "Input a sequence of characters denoting the order in which spectrum preprocessing transformations should be applied. Available characters/transformations are F (filtering), N (noise removal), L (low-entropy transformation), and W (weight factor transformation).", "FNLW"),
|
|
186
|
-
ui.input_numeric("mz_min", "Enter numeric value for minimum mass/charge ratio for filtering. Separate multiple entries with comma.", 0),
|
|
187
|
-
ui.input_numeric("mz_max", "Enter numeric value for minimum mass/charge ratio for filtering. Separate multiple entries with comma.", 99999999),
|
|
188
|
-
ui.input_numeric("int_min", "Enter numeric value for minimum intensity for filtering. Separate multiple entries with comma.", 0),
|
|
189
|
-
ui.input_numeric("int_max", "Enter numeric value for maximum intensity for filtering. Separate multiple entries with comma.", 999999999),
|
|
190
|
-
ui.input_numeric("noise_threshold", "Enter numeric value for the noise removal threshold. Separate multiple entries with comma.", 0.0),
|
|
191
|
-
ui.input_numeric("wf_mz", "Enter numeric value for the mass/charge weight factor. Separate multiple entries with comma.", 0.0),
|
|
192
|
-
ui.input_numeric("wf_int", "Enter numeric value for the intensity weight factor. Separate multiple entries with comma.", 1.0),
|
|
193
|
-
ui.input_numeric("LET_threshold", "Enter non-negative numeric value for the low-entropy threshold. Separate multiple entries with comma.", 0.0),
|
|
194
|
-
ui.input_numeric("entropy_dimension", "Enter non-negative, non-unity numeric value for the entropy dimension (only applicable to Renyi and Tsallis). Separate multiple entries with comma.", 1.1),
|
|
195
|
-
ui.input_select("y_axis_transformation", "Select the transformation to apply to the intensity axis of the generated plots:", ["normalized", "none", "log10", "sqrt"]),
|
|
196
|
-
ui.input_text("output_path", "Path to parameter tuning output:", f'{Path.cwd()}/output_plot.pdf'),
|
|
197
|
-
ui.input_action_button("run_btn", "Run"))
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
@reactive.effect
|
|
201
|
-
@reactive.event(input.run_btn)
|
|
202
|
-
def _():
|
|
203
|
-
choice = input.choice()
|
|
204
|
-
|
|
205
|
-
if choice == "Run spectral library matching to identify unknown compounds":
|
|
206
|
-
if input.chromatography_platform() == "HRMS":
|
|
207
|
-
try:
|
|
208
|
-
run_spec_lib_matching_on_HRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], likely_reference_ids=None, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), window_size_centroiding=input.window_size_centroiding(), window_size_matching=input.window_size_matching(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), n_top_matches_to_save=input.n_top_matches_to_save(), print_id_results=False, output_identification=f'{Path.cwd()}/output_identification.csv', output_similarity_scores=f'{Path.cwd()}/output_similarity_scores.csv')
|
|
209
|
-
df_identification_tmp = pd.read_csv(f'{Path.cwd()}/output_identification.csv')
|
|
210
|
-
df_similarity_scores_tmp = pd.read_csv(f'{Path.cwd()}/output_similarity_scores.csv')
|
|
211
|
-
df_identification_tmp.to_csv(input.output_identification(), index=False)
|
|
212
|
-
df_similarity_scores_tmp.to_csv(input.output_similarity_scores(), index=False)
|
|
213
|
-
run_status.set(f"✅ Spectral library matching has finished.")
|
|
214
|
-
except Exception as e:
|
|
215
|
-
run_status.set(f"❌ Error: {traceback.format_exc()}")
|
|
216
|
-
elif input.chromatography_platform == "NRMS":
|
|
217
|
-
try:
|
|
218
|
-
run_spec_lib_matching_on_NRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], likely_reference_ids=None, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), n_top_matches_to_save=input.n_top_matches_to_save(), print_id_results=False, output_identification=f'{Path.cwd()}/output_identification.csv', output_similarity_scores=f'{Path.cwd()}/output_similarity_scores.csv')
|
|
219
|
-
df_identification_tmp = pd.read_csv(f'{Path.cwd()}/output_identification.csv')
|
|
220
|
-
df_similarity_scores_tmp = pd.read_csv(f'{Path.cwd()}/output_similarity_scores.csv')
|
|
221
|
-
df_identification_tmp.to_csv(input.output_identification(), index=False)
|
|
222
|
-
df_similarity_scores_tmp.to_csv(input.output_similarity_scores(), index=False)
|
|
223
|
-
run_status.set(f"✅ Spectral library matching has finished.")
|
|
224
|
-
except Exception as e:
|
|
225
|
-
run_status.set(f"❌ Error: {traceback.format_exc()}")
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
elif choice == "Tune parameters with a query library with known compound IDs":
|
|
230
|
-
high_quality_reference_library_tmp = custom_on_off_to_bool(list(input.high_quality_reference_library()))
|
|
231
|
-
if input.chromatography_platform() == "HRMS":
|
|
232
|
-
try:
|
|
233
|
-
grid = {'similarity_measure':list(input.similarity_measure()),
|
|
234
|
-
'high_quality_reference_library':high_quality_reference_library_tmp,
|
|
235
|
-
'spectrum_preprocessing_order':split_or_wrap(input.spectrum_preprocessing_order()),
|
|
236
|
-
'mz_min':split_or_wrap(input.mz_min()),
|
|
237
|
-
'mz_max':split_or_wrap(input.mz_max()),
|
|
238
|
-
'int_min':split_or_wrap(input.int_min()),
|
|
239
|
-
'int_max':split_or_wrap(input.int_max()),
|
|
240
|
-
'window_size_centroiding':split_or_wrap(input.window_size_centroiding()),
|
|
241
|
-
'window_size_matching':split_or_wrap(input.window_size_matching()),
|
|
242
|
-
'noise_threshold':split_or_wrap(input.noise_threshold()),
|
|
243
|
-
'wf_mz':split_or_wrap(input.wf_mz()),
|
|
244
|
-
'wf_int':split_or_wrap(input.wf_int()),
|
|
245
|
-
'LET_threshold':split_or_wrap(input.LET_threshold()),
|
|
246
|
-
'entropy_dimension':split_or_wrap(input.entropy_dimension())}
|
|
247
|
-
tune_params_on_HRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], grid=grid, output_path=input.output_path())
|
|
248
|
-
run_status.set(f"✅ Parameter tuning has finished.")
|
|
249
|
-
except Exception as e:
|
|
250
|
-
run_status.set(f"❌ Error: {traceback.format_exc()}")
|
|
251
|
-
elif input.chromatography_platform() == "NRMS":
|
|
252
|
-
try:
|
|
253
|
-
grid = {'similarity_measure':list(input.similarity_measure()),
|
|
254
|
-
'high_quality_reference_library':high_quality_reference_library_tmp,
|
|
255
|
-
'spectrum_preprocessing_order':split_or_wrap(input.spectrum_preprocessing_order()),
|
|
256
|
-
'mz_min':split_or_wrap(input.mz_min()),
|
|
257
|
-
'mz_max':split_or_wrap(input.mz_max()),
|
|
258
|
-
'int_min':split_or_wrap(input.int_min()),
|
|
259
|
-
'int_max':split_or_wrap(input.int_max()),
|
|
260
|
-
'noise_threshold':split_or_wrap(input.noise_threshold()),
|
|
261
|
-
'wf_mz':split_or_wrap(input.wf_mz()),
|
|
262
|
-
'wf_int':split_or_wrap(input.wf_int()),
|
|
263
|
-
'LET_threshold':split_or_wrap(input.LET_threshold()),
|
|
264
|
-
'entropy_dimension':split_or_wrap(input.entropy_dimension())}
|
|
265
|
-
tune_params_on_NRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], grid=grid, output_path=input.output_path())
|
|
266
|
-
run_status.set(f"✅ Parameter tuning has finished.")
|
|
267
|
-
except Exception as e:
|
|
268
|
-
run_status.set(f"❌ Error: {traceback.format_exc()}")
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
elif choice == "Plot two spectra":
|
|
274
|
-
if len(input.spectrum_ID1())==0:
|
|
275
|
-
spectrum_ID1 = None
|
|
276
|
-
if len(input.spectrum_ID2())==0:
|
|
277
|
-
spectrum_ID2 = None
|
|
278
|
-
|
|
279
|
-
if input.chromatography_platform() == "HRMS":
|
|
280
|
-
try:
|
|
281
|
-
generate_plots_on_HRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], spectrum_ID1=spectrum_ID1, spectrum_ID2=spectrum_ID2, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), window_size_centroiding=input.window_size_centroiding(), window_size_matching=input.window_size_matching(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), y_axis_transformation=input.y_axis_transformation(), output_path=input.output_path())
|
|
282
|
-
run_status.set(f"✅ Plotting has finished.")
|
|
283
|
-
except Exception as e:
|
|
284
|
-
run_status.set(f"❌ Error: {traceback.format_exc()}")
|
|
285
|
-
elif input.chromatography_platform == "NRMS":
|
|
286
|
-
try:
|
|
287
|
-
generate_plots_on_NRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], spectrum_ID1=input.spectrum_ID1(), spectrum_ID2=input.spectrum_ID2(), similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), y_axis_transformation=input.y_axis_transformation(), output_path=input.output_path())
|
|
288
|
-
run_status.set(f"✅ Plotting has finished.")
|
|
289
|
-
except Exception as e:
|
|
290
|
-
run_status.set(f"❌ Error: {traceback.format_exc()}")
|
|
291
|
-
|
|
292
|
-
@output
|
|
293
|
-
@render.text
|
|
294
|
-
def status_output():
|
|
295
|
-
return run_status.get()
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
app = App(app_ui, server)
|
|
299
|
-
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|