pycompound 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- app.py +164 -196
- pycompound/build_library.py +2 -9
- pycompound/plot_spectra.py +25 -48
- pycompound/processing.py +0 -9
- pycompound/similarity_measures.py +0 -3
- pycompound/spec_lib_matching.py +246 -81
- pycompound/spec_lib_matching_CLI.py +2 -7
- pycompound/tuning_CLI.py +1 -1
- {pycompound-0.1.1.dist-info → pycompound-0.1.3.dist-info}/METADATA +1 -1
- pycompound-0.1.3.dist-info/RECORD +14 -0
- pycompound-0.1.1.dist-info/RECORD +0 -14
- {pycompound-0.1.1.dist-info → pycompound-0.1.3.dist-info}/WHEEL +0 -0
- {pycompound-0.1.1.dist-info → pycompound-0.1.3.dist-info}/licenses/LICENSE +0 -0
- {pycompound-0.1.1.dist-info → pycompound-0.1.3.dist-info}/top_level.txt +0 -0
app.py
CHANGED
|
@@ -4,6 +4,8 @@ from pycompound.spec_lib_matching import run_spec_lib_matching_on_HRMS_data
|
|
|
4
4
|
from pycompound.spec_lib_matching import run_spec_lib_matching_on_NRMS_data
|
|
5
5
|
from pycompound.spec_lib_matching import tune_params_on_HRMS_data
|
|
6
6
|
from pycompound.spec_lib_matching import tune_params_on_NRMS_data
|
|
7
|
+
from pycompound.spec_lib_matching import tune_params_on_HRMS_data_shiny
|
|
8
|
+
from pycompound.spec_lib_matching import tune_params_on_NRMS_data_shiny
|
|
7
9
|
from pycompound.plot_spectra import generate_plots_on_HRMS_data
|
|
8
10
|
from pycompound.plot_spectra import generate_plots_on_NRMS_data
|
|
9
11
|
from pathlib import Path
|
|
@@ -20,10 +22,16 @@ import numpy as np
|
|
|
20
22
|
import netCDF4 as nc
|
|
21
23
|
from pyteomics import mgf, mzml
|
|
22
24
|
import ast
|
|
25
|
+
from numbers import Real
|
|
26
|
+
|
|
23
27
|
|
|
24
28
|
|
|
25
29
|
_LOG_QUEUE: asyncio.Queue[str] = asyncio.Queue()
|
|
26
30
|
|
|
31
|
+
def _run_with_redirects(fn, writer, *args, **kwargs):
|
|
32
|
+
with redirect_stdout(writer), redirect_stderr(writer):
|
|
33
|
+
return fn(*args, **kwargs)
|
|
34
|
+
|
|
27
35
|
|
|
28
36
|
def strip_text(s):
|
|
29
37
|
return [x.strip() for x in s.strip('[]').split(',') if x.strip()]
|
|
@@ -34,9 +42,23 @@ def strip_numeric(s):
|
|
|
34
42
|
|
|
35
43
|
|
|
36
44
|
def strip_weights(s):
|
|
37
|
-
|
|
45
|
+
obj = ast.literal_eval(s) if isinstance(s, (str, bytes)) else s
|
|
38
46
|
keys = ['Cosine', 'Shannon', 'Renyi', 'Tsallis']
|
|
39
|
-
|
|
47
|
+
|
|
48
|
+
if isinstance(obj, (list, tuple)):
|
|
49
|
+
if len(obj) == 4 and all(isinstance(x, Real) for x in obj):
|
|
50
|
+
tuples = [obj]
|
|
51
|
+
else:
|
|
52
|
+
tuples = list(obj)
|
|
53
|
+
else:
|
|
54
|
+
raise ValueError(f"Expected a 4-tuple or a sequence of 4-tuples, got {type(obj).__name__}")
|
|
55
|
+
|
|
56
|
+
out = []
|
|
57
|
+
for t in tuples:
|
|
58
|
+
if not (isinstance(t, (list, tuple)) and len(t) == 4):
|
|
59
|
+
raise ValueError(f"Each item must be a 4-tuple, got: {t!r}")
|
|
60
|
+
out.append(dict(zip(keys, t)))
|
|
61
|
+
return out
|
|
40
62
|
|
|
41
63
|
|
|
42
64
|
def build_library(input_path=None, output_path=None):
|
|
@@ -177,7 +199,6 @@ def _open_plot_window(session, png_bytes: bytes, title: str = "plot.png"):
|
|
|
177
199
|
|
|
178
200
|
|
|
179
201
|
def plot_spectra_ui(platform: str):
|
|
180
|
-
# Base inputs common to all platforms
|
|
181
202
|
base_inputs = [
|
|
182
203
|
ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
183
204
|
ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
@@ -196,7 +217,7 @@ def plot_spectra_ui(platform: str):
|
|
|
196
217
|
options={"placeholder": "Upload a library..."},
|
|
197
218
|
),
|
|
198
219
|
ui.input_select("similarity_measure", "Select similarity measure:", ["cosine","shannon","renyi","tsallis","mixture","jaccard","dice","3w_jaccard","sokal_sneath","binary_cosine","mountford","mcconnaughey","driver_kroeber","simpson","braun_banquet","fager_mcgowan","kulczynski","intersection","hamming","hellinger"]),
|
|
199
|
-
ui.input_text('weights', 'Weights for similarity measure (cosine, shannon, renyi, tsallis):', '0.25, 0.25, 0.25, 0.25'),
|
|
220
|
+
ui.input_text('weights', 'Weights for mixture similarity measure (cosine, shannon, renyi, tsallis):', '0.25, 0.25, 0.25, 0.25'),
|
|
200
221
|
ui.input_select(
|
|
201
222
|
"high_quality_reference_library",
|
|
202
223
|
"Indicate whether the reference library is considered high quality. If True, filtering and noise removal are only applied to the query spectra.",
|
|
@@ -204,7 +225,6 @@ def plot_spectra_ui(platform: str):
|
|
|
204
225
|
),
|
|
205
226
|
]
|
|
206
227
|
|
|
207
|
-
# Extra inputs depending on platform
|
|
208
228
|
if platform == "HRMS":
|
|
209
229
|
extra_inputs = [
|
|
210
230
|
ui.input_text(
|
|
@@ -224,7 +244,6 @@ def plot_spectra_ui(platform: str):
|
|
|
224
244
|
)
|
|
225
245
|
]
|
|
226
246
|
|
|
227
|
-
# Numeric inputs
|
|
228
247
|
numeric_inputs = [
|
|
229
248
|
ui.input_numeric("mz_min", "Minimum m/z for filtering:", 0),
|
|
230
249
|
ui.input_numeric("mz_max", "Maximum m/z for filtering:", 99999999),
|
|
@@ -237,18 +256,15 @@ def plot_spectra_ui(platform: str):
|
|
|
237
256
|
ui.input_numeric("entropy_dimension", "Entropy dimension (Renyi/Tsallis only):", 1.1),
|
|
238
257
|
]
|
|
239
258
|
|
|
240
|
-
# Y-axis transformation select input
|
|
241
259
|
select_input = ui.input_select(
|
|
242
260
|
"y_axis_transformation",
|
|
243
261
|
"Transformation to apply to intensity axis:",
|
|
244
262
|
["normalized", "none", "log10", "sqrt"],
|
|
245
263
|
)
|
|
246
264
|
|
|
247
|
-
# Run and Back buttons
|
|
248
265
|
run_button_plot_spectra = ui.download_button("run_btn_plot_spectra", "Run", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
|
|
249
266
|
back_button = ui.input_action_button("back", "Back to main menu", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
|
|
250
267
|
|
|
251
|
-
# Layout base_inputs and extra_inputs in columns
|
|
252
268
|
if platform == "HRMS":
|
|
253
269
|
inputs_columns = ui.layout_columns(
|
|
254
270
|
ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
|
|
@@ -266,7 +282,6 @@ def plot_spectra_ui(platform: str):
|
|
|
266
282
|
col_widths=(3,3,3,3),
|
|
267
283
|
)
|
|
268
284
|
|
|
269
|
-
# Combine everything
|
|
270
285
|
return ui.div(
|
|
271
286
|
ui.TagList(
|
|
272
287
|
ui.h2("Plot Spectra"),
|
|
@@ -281,12 +296,11 @@ def plot_spectra_ui(platform: str):
|
|
|
281
296
|
|
|
282
297
|
|
|
283
298
|
def run_spec_lib_matching_ui(platform: str):
|
|
284
|
-
# Base inputs common to all platforms
|
|
285
299
|
base_inputs = [
|
|
286
300
|
ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
287
301
|
ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
288
302
|
ui.input_select("similarity_measure", "Select similarity measure:", ["cosine","shannon","renyi","tsallis","mixture","jaccard","dice","3w_jaccard","sokal_sneath","binary_cosine","mountford","mcconnaughey","driver_kroeber","simpson","braun_banquet","fager_mcgowan","kulczynski","intersection","hamming","hellinger"]),
|
|
289
|
-
ui.input_text('weights', 'Weights for similarity measure (cosine, shannon, renyi, tsallis):', '0.25, 0.25, 0.25, 0.25'),
|
|
303
|
+
ui.input_text('weights', 'Weights for mixture similarity measure (cosine, shannon, renyi, tsallis):', '0.25, 0.25, 0.25, 0.25'),
|
|
290
304
|
ui.input_selectize(
|
|
291
305
|
"spectrum_ID1",
|
|
292
306
|
"Select spectrum ID 1 (only applicable for plotting; default is the first spectrum in the query library):",
|
|
@@ -308,7 +322,6 @@ def run_spec_lib_matching_ui(platform: str):
|
|
|
308
322
|
)
|
|
309
323
|
]
|
|
310
324
|
|
|
311
|
-
# Extra inputs depending on platform
|
|
312
325
|
if platform == "HRMS":
|
|
313
326
|
extra_inputs = [
|
|
314
327
|
ui.input_text(
|
|
@@ -328,7 +341,6 @@ def run_spec_lib_matching_ui(platform: str):
|
|
|
328
341
|
)
|
|
329
342
|
]
|
|
330
343
|
|
|
331
|
-
# Numeric inputs
|
|
332
344
|
numeric_inputs = [
|
|
333
345
|
ui.input_numeric("mz_min", "Minimum m/z for filtering:", 0),
|
|
334
346
|
ui.input_numeric("mz_max", "Maximum m/z for filtering:", 99999999),
|
|
@@ -343,12 +355,10 @@ def run_spec_lib_matching_ui(platform: str):
|
|
|
343
355
|
]
|
|
344
356
|
|
|
345
357
|
|
|
346
|
-
# Run and Back buttons
|
|
347
358
|
run_button_spec_lib_matching = ui.download_button("run_btn_spec_lib_matching", "Run Spectral Library Matching", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
|
|
348
359
|
run_button_plot_spectra_within_spec_lib_matching = ui.download_button("run_btn_plot_spectra_within_spec_lib_matching", "Plot Spectra", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
|
|
349
360
|
back_button = ui.input_action_button("back", "Back to main menu", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
|
|
350
361
|
|
|
351
|
-
# Layout base_inputs and extra_inputs in columns
|
|
352
362
|
if platform == "HRMS":
|
|
353
363
|
inputs_columns = ui.layout_columns(
|
|
354
364
|
ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
|
|
@@ -372,7 +382,6 @@ def run_spec_lib_matching_ui(platform: str):
|
|
|
372
382
|
style="max-height:300px; overflow:auto"
|
|
373
383
|
)
|
|
374
384
|
|
|
375
|
-
# Combine everything
|
|
376
385
|
return ui.div(
|
|
377
386
|
ui.TagList(
|
|
378
387
|
ui.h2("Run Spectral Library Matching"),
|
|
@@ -387,16 +396,14 @@ def run_spec_lib_matching_ui(platform: str):
|
|
|
387
396
|
|
|
388
397
|
|
|
389
398
|
def run_parameter_tuning_ui(platform: str):
|
|
390
|
-
# Base inputs common to all platforms
|
|
391
399
|
base_inputs = [
|
|
392
400
|
ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
393
401
|
ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
394
402
|
ui.input_selectize("similarity_measure", "Select similarity measure(s):", ["cosine","shannon","renyi","tsallis","mixture","jaccard","dice","3w_jaccard","sokal_sneath","binary_cosine","mountford","mcconnaughey","driver_kroeber","simpson","braun_banquet","fager_mcgowan","kulczynski","intersection","hamming","hellinger"], multiple=True, selected='cosine'),
|
|
395
|
-
ui.input_text('weights', 'Weights for similarity measure (cosine, shannon, renyi, tsallis):', '((0.25, 0.25, 0.25, 0.25)
|
|
403
|
+
ui.input_text('weights', 'Weights for mixture similarity measure (cosine, shannon, renyi, tsallis):', '((0.25, 0.25, 0.25, 0.25))'),
|
|
396
404
|
ui.input_text("high_quality_reference_library", "Indicate whether the reference library is considered high quality. If True, filtering and noise removal are only applied to the query spectra.", '[True]')
|
|
397
405
|
]
|
|
398
406
|
|
|
399
|
-
# Extra inputs depending on platform
|
|
400
407
|
if platform == "HRMS":
|
|
401
408
|
extra_inputs = [
|
|
402
409
|
ui.input_text(
|
|
@@ -416,7 +423,6 @@ def run_parameter_tuning_ui(platform: str):
|
|
|
416
423
|
)
|
|
417
424
|
]
|
|
418
425
|
|
|
419
|
-
# Numeric inputs
|
|
420
426
|
numeric_inputs = [
|
|
421
427
|
ui.input_text("mz_min", "Minimum m/z for filtering:", '[0]'),
|
|
422
428
|
ui.input_text("mz_max", "Maximum m/z for filtering:", '[99999999]'),
|
|
@@ -430,11 +436,9 @@ def run_parameter_tuning_ui(platform: str):
|
|
|
430
436
|
]
|
|
431
437
|
|
|
432
438
|
|
|
433
|
-
# Run and Back buttons
|
|
434
439
|
run_button_parameter_tuning = ui.download_button("run_btn_parameter_tuning", "Tune parameters", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
|
|
435
440
|
back_button = ui.input_action_button("back", "Back to main menu", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
|
|
436
441
|
|
|
437
|
-
# Layout base_inputs and extra_inputs in columns
|
|
438
442
|
if platform == "HRMS":
|
|
439
443
|
inputs_columns = ui.layout_columns(
|
|
440
444
|
ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
|
|
@@ -458,7 +462,6 @@ def run_parameter_tuning_ui(platform: str):
|
|
|
458
462
|
style="max-height:300px; overflow:auto"
|
|
459
463
|
)
|
|
460
464
|
|
|
461
|
-
# Combine everything
|
|
462
465
|
return ui.div(
|
|
463
466
|
ui.TagList(
|
|
464
467
|
ui.h2("Tune parameters"),
|
|
@@ -494,6 +497,9 @@ def server(input, output, session):
|
|
|
494
497
|
match_log_rv = reactive.Value("")
|
|
495
498
|
is_matching_rv = reactive.Value(False)
|
|
496
499
|
is_any_job_running = reactive.Value(False)
|
|
500
|
+
latest_csv_path_rv = reactive.Value("")
|
|
501
|
+
latest_df_rv = reactive.Value(None)
|
|
502
|
+
is_running_rv = reactive.Value(False)
|
|
497
503
|
|
|
498
504
|
query_ids_rv = reactive.Value([])
|
|
499
505
|
query_file_path_rv = reactive.Value(None)
|
|
@@ -508,7 +514,63 @@ def server(input, output, session):
|
|
|
508
514
|
converted_reference_path_rv = reactive.Value(None)
|
|
509
515
|
|
|
510
516
|
|
|
511
|
-
|
|
517
|
+
def _reset_plot_spectra_state():
|
|
518
|
+
query_status_rv.set("")
|
|
519
|
+
reference_status_rv.set("")
|
|
520
|
+
query_ids_rv.set([])
|
|
521
|
+
reference_ids_rv.set([])
|
|
522
|
+
query_file_path_rv.set(None)
|
|
523
|
+
reference_file_path_rv.set(None)
|
|
524
|
+
query_result_rv.set(None)
|
|
525
|
+
reference_result_rv.set(None)
|
|
526
|
+
converted_query_path_rv.set(None)
|
|
527
|
+
converted_reference_path_rv.set(None)
|
|
528
|
+
try:
|
|
529
|
+
ui.update_selectize("spectrum_ID1", choices=[], selected=None)
|
|
530
|
+
ui.update_selectize("spectrum_ID2", choices=[], selected=None)
|
|
531
|
+
except Exception:
|
|
532
|
+
pass
|
|
533
|
+
|
|
534
|
+
|
|
535
|
+
def _reset_spec_lib_matching_state():
|
|
536
|
+
match_log_rv.set("")
|
|
537
|
+
is_matching_rv.set(False)
|
|
538
|
+
is_any_job_running.set(False)
|
|
539
|
+
try:
|
|
540
|
+
ui.update_selectize("spectrum_ID1", choices=[], selected=None)
|
|
541
|
+
ui.update_selectize("spectrum_ID2", choices=[], selected=None)
|
|
542
|
+
except Exception:
|
|
543
|
+
pass
|
|
544
|
+
|
|
545
|
+
|
|
546
|
+
def _reset_parameter_tuning_state():
|
|
547
|
+
match_log_rv.set("")
|
|
548
|
+
is_tuning_running.set(False)
|
|
549
|
+
is_any_job_running.set(False)
|
|
550
|
+
|
|
551
|
+
|
|
552
|
+
@reactive.effect
|
|
553
|
+
@reactive.event(input.back)
|
|
554
|
+
def _clear_on_back_from_pages():
|
|
555
|
+
page = current_page()
|
|
556
|
+
if page == "plot_spectra":
|
|
557
|
+
_reset_plot_spectra_state()
|
|
558
|
+
elif page == "run_spec_lib_matching":
|
|
559
|
+
_reset_spec_lib_matching_state()
|
|
560
|
+
elif page == "run_parameter_tuning":
|
|
561
|
+
_reset_parameter_tuning_state()
|
|
562
|
+
|
|
563
|
+
@reactive.effect
|
|
564
|
+
def _clear_on_enter_pages():
|
|
565
|
+
page = current_page()
|
|
566
|
+
if page == "plot_spectra":
|
|
567
|
+
_reset_plot_spectra_state()
|
|
568
|
+
elif page == "run_spec_lib_matching":
|
|
569
|
+
_reset_spec_lib_matching_state()
|
|
570
|
+
elif page == "run_parameter_tuning":
|
|
571
|
+
_reset_parameter_tuning_state()
|
|
572
|
+
|
|
573
|
+
|
|
512
574
|
def _drain_queue_nowait(q: asyncio.Queue) -> list[str]:
|
|
513
575
|
out = []
|
|
514
576
|
try:
|
|
@@ -519,12 +581,23 @@ def server(input, output, session):
|
|
|
519
581
|
return out
|
|
520
582
|
|
|
521
583
|
|
|
584
|
+
class ReactiveWriter(io.TextIOBase):
|
|
585
|
+
def __init__(self, loop: asyncio.AbstractEventLoop):
|
|
586
|
+
self._loop = loop
|
|
587
|
+
def write(self, s: str):
|
|
588
|
+
if not s:
|
|
589
|
+
return 0
|
|
590
|
+
self._loop.call_soon_threadsafe(_LOG_QUEUE.put_nowait, s)
|
|
591
|
+
return len(s)
|
|
592
|
+
def flush(self):
|
|
593
|
+
pass
|
|
594
|
+
|
|
595
|
+
|
|
522
596
|
@reactive.effect
|
|
523
597
|
async def _pump_logs():
|
|
524
|
-
if not is_any_job_running.get():
|
|
598
|
+
if not (is_any_job_running.get() or is_tuning_running.get() or is_matching_rv.get()):
|
|
525
599
|
return
|
|
526
|
-
|
|
527
|
-
reactive.invalidate_later(0.1)
|
|
600
|
+
reactive.invalidate_later(0.05)
|
|
528
601
|
msgs = _drain_queue_nowait(_LOG_QUEUE)
|
|
529
602
|
if msgs:
|
|
530
603
|
match_log_rv.set(match_log_rv.get() + "".join(msgs))
|
|
@@ -593,24 +666,6 @@ def server(input, output, session):
|
|
|
593
666
|
return match_log_rv.get()
|
|
594
667
|
|
|
595
668
|
|
|
596
|
-
class ReactiveWriter(io.TextIOBase):
|
|
597
|
-
def __init__(self, rv):
|
|
598
|
-
self.rv = rv
|
|
599
|
-
def write(self, s: str):
|
|
600
|
-
if not s:
|
|
601
|
-
return 0
|
|
602
|
-
self.rv.set(self.rv.get() + s)
|
|
603
|
-
try:
|
|
604
|
-
loop = asyncio.get_running_loop()
|
|
605
|
-
loop.create_task(reactive.flush())
|
|
606
|
-
except RuntimeError:
|
|
607
|
-
pass
|
|
608
|
-
return len(s)
|
|
609
|
-
def flush(self):
|
|
610
|
-
pass
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
669
|
@reactive.Effect
|
|
615
670
|
def _():
|
|
616
671
|
if input.plot_spectra() > plot_clicks.get():
|
|
@@ -629,8 +684,6 @@ def server(input, output, session):
|
|
|
629
684
|
|
|
630
685
|
@render.image
|
|
631
686
|
def image():
|
|
632
|
-
from pathlib import Path
|
|
633
|
-
|
|
634
687
|
dir = Path(__file__).resolve().parent
|
|
635
688
|
img: ImgData = {"src": str(dir / "www/emblem.png"), "width": "320px", "height": "250px"}
|
|
636
689
|
return img
|
|
@@ -726,9 +779,6 @@ def server(input, output, session):
|
|
|
726
779
|
@reactive.effect
|
|
727
780
|
@reactive.event(input.query_data)
|
|
728
781
|
async def _populate_ids_from_query_upload():
|
|
729
|
-
#if current_page() != "plot_spectra":
|
|
730
|
-
# return
|
|
731
|
-
|
|
732
782
|
files = input.query_data()
|
|
733
783
|
if not files:
|
|
734
784
|
return
|
|
@@ -736,7 +786,6 @@ def server(input, output, session):
|
|
|
736
786
|
in_path = Path(files[0]["datapath"])
|
|
737
787
|
suffix = in_path.suffix.lower()
|
|
738
788
|
|
|
739
|
-
# Decide what CSV to read IDs from
|
|
740
789
|
try:
|
|
741
790
|
if suffix == ".csv":
|
|
742
791
|
csv_path = in_path
|
|
@@ -745,17 +794,14 @@ def server(input, output, session):
|
|
|
745
794
|
query_status_rv.set(f"Converting {in_path.name} → CSV …")
|
|
746
795
|
await reactive.flush()
|
|
747
796
|
|
|
748
|
-
# Choose an output temp path next to the upload
|
|
749
797
|
tmp_csv_path = in_path.with_suffix(".converted.csv")
|
|
750
798
|
|
|
751
799
|
out_obj = await asyncio.to_thread(build_library, str(in_path), str(tmp_csv_path))
|
|
752
800
|
|
|
753
|
-
# out_obj may be a path (str/PathLike) OR a DataFrame. Normalize to a path.
|
|
754
801
|
if isinstance(out_obj, (str, os.PathLike, Path)):
|
|
755
802
|
csv_path = Path(out_obj)
|
|
756
803
|
elif isinstance(out_obj, pd.DataFrame):
|
|
757
|
-
|
|
758
|
-
out_obj.to_csv(tmp_csv_path, index=False)
|
|
804
|
+
out_obj.to_csv(tmp_csv_path, index=False, sep='\t')
|
|
759
805
|
csv_path = tmp_csv_path
|
|
760
806
|
else:
|
|
761
807
|
raise TypeError(f"build_library returned unsupported type: {type(out_obj)}")
|
|
@@ -765,16 +811,12 @@ def server(input, output, session):
|
|
|
765
811
|
query_status_rv.set(f"Reading IDs from: {csv_path.name} …")
|
|
766
812
|
await reactive.flush()
|
|
767
813
|
|
|
768
|
-
# Extract IDs from the CSV’s first column
|
|
769
814
|
ids = await asyncio.to_thread(extract_first_column_ids, str(csv_path))
|
|
770
815
|
query_ids_rv.set(ids)
|
|
771
816
|
|
|
772
|
-
# Update dropdowns
|
|
773
817
|
ui.update_selectize("spectrum_ID1", choices=ids, selected=(ids[0] if ids else None))
|
|
774
818
|
|
|
775
|
-
query_status_rv.set(
|
|
776
|
-
f"✅ Loaded {len(ids)} IDs from {csv_path.name}" if ids else f"⚠️ No IDs found in {csv_path.name}"
|
|
777
|
-
)
|
|
819
|
+
query_status_rv.set(f"✅ Loaded {len(ids)} IDs from {csv_path.name}" if ids else f"⚠️ No IDs found in {csv_path.name}")
|
|
778
820
|
await reactive.flush()
|
|
779
821
|
|
|
780
822
|
except Exception as e:
|
|
@@ -786,9 +828,6 @@ def server(input, output, session):
|
|
|
786
828
|
@reactive.effect
|
|
787
829
|
@reactive.event(input.reference_data)
|
|
788
830
|
async def _populate_ids_from_reference_upload():
|
|
789
|
-
#if current_page() != "plot_spectra":
|
|
790
|
-
# return
|
|
791
|
-
|
|
792
831
|
files = input.reference_data()
|
|
793
832
|
if not files:
|
|
794
833
|
return
|
|
@@ -796,7 +835,6 @@ def server(input, output, session):
|
|
|
796
835
|
in_path = Path(files[0]["datapath"])
|
|
797
836
|
suffix = in_path.suffix.lower()
|
|
798
837
|
|
|
799
|
-
# Decide what CSV to read IDs from
|
|
800
838
|
try:
|
|
801
839
|
if suffix == ".csv":
|
|
802
840
|
csv_path = in_path
|
|
@@ -805,17 +843,14 @@ def server(input, output, session):
|
|
|
805
843
|
reference_status_rv.set(f"Converting {in_path.name} → CSV …")
|
|
806
844
|
await reactive.flush()
|
|
807
845
|
|
|
808
|
-
# Choose an output temp path next to the upload
|
|
809
846
|
tmp_csv_path = in_path.with_suffix(".converted.csv")
|
|
810
847
|
|
|
811
848
|
out_obj = await asyncio.to_thread(build_library, str(in_path), str(tmp_csv_path))
|
|
812
849
|
|
|
813
|
-
# out_obj may be a path (str/PathLike) OR a DataFrame. Normalize to a path.
|
|
814
850
|
if isinstance(out_obj, (str, os.PathLike, Path)):
|
|
815
851
|
csv_path = Path(out_obj)
|
|
816
852
|
elif isinstance(out_obj, pd.DataFrame):
|
|
817
|
-
|
|
818
|
-
out_obj.to_csv(tmp_csv_path, index=False)
|
|
853
|
+
out_obj.to_csv(tmp_csv_path, index=False, sep='\t')
|
|
819
854
|
csv_path = tmp_csv_path
|
|
820
855
|
else:
|
|
821
856
|
raise TypeError(f"build_library returned unsupported type: {type(out_obj)}")
|
|
@@ -825,11 +860,9 @@ def server(input, output, session):
|
|
|
825
860
|
reference_status_rv.set(f"Reading IDs from: {csv_path.name} …")
|
|
826
861
|
await reactive.flush()
|
|
827
862
|
|
|
828
|
-
# Extract IDs from the CSV’s first column
|
|
829
863
|
ids = await asyncio.to_thread(extract_first_column_ids, str(csv_path))
|
|
830
864
|
reference_ids_rv.set(ids)
|
|
831
865
|
|
|
832
|
-
# Update dropdowns
|
|
833
866
|
ui.update_selectize("spectrum_ID2", choices=ids, selected=(ids[0] if ids else None))
|
|
834
867
|
|
|
835
868
|
reference_status_rv.set(
|
|
@@ -851,11 +884,18 @@ def server(input, output, session):
|
|
|
851
884
|
weights = [float(weight.strip()) for weight in input.weights().split(",") if weight.strip()]
|
|
852
885
|
weights = {'Cosine':weights[0], 'Shannon':weights[1], 'Renyi':weights[2], 'Tsallis':weights[3]}
|
|
853
886
|
|
|
887
|
+
high_quality_reference_library_tmp2 = False
|
|
888
|
+
if input.high_quality_reference_library() != 'False':
|
|
889
|
+
high_quality_reference_library_tmp2 = True
|
|
890
|
+
|
|
891
|
+
print(input.high_quality_reference_library())
|
|
892
|
+
print(high_quality_reference_library_tmp2)
|
|
893
|
+
|
|
854
894
|
if input.chromatography_platform() == "HRMS":
|
|
855
|
-
fig = generate_plots_on_HRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], spectrum_ID1=spectrum_ID1, spectrum_ID2=spectrum_ID2, similarity_measure=input.similarity_measure(), weights=weights, spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=
|
|
895
|
+
fig = generate_plots_on_HRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], spectrum_ID1=spectrum_ID1, spectrum_ID2=spectrum_ID2, similarity_measure=input.similarity_measure(), weights=weights, spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=high_quality_reference_library_tmp2, mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), window_size_centroiding=input.window_size_centroiding(), window_size_matching=input.window_size_matching(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), y_axis_transformation=input.y_axis_transformation(), return_plot=True)
|
|
856
896
|
plt.show()
|
|
857
897
|
elif input.chromatography_platform() == "NRMS":
|
|
858
|
-
fig = generate_plots_on_NRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], spectrum_ID1=spectrum_ID1, spectrum_ID2=spectrum_ID2, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=
|
|
898
|
+
fig = generate_plots_on_NRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], spectrum_ID1=spectrum_ID1, spectrum_ID2=spectrum_ID2, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=high_quality_reference_library_tmp2, mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), y_axis_transformation=input.y_axis_transformation(), return_plot=True)
|
|
859
899
|
plt.show()
|
|
860
900
|
with io.BytesIO() as buf:
|
|
861
901
|
fig.savefig(buf, format="png", dpi=150, bbox_inches="tight")
|
|
@@ -863,34 +903,10 @@ def server(input, output, session):
|
|
|
863
903
|
yield buf.getvalue()
|
|
864
904
|
|
|
865
905
|
|
|
866
|
-
@render.text
|
|
867
|
-
def status_output():
|
|
868
|
-
return run_status_plot_spectra.get()
|
|
869
|
-
return run_status_spec_lib_matching.get()
|
|
870
|
-
return run_status_parameter_tuning.get()
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
class ReactiveWriter(io.TextIOBase):
|
|
874
|
-
def __init__(self, rv: reactive.Value, loop: asyncio.AbstractEventLoop):
|
|
875
|
-
self.rv = rv
|
|
876
|
-
self.loop = loop
|
|
877
|
-
|
|
878
|
-
def write(self, s: str):
|
|
879
|
-
if not s:
|
|
880
|
-
return 0
|
|
881
|
-
def _apply():
|
|
882
|
-
self.rv.set(self.rv.get() + s)
|
|
883
|
-
self.loop.create_task(reactive.flush())
|
|
884
|
-
self.loop.call_soon_threadsafe(_apply)
|
|
885
|
-
return len(s)
|
|
886
906
|
|
|
887
|
-
|
|
888
|
-
pass
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
@render.download(filename="identification_output.csv")
|
|
907
|
+
@render.download(filename="identification_output.txt")
|
|
892
908
|
async def run_btn_spec_lib_matching():
|
|
893
|
-
match_log_rv.set("
|
|
909
|
+
match_log_rv.set("Running identification...\n")
|
|
894
910
|
await reactive.flush()
|
|
895
911
|
|
|
896
912
|
hq = input.high_quality_reference_library()
|
|
@@ -916,14 +932,14 @@ def server(input, output, session):
|
|
|
916
932
|
wf_mz=input.wf_mz(), wf_intensity=input.wf_int(),
|
|
917
933
|
LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(),
|
|
918
934
|
n_top_matches_to_save=input.n_top_matches_to_save(),
|
|
919
|
-
print_id_results=True,
|
|
920
|
-
output_identification=str(Path.cwd() / "identification_output.
|
|
921
|
-
output_similarity_scores=str(Path.cwd() / "similarity_scores.
|
|
935
|
+
print_id_results=True,
|
|
936
|
+
output_identification=str(Path.cwd() / "identification_output.txt"),
|
|
937
|
+
output_similarity_scores=str(Path.cwd() / "similarity_scores.txt"),
|
|
922
938
|
return_ID_output=True,
|
|
923
939
|
)
|
|
924
940
|
|
|
925
941
|
loop = asyncio.get_running_loop()
|
|
926
|
-
rw = ReactiveWriter(
|
|
942
|
+
rw = ReactiveWriter(loop)
|
|
927
943
|
|
|
928
944
|
try:
|
|
929
945
|
with redirect_stdout(rw), redirect_stderr(rw):
|
|
@@ -943,7 +959,7 @@ def server(input, output, session):
|
|
|
943
959
|
await reactive.flush()
|
|
944
960
|
raise
|
|
945
961
|
|
|
946
|
-
yield df_out.to_csv(index=True)
|
|
962
|
+
yield df_out.to_csv(index=True, sep='\t')
|
|
947
963
|
|
|
948
964
|
|
|
949
965
|
|
|
@@ -998,62 +1014,12 @@ def server(input, output, session):
|
|
|
998
1014
|
yield buf.getvalue()
|
|
999
1015
|
|
|
1000
1016
|
|
|
1001
|
-
|
|
1002
|
-
@render.download(filename="parameter_tuning_output.csv")
|
|
1003
|
-
async def run_btn_parameter_tuning():
|
|
1004
|
-
match_log_rv.set("Running grid search of all parameters specified...\n")
|
|
1005
|
-
|
|
1006
|
-
similarity_measure_tmp = list(input.similarity_measure())
|
|
1007
|
-
high_quality_reference_library_tmp = [x.strip().lower() == "true" for x in input.high_quality_reference_library().strip().strip("[]").split(",") if x.strip()]
|
|
1008
|
-
spectrum_preprocessing_order_tmp = strip_text(input.spectrum_preprocessing_order())
|
|
1009
|
-
mz_min_tmp = strip_numeric(input.mz_min())
|
|
1010
|
-
mz_max_tmp = strip_numeric(input.mz_max())
|
|
1011
|
-
int_min_tmp = strip_numeric(input.int_min())
|
|
1012
|
-
int_max_tmp = strip_numeric(input.int_max())
|
|
1013
|
-
noise_threshold_tmp = strip_numeric(input.noise_threshold())
|
|
1014
|
-
wf_mz_tmp = strip_numeric(input.wf_mz())
|
|
1015
|
-
wf_int_tmp = strip_numeric(input.wf_int())
|
|
1016
|
-
LET_threshold_tmp = strip_numeric(input.LET_threshold())
|
|
1017
|
-
entropy_dimension_tmp = strip_numeric(input.entropy_dimension())
|
|
1018
|
-
weights_tmp = strip_weights(input.weights())
|
|
1019
|
-
|
|
1020
|
-
common_kwargs = dict(
|
|
1021
|
-
query_data=input.query_data()[0]["datapath"],
|
|
1022
|
-
reference_data=input.reference_data()[0]["datapath"],
|
|
1023
|
-
output_path=str(Path.cwd() / "parameter_tuning_output.csv"),
|
|
1024
|
-
return_output=True
|
|
1025
|
-
)
|
|
1026
|
-
|
|
1027
|
-
loop = asyncio.get_running_loop()
|
|
1028
|
-
rw = ReactiveWriter(match_log_rv, loop)
|
|
1029
|
-
|
|
1030
|
-
try:
|
|
1031
|
-
with redirect_stdout(rw), redirect_stderr(rw):
|
|
1032
|
-
if input.chromatography_platform() == "HRMS":
|
|
1033
|
-
window_size_centroiding_tmp = strip_numeric(input.window_size_centroiding())
|
|
1034
|
-
window_size_matching_tmp = strip_numeric(input.window_size_matching())
|
|
1035
|
-
grid={'similarity_measure':similarity_measure_tmp, 'weight':weights_tmp, 'spectrum_preprocessing_order':spectrum_preprocessing_order_tmp, 'mz_min':mz_min_tmp, 'mz_max':mz_max_tmp, 'int_min':int_min_tmp, 'int_max':int_max_tmp, 'noise_threshold':noise_threshold_tmp, 'wf_mz':wf_mz_tmp, 'wf_int':wf_int_tmp, 'LET_threshold':LET_threshold_tmp, 'entropy_dimension':entropy_dimension_tmp, 'high_quality_reference_library':high_quality_reference_library_tmp, 'window_size_centroiding':window_size_centroiding_tmp, 'window_size_matching':window_size_matching_tmp}
|
|
1036
|
-
df_out = await asyncio.to_thread(tune_params_on_HRMS_data, **common_kwargs, grid=grid)
|
|
1037
|
-
else:
|
|
1038
|
-
grid={'similarity_measure':similarity_measure_tmp, 'weight':weights_tmp, 'spectrum_preprocessing_order':spectrum_preprocessing_order_tmp, 'mz_min':mz_min_tmp, 'mz_max':mz_max_tmp, 'int_min':int_min_tmp, 'int_max':int_max_tmp, 'noise_threshold':noise_threshold_tmp, 'wf_mz':wf_mz_tmp, 'wf_int':wf_int_tmp, 'LET_threshold':LET_threshold_tmp, 'entropy_dimension':entropy_dimension_tmp, 'high_quality_reference_library':high_quality_reference_library_tmp}
|
|
1039
|
-
df_out = await asyncio.to_thread(tune_params_on_NRMS_data, **common_kwargs, grid=grid)
|
|
1040
|
-
match_log_rv.set(match_log_rv.get() + "\n✅ Parameter tuning finished.\n")
|
|
1041
|
-
#await reactive.flush()
|
|
1042
|
-
except Exception as e:
|
|
1043
|
-
match_log_rv.set(match_log_rv.get() + f"\n❌ Error: {e}\n")
|
|
1044
|
-
#await reactive.flush()
|
|
1045
|
-
raise
|
|
1046
|
-
|
|
1047
|
-
yield df_out.to_csv(index=False)
|
|
1048
|
-
'''
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
@render.download(filename="parameter_tuning_output.csv")
|
|
1017
|
+
@render.download(filename="parameter_tuning_output.txt")
|
|
1052
1018
|
async def run_btn_parameter_tuning():
|
|
1053
1019
|
is_any_job_running.set(True)
|
|
1054
1020
|
is_tuning_running.set(True)
|
|
1055
1021
|
match_log_rv.set("Running grid search of all parameters specified...\n")
|
|
1056
|
-
|
|
1022
|
+
await reactive.flush()
|
|
1057
1023
|
|
|
1058
1024
|
similarity_measure_tmp = list(input.similarity_measure())
|
|
1059
1025
|
high_quality_reference_library_tmp = [x.strip().lower() == "true" for x in input.high_quality_reference_library().strip().strip("[]").split(",") if x.strip()]
|
|
@@ -1072,23 +1038,52 @@ def server(input, output, session):
|
|
|
1072
1038
|
common_kwargs = dict(
|
|
1073
1039
|
query_data=input.query_data()[0]["datapath"],
|
|
1074
1040
|
reference_data=input.reference_data()[0]["datapath"],
|
|
1075
|
-
output_path=str(Path.cwd() / "parameter_tuning_output.
|
|
1076
|
-
return_output=True
|
|
1041
|
+
output_path=str(Path.cwd() / "parameter_tuning_output.txt"),
|
|
1042
|
+
return_output=True,
|
|
1077
1043
|
)
|
|
1078
1044
|
|
|
1079
1045
|
loop = asyncio.get_running_loop()
|
|
1080
|
-
rw = ReactiveWriter(
|
|
1046
|
+
rw = ReactiveWriter(loop)
|
|
1081
1047
|
|
|
1082
1048
|
try:
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1049
|
+
if input.chromatography_platform() == "HRMS":
|
|
1050
|
+
window_size_centroiding_tmp = strip_numeric(input.window_size_centroiding())
|
|
1051
|
+
window_size_matching_tmp = strip_numeric(input.window_size_matching())
|
|
1052
|
+
grid = {
|
|
1053
|
+
'similarity_measure': similarity_measure_tmp,
|
|
1054
|
+
'weight': weights_tmp,
|
|
1055
|
+
'spectrum_preprocessing_order': spectrum_preprocessing_order_tmp,
|
|
1056
|
+
'mz_min': mz_min_tmp,
|
|
1057
|
+
'mz_max': mz_max_tmp,
|
|
1058
|
+
'int_min': int_min_tmp,
|
|
1059
|
+
'int_max': int_max_tmp,
|
|
1060
|
+
'noise_threshold': noise_threshold_tmp,
|
|
1061
|
+
'wf_mz': wf_mz_tmp,
|
|
1062
|
+
'wf_int': wf_int_tmp,
|
|
1063
|
+
'LET_threshold': LET_threshold_tmp,
|
|
1064
|
+
'entropy_dimension': entropy_dimension_tmp,
|
|
1065
|
+
'high_quality_reference_library': high_quality_reference_library_tmp,
|
|
1066
|
+
'window_size_centroiding': window_size_centroiding_tmp,
|
|
1067
|
+
'window_size_matching': window_size_matching_tmp,
|
|
1068
|
+
}
|
|
1069
|
+
df_out = await asyncio.to_thread(_run_with_redirects, tune_params_on_HRMS_data_shiny, rw, **common_kwargs, grid=grid)
|
|
1070
|
+
else:
|
|
1071
|
+
grid = {
|
|
1072
|
+
'similarity_measure': similarity_measure_tmp,
|
|
1073
|
+
'weight': weights_tmp,
|
|
1074
|
+
'spectrum_preprocessing_order': spectrum_preprocessing_order_tmp,
|
|
1075
|
+
'mz_min': mz_min_tmp,
|
|
1076
|
+
'mz_max': mz_max_tmp,
|
|
1077
|
+
'int_min': int_min_tmp,
|
|
1078
|
+
'int_max': int_max_tmp,
|
|
1079
|
+
'noise_threshold': noise_threshold_tmp,
|
|
1080
|
+
'wf_mz': wf_mz_tmp,
|
|
1081
|
+
'wf_int': wf_int_tmp,
|
|
1082
|
+
'LET_threshold': LET_threshold_tmp,
|
|
1083
|
+
'entropy_dimension': entropy_dimension_tmp,
|
|
1084
|
+
'high_quality_reference_library': high_quality_reference_library_tmp,
|
|
1085
|
+
}
|
|
1086
|
+
df_out = await asyncio.to_thread(_run_with_redirects, tune_params_on_NRMS_data_shiny, rw, **common_kwargs, grid=grid)
|
|
1092
1087
|
|
|
1093
1088
|
match_log_rv.set(match_log_rv.get() + "\n✅ Parameter tuning finished.\n")
|
|
1094
1089
|
except Exception as e:
|
|
@@ -1097,37 +1092,12 @@ def server(input, output, session):
|
|
|
1097
1092
|
finally:
|
|
1098
1093
|
is_tuning_running.set(False)
|
|
1099
1094
|
is_any_job_running.set(False)
|
|
1100
|
-
trailing = _drain_queue_nowait(_LOG_QUEUE)
|
|
1101
|
-
if trailing:
|
|
1102
|
-
match_log_rv.set(match_log_rv.get() + "".join(trailing))
|
|
1103
1095
|
await reactive.flush()
|
|
1104
1096
|
|
|
1105
|
-
|
|
1106
|
-
csv_bytes = df_out.to_csv(index=False).encode('utf-8')
|
|
1107
|
-
yield csv_bytes
|
|
1097
|
+
yield df_out.to_csv(index=False).encode("utf-8", sep='\t')
|
|
1108
1098
|
|
|
1109
1099
|
|
|
1110
1100
|
|
|
1111
|
-
@render.text
|
|
1112
|
-
def status_output():
|
|
1113
|
-
return run_status_plot_spectra.get()
|
|
1114
|
-
return run_status_spec_lib_matching.get()
|
|
1115
|
-
return run_status_parameter_tuning.get()
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
class ReactiveWriter(io.TextIOBase):
|
|
1119
|
-
def __init__(self, rv: reactive.Value, loop: asyncio.AbstractEventLoop):
|
|
1120
|
-
self._rv = rv
|
|
1121
|
-
self._loop = loop
|
|
1122
|
-
|
|
1123
|
-
def write(self, s: str):
|
|
1124
|
-
if not s:
|
|
1125
|
-
return 0
|
|
1126
|
-
self._loop.call_soon_threadsafe(_LOG_QUEUE.put_nowait, s)
|
|
1127
|
-
return len(s)
|
|
1128
|
-
|
|
1129
|
-
def flush(self):
|
|
1130
|
-
pass
|
|
1131
1101
|
|
|
1132
1102
|
|
|
1133
1103
|
@reactive.effect
|
|
@@ -1142,7 +1112,6 @@ def server(input, output, session):
|
|
|
1142
1112
|
await reactive.flush()
|
|
1143
1113
|
|
|
1144
1114
|
|
|
1145
|
-
|
|
1146
1115
|
@render.text
|
|
1147
1116
|
def status_output():
|
|
1148
1117
|
return run_status_plot_spectra.get()
|
|
@@ -1150,7 +1119,6 @@ def server(input, output, session):
|
|
|
1150
1119
|
return run_status_parameter_tuning.get()
|
|
1151
1120
|
|
|
1152
1121
|
|
|
1153
|
-
|
|
1154
1122
|
app = App(app_ui, server)
|
|
1155
1123
|
|
|
1156
1124
|
|