pycompound 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- app.py +422 -56
- pycompound/spec_lib_matching.py +127 -4
- {pycompound-0.1.4.dist-info → pycompound-0.1.6.dist-info}/METADATA +1 -1
- {pycompound-0.1.4.dist-info → pycompound-0.1.6.dist-info}/RECORD +7 -7
- {pycompound-0.1.4.dist-info → pycompound-0.1.6.dist-info}/WHEEL +0 -0
- {pycompound-0.1.4.dist-info → pycompound-0.1.6.dist-info}/licenses/LICENSE +0 -0
- {pycompound-0.1.4.dist-info → pycompound-0.1.6.dist-info}/top_level.txt +0 -0
app.py
CHANGED
|
@@ -1,15 +1,18 @@
|
|
|
1
1
|
|
|
2
2
|
from shiny import App, ui, reactive, render, req
|
|
3
|
+
from shiny.types import SilentException
|
|
3
4
|
from pycompound.spec_lib_matching import run_spec_lib_matching_on_HRMS_data
|
|
4
5
|
from pycompound.spec_lib_matching import run_spec_lib_matching_on_NRMS_data
|
|
5
|
-
from pycompound.spec_lib_matching import
|
|
6
|
-
from pycompound.spec_lib_matching import
|
|
7
|
-
from pycompound.spec_lib_matching import
|
|
8
|
-
from pycompound.spec_lib_matching import
|
|
6
|
+
from pycompound.spec_lib_matching import tune_params_on_HRMS_data_grid
|
|
7
|
+
from pycompound.spec_lib_matching import tune_params_on_NRMS_data_grid
|
|
8
|
+
from pycompound.spec_lib_matching import tune_params_on_HRMS_data_grid_shiny
|
|
9
|
+
from pycompound.spec_lib_matching import tune_params_on_NRMS_data_grid_shiny
|
|
10
|
+
from pycompound.spec_lib_matching import tune_params_DE
|
|
9
11
|
from pycompound.plot_spectra import generate_plots_on_HRMS_data
|
|
10
12
|
from pycompound.plot_spectra import generate_plots_on_NRMS_data
|
|
11
13
|
from pathlib import Path
|
|
12
14
|
from contextlib import redirect_stdout, redirect_stderr
|
|
15
|
+
import contextlib
|
|
13
16
|
import subprocess
|
|
14
17
|
import traceback
|
|
15
18
|
import asyncio
|
|
@@ -23,10 +26,34 @@ import netCDF4 as nc
|
|
|
23
26
|
from pyteomics import mgf, mzml
|
|
24
27
|
import ast
|
|
25
28
|
from numbers import Real
|
|
29
|
+
import logging
|
|
30
|
+
from scipy.optimize import differential_evolution
|
|
26
31
|
|
|
27
32
|
|
|
28
33
|
_LOG_QUEUE: asyncio.Queue[str] = asyncio.Queue()
|
|
29
34
|
|
|
35
|
+
class _UIWriter:
|
|
36
|
+
def __init__(self, loop, q: asyncio.Queue[str]):
|
|
37
|
+
self._loop = loop
|
|
38
|
+
self._q = q
|
|
39
|
+
def write(self, s: str):
|
|
40
|
+
if s:
|
|
41
|
+
self._loop.call_soon_threadsafe(self._q.put_nowait, s)
|
|
42
|
+
return len(s)
|
|
43
|
+
def flush(self):
|
|
44
|
+
pass
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def attach_logging_to_writer(writer):
|
|
48
|
+
handler = logging.StreamHandler(writer)
|
|
49
|
+
handler.setLevel(logging.INFO)
|
|
50
|
+
root = logging.getLogger()
|
|
51
|
+
root.addHandler(handler)
|
|
52
|
+
root.setLevel(logging.INFO)
|
|
53
|
+
return handler, root
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
|
|
30
57
|
def _run_with_redirects(fn, writer, *args, **kwargs):
|
|
31
58
|
with redirect_stdout(writer), redirect_stderr(writer):
|
|
32
59
|
return fn(*args, **kwargs)
|
|
@@ -394,7 +421,7 @@ def run_spec_lib_matching_ui(platform: str):
|
|
|
394
421
|
|
|
395
422
|
|
|
396
423
|
|
|
397
|
-
def
|
|
424
|
+
def run_parameter_tuning_grid_ui(platform: str):
|
|
398
425
|
base_inputs = [
|
|
399
426
|
ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
400
427
|
ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
@@ -435,7 +462,7 @@ def run_parameter_tuning_ui(platform: str):
|
|
|
435
462
|
]
|
|
436
463
|
|
|
437
464
|
|
|
438
|
-
|
|
465
|
+
run_button_parameter_tuning_grid = ui.download_button("run_btn_parameter_tuning_grid", "Tune parameters (grid search)", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
|
|
439
466
|
back_button = ui.input_action_button("back", "Back to main menu", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
|
|
440
467
|
|
|
441
468
|
if platform == "HRMS":
|
|
@@ -465,7 +492,7 @@ def run_parameter_tuning_ui(platform: str):
|
|
|
465
492
|
ui.TagList(
|
|
466
493
|
ui.h2("Tune parameters"),
|
|
467
494
|
inputs_columns,
|
|
468
|
-
|
|
495
|
+
run_button_parameter_tuning_grid,
|
|
469
496
|
back_button,
|
|
470
497
|
log_panel
|
|
471
498
|
),
|
|
@@ -473,12 +500,152 @@ def run_parameter_tuning_ui(platform: str):
|
|
|
473
500
|
|
|
474
501
|
|
|
475
502
|
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
)
|
|
481
|
-
|
|
503
|
+
PARAMS_HRMS = {
|
|
504
|
+
"window_size_centroiding": (0.0, 0.5),
|
|
505
|
+
"window_size_matching": (0.0, 0.5),
|
|
506
|
+
"noise_threshold": (0.0, 0.25),
|
|
507
|
+
"wf_mz": (0.0, 5.0),
|
|
508
|
+
"wf_int": (0.0, 5.0),
|
|
509
|
+
"LET_threshold": (0.0, 5.0),
|
|
510
|
+
"entropy_dimension": (1.0, 3.0)
|
|
511
|
+
}
|
|
512
|
+
|
|
513
|
+
PARAMS_NRMS = {
|
|
514
|
+
"noise_threshold": (0.0, 0.25),
|
|
515
|
+
"wf_mz": (0.0, 5.0),
|
|
516
|
+
"wf_int": (0.0, 5.0),
|
|
517
|
+
"LET_threshold": (0.0, 5.0),
|
|
518
|
+
"entropy_dimension": (1.0, 3.0)
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
|
|
522
|
+
def run_parameter_tuning_DE_ui(platform: str):
|
|
523
|
+
# Pick param set per platform
|
|
524
|
+
if platform == "HRMS":
|
|
525
|
+
PARAMS = PARAMS_HRMS
|
|
526
|
+
else:
|
|
527
|
+
PARAMS = PARAMS_NRMS
|
|
528
|
+
|
|
529
|
+
base_inputs = [
|
|
530
|
+
ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
531
|
+
ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
532
|
+
ui.input_select(
|
|
533
|
+
"similarity_measure",
|
|
534
|
+
"Select similarity measure:",
|
|
535
|
+
[
|
|
536
|
+
"cosine","shannon","renyi","tsallis","mixture","jaccard","dice",
|
|
537
|
+
"3w_jaccard","sokal_sneath","binary_cosine","mountford",
|
|
538
|
+
"mcconnaughey","driver_kroeber","simpson","braun_banquet",
|
|
539
|
+
"fager_mcgowan","kulczynski","intersection","hamming","hellinger",
|
|
540
|
+
],
|
|
541
|
+
),
|
|
542
|
+
ui.input_text(
|
|
543
|
+
"weights",
|
|
544
|
+
"Weights for mixture similarity measure (cosine, shannon, renyi, tsallis):",
|
|
545
|
+
"0.25, 0.25, 0.25, 0.25",
|
|
546
|
+
),
|
|
547
|
+
ui.input_select(
|
|
548
|
+
"high_quality_reference_library",
|
|
549
|
+
"Indicate whether the reference library is considered high quality. If True, filtering and noise removal are only applied to the query spectra.",
|
|
550
|
+
[False, True],
|
|
551
|
+
),
|
|
552
|
+
]
|
|
553
|
+
|
|
554
|
+
if platform == "HRMS":
|
|
555
|
+
extra_inputs = [
|
|
556
|
+
ui.input_text(
|
|
557
|
+
"spectrum_preprocessing_order",
|
|
558
|
+
"Sequence of characters for preprocessing order (C (centroiding), F (filtering), M (matching), N (noise removal), L (low-entropy transformation), W (weight factor transformation)). M must be included, C before M if used.",
|
|
559
|
+
"FCNMWL",
|
|
560
|
+
),
|
|
561
|
+
ui.input_numeric("window_size_centroiding", "Centroiding window-size:", 0.5),
|
|
562
|
+
ui.input_numeric("window_size_matching", "Matching window-size:", 0.5),
|
|
563
|
+
]
|
|
564
|
+
else:
|
|
565
|
+
extra_inputs = [
|
|
566
|
+
ui.input_text(
|
|
567
|
+
"spectrum_preprocessing_order",
|
|
568
|
+
"Sequence of characters for preprocessing order (F (filtering), N (noise removal), L (low-entropy transformation), W (weight factor transformation)).",
|
|
569
|
+
"FNLW",
|
|
570
|
+
)
|
|
571
|
+
]
|
|
572
|
+
|
|
573
|
+
numeric_inputs = [
|
|
574
|
+
ui.input_numeric("mz_min", "Minimum m/z for filtering:", 0),
|
|
575
|
+
ui.input_numeric("mz_max", "Maximum m/z for filtering:", 99_999_999),
|
|
576
|
+
ui.input_numeric("int_min", "Minimum intensity for filtering:", 0),
|
|
577
|
+
ui.input_numeric("int_max", "Maximum intensity for filtering:", 999_999_999),
|
|
578
|
+
ui.input_numeric("noise_threshold", "Noise removal threshold:", 0.0),
|
|
579
|
+
ui.input_numeric("wf_mz", "Mass/charge weight factor:", 0.0),
|
|
580
|
+
ui.input_numeric("wf_int", "Intensity weight factor:", 1.0),
|
|
581
|
+
ui.input_numeric("LET_threshold", "Low-entropy threshold:", 0.0),
|
|
582
|
+
ui.input_numeric("entropy_dimension", "Entropy dimension (Renyi/Tsallis only):", 1.1),
|
|
583
|
+
ui.input_numeric("max_iterations", "Maximum number of iterations:", 5),
|
|
584
|
+
]
|
|
585
|
+
|
|
586
|
+
run_button_parameter_tuning_DE = ui.input_action_button(
|
|
587
|
+
"run_btn_parameter_tuning_DE",
|
|
588
|
+
"Tune parameters (differential evolution optimization)",
|
|
589
|
+
style="font-size:16px; padding:15px 30px; width:300px; height:100px",
|
|
590
|
+
)
|
|
591
|
+
back_button = ui.input_action_button(
|
|
592
|
+
"back",
|
|
593
|
+
"Back to main menu",
|
|
594
|
+
style="font-size:16px; padding:15px 30px; width:300px; height:100px",
|
|
595
|
+
)
|
|
596
|
+
|
|
597
|
+
# Build the 4-column inputs panel (fixed slices corrected, unpack lists properly)
|
|
598
|
+
if platform == "HRMS":
|
|
599
|
+
inputs_columns = ui.layout_columns(
|
|
600
|
+
ui.div(*base_inputs, style="display:flex; flex-direction:column; gap:10px;"),
|
|
601
|
+
ui.div(*extra_inputs, style="display:flex; flex-direction:column; gap:10px;"),
|
|
602
|
+
ui.div(*numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
|
|
603
|
+
ui.div(*numeric_inputs[5:11], style="display:flex; flex-direction:column; gap:10px;"),
|
|
604
|
+
col_widths=(3, 3, 3, 3),
|
|
605
|
+
)
|
|
606
|
+
else: # NRMS
|
|
607
|
+
inputs_columns = ui.layout_columns(
|
|
608
|
+
ui.div(*base_inputs, style="display:flex; flex-direction:column; gap:10px;"),
|
|
609
|
+
ui.div(*extra_inputs, style="display:flex; flex-direction:column; gap:10px;"),
|
|
610
|
+
ui.div(*numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
|
|
611
|
+
ui.div(*numeric_inputs[5:11], style="display:flex; flex-direction:column; gap:10px;"),
|
|
612
|
+
col_widths=(3, 3, 3, 3),
|
|
613
|
+
)
|
|
614
|
+
|
|
615
|
+
# Main page: sidebar (param selection + bounds) and body (inputs + buttons + live log)
|
|
616
|
+
return ui.page_fillable(
|
|
617
|
+
ui.layout_sidebar(
|
|
618
|
+
ui.sidebar(
|
|
619
|
+
ui.h3("Select continuous parameters to optimize"),
|
|
620
|
+
ui.input_checkbox_group(
|
|
621
|
+
"params",
|
|
622
|
+
None,
|
|
623
|
+
choices=list(PARAMS.keys()),
|
|
624
|
+
selected=["noise_threshold", "LET_threshold"],
|
|
625
|
+
),
|
|
626
|
+
ui.hr(),
|
|
627
|
+
ui.h4("Bounds for selected parameters"),
|
|
628
|
+
ui.output_ui("bounds_inputs"),
|
|
629
|
+
width=360,
|
|
630
|
+
),
|
|
631
|
+
ui.div(
|
|
632
|
+
ui.h2("Tune parameters (differential evolution optimization)"),
|
|
633
|
+
inputs_columns,
|
|
634
|
+
run_button_parameter_tuning_DE,
|
|
635
|
+
back_button,
|
|
636
|
+
ui.br(),
|
|
637
|
+
ui.card(
|
|
638
|
+
ui.card_header("Live log"),
|
|
639
|
+
ui.output_text_verbatim("run_log"), # <-- make sure server defines this
|
|
640
|
+
),
|
|
641
|
+
style="display:flex; flex-direction:column; gap:16px;",
|
|
642
|
+
),
|
|
643
|
+
)
|
|
644
|
+
)
|
|
645
|
+
|
|
646
|
+
|
|
647
|
+
|
|
648
|
+
|
|
482
649
|
|
|
483
650
|
app_ui = ui.page_fluid(
|
|
484
651
|
ui.head_content(ui.tags.link(rel="icon", href="emblem.png")),
|
|
@@ -498,8 +665,10 @@ def server(input, output, session):
|
|
|
498
665
|
run_status_plot_spectra = reactive.Value("")
|
|
499
666
|
run_status_spec_lib_matching = reactive.Value("")
|
|
500
667
|
run_status_plot_spectra_within_spec_lib_matching = reactive.Value("")
|
|
501
|
-
|
|
502
|
-
|
|
668
|
+
run_status_parameter_tuning_grid = reactive.Value("")
|
|
669
|
+
run_status_parameter_tuning_DE = reactive.Value("")
|
|
670
|
+
is_tuning_grid_running = reactive.Value(False)
|
|
671
|
+
is_tuning_DE_running = reactive.Value(False)
|
|
503
672
|
match_log_rv = reactive.Value("")
|
|
504
673
|
is_matching_rv = reactive.Value(False)
|
|
505
674
|
is_any_job_running = reactive.Value(False)
|
|
@@ -519,6 +688,64 @@ def server(input, output, session):
|
|
|
519
688
|
converted_query_path_rv = reactive.Value(None)
|
|
520
689
|
converted_reference_path_rv = reactive.Value(None)
|
|
521
690
|
|
|
691
|
+
@output
|
|
692
|
+
@render.ui
|
|
693
|
+
def bounds_inputs():
|
|
694
|
+
selected = input.params()
|
|
695
|
+
if not selected:
|
|
696
|
+
return ui.div(ui.em("Select one or more parameters above."))
|
|
697
|
+
|
|
698
|
+
if input.chromatography_platform() == 'HRMS':
|
|
699
|
+
PARAMS = PARAMS_HRMS
|
|
700
|
+
else:
|
|
701
|
+
PARAMS = PARAMS_NRMS
|
|
702
|
+
blocks = []
|
|
703
|
+
for name in selected:
|
|
704
|
+
lo, hi = PARAMS.get(name, (0.0, 1.0))
|
|
705
|
+
blocks.append(
|
|
706
|
+
ui.card(
|
|
707
|
+
ui.card_header(name),
|
|
708
|
+
ui.layout_columns(
|
|
709
|
+
ui.input_numeric(f"min_{name}", "Lower", lo, step=0.001),
|
|
710
|
+
ui.input_numeric(f"max_{name}", "Upper", hi, step=0.001),
|
|
711
|
+
)
|
|
712
|
+
)
|
|
713
|
+
)
|
|
714
|
+
return ui.div(*blocks)
|
|
715
|
+
|
|
716
|
+
def _read_bounds_dict():
|
|
717
|
+
selected = input.params()
|
|
718
|
+
out = {}
|
|
719
|
+
for name in selected:
|
|
720
|
+
lo_default, hi_default = PARAMS.get(name, (0.0, 1.0))
|
|
721
|
+
lo_id = f"min_{name}"
|
|
722
|
+
hi_id = f"max_{name}"
|
|
723
|
+
|
|
724
|
+
lo_val = input[lo_id]() if lo_id in input else lo_default
|
|
725
|
+
hi_val = input[hi_id]() if hi_id in input else hi_default
|
|
726
|
+
|
|
727
|
+
out[name] = (float(lo_val), float(hi_val))
|
|
728
|
+
return out
|
|
729
|
+
|
|
730
|
+
def _read_bounds():
|
|
731
|
+
opt_params = input.params()
|
|
732
|
+
bounds_dict = {}
|
|
733
|
+
if input.chromatography_platform() == 'HRMS':
|
|
734
|
+
PARAMS = PARAMS_HRMS
|
|
735
|
+
else:
|
|
736
|
+
PARAMS = PARAMS_NRMS
|
|
737
|
+
|
|
738
|
+
for p in opt_params:
|
|
739
|
+
lo_id, hi_id = f"min_{p}", f"max_{p}"
|
|
740
|
+
lo_default, hi_default = PARAMS.get(p, (0.0, 1.0))
|
|
741
|
+
lo = input[lo_id]() if lo_id in input else lo_default
|
|
742
|
+
hi = input[hi_id]() if hi_id in input else hi_default
|
|
743
|
+
if lo > hi:
|
|
744
|
+
lo, hi = hi, lo
|
|
745
|
+
bounds_dict[p] = (float(lo), float(hi))
|
|
746
|
+
|
|
747
|
+
bounds_list = [bounds_dict[p] for p in opt_params]
|
|
748
|
+
return opt_params, bounds_dict, bounds_list
|
|
522
749
|
|
|
523
750
|
def _reset_plot_spectra_state():
|
|
524
751
|
query_status_rv.set("")
|
|
@@ -551,7 +778,8 @@ def server(input, output, session):
|
|
|
551
778
|
|
|
552
779
|
def _reset_parameter_tuning_state():
|
|
553
780
|
match_log_rv.set("")
|
|
554
|
-
|
|
781
|
+
is_tuning_grid_running.set(False)
|
|
782
|
+
is_tuning_DE_running.set(False)
|
|
555
783
|
is_any_job_running.set(False)
|
|
556
784
|
|
|
557
785
|
|
|
@@ -563,7 +791,9 @@ def server(input, output, session):
|
|
|
563
791
|
_reset_plot_spectra_state()
|
|
564
792
|
elif page == "run_spec_lib_matching":
|
|
565
793
|
_reset_spec_lib_matching_state()
|
|
566
|
-
elif page == "
|
|
794
|
+
elif page == "run_parameter_tuning_grid":
|
|
795
|
+
_reset_parameter_tuning_state()
|
|
796
|
+
elif page == "run_parameter_tuning_DE":
|
|
567
797
|
_reset_parameter_tuning_state()
|
|
568
798
|
|
|
569
799
|
@reactive.effect
|
|
@@ -573,7 +803,9 @@ def server(input, output, session):
|
|
|
573
803
|
_reset_plot_spectra_state()
|
|
574
804
|
elif page == "run_spec_lib_matching":
|
|
575
805
|
_reset_spec_lib_matching_state()
|
|
576
|
-
elif page == "
|
|
806
|
+
elif page == "run_parameter_tuning_grid":
|
|
807
|
+
_reset_parameter_tuning_state()
|
|
808
|
+
elif page == "run_parameter_tuning_DE":
|
|
577
809
|
_reset_parameter_tuning_state()
|
|
578
810
|
|
|
579
811
|
|
|
@@ -601,7 +833,7 @@ def server(input, output, session):
|
|
|
601
833
|
|
|
602
834
|
@reactive.effect
|
|
603
835
|
async def _pump_logs():
|
|
604
|
-
if not (is_any_job_running.get() or
|
|
836
|
+
if not (is_any_job_running.get() or is_tuning_grid_running.get() or is_tuning_DE_running.get() or is_matching_rv.get()):
|
|
605
837
|
return
|
|
606
838
|
reactive.invalidate_later(0.05)
|
|
607
839
|
msgs = _drain_queue_nowait(_LOG_QUEUE)
|
|
@@ -680,9 +912,12 @@ def server(input, output, session):
|
|
|
680
912
|
elif input.run_spec_lib_matching() > match_clicks.get():
|
|
681
913
|
current_page.set("run_spec_lib_matching")
|
|
682
914
|
match_clicks.set(input.run_spec_lib_matching())
|
|
683
|
-
elif input.
|
|
684
|
-
current_page.set("
|
|
685
|
-
match_clicks.set(input.
|
|
915
|
+
elif input.run_parameter_tuning_grid() > match_clicks.get():
|
|
916
|
+
current_page.set("run_parameter_tuning_grid")
|
|
917
|
+
match_clicks.set(input.run_parameter_tuning_grid())
|
|
918
|
+
elif input.run_parameter_tuning_DE() > match_clicks.get():
|
|
919
|
+
current_page.set("run_parameter_tuning_DE")
|
|
920
|
+
match_clicks.set(input.run_parameter_tuning_DE())
|
|
686
921
|
elif hasattr(input, "back") and input.back() > back_clicks.get():
|
|
687
922
|
current_page.set("main_menu")
|
|
688
923
|
back_clicks.set(input.back())
|
|
@@ -726,7 +961,8 @@ def server(input, output, session):
|
|
|
726
961
|
),
|
|
727
962
|
ui.input_action_button("plot_spectra", "Plot two spectra before and after preprocessing transformations.", style="font-size:18px; padding:20px 40px; width:550px; height:100px; margin-top:10px; margin-right:50px"),
|
|
728
963
|
ui.input_action_button("run_spec_lib_matching", "Run spectral library matching to perform compound identification on a query library of spectra.", style="font-size:18px; padding:20px 40px; width:550px; height:100px; margin-top:10px; margin-right:50px"),
|
|
729
|
-
ui.input_action_button("
|
|
964
|
+
ui.input_action_button("run_parameter_tuning_grid", "Grid search: Tune parameters to maximize accuracy of compound identification given a query library with known spectrum IDs.", style="font-size:18px; padding:20px 40px; width:450px; height:120px; margin-top:10px; margin-right:50px"),
|
|
965
|
+
ui.input_action_button("run_parameter_tuning_DE", "Differential evolution optimization: Tune parameters to maximize accuracy of compound identification given a query library with known spectrum IDs.", style="font-size:18px; padding:20px 40px; width:500px; height:150px; margin-top:10px; margin-right:50px"),
|
|
730
966
|
ui.div(
|
|
731
967
|
"References:",
|
|
732
968
|
style="margin-top:35px; text-align:left; font-size:24px; font-weight:bold"
|
|
@@ -777,8 +1013,10 @@ def server(input, output, session):
|
|
|
777
1013
|
return plot_spectra_ui(input.chromatography_platform())
|
|
778
1014
|
elif current_page() == "run_spec_lib_matching":
|
|
779
1015
|
return run_spec_lib_matching_ui(input.chromatography_platform())
|
|
780
|
-
elif current_page() == "
|
|
781
|
-
return
|
|
1016
|
+
elif current_page() == "run_parameter_tuning_grid":
|
|
1017
|
+
return run_parameter_tuning_grid_ui(input.chromatography_platform())
|
|
1018
|
+
elif current_page() == "run_parameter_tuning_DE":
|
|
1019
|
+
return run_parameter_tuning_DE_ui(input.chromatography_platform())
|
|
782
1020
|
|
|
783
1021
|
|
|
784
1022
|
|
|
@@ -1020,10 +1258,10 @@ def server(input, output, session):
|
|
|
1020
1258
|
yield buf.getvalue()
|
|
1021
1259
|
|
|
1022
1260
|
|
|
1023
|
-
@render.download(filename="
|
|
1024
|
-
async def
|
|
1261
|
+
@render.download(filename="parameter_tuning_grid_output.txt")
|
|
1262
|
+
async def run_btn_parameter_tuning_grid():
|
|
1025
1263
|
is_any_job_running.set(True)
|
|
1026
|
-
|
|
1264
|
+
is_tuning_grid_running.set(True)
|
|
1027
1265
|
match_log_rv.set("Running grid search of all parameters specified...\n")
|
|
1028
1266
|
await reactive.flush()
|
|
1029
1267
|
|
|
@@ -1044,7 +1282,7 @@ def server(input, output, session):
|
|
|
1044
1282
|
common_kwargs = dict(
|
|
1045
1283
|
query_data=input.query_data()[0]["datapath"],
|
|
1046
1284
|
reference_data=input.reference_data()[0]["datapath"],
|
|
1047
|
-
output_path=str(Path.cwd() / "
|
|
1285
|
+
output_path=str(Path.cwd() / "parameter_tuning_grid_output.txt"),
|
|
1048
1286
|
return_output=True,
|
|
1049
1287
|
)
|
|
1050
1288
|
|
|
@@ -1072,7 +1310,7 @@ def server(input, output, session):
|
|
|
1072
1310
|
'window_size_centroiding': window_size_centroiding_tmp,
|
|
1073
1311
|
'window_size_matching': window_size_matching_tmp,
|
|
1074
1312
|
}
|
|
1075
|
-
df_out = await asyncio.to_thread(_run_with_redirects,
|
|
1313
|
+
df_out = await asyncio.to_thread(_run_with_redirects, tune_params_on_HRMS_data_grid_shiny, rw, **common_kwargs, grid=grid)
|
|
1076
1314
|
else:
|
|
1077
1315
|
grid = {
|
|
1078
1316
|
'similarity_measure': similarity_measure_tmp,
|
|
@@ -1089,14 +1327,14 @@ def server(input, output, session):
|
|
|
1089
1327
|
'entropy_dimension': entropy_dimension_tmp,
|
|
1090
1328
|
'high_quality_reference_library': high_quality_reference_library_tmp,
|
|
1091
1329
|
}
|
|
1092
|
-
df_out = await asyncio.to_thread(_run_with_redirects,
|
|
1330
|
+
df_out = await asyncio.to_thread(_run_with_redirects, tune_params_on_NRMS_data_grid_shiny, rw, **common_kwargs, grid=grid)
|
|
1093
1331
|
|
|
1094
1332
|
match_log_rv.set(match_log_rv.get() + "\n✅ Parameter tuning finished.\n")
|
|
1095
1333
|
except Exception as e:
|
|
1096
1334
|
match_log_rv.set(match_log_rv.get() + f"\n❌ Error: {e}\n")
|
|
1097
1335
|
raise
|
|
1098
1336
|
finally:
|
|
1099
|
-
|
|
1337
|
+
is_tuning_grid_running.set(False)
|
|
1100
1338
|
is_any_job_running.set(False)
|
|
1101
1339
|
await reactive.flush()
|
|
1102
1340
|
|
|
@@ -1104,11 +1342,155 @@ def server(input, output, session):
|
|
|
1104
1342
|
|
|
1105
1343
|
|
|
1106
1344
|
|
|
1345
|
+
@reactive.effect
|
|
1346
|
+
@reactive.event(input.run_btn_parameter_tuning_DE)
|
|
1347
|
+
async def run_btn_parameter_tuning_DE():
|
|
1348
|
+
match_log_rv.set("Tuning specified continuous parameters using differential evolution...\n")
|
|
1349
|
+
is_any_job_running.set(True)
|
|
1350
|
+
is_tuning_DE_running.set(True)
|
|
1351
|
+
await reactive.flush()
|
|
1352
|
+
|
|
1353
|
+
# --- helpers ---
|
|
1354
|
+
def _safe_float(v, default):
|
|
1355
|
+
try:
|
|
1356
|
+
if v is None:
|
|
1357
|
+
return default
|
|
1358
|
+
return float(v)
|
|
1359
|
+
except Exception:
|
|
1360
|
+
return default
|
|
1361
|
+
|
|
1362
|
+
def _iget(id, default=None):
|
|
1363
|
+
# Safe getter for Shiny inputs (avoids SilentException)
|
|
1364
|
+
if id in input:
|
|
1365
|
+
try:
|
|
1366
|
+
return input[id]()
|
|
1367
|
+
except SilentException:
|
|
1368
|
+
return default
|
|
1369
|
+
return default
|
|
1370
|
+
|
|
1371
|
+
# ---- log plumbing (stdout/stderr -> UI) ----
|
|
1372
|
+
loop = asyncio.get_running_loop()
|
|
1373
|
+
q: asyncio.Queue[str | None] = asyncio.Queue()
|
|
1374
|
+
|
|
1375
|
+
class UIWriter(io.TextIOBase):
|
|
1376
|
+
def write(self, s: str):
|
|
1377
|
+
if s:
|
|
1378
|
+
loop.call_soon_threadsafe(q.put_nowait, s)
|
|
1379
|
+
return len(s)
|
|
1380
|
+
def flush(self): pass
|
|
1381
|
+
|
|
1382
|
+
async def _drain():
|
|
1383
|
+
while True:
|
|
1384
|
+
msg = await q.get()
|
|
1385
|
+
if msg is None:
|
|
1386
|
+
break
|
|
1387
|
+
match_log_rv.set(match_log_rv.get() + msg)
|
|
1388
|
+
await reactive.flush()
|
|
1389
|
+
|
|
1390
|
+
drain_task = asyncio.create_task(_drain())
|
|
1391
|
+
writer = UIWriter()
|
|
1392
|
+
|
|
1393
|
+
# ---------- SNAPSHOT INPUTS SAFELY ----------
|
|
1394
|
+
try:
|
|
1395
|
+
qfile = _iget("query_data")[0]["datapath"]
|
|
1396
|
+
rfile = _iget("reference_data")[0]["datapath"]
|
|
1397
|
+
|
|
1398
|
+
platform = _iget("chromatography_platform", "HRMS")
|
|
1399
|
+
sim = _iget("similarity_measure", "cosine")
|
|
1400
|
+
spro = _iget("spectrum_preprocessing_order", "FCNMWL")
|
|
1401
|
+
|
|
1402
|
+
hq_raw = _iget("high_quality_reference_library", False)
|
|
1403
|
+
if isinstance(hq_raw, str):
|
|
1404
|
+
hq = hq_raw.lower() == "true"
|
|
1405
|
+
else:
|
|
1406
|
+
hq = bool(hq_raw)
|
|
1407
|
+
|
|
1408
|
+
mz_min = _safe_float(_iget("mz_min", 0.0), 0.0)
|
|
1409
|
+
mz_max = _safe_float(_iget("mz_max", 99_999_999.0), 99_999_999.0)
|
|
1410
|
+
int_min = _safe_float(_iget("int_min", 0.0), 0.0)
|
|
1411
|
+
int_max = _safe_float(_iget("int_max", 999_999_999.0), 999_999_999.0)
|
|
1412
|
+
|
|
1413
|
+
# weights "a,b,c,d"
|
|
1414
|
+
w_text = _iget("weights", "") or ""
|
|
1415
|
+
w_list = [float(w.strip()) for w in w_text.split(",") if w.strip()]
|
|
1416
|
+
w_list = (w_list + [0.0, 0.0, 0.0, 0.0])[:4]
|
|
1417
|
+
weights = {"Cosine": w_list[0], "Shannon": w_list[1], "Renyi": w_list[2], "Tsallis": w_list[3]}
|
|
1418
|
+
|
|
1419
|
+
# selected params + bounds
|
|
1420
|
+
opt_params = tuple(_iget("params", ()) or ())
|
|
1421
|
+
bounds_dict = {}
|
|
1422
|
+
# populate bounds using the min_/max_ inputs if present, otherwise fall back
|
|
1423
|
+
# to your default PARAMS dicts already defined in your file
|
|
1424
|
+
param_defaults = PARAMS_HRMS if platform == "HRMS" else PARAMS_NRMS
|
|
1425
|
+
for p in opt_params:
|
|
1426
|
+
lo = _safe_float(_iget(f"min_{p}", param_defaults.get(p, (0.0, 1.0))[0]),
|
|
1427
|
+
param_defaults.get(p, (0.0, 1.0))[0])
|
|
1428
|
+
hi = _safe_float(_iget(f"max_{p}", param_defaults.get(p, (0.0, 1.0))[1]),
|
|
1429
|
+
param_defaults.get(p, (0.0, 1.0))[1])
|
|
1430
|
+
if lo > hi:
|
|
1431
|
+
lo, hi = hi, lo
|
|
1432
|
+
bounds_dict[p] = (lo, hi)
|
|
1433
|
+
|
|
1434
|
+
# defaults (guarded!)
|
|
1435
|
+
defaults = {
|
|
1436
|
+
"window_size_centroiding": _safe_float(_iget("window_size_centroiding", 0.5), 0.5),
|
|
1437
|
+
"window_size_matching": _safe_float(_iget("window_size_matching", 0.5), 0.5),
|
|
1438
|
+
"noise_threshold": _safe_float(_iget("noise_threshold", 0.0), 0.0),
|
|
1439
|
+
"wf_mz": _safe_float(_iget("wf_mz", 0.0), 0.0),
|
|
1440
|
+
"wf_int": _safe_float(_iget("wf_int", 1.0), 1.0),
|
|
1441
|
+
"LET_threshold": _safe_float(_iget("LET_threshold", 0.0), 0.0),
|
|
1442
|
+
"entropy_dimension": _safe_float(_iget("entropy_dimension", 1.1), 1.1),
|
|
1443
|
+
}
|
|
1444
|
+
if platform == "NRMS":
|
|
1445
|
+
defaults.pop("window_size_centroiding", None)
|
|
1446
|
+
defaults.pop("window_size_matching", None)
|
|
1447
|
+
|
|
1448
|
+
except Exception as e:
|
|
1449
|
+
import traceback
|
|
1450
|
+
tb = "".join(traceback.format_exception(type(e), e, e.__traceback__))
|
|
1451
|
+
match_log_rv.set(match_log_rv.get() + f"\n❌ Input snapshot failed:\n{tb}\n")
|
|
1452
|
+
is_tuning_DE_running.set(False); is_any_job_running.set(False)
|
|
1453
|
+
await q.put(None); await drain_task; await reactive.flush()
|
|
1454
|
+
return
|
|
1455
|
+
|
|
1456
|
+
def _run():
|
|
1457
|
+
from contextlib import redirect_stdout, redirect_stderr
|
|
1458
|
+
with redirect_stdout(writer), redirect_stderr(writer):
|
|
1459
|
+
return tune_params_DE(
|
|
1460
|
+
query_data=qfile,
|
|
1461
|
+
reference_data=rfile,
|
|
1462
|
+
chromatography_platform=input.chromatography_platform(),
|
|
1463
|
+
similarity_measure=sim,
|
|
1464
|
+
weights=weights,
|
|
1465
|
+
spectrum_preprocessing_order=spro,
|
|
1466
|
+
mz_min=mz_min, mz_max=mz_max,
|
|
1467
|
+
int_min=int_min, int_max=int_max,
|
|
1468
|
+
high_quality_reference_library=hq,
|
|
1469
|
+
optimize_params=list(opt_params),
|
|
1470
|
+
param_bounds=bounds_dict,
|
|
1471
|
+
default_params=defaults,
|
|
1472
|
+
de_workers=1,
|
|
1473
|
+
maxiters=input.max_iterations()
|
|
1474
|
+
)
|
|
1475
|
+
|
|
1476
|
+
try:
|
|
1477
|
+
_ = await asyncio.to_thread(_run)
|
|
1478
|
+
match_log_rv.set(match_log_rv.get() + "\n✅ Differential evolution finished.\n")
|
|
1479
|
+
except Exception as e:
|
|
1480
|
+
import traceback
|
|
1481
|
+
tb = "".join(traceback.format_exception(type(e), e, e.__traceback__))
|
|
1482
|
+
match_log_rv.set(match_log_rv.get() + f"\n❌ {type(e).__name__}: {e}\n{tb}\n")
|
|
1483
|
+
finally:
|
|
1484
|
+
await q.put(None)
|
|
1485
|
+
await drain_task
|
|
1486
|
+
is_tuning_DE_running.set(False)
|
|
1487
|
+
is_any_job_running.set(False)
|
|
1488
|
+
await reactive.flush()
|
|
1107
1489
|
|
|
1108
1490
|
|
|
1109
1491
|
@reactive.effect
|
|
1110
1492
|
async def _pump_reactive_writer_logs():
|
|
1111
|
-
if not
|
|
1493
|
+
if not is_tuning_grid_running.get():
|
|
1112
1494
|
return
|
|
1113
1495
|
|
|
1114
1496
|
reactive.invalidate_later(0.1)
|
|
@@ -1122,32 +1504,16 @@ def server(input, output, session):
|
|
|
1122
1504
|
def status_output():
|
|
1123
1505
|
return run_status_plot_spectra.get()
|
|
1124
1506
|
return run_status_spec_lib_matching.get()
|
|
1125
|
-
return
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
app = App(app_ui, server)
|
|
1129
|
-
|
|
1507
|
+
return run_status_parameter_tuning_grid.get()
|
|
1508
|
+
return run_status_parameter_tuning_DE.get()
|
|
1130
1509
|
|
|
1510
|
+
@output
|
|
1511
|
+
@render.text
|
|
1512
|
+
def run_log():
|
|
1513
|
+
return match_log_rv.get()
|
|
1131
1514
|
|
|
1132
|
-
'''
|
|
1133
|
-
from starlette.middleware.base import BaseHTTPMiddleware
|
|
1134
|
-
from starlette.requests import Request
|
|
1135
1515
|
|
|
1136
|
-
|
|
1137
|
-
async def dispatch(self, request: Request, call_next):
|
|
1138
|
-
hdrs = dict(request.scope.get("headers", []))
|
|
1139
|
-
if b"rstudio-connect-app-base-url" not in hdrs:
|
|
1140
|
-
host = request.headers.get("x-forwarded-host") or request.headers.get("host") or ""
|
|
1141
|
-
proto = request.headers.get("x-forwarded-proto") or "https"
|
|
1142
|
-
root_path = (request.scope.get("root_path") or "").rstrip("/")
|
|
1143
|
-
base = f"{proto}://{host}{root_path}"
|
|
1144
|
-
new_headers = list(request.scope.get("headers", [])) + [
|
|
1145
|
-
(b"rstudio-connect-app-base-url", base.encode("utf-8"))
|
|
1146
|
-
]
|
|
1147
|
-
request.scope["headers"] = new_headers
|
|
1148
|
-
return await call_next(request)
|
|
1516
|
+
app = App(app_ui, server)
|
|
1149
1517
|
|
|
1150
|
-
app.starlette_app.add_middleware(_InjectBaseURLMiddleware)
|
|
1151
|
-
'''
|
|
1152
1518
|
|
|
1153
1519
|
|
pycompound/spec_lib_matching.py
CHANGED
|
@@ -9,6 +9,129 @@ from itertools import product
|
|
|
9
9
|
from joblib import Parallel, delayed
|
|
10
10
|
import csv
|
|
11
11
|
import sys, csv
|
|
12
|
+
from scipy.optimize import differential_evolution
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _vector_to_full_params(X, default_params, optimize_params):
|
|
16
|
+
params = default_params.copy()
|
|
17
|
+
for name, val in zip(optimize_params, X):
|
|
18
|
+
params[name] = float(val)
|
|
19
|
+
return params
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def objective_function_HRMS(X, ctx):
|
|
23
|
+
p = _vector_to_full_params(X, ctx["default_params"], ctx["optimize_params"])
|
|
24
|
+
acc = get_acc_HRMS(
|
|
25
|
+
ctx["df_query"], ctx["df_reference"],
|
|
26
|
+
ctx["unique_query_ids"], ctx["unique_reference_ids"],
|
|
27
|
+
ctx["similarity_measure"], ctx["weights"], ctx["spectrum_preprocessing_order"],
|
|
28
|
+
ctx["mz_min"], ctx["mz_max"], ctx["int_min"], ctx["int_max"],
|
|
29
|
+
p["window_size_centroiding"], p["window_size_matching"], p["noise_threshold"],
|
|
30
|
+
p["wf_mz"], p["wf_int"], p["LET_threshold"],
|
|
31
|
+
p["entropy_dimension"],
|
|
32
|
+
ctx["high_quality_reference_library"],
|
|
33
|
+
verbose=False
|
|
34
|
+
)
|
|
35
|
+
print(f"\nparams({ctx['optimize_params']}) = {np.array(X)}\naccuracy: {acc*100}%")
|
|
36
|
+
return 1.0 - acc
|
|
37
|
+
|
|
38
|
+
def objective_function_NRMS(X, ctx):
|
|
39
|
+
p = _vector_to_full_params(X, ctx["default_params"], ctx["optimize_params"])
|
|
40
|
+
acc = get_acc_NRMS(
|
|
41
|
+
ctx["df_query"], ctx["df_reference"],
|
|
42
|
+
ctx["unique_query_ids"], ctx["unique_reference_ids"],
|
|
43
|
+
ctx["similarity_measure"], ctx["weights"], ctx["spectrum_preprocessing_order"],
|
|
44
|
+
ctx["mz_min"], ctx["mz_max"], ctx["int_min"], ctx["int_max"],
|
|
45
|
+
p["noise_threshold"], p["wf_mz"], p["wf_int"], p["LET_threshold"], p["entropy_dimension"],
|
|
46
|
+
ctx["high_quality_reference_library"],
|
|
47
|
+
verbose=False
|
|
48
|
+
)
|
|
49
|
+
print(f"\nparams({ctx['optimize_params']}) = {np.array(X)}\naccuracy: {acc*100}%")
|
|
50
|
+
return 1.0 - acc
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def tune_params_DE(query_data=None, reference_data=None, chromatography_platform='HRMS', similarity_measure='cosine', weights=None, spectrum_preprocessing_order='CNMWL', mz_min=0, mz_max=999999999, int_min=0, int_max=999999999, high_quality_reference_library=False, optimize_params=["window_size_centroiding","window_size_matching","noise_threshold","wf_mz","wf_int","LET_threshold","entropy_dimension"], param_bounds={"window_size_centroiding":(0.0,0.5),"window_size_matching":(0.0,0.5),"noise_threshold":(0.0,0.25),"wf_mz":(0.0,5.0),"wf_int":(0.0,5.0),"LET_threshold":(0.0,5.0),"entropy_dimension":(1.0,3.0)}, default_params={"window_size_centroiding": 0.5, "window_size_matching":0.5, "noise_threshold":0.10, "wf_mz":0.0, "wf_int":1.0, "LET_threshold":0.0, "entropy_dimension":1.1}, maxiters=3, de_workers=1, de_updating='immediate', log_hook=None):
|
|
55
|
+
|
|
56
|
+
def _log(msg):
|
|
57
|
+
if log_hook:
|
|
58
|
+
try: log_hook(msg if msg.endswith("\n") else msg + "\n")
|
|
59
|
+
except: pass
|
|
60
|
+
|
|
61
|
+
def callback(xk, conv):
|
|
62
|
+
_log(f"iter callback: conv={conv:.4g}, x={xk}")
|
|
63
|
+
return False
|
|
64
|
+
|
|
65
|
+
if query_data is None:
|
|
66
|
+
print('\nError: No argument passed to the mandatory query_data. Please pass the path to the TXT file of the query data.')
|
|
67
|
+
sys.exit()
|
|
68
|
+
else:
|
|
69
|
+
extension = query_data.rsplit('.',1)
|
|
70
|
+
extension = extension[(len(extension)-1)]
|
|
71
|
+
if extension == 'mgf' or extension == 'MGF' or extension == 'mzML' or extension == 'mzml' or extension == 'MZML' or extension == 'cdf' or extension == 'CDF':
|
|
72
|
+
output_path_tmp = query_data[:-3] + 'csv'
|
|
73
|
+
build_library_from_raw_data(input_path=query_data, output_path=output_path_tmp, is_reference=False)
|
|
74
|
+
df_query = pd.read_csv(output_path_tmp)
|
|
75
|
+
if extension == 'csv' or extension == 'CSV':
|
|
76
|
+
df_query = pd.read_csv(query_data)
|
|
77
|
+
unique_query_ids = df_query.iloc[:,0].unique()
|
|
78
|
+
|
|
79
|
+
if reference_data is None:
|
|
80
|
+
print('\nError: No argument passed to the mandatory reference_data. Please pass the path to the CSV file of the reference data.')
|
|
81
|
+
sys.exit()
|
|
82
|
+
else:
|
|
83
|
+
if isinstance(reference_data,str):
|
|
84
|
+
df_reference = get_reference_df(reference_data=reference_data)
|
|
85
|
+
unique_reference_ids = df_reference.iloc[:,0].unique()
|
|
86
|
+
else:
|
|
87
|
+
dfs = []
|
|
88
|
+
unique_reference_ids = []
|
|
89
|
+
for f in reference_data:
|
|
90
|
+
tmp = get_reference_df(reference_data=f)
|
|
91
|
+
dfs.append(tmp)
|
|
92
|
+
unique_reference_ids.extend(tmp.iloc[:,0].unique())
|
|
93
|
+
df_reference = pd.concat(dfs, axis=0, ignore_index=True)
|
|
94
|
+
|
|
95
|
+
unique_query_ids = df_query['id'].unique().tolist()
|
|
96
|
+
unique_reference_ids = df_reference['id'].unique().tolist()
|
|
97
|
+
|
|
98
|
+
ctx = dict(
|
|
99
|
+
df_query=df_query,
|
|
100
|
+
df_reference=df_reference,
|
|
101
|
+
unique_query_ids=unique_query_ids,
|
|
102
|
+
unique_reference_ids=unique_reference_ids,
|
|
103
|
+
similarity_measure=similarity_measure,
|
|
104
|
+
weights=weights,
|
|
105
|
+
spectrum_preprocessing_order=spectrum_preprocessing_order,
|
|
106
|
+
mz_min=mz_min, mz_max=mz_max, int_min=int_min, int_max=int_max,
|
|
107
|
+
high_quality_reference_library=high_quality_reference_library,
|
|
108
|
+
default_params=default_params,
|
|
109
|
+
optimize_params=optimize_params,
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
bounds = [param_bounds[p] for p in optimize_params]
|
|
113
|
+
|
|
114
|
+
print('here!!!!!!!!!!!!!!!')
|
|
115
|
+
print(de_workers)
|
|
116
|
+
print('here!!!!!!!!!!!!!!!')
|
|
117
|
+
if chromatography_platform == 'HRMS':
|
|
118
|
+
result = differential_evolution(objective_function_HRMS, bounds=bounds, args=(ctx,), maxiter=maxiters, tol=0.0, workers=de_workers, seed=1)
|
|
119
|
+
else:
|
|
120
|
+
result = differential_evolution(objective_function_NRMS, bounds=bounds, args=(ctx,), maxiter=maxiters, tol=0.0, workers=de_workers, seed=1)
|
|
121
|
+
|
|
122
|
+
best_full_params = _vector_to_full_params(result.x, default_params, optimize_params)
|
|
123
|
+
best_acc = 100.0 - (result.fun * 100.0)
|
|
124
|
+
|
|
125
|
+
print("\n=== Differential Evolution Result ===")
|
|
126
|
+
print(f"Optimized over: {optimize_params}")
|
|
127
|
+
print("Best values (selected params):")
|
|
128
|
+
for name in optimize_params:
|
|
129
|
+
print(f" {name}: {best_full_params[name]}")
|
|
130
|
+
print("\nFull parameter set used in final evaluation:")
|
|
131
|
+
for k, v in best_full_params.items():
|
|
132
|
+
print(f" {k}: {v}")
|
|
133
|
+
print(f"\nBest accuracy: {best_acc:.3f}%")
|
|
134
|
+
_log(f"best = {result.x}, acc={100*(1-result.fun):.3f}%")
|
|
12
135
|
|
|
13
136
|
|
|
14
137
|
default_HRMS_grid = {'similarity_measure':['cosine'], 'weight':[{'Cosine':0.25,'Shannon':0.25,'Renyi':0.25,'Tsallis':0.25}], 'spectrum_preprocessing_order':['FCNMWL'], 'mz_min':[0], 'mz_max':[9999999], 'int_min':[0], 'int_max':[99999999], 'window_size_centroiding':[0.5], 'window_size_matching':[0.5], 'noise_threshold':[0.0], 'wf_mz':[0.0], 'wf_int':[1.0], 'LET_threshold':[0.0], 'entropy_dimension':[1.1], 'high_quality_reference_library':[False]}
|
|
@@ -78,7 +201,7 @@ def _eval_one_NRMS(df_query, df_reference, unique_query_ids, unique_reference_id
|
|
|
78
201
|
|
|
79
202
|
|
|
80
203
|
|
|
81
|
-
def
|
|
204
|
+
def tune_params_on_HRMS_data_grid(query_data=None, reference_data=None, grid=None, output_path=None, return_output=False):
|
|
82
205
|
"""
|
|
83
206
|
runs spectral library matching on high-resolution mass spectrometry (HRMS) data with all possible combinations of parameters in the grid dict, saves results from each choice of parameters to a TXT file, and prints top-performing parameters
|
|
84
207
|
|
|
@@ -154,7 +277,7 @@ def tune_params_on_HRMS_data(query_data=None, reference_data=None, grid=None, ou
|
|
|
154
277
|
|
|
155
278
|
|
|
156
279
|
|
|
157
|
-
def
|
|
280
|
+
def tune_params_on_HRMS_data_grid_shiny(query_data=None, reference_data=None, grid=None, output_path=None, return_output=False):
|
|
158
281
|
"""
|
|
159
282
|
runs spectral library matching on high-resolution mass spectrometry (HRMS) data with all possible
|
|
160
283
|
combinations of parameters in the grid dict, saves results from each choice of parameters to a TXT file,
|
|
@@ -262,7 +385,7 @@ def tune_params_on_HRMS_data_shiny(query_data=None, reference_data=None, grid=No
|
|
|
262
385
|
print(f'Wrote results to {output_path}')
|
|
263
386
|
|
|
264
387
|
|
|
265
|
-
def
|
|
388
|
+
def tune_params_on_NRMS_data_grid(query_data=None, reference_data=None, grid=None, output_path=None, return_output=False):
|
|
266
389
|
"""
|
|
267
390
|
runs spectral library matching on nominal-resolution mass spectrometry (NRMS) data with all possible combinations of parameters in the grid dict, saves results from each choice of parameters to a TXT file, and prints top-performing parameters
|
|
268
391
|
|
|
@@ -336,7 +459,7 @@ def tune_params_on_NRMS_data(query_data=None, reference_data=None, grid=None, ou
|
|
|
336
459
|
|
|
337
460
|
|
|
338
461
|
|
|
339
|
-
def
|
|
462
|
+
def tune_params_on_NRMS_data_grid_shiny(query_data=None, reference_data=None, grid=None, output_path=None, return_output=False):
|
|
340
463
|
"""
|
|
341
464
|
runs spectral library matching on nominal-resolution mass spectrometry (NRMS) data with all possible
|
|
342
465
|
combinations of parameters in the grid dict, saves results from each choice of parameters to a TXT file,
|
|
@@ -1,15 +1,15 @@
|
|
|
1
|
-
app.py,sha256=
|
|
1
|
+
app.py,sha256=lc54MkUqNTpGDBaF-3sDkSKDBSem0lDzZXo875d4W0c,67545
|
|
2
2
|
pycompound/build_library.py,sha256=sXG5MTJnPE7Gr8YMlLWjfMS7JQrW32lCeCGDw-DFe38,4826
|
|
3
3
|
pycompound/plot_spectra.py,sha256=_yeHooNoJHYlTajaZ9hgUudisdWVlw1Zw1wJfV3tpqc,40632
|
|
4
4
|
pycompound/plot_spectra_CLI.py,sha256=ObaLad5Z5DmfQB-j0HSCg1mLORbYj2BM3hb5Yd0ZdDI,8395
|
|
5
5
|
pycompound/processing.py,sha256=q629rcDaMQMgef-4SbeV9cJnuiLXg97VT2F5AIsyqgI,10654
|
|
6
6
|
pycompound/similarity_measures.py,sha256=NbeVIy9DE_KWlDMXXylekjKuYVrtzbeEXbTutKFxmfU,10460
|
|
7
|
-
pycompound/spec_lib_matching.py,sha256=
|
|
7
|
+
pycompound/spec_lib_matching.py,sha256=Dqz8yU1W7aqqPLLiZefLVXU3V0ojEnXLbKoyZJFTbAA,73386
|
|
8
8
|
pycompound/spec_lib_matching_CLI.py,sha256=qiekC52FP6ET_3NYvxUDN7km7y1OLUsd9FB4SHfne_Y,9690
|
|
9
9
|
pycompound/tuning_CLI_DE.py,sha256=PXy95LD_jmVeWdgiMlMwEZU_KqPGqDao1skwe5U4Sfc,9147
|
|
10
10
|
pycompound/tuning_CLI_grid.py,sha256=0XU-4ShZiZ2MQy5d0zydH0hphqXvqGtf4etl-ePNarU,8560
|
|
11
|
-
pycompound-0.1.
|
|
12
|
-
pycompound-0.1.
|
|
13
|
-
pycompound-0.1.
|
|
14
|
-
pycompound-0.1.
|
|
15
|
-
pycompound-0.1.
|
|
11
|
+
pycompound-0.1.6.dist-info/licenses/LICENSE,sha256=fPFFlkSGg60VQWyWqTSv8yoJnpLzppzdihVWY5NKom8,1064
|
|
12
|
+
pycompound-0.1.6.dist-info/METADATA,sha256=Rw4Z889RSMLStLvxDy8mVA-b_AKNzryMtSQHH_X0m2Q,1732
|
|
13
|
+
pycompound-0.1.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
14
|
+
pycompound-0.1.6.dist-info/top_level.txt,sha256=wFBLVrqpC07HghIU8tsEdgdvgkdOE3GN_1Gfjk-uEUc,15
|
|
15
|
+
pycompound-0.1.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|