pycompound 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- app.py +248 -30
- app2.py +101 -0
- pycompound/spec_lib_matching.py +152 -14
- pycompound/tuning_CLI_DE.py +233 -0
- pycompound/{tuning_CLI.py → tuning_CLI_grid.py} +4 -4
- {pycompound-0.1.3.dist-info → pycompound-0.1.5.dist-info}/METADATA +1 -1
- pycompound-0.1.5.dist-info/RECORD +16 -0
- {pycompound-0.1.3.dist-info → pycompound-0.1.5.dist-info}/top_level.txt +1 -0
- pycompound-0.1.3.dist-info/RECORD +0 -14
- {pycompound-0.1.3.dist-info → pycompound-0.1.5.dist-info}/WHEEL +0 -0
- {pycompound-0.1.3.dist-info → pycompound-0.1.5.dist-info}/licenses/LICENSE +0 -0
app.py
CHANGED
|
@@ -2,10 +2,11 @@
|
|
|
2
2
|
from shiny import App, ui, reactive, render, req
|
|
3
3
|
from pycompound.spec_lib_matching import run_spec_lib_matching_on_HRMS_data
|
|
4
4
|
from pycompound.spec_lib_matching import run_spec_lib_matching_on_NRMS_data
|
|
5
|
-
from pycompound.spec_lib_matching import
|
|
6
|
-
from pycompound.spec_lib_matching import
|
|
7
|
-
from pycompound.spec_lib_matching import
|
|
8
|
-
from pycompound.spec_lib_matching import
|
|
5
|
+
from pycompound.spec_lib_matching import tune_params_on_HRMS_data_grid
|
|
6
|
+
from pycompound.spec_lib_matching import tune_params_on_NRMS_data_grid
|
|
7
|
+
from pycompound.spec_lib_matching import tune_params_on_HRMS_data_grid_shiny
|
|
8
|
+
from pycompound.spec_lib_matching import tune_params_on_NRMS_data_grid_shiny
|
|
9
|
+
from pycompound.spec_lib_matching import tune_params_DE
|
|
9
10
|
from pycompound.plot_spectra import generate_plots_on_HRMS_data
|
|
10
11
|
from pycompound.plot_spectra import generate_plots_on_NRMS_data
|
|
11
12
|
from pathlib import Path
|
|
@@ -25,7 +26,6 @@ import ast
|
|
|
25
26
|
from numbers import Real
|
|
26
27
|
|
|
27
28
|
|
|
28
|
-
|
|
29
29
|
_LOG_QUEUE: asyncio.Queue[str] = asyncio.Queue()
|
|
30
30
|
|
|
31
31
|
def _run_with_redirects(fn, writer, *args, **kwargs):
|
|
@@ -395,7 +395,7 @@ def run_spec_lib_matching_ui(platform: str):
|
|
|
395
395
|
|
|
396
396
|
|
|
397
397
|
|
|
398
|
-
def
|
|
398
|
+
def run_parameter_tuning_grid_ui(platform: str):
|
|
399
399
|
base_inputs = [
|
|
400
400
|
ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
401
401
|
ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
@@ -436,7 +436,7 @@ def run_parameter_tuning_ui(platform: str):
|
|
|
436
436
|
]
|
|
437
437
|
|
|
438
438
|
|
|
439
|
-
|
|
439
|
+
run_button_parameter_tuning_grid = ui.download_button("run_btn_parameter_tuning_grid", "Tune parameters (grid search)", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
|
|
440
440
|
back_button = ui.input_action_button("back", "Back to main menu", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
|
|
441
441
|
|
|
442
442
|
if platform == "HRMS":
|
|
@@ -466,7 +466,7 @@ def run_parameter_tuning_ui(platform: str):
|
|
|
466
466
|
ui.TagList(
|
|
467
467
|
ui.h2("Tune parameters"),
|
|
468
468
|
inputs_columns,
|
|
469
|
-
|
|
469
|
+
run_button_parameter_tuning_grid,
|
|
470
470
|
back_button,
|
|
471
471
|
log_panel
|
|
472
472
|
),
|
|
@@ -474,8 +474,120 @@ def run_parameter_tuning_ui(platform: str):
|
|
|
474
474
|
|
|
475
475
|
|
|
476
476
|
|
|
477
|
+
PARAMS_HRMS = {
|
|
478
|
+
"window_size_centroiding": (0.0, 0.5),
|
|
479
|
+
"window_size_matching": (0.0, 0.5),
|
|
480
|
+
"noise_threshold": (0.0, 0.25),
|
|
481
|
+
"wf_mz": (0.0, 5.0),
|
|
482
|
+
"wf_int": (0.0, 5.0),
|
|
483
|
+
"LET_threshold": (0.0, 5.0),
|
|
484
|
+
"entropy_dimension": (1.0, 3.0)
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
PARAMS_NRMS = {
|
|
488
|
+
"noise_threshold": (0.0, 0.25),
|
|
489
|
+
"wf_mz": (0.0, 5.0),
|
|
490
|
+
"wf_int": (0.0, 5.0),
|
|
491
|
+
"LET_threshold": (0.0, 5.0),
|
|
492
|
+
"entropy_dimension": (1.0, 3.0)
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
def run_parameter_tuning_DE_ui(platform: str):
|
|
496
|
+
if platform == 'HRMS':
|
|
497
|
+
PARAMS=PARAMS_HRMS
|
|
498
|
+
else:
|
|
499
|
+
PARAMS=PARAMS_NRMS
|
|
500
|
+
|
|
501
|
+
base_inputs = [
|
|
502
|
+
ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
503
|
+
ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
|
|
504
|
+
ui.input_select("similarity_measure", "Select similarity measure:", ["cosine","shannon","renyi","tsallis","mixture","jaccard","dice","3w_jaccard","sokal_sneath","binary_cosine","mountford","mcconnaughey","driver_kroeber","simpson","braun_banquet","fager_mcgowan","kulczynski","intersection","hamming","hellinger"]),
|
|
505
|
+
ui.input_text('weights', 'Weights for mixture similarity measure (cosine, shannon, renyi, tsallis):', '0.25, 0.25, 0.25, 0.25'),
|
|
506
|
+
ui.input_select("high_quality_reference_library", "Indicate whether the reference library is considered high quality. If True, filtering and noise removal are only applied to the query spectra.", [False, True],),
|
|
507
|
+
]
|
|
508
|
+
|
|
509
|
+
if platform == "HRMS":
|
|
510
|
+
extra_inputs = [
|
|
511
|
+
ui.input_text(
|
|
512
|
+
"spectrum_preprocessing_order",
|
|
513
|
+
"Sequence of characters for preprocessing order (C (centroiding), F (filtering), M (matching), N (noise removal), L (low-entropy transformation), W (weight factor transformation)). M must be included, C before M if used.",
|
|
514
|
+
"FCNMWL",
|
|
515
|
+
),
|
|
516
|
+
ui.input_numeric("window_size_centroiding", "Centroiding window-size:", 0.5),
|
|
517
|
+
ui.input_numeric("window_size_matching", "Matching window-size:", 0.5),
|
|
518
|
+
]
|
|
519
|
+
else:
|
|
520
|
+
extra_inputs = [
|
|
521
|
+
ui.input_text(
|
|
522
|
+
"spectrum_preprocessing_order",
|
|
523
|
+
"Sequence of characters for preprocessing order (F (filtering), N (noise removal), L (low-entropy transformation), W (weight factor transformation)).",
|
|
524
|
+
"FNLW",
|
|
525
|
+
)
|
|
526
|
+
]
|
|
527
|
+
|
|
528
|
+
numeric_inputs = [
|
|
529
|
+
ui.input_numeric("mz_min", "Minimum m/z for filtering:", 0),
|
|
530
|
+
ui.input_numeric("mz_max", "Maximum m/z for filtering:", 99999999),
|
|
531
|
+
ui.input_numeric("int_min", "Minimum intensity for filtering:", 0),
|
|
532
|
+
ui.input_numeric("int_max", "Maximum intensity for filtering:", 999999999),
|
|
533
|
+
ui.input_numeric("noise_threshold", "Noise removal threshold:", 0.0),
|
|
534
|
+
ui.input_numeric("wf_mz", "Mass/charge weight factor:", 0.0),
|
|
535
|
+
ui.input_numeric("wf_int", "Intensity weight factor:", 1.0),
|
|
536
|
+
ui.input_numeric("LET_threshold", "Low-entropy threshold:", 0.0),
|
|
537
|
+
ui.input_numeric("entropy_dimension", "Entropy dimension (Renyi/Tsallis only):", 1.1),
|
|
538
|
+
]
|
|
539
|
+
|
|
540
|
+
|
|
541
|
+
#run_button_parameter_tuning_DE = ui.download_button("run_btn_parameter_tuning_DE", "Tune parameters (differential evolution optimization)", style="font-size:16px; padding:15px 30px; width:300px; height:100px")
|
|
542
|
+
run_button_parameter_tuning_DE = ui.input_action_button("run_btn_parameter_tuning_DE", "Tune parameters (differential evolution optimization)", style="font-size:16px; padding:15px 30px; width:300px; height:100px")
|
|
543
|
+
back_button = ui.input_action_button("back", "Back to main menu", style="font-size:16px; padding:15px 30px; width:300px; height:100px")
|
|
544
|
+
|
|
545
|
+
if platform == "HRMS":
|
|
546
|
+
inputs_columns = ui.layout_columns(
|
|
547
|
+
ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
|
|
548
|
+
ui.div([base_inputs[6:7], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
|
|
549
|
+
ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
|
|
550
|
+
ui.div([numeric_inputs[5:10]], style="display:flex; flex-direction:column; gap:10px;"),
|
|
551
|
+
col_widths=(3,3,3,3),
|
|
552
|
+
)
|
|
553
|
+
elif platform == "NRMS":
|
|
554
|
+
inputs_columns = ui.layout_columns(
|
|
555
|
+
ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
|
|
556
|
+
ui.div([base_inputs[6:7], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
|
|
557
|
+
ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
|
|
558
|
+
ui.div([numeric_inputs[5:10]], style="display:flex; flex-direction:column; gap:10px;"),
|
|
559
|
+
col_widths=(3,3,3,3),
|
|
560
|
+
)
|
|
561
|
+
|
|
562
|
+
return ui.page_fillable(
|
|
563
|
+
ui.layout_sidebar(
|
|
564
|
+
ui.sidebar(
|
|
565
|
+
ui.h3("Select parameters"),
|
|
566
|
+
ui.input_checkbox_group(
|
|
567
|
+
"params",
|
|
568
|
+
None,
|
|
569
|
+
choices=list(PARAMS.keys()),
|
|
570
|
+
selected=["noise_threshold","LET_threshold"],
|
|
571
|
+
),
|
|
572
|
+
ui.hr(),
|
|
573
|
+
ui.h4("Bounds for selected parameters"),
|
|
574
|
+
ui.output_ui("bounds_inputs"),
|
|
575
|
+
width=360,
|
|
576
|
+
),
|
|
577
|
+
ui.div(
|
|
578
|
+
ui.h2("Tune parameters (differential evolution optimization)"),
|
|
579
|
+
*(inputs_columns if isinstance(inputs_columns, (list, tuple)) else [inputs_columns]),
|
|
580
|
+
run_button_parameter_tuning_DE,
|
|
581
|
+
back_button,
|
|
582
|
+
),
|
|
583
|
+
)
|
|
584
|
+
)
|
|
585
|
+
|
|
586
|
+
|
|
587
|
+
|
|
477
588
|
|
|
478
589
|
app_ui = ui.page_fluid(
|
|
590
|
+
ui.head_content(ui.tags.link(rel="icon", href="emblem.png")),
|
|
479
591
|
ui.output_ui("main_ui"),
|
|
480
592
|
ui.output_text("status_output")
|
|
481
593
|
)
|
|
@@ -492,8 +604,10 @@ def server(input, output, session):
|
|
|
492
604
|
run_status_plot_spectra = reactive.Value("")
|
|
493
605
|
run_status_spec_lib_matching = reactive.Value("")
|
|
494
606
|
run_status_plot_spectra_within_spec_lib_matching = reactive.Value("")
|
|
495
|
-
|
|
496
|
-
|
|
607
|
+
run_status_parameter_tuning_grid = reactive.Value("")
|
|
608
|
+
run_status_parameter_tuning_DE = reactive.Value("")
|
|
609
|
+
is_tuning_grid_running = reactive.Value(False)
|
|
610
|
+
is_tuning_DE_running = reactive.Value(False)
|
|
497
611
|
match_log_rv = reactive.Value("")
|
|
498
612
|
is_matching_rv = reactive.Value(False)
|
|
499
613
|
is_any_job_running = reactive.Value(False)
|
|
@@ -513,6 +627,64 @@ def server(input, output, session):
|
|
|
513
627
|
converted_query_path_rv = reactive.Value(None)
|
|
514
628
|
converted_reference_path_rv = reactive.Value(None)
|
|
515
629
|
|
|
630
|
+
@output
|
|
631
|
+
@render.ui
|
|
632
|
+
def bounds_inputs():
|
|
633
|
+
selected = input.params()
|
|
634
|
+
if not selected:
|
|
635
|
+
return ui.div(ui.em("Select one or more parameters above."))
|
|
636
|
+
|
|
637
|
+
if input.chromatography_platform() == 'HRMS':
|
|
638
|
+
PARAMS = PARAMS_HRMS
|
|
639
|
+
else:
|
|
640
|
+
PARAMS = PARAMS_NRMS
|
|
641
|
+
blocks = []
|
|
642
|
+
for name in selected:
|
|
643
|
+
lo, hi = PARAMS.get(name, (0.0, 1.0))
|
|
644
|
+
blocks.append(
|
|
645
|
+
ui.card(
|
|
646
|
+
ui.card_header(name),
|
|
647
|
+
ui.layout_columns(
|
|
648
|
+
ui.input_numeric(f"min_{name}", "Lower", lo, step=0.001),
|
|
649
|
+
ui.input_numeric(f"max_{name}", "Upper", hi, step=0.001),
|
|
650
|
+
)
|
|
651
|
+
)
|
|
652
|
+
)
|
|
653
|
+
return ui.div(*blocks)
|
|
654
|
+
|
|
655
|
+
def _read_bounds_dict():
|
|
656
|
+
selected = input.params()
|
|
657
|
+
out = {}
|
|
658
|
+
for name in selected:
|
|
659
|
+
lo_default, hi_default = PARAMS.get(name, (0.0, 1.0))
|
|
660
|
+
lo_id = f"min_{name}"
|
|
661
|
+
hi_id = f"max_{name}"
|
|
662
|
+
|
|
663
|
+
lo_val = input[lo_id]() if lo_id in input else lo_default
|
|
664
|
+
hi_val = input[hi_id]() if hi_id in input else hi_default
|
|
665
|
+
|
|
666
|
+
out[name] = (float(lo_val), float(hi_val))
|
|
667
|
+
return out
|
|
668
|
+
|
|
669
|
+
def _read_bounds():
|
|
670
|
+
opt_params = input.params()
|
|
671
|
+
bounds_dict = {}
|
|
672
|
+
if input.chromatography_platform() == 'HRMS':
|
|
673
|
+
PARAMS = PARAMS_HRMS
|
|
674
|
+
else:
|
|
675
|
+
PARAMS = PARAMS_NRMS
|
|
676
|
+
|
|
677
|
+
for p in opt_params:
|
|
678
|
+
lo_id, hi_id = f"min_{p}", f"max_{p}"
|
|
679
|
+
lo_default, hi_default = PARAMS.get(p, (0.0, 1.0))
|
|
680
|
+
lo = input[lo_id]() if lo_id in input else lo_default
|
|
681
|
+
hi = input[hi_id]() if hi_id in input else hi_default
|
|
682
|
+
if lo > hi:
|
|
683
|
+
lo, hi = hi, lo
|
|
684
|
+
bounds_dict[p] = (float(lo), float(hi))
|
|
685
|
+
|
|
686
|
+
bounds_list = [bounds_dict[p] for p in opt_params]
|
|
687
|
+
return opt_params, bounds_dict, bounds_list
|
|
516
688
|
|
|
517
689
|
def _reset_plot_spectra_state():
|
|
518
690
|
query_status_rv.set("")
|
|
@@ -545,7 +717,8 @@ def server(input, output, session):
|
|
|
545
717
|
|
|
546
718
|
def _reset_parameter_tuning_state():
|
|
547
719
|
match_log_rv.set("")
|
|
548
|
-
|
|
720
|
+
is_tuning_grid_running.set(False)
|
|
721
|
+
is_tuning_DE_running.set(False)
|
|
549
722
|
is_any_job_running.set(False)
|
|
550
723
|
|
|
551
724
|
|
|
@@ -557,7 +730,9 @@ def server(input, output, session):
|
|
|
557
730
|
_reset_plot_spectra_state()
|
|
558
731
|
elif page == "run_spec_lib_matching":
|
|
559
732
|
_reset_spec_lib_matching_state()
|
|
560
|
-
elif page == "
|
|
733
|
+
elif page == "run_parameter_tuning_grid":
|
|
734
|
+
_reset_parameter_tuning_state()
|
|
735
|
+
elif page == "run_parameter_tuning_DE":
|
|
561
736
|
_reset_parameter_tuning_state()
|
|
562
737
|
|
|
563
738
|
@reactive.effect
|
|
@@ -567,7 +742,9 @@ def server(input, output, session):
|
|
|
567
742
|
_reset_plot_spectra_state()
|
|
568
743
|
elif page == "run_spec_lib_matching":
|
|
569
744
|
_reset_spec_lib_matching_state()
|
|
570
|
-
elif page == "
|
|
745
|
+
elif page == "run_parameter_tuning_grid":
|
|
746
|
+
_reset_parameter_tuning_state()
|
|
747
|
+
elif page == "run_parameter_tuning_DE":
|
|
571
748
|
_reset_parameter_tuning_state()
|
|
572
749
|
|
|
573
750
|
|
|
@@ -595,7 +772,7 @@ def server(input, output, session):
|
|
|
595
772
|
|
|
596
773
|
@reactive.effect
|
|
597
774
|
async def _pump_logs():
|
|
598
|
-
if not (is_any_job_running.get() or
|
|
775
|
+
if not (is_any_job_running.get() or is_tuning_grid_running.get() or is_tuning_DE_running.get() or is_matching_rv.get()):
|
|
599
776
|
return
|
|
600
777
|
reactive.invalidate_later(0.05)
|
|
601
778
|
msgs = _drain_queue_nowait(_LOG_QUEUE)
|
|
@@ -674,9 +851,12 @@ def server(input, output, session):
|
|
|
674
851
|
elif input.run_spec_lib_matching() > match_clicks.get():
|
|
675
852
|
current_page.set("run_spec_lib_matching")
|
|
676
853
|
match_clicks.set(input.run_spec_lib_matching())
|
|
677
|
-
elif input.
|
|
678
|
-
current_page.set("
|
|
679
|
-
match_clicks.set(input.
|
|
854
|
+
elif input.run_parameter_tuning_grid() > match_clicks.get():
|
|
855
|
+
current_page.set("run_parameter_tuning_grid")
|
|
856
|
+
match_clicks.set(input.run_parameter_tuning_grid())
|
|
857
|
+
elif input.run_parameter_tuning_DE() > match_clicks.get():
|
|
858
|
+
current_page.set("run_parameter_tuning_DE")
|
|
859
|
+
match_clicks.set(input.run_parameter_tuning_DE())
|
|
680
860
|
elif hasattr(input, "back") and input.back() > back_clicks.get():
|
|
681
861
|
current_page.set("main_menu")
|
|
682
862
|
back_clicks.set(input.back())
|
|
@@ -688,7 +868,6 @@ def server(input, output, session):
|
|
|
688
868
|
img: ImgData = {"src": str(dir / "www/emblem.png"), "width": "320px", "height": "250px"}
|
|
689
869
|
return img
|
|
690
870
|
|
|
691
|
-
|
|
692
871
|
@output
|
|
693
872
|
@render.ui
|
|
694
873
|
def main_ui():
|
|
@@ -697,6 +876,7 @@ def server(input, output, session):
|
|
|
697
876
|
ui.h2("Main Menu"),
|
|
698
877
|
ui.div(
|
|
699
878
|
ui.output_image("image"),
|
|
879
|
+
#ui.img(src="emblem.png", width="320px", height="250px"),
|
|
700
880
|
style=(
|
|
701
881
|
"position:fixed; top:0; left:50%; transform:translateX(-50%); "
|
|
702
882
|
"z-index:1000; text-align:center; padding:10px; background-color:white;"
|
|
@@ -720,7 +900,8 @@ def server(input, output, session):
|
|
|
720
900
|
),
|
|
721
901
|
ui.input_action_button("plot_spectra", "Plot two spectra before and after preprocessing transformations.", style="font-size:18px; padding:20px 40px; width:550px; height:100px; margin-top:10px; margin-right:50px"),
|
|
722
902
|
ui.input_action_button("run_spec_lib_matching", "Run spectral library matching to perform compound identification on a query library of spectra.", style="font-size:18px; padding:20px 40px; width:550px; height:100px; margin-top:10px; margin-right:50px"),
|
|
723
|
-
ui.input_action_button("
|
|
903
|
+
ui.input_action_button("run_parameter_tuning_grid", "Grid search: Tune parameters to maximize accuracy of compound identification given a query library with known spectrum IDs.", style="font-size:18px; padding:20px 40px; width:450px; height:120px; margin-top:10px; margin-right:50px"),
|
|
904
|
+
ui.input_action_button("run_parameter_tuning_DE", "Differential evolution optimization: Tune parameters to maximize accuracy of compound identification given a query library with known spectrum IDs.", style="font-size:18px; padding:20px 40px; width:500px; height:150px; margin-top:10px; margin-right:50px"),
|
|
724
905
|
ui.div(
|
|
725
906
|
"References:",
|
|
726
907
|
style="margin-top:35px; text-align:left; font-size:24px; font-weight:bold"
|
|
@@ -771,8 +952,10 @@ def server(input, output, session):
|
|
|
771
952
|
return plot_spectra_ui(input.chromatography_platform())
|
|
772
953
|
elif current_page() == "run_spec_lib_matching":
|
|
773
954
|
return run_spec_lib_matching_ui(input.chromatography_platform())
|
|
774
|
-
elif current_page() == "
|
|
775
|
-
return
|
|
955
|
+
elif current_page() == "run_parameter_tuning_grid":
|
|
956
|
+
return run_parameter_tuning_grid_ui(input.chromatography_platform())
|
|
957
|
+
elif current_page() == "run_parameter_tuning_DE":
|
|
958
|
+
return run_parameter_tuning_DE_ui(input.chromatography_platform())
|
|
776
959
|
|
|
777
960
|
|
|
778
961
|
|
|
@@ -1014,10 +1197,10 @@ def server(input, output, session):
|
|
|
1014
1197
|
yield buf.getvalue()
|
|
1015
1198
|
|
|
1016
1199
|
|
|
1017
|
-
@render.download(filename="
|
|
1018
|
-
async def
|
|
1200
|
+
@render.download(filename="parameter_tuning_grid_output.txt")
|
|
1201
|
+
async def run_btn_parameter_tuning_grid():
|
|
1019
1202
|
is_any_job_running.set(True)
|
|
1020
|
-
|
|
1203
|
+
is_tuning_grid_running.set(True)
|
|
1021
1204
|
match_log_rv.set("Running grid search of all parameters specified...\n")
|
|
1022
1205
|
await reactive.flush()
|
|
1023
1206
|
|
|
@@ -1038,7 +1221,7 @@ def server(input, output, session):
|
|
|
1038
1221
|
common_kwargs = dict(
|
|
1039
1222
|
query_data=input.query_data()[0]["datapath"],
|
|
1040
1223
|
reference_data=input.reference_data()[0]["datapath"],
|
|
1041
|
-
output_path=str(Path.cwd() / "
|
|
1224
|
+
output_path=str(Path.cwd() / "parameter_tuning_grid_output.txt"),
|
|
1042
1225
|
return_output=True,
|
|
1043
1226
|
)
|
|
1044
1227
|
|
|
@@ -1066,7 +1249,7 @@ def server(input, output, session):
|
|
|
1066
1249
|
'window_size_centroiding': window_size_centroiding_tmp,
|
|
1067
1250
|
'window_size_matching': window_size_matching_tmp,
|
|
1068
1251
|
}
|
|
1069
|
-
df_out = await asyncio.to_thread(_run_with_redirects,
|
|
1252
|
+
df_out = await asyncio.to_thread(_run_with_redirects, tune_params_on_HRMS_data_grid_shiny, rw, **common_kwargs, grid=grid)
|
|
1070
1253
|
else:
|
|
1071
1254
|
grid = {
|
|
1072
1255
|
'similarity_measure': similarity_measure_tmp,
|
|
@@ -1083,26 +1266,59 @@ def server(input, output, session):
|
|
|
1083
1266
|
'entropy_dimension': entropy_dimension_tmp,
|
|
1084
1267
|
'high_quality_reference_library': high_quality_reference_library_tmp,
|
|
1085
1268
|
}
|
|
1086
|
-
df_out = await asyncio.to_thread(_run_with_redirects,
|
|
1269
|
+
df_out = await asyncio.to_thread(_run_with_redirects, tune_params_on_NRMS_data_grid_shiny, rw, **common_kwargs, grid=grid)
|
|
1087
1270
|
|
|
1088
1271
|
match_log_rv.set(match_log_rv.get() + "\n✅ Parameter tuning finished.\n")
|
|
1089
1272
|
except Exception as e:
|
|
1090
1273
|
match_log_rv.set(match_log_rv.get() + f"\n❌ Error: {e}\n")
|
|
1091
1274
|
raise
|
|
1092
1275
|
finally:
|
|
1093
|
-
|
|
1276
|
+
is_tuning_grid_running.set(False)
|
|
1094
1277
|
is_any_job_running.set(False)
|
|
1095
1278
|
await reactive.flush()
|
|
1096
1279
|
|
|
1097
1280
|
yield df_out.to_csv(index=False).encode("utf-8", sep='\t')
|
|
1098
1281
|
|
|
1099
1282
|
|
|
1283
|
+
@reactive.effect
|
|
1284
|
+
@reactive.event(input.run_btn_parameter_tuning_DE)
|
|
1285
|
+
def _run_btn_parameter_tuning_DE():
|
|
1286
|
+
is_any_job_running.set(True)
|
|
1287
|
+
is_tuning_DE_running.set(True)
|
|
1288
|
+
match_log_rv.set("Tuning specified continuous parameters using differential evolution...\n")
|
|
1289
|
+
|
|
1290
|
+
#print('\nhere!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
|
|
1291
|
+
weights = [float(weight.strip()) for weight in input.weights().split(",") if weight.strip()]
|
|
1292
|
+
weights = {'Cosine':weights[0], 'Shannon':weights[1], 'Renyi':weights[2], 'Tsallis':weights[3]}
|
|
1293
|
+
opt_params, bounds_dict, bounds_list = _read_bounds()
|
|
1294
|
+
#print(input.params())
|
|
1295
|
+
#print("Optimizing over:", opt_params)
|
|
1296
|
+
#print("Bounds list:", bounds_list)
|
|
1297
|
+
#print("Bounds dict:", bounds_dict)
|
|
1298
|
+
#tmp = {"window_size_centroiding":input.window_size_centroiding(), "window_size_matching":input.window_size_matching(), "noise_threshold":input.noise_threshold(), "wf_mz":input.wf_mz(), "wf_int":input.wf_int(), "LET_threshold":input.LET_threshold(), "entropy_dimension":input.entropy_dimension()}
|
|
1299
|
+
#print(tmp)
|
|
1300
|
+
if input.chromatography_platform() == 'HRMS':
|
|
1301
|
+
tune_params_DE(query_data=input.query_data()[0]["datapath"],
|
|
1302
|
+
reference_data=input.reference_data()[0]["datapath"],
|
|
1303
|
+
similarity_measure=input.similarity_measure(),
|
|
1304
|
+
weights=weights,
|
|
1305
|
+
spectrum_preprocessing_order=input.spectrum_preprocessing_order(),
|
|
1306
|
+
mz_min=input.mz_min(),
|
|
1307
|
+
mz_max=input.mz_max(),
|
|
1308
|
+
int_min=input.int_min(),
|
|
1309
|
+
int_max=input.int_max(),
|
|
1310
|
+
high_quality_reference_library=input.high_quality_reference_library(),
|
|
1311
|
+
optimize_params=list(input.params()),
|
|
1312
|
+
param_bounds={"window_size_matching":(0.0,0.5),"noise_threshold":(0.0,0.25),"wf_mz":(0.0,5.0),"wf_int":(0.0,5.0)},
|
|
1313
|
+
#param_bounds=bounds_dict,
|
|
1314
|
+
default_params={"window_size_centroiding":input.window_size_centroiding(), "window_size_matching":input.window_size_matching(), "noise_threshold":input.noise_threshold(), "wf_mz":input.wf_mz(), "wf_int":input.wf_int(), "LET_threshold":input.LET_threshold(), "entropy_dimension":input.entropy_dimension()})
|
|
1315
|
+
#print('here!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n')
|
|
1100
1316
|
|
|
1101
1317
|
|
|
1102
1318
|
|
|
1103
1319
|
@reactive.effect
|
|
1104
1320
|
async def _pump_reactive_writer_logs():
|
|
1105
|
-
if not
|
|
1321
|
+
if not is_tuning_grid_running.get():
|
|
1106
1322
|
return
|
|
1107
1323
|
|
|
1108
1324
|
reactive.invalidate_later(0.1)
|
|
@@ -1116,9 +1332,11 @@ def server(input, output, session):
|
|
|
1116
1332
|
def status_output():
|
|
1117
1333
|
return run_status_plot_spectra.get()
|
|
1118
1334
|
return run_status_spec_lib_matching.get()
|
|
1119
|
-
return
|
|
1335
|
+
return run_status_parameter_tuning_grid.get()
|
|
1336
|
+
return run_status_parameter_tuning_DE.get()
|
|
1120
1337
|
|
|
1121
1338
|
|
|
1122
1339
|
app = App(app_ui, server)
|
|
1123
1340
|
|
|
1124
1341
|
|
|
1342
|
+
|
app2.py
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
|
|
2
|
+
|
|
3
|
+
# app.py
|
|
4
|
+
from shiny import App, ui, render, reactive
|
|
5
|
+
import pandas as pd
|
|
6
|
+
|
|
7
|
+
# Parameters to choose from + suggested default ranges
|
|
8
|
+
PARAMS = {
|
|
9
|
+
"window_size_centroiding": (0.0, 0.5),
|
|
10
|
+
"window_size_matching": (0.0, 0.5),
|
|
11
|
+
"noise_threshold": (0.0, 0.25),
|
|
12
|
+
"wf_mz": (0.0, 5.0),
|
|
13
|
+
"wf_int": (0.0, 5.0),
|
|
14
|
+
"LET_threshold": (0.0, 5.0),
|
|
15
|
+
"entropy_dimension": (1.0, 3.0),
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
app_ui = ui.page_fillable(
|
|
19
|
+
ui.layout_sidebar(
|
|
20
|
+
ui.sidebar(
|
|
21
|
+
ui.h3("Select parameters"),
|
|
22
|
+
ui.input_checkbox_group(
|
|
23
|
+
id="params",
|
|
24
|
+
label=None,
|
|
25
|
+
choices=list(PARAMS.keys()),
|
|
26
|
+
selected=["window_size_centroiding", "noise_threshold"],
|
|
27
|
+
),
|
|
28
|
+
ui.hr(),
|
|
29
|
+
ui.h4("Bounds for selected parameters"),
|
|
30
|
+
ui.output_ui("bounds_inputs"),
|
|
31
|
+
width=360,
|
|
32
|
+
),
|
|
33
|
+
)
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
def server(input, output, session):
|
|
37
|
+
@output
|
|
38
|
+
@render.ui
|
|
39
|
+
def bounds_inputs():
|
|
40
|
+
selected = input.params()
|
|
41
|
+
if not selected:
|
|
42
|
+
return ui.div(ui.em("Select one or more parameters above."))
|
|
43
|
+
|
|
44
|
+
blocks = []
|
|
45
|
+
for name in selected:
|
|
46
|
+
lo, hi = PARAMS.get(name, (0.0, 1.0))
|
|
47
|
+
blocks.append(
|
|
48
|
+
ui.card(
|
|
49
|
+
ui.card_header(name),
|
|
50
|
+
ui.layout_columns(
|
|
51
|
+
ui.input_numeric(f"min_{name}", "Lower", lo, step=0.001),
|
|
52
|
+
ui.input_numeric(f"max_{name}", "Upper", hi, step=0.001),
|
|
53
|
+
)
|
|
54
|
+
)
|
|
55
|
+
)
|
|
56
|
+
return ui.div(*blocks)
|
|
57
|
+
|
|
58
|
+
def _read_bounds_dict():
|
|
59
|
+
selected = input.params()
|
|
60
|
+
out = {}
|
|
61
|
+
for name in selected:
|
|
62
|
+
lo_default, hi_default = PARAMS.get(name, (0.0, 1.0))
|
|
63
|
+
lo_id = f"min_{name}"
|
|
64
|
+
hi_id = f"max_{name}"
|
|
65
|
+
|
|
66
|
+
# Use input[...]() and guard with "in input"
|
|
67
|
+
lo_val = input[lo_id]() if lo_id in input else lo_default
|
|
68
|
+
hi_val = input[hi_id]() if hi_id in input else hi_default
|
|
69
|
+
|
|
70
|
+
out[name] = (float(lo_val), float(hi_val))
|
|
71
|
+
return out
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
# Table of current bounds
|
|
76
|
+
@output
|
|
77
|
+
@render.data_frame
|
|
78
|
+
def bounds_table():
|
|
79
|
+
b = _read_bounds_dict()
|
|
80
|
+
if not b:
|
|
81
|
+
return pd.DataFrame(columns=["parameter", "lower", "upper"])
|
|
82
|
+
rows = [{"parameter": k, "lower": v[0], "upper": v[1]} for k, v in b.items()]
|
|
83
|
+
return pd.DataFrame(rows)
|
|
84
|
+
|
|
85
|
+
# JSON-ish view (string) you can parse/use elsewhere
|
|
86
|
+
@output
|
|
87
|
+
@render.text
|
|
88
|
+
def bounds_json():
|
|
89
|
+
b = _read_bounds_dict()
|
|
90
|
+
if not b:
|
|
91
|
+
return "{}"
|
|
92
|
+
# Pretty-print as Python dict literal for quick copy/paste
|
|
93
|
+
lines = ["{"]
|
|
94
|
+
for k, (lo, hi) in b.items():
|
|
95
|
+
lines.append(f" '{k}': ({lo}, {hi}),")
|
|
96
|
+
lines.append("}")
|
|
97
|
+
return "\n".join(lines)
|
|
98
|
+
|
|
99
|
+
app = App(app_ui, server)
|
|
100
|
+
|
|
101
|
+
|
pycompound/spec_lib_matching.py
CHANGED
|
@@ -9,6 +9,139 @@ from itertools import product
|
|
|
9
9
|
from joblib import Parallel, delayed
|
|
10
10
|
import csv
|
|
11
11
|
import sys, csv
|
|
12
|
+
from scipy.optimize import differential_evolution
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _vector_to_full_params(X, default_params, optimize_params):
|
|
16
|
+
params = default_params.copy()
|
|
17
|
+
for name, val in zip(optimize_params, X):
|
|
18
|
+
params[name] = float(val)
|
|
19
|
+
return params
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def objective_function_HRMS(X, ctx):
|
|
23
|
+
p = _vector_to_full_params(X, ctx["default_params"], ctx["optimize_params"])
|
|
24
|
+
if 'window_size_centroiding' in ctx.keys():
|
|
25
|
+
acc = get_acc_HRMS(
|
|
26
|
+
ctx["df_query"], ctx["df_reference"],
|
|
27
|
+
ctx["unique_query_ids"], ctx["unique_reference_ids"],
|
|
28
|
+
ctx["similarity_measure"], ctx["weights"], ctx["spectrum_preprocessing_order"],
|
|
29
|
+
ctx["mz_min"], ctx["mz_max"], ctx["int_min"], ctx["int_max"],
|
|
30
|
+
p["window_size_centroiding"], p["window_size_matching"], p["noise_threshold"],
|
|
31
|
+
p["wf_mz"], p["wf_int"], p["LET_threshold"],
|
|
32
|
+
p["entropy_dimension"],
|
|
33
|
+
ctx["high_quality_reference_library"],
|
|
34
|
+
verbose=False
|
|
35
|
+
)
|
|
36
|
+
else:
|
|
37
|
+
acc = get_acc_NRMS(
|
|
38
|
+
ctx["df_query"], ctx["df_reference"],
|
|
39
|
+
ctx["unique_query_ids"], ctx["unique_reference_ids"],
|
|
40
|
+
ctx["similarity_measure"], ctx["weights"], ctx["spectrum_preprocessing_order"],
|
|
41
|
+
ctx["mz_min"], ctx["mz_max"], ctx["int_min"], ctx["int_max"],
|
|
42
|
+
p["noise_threshold"],
|
|
43
|
+
p["wf_mz"], p["wf_int"], p["LET_threshold"],
|
|
44
|
+
p["entropy_dimension"],
|
|
45
|
+
ctx["high_quality_reference_library"],
|
|
46
|
+
verbose=False
|
|
47
|
+
)
|
|
48
|
+
print(f"\nparams({ctx['optimize_params']}) = {np.array(X)}\naccuracy: {acc*100}%")
|
|
49
|
+
return 1.0 - acc
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def tune_params_DE(query_data=None, reference_data=None, similarity_measure='cosine', weights=None, spectrum_preprocessing_order='CNMWL', mz_min=0, mz_max=999999999, int_min=0, int_max=999999999, high_quality_reference_library=False, optimize_params=["window_size_centroiding","window_size_matching","noise_threshold","wf_mz","wf_int","LET_threshold","entropy_dimension"], param_bounds={"window_size_centroiding":(0.0,0.5),"window_size_matching":(0.0,0.5),"noise_threshold":(0.0,0.25),"wf_mz":(0.0,5.0),"wf_int":(0.0,5.0),"LET_threshold":(0.0,5.0),"entropy_dimension":(1.0,3.0)}, default_params={"window_size_centroiding": 0.5, "window_size_matching":0.5, "noise_threshold":0.10, "wf_mz":0.0, "wf_int":1.0, "LET_threshold":0.0, "entropy_dimension":1.1}):
|
|
55
|
+
|
|
56
|
+
'''
|
|
57
|
+
print('<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<')
|
|
58
|
+
print(param_bounds)
|
|
59
|
+
print(default_params)
|
|
60
|
+
print(type(param_bounds['noise_threshold'][0]))
|
|
61
|
+
print(type(param_bounds['noise_threshold'][1]))
|
|
62
|
+
print('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
|
|
63
|
+
'''
|
|
64
|
+
|
|
65
|
+
if query_data is None:
|
|
66
|
+
print('\nError: No argument passed to the mandatory query_data. Please pass the path to the TXT file of the query data.')
|
|
67
|
+
sys.exit()
|
|
68
|
+
else:
|
|
69
|
+
extension = query_data.rsplit('.',1)
|
|
70
|
+
extension = extension[(len(extension)-1)]
|
|
71
|
+
if extension == 'mgf' or extension == 'MGF' or extension == 'mzML' or extension == 'mzml' or extension == 'MZML' or extension == 'cdf' or extension == 'CDF':
|
|
72
|
+
output_path_tmp = query_data[:-3] + 'csv'
|
|
73
|
+
build_library_from_raw_data(input_path=query_data, output_path=output_path_tmp, is_reference=False)
|
|
74
|
+
df_query = pd.read_csv(output_path_tmp)
|
|
75
|
+
if extension == 'csv' or extension == 'CSV':
|
|
76
|
+
df_query = pd.read_csv(query_data)
|
|
77
|
+
unique_query_ids = df_query.iloc[:,0].unique()
|
|
78
|
+
|
|
79
|
+
if reference_data is None:
|
|
80
|
+
print('\nError: No argument passed to the mandatory reference_data. Please pass the path to the CSV file of the reference data.')
|
|
81
|
+
sys.exit()
|
|
82
|
+
else:
|
|
83
|
+
if isinstance(reference_data,str):
|
|
84
|
+
df_reference = get_reference_df(reference_data=reference_data)
|
|
85
|
+
unique_reference_ids = df_reference.iloc[:,0].unique()
|
|
86
|
+
else:
|
|
87
|
+
dfs = []
|
|
88
|
+
unique_reference_ids = []
|
|
89
|
+
for f in reference_data:
|
|
90
|
+
tmp = get_reference_df(reference_data=f)
|
|
91
|
+
dfs.append(tmp)
|
|
92
|
+
unique_reference_ids.extend(tmp.iloc[:,0].unique())
|
|
93
|
+
df_reference = pd.concat(dfs, axis=0, ignore_index=True)
|
|
94
|
+
|
|
95
|
+
unique_query_ids = df_query['id'].unique().tolist()
|
|
96
|
+
unique_reference_ids = df_reference['id'].unique().tolist()
|
|
97
|
+
|
|
98
|
+
ctx = dict(
|
|
99
|
+
df_query=df_query,
|
|
100
|
+
df_reference=df_reference,
|
|
101
|
+
unique_query_ids=unique_query_ids,
|
|
102
|
+
unique_reference_ids=unique_reference_ids,
|
|
103
|
+
similarity_measure=similarity_measure,
|
|
104
|
+
weights=weights,
|
|
105
|
+
spectrum_preprocessing_order=spectrum_preprocessing_order,
|
|
106
|
+
mz_min=mz_min, mz_max=mz_max, int_min=int_min, int_max=int_max,
|
|
107
|
+
high_quality_reference_library=high_quality_reference_library,
|
|
108
|
+
default_params=default_params,
|
|
109
|
+
optimize_params=optimize_params,
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
bounds = [param_bounds[p] for p in optimize_params]
|
|
113
|
+
|
|
114
|
+
#print('<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<')
|
|
115
|
+
#print(df_query.head())
|
|
116
|
+
#print(df_reference.head())
|
|
117
|
+
#print(bounds)
|
|
118
|
+
#print(ctx)
|
|
119
|
+
#print('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
|
|
120
|
+
|
|
121
|
+
result = differential_evolution(
|
|
122
|
+
objective_function_HRMS,
|
|
123
|
+
bounds=bounds,
|
|
124
|
+
args=(ctx,),
|
|
125
|
+
maxiter=3,
|
|
126
|
+
tol=0.0,
|
|
127
|
+
workers=-1,
|
|
128
|
+
seed=1,
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
best_full_params = _vector_to_full_params(result.x, default_params, optimize_params)
|
|
132
|
+
best_acc = 100.0 - (result.fun * 100.0)
|
|
133
|
+
|
|
134
|
+
print("\n=== Differential Evolution Result ===")
|
|
135
|
+
print(f"Optimized over: {optimize_params}")
|
|
136
|
+
print("Best values (selected params):")
|
|
137
|
+
for name in optimize_params:
|
|
138
|
+
print(f" {name}: {best_full_params[name]}")
|
|
139
|
+
print("\nFull parameter set used in final evaluation:")
|
|
140
|
+
for k, v in best_full_params.items():
|
|
141
|
+
print(f" {k}: {v}")
|
|
142
|
+
print(f"\nBest accuracy: {best_acc:.3f}%")
|
|
143
|
+
|
|
144
|
+
|
|
12
145
|
|
|
13
146
|
|
|
14
147
|
default_HRMS_grid = {'similarity_measure':['cosine'], 'weight':[{'Cosine':0.25,'Shannon':0.25,'Renyi':0.25,'Tsallis':0.25}], 'spectrum_preprocessing_order':['FCNMWL'], 'mz_min':[0], 'mz_max':[9999999], 'int_min':[0], 'int_max':[99999999], 'window_size_centroiding':[0.5], 'window_size_matching':[0.5], 'noise_threshold':[0.0], 'wf_mz':[0.0], 'wf_int':[1.0], 'LET_threshold':[0.0], 'entropy_dimension':[1.1], 'high_quality_reference_library':[False]}
|
|
@@ -37,6 +170,7 @@ def _eval_one_HRMS(df_query, df_reference, unique_query_ids, unique_reference_id
|
|
|
37
170
|
LET_threshold=LET_threshold_tmp,
|
|
38
171
|
entropy_dimension=entropy_dimension_tmp,
|
|
39
172
|
high_quality_reference_library=high_quality_reference_library_tmp,
|
|
173
|
+
verbose=True
|
|
40
174
|
)
|
|
41
175
|
|
|
42
176
|
return (
|
|
@@ -77,7 +211,7 @@ def _eval_one_NRMS(df_query, df_reference, unique_query_ids, unique_reference_id
|
|
|
77
211
|
|
|
78
212
|
|
|
79
213
|
|
|
80
|
-
def
|
|
214
|
+
def tune_params_on_HRMS_data_grid(query_data=None, reference_data=None, grid=None, output_path=None, return_output=False):
|
|
81
215
|
"""
|
|
82
216
|
runs spectral library matching on high-resolution mass spectrometry (HRMS) data with all possible combinations of parameters in the grid dict, saves results from each choice of parameters to a TXT file, and prints top-performing parameters
|
|
83
217
|
|
|
@@ -153,7 +287,7 @@ def tune_params_on_HRMS_data(query_data=None, reference_data=None, grid=None, ou
|
|
|
153
287
|
|
|
154
288
|
|
|
155
289
|
|
|
156
|
-
def
|
|
290
|
+
def tune_params_on_HRMS_data_grid_shiny(query_data=None, reference_data=None, grid=None, output_path=None, return_output=False):
|
|
157
291
|
"""
|
|
158
292
|
runs spectral library matching on high-resolution mass spectrometry (HRMS) data with all possible
|
|
159
293
|
combinations of parameters in the grid dict, saves results from each choice of parameters to a TXT file,
|
|
@@ -261,7 +395,7 @@ def tune_params_on_HRMS_data_shiny(query_data=None, reference_data=None, grid=No
|
|
|
261
395
|
print(f'Wrote results to {output_path}')
|
|
262
396
|
|
|
263
397
|
|
|
264
|
-
def
|
|
398
|
+
def tune_params_on_NRMS_data_grid(query_data=None, reference_data=None, grid=None, output_path=None, return_output=False):
|
|
265
399
|
"""
|
|
266
400
|
runs spectral library matching on nominal-resolution mass spectrometry (NRMS) data with all possible combinations of parameters in the grid dict, saves results from each choice of parameters to a TXT file, and prints top-performing parameters
|
|
267
401
|
|
|
@@ -335,7 +469,7 @@ def tune_params_on_NRMS_data(query_data=None, reference_data=None, grid=None, ou
|
|
|
335
469
|
|
|
336
470
|
|
|
337
471
|
|
|
338
|
-
def
|
|
472
|
+
def tune_params_on_NRMS_data_grid_shiny(query_data=None, reference_data=None, grid=None, output_path=None, return_output=False):
|
|
339
473
|
"""
|
|
340
474
|
runs spectral library matching on nominal-resolution mass spectrometry (NRMS) data with all possible
|
|
341
475
|
combinations of parameters in the grid dict, saves results from each choice of parameters to a TXT file,
|
|
@@ -441,21 +575,26 @@ def tune_params_on_NRMS_data_shiny(query_data=None, reference_data=None, grid=No
|
|
|
441
575
|
|
|
442
576
|
|
|
443
577
|
|
|
444
|
-
def get_acc_HRMS(df_query, df_reference, unique_query_ids, unique_reference_ids, similarity_measure, weights, spectrum_preprocessing_order, mz_min, mz_max, int_min, int_max, window_size_centroiding, window_size_matching, noise_threshold, wf_mz, wf_int, LET_threshold, entropy_dimension, high_quality_reference_library):
|
|
578
|
+
def get_acc_HRMS(df_query, df_reference, unique_query_ids, unique_reference_ids, similarity_measure, weights, spectrum_preprocessing_order, mz_min, mz_max, int_min, int_max, window_size_centroiding, window_size_matching, noise_threshold, wf_mz, wf_int, LET_threshold, entropy_dimension, high_quality_reference_library, verbose=True):
|
|
445
579
|
|
|
580
|
+
#print('\n\n\n\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n\n\n\n')
|
|
446
581
|
n_top_matches_to_save = 1
|
|
447
582
|
|
|
448
583
|
all_similarity_scores = []
|
|
449
584
|
for query_idx in range(0,len(unique_query_ids)):
|
|
450
|
-
|
|
585
|
+
if verbose is True:
|
|
586
|
+
print(f'query spectrum #{query_idx} is being identified')
|
|
451
587
|
q_idxs_tmp = np.where(df_query.iloc[:,0] == unique_query_ids[query_idx])[0]
|
|
452
588
|
q_spec_tmp = np.asarray(pd.concat([df_query.iloc[q_idxs_tmp,1], df_query.iloc[q_idxs_tmp,2]], axis=1).reset_index(drop=True))
|
|
589
|
+
#q_spec_tmp = q_spec_tmp.astype(float)
|
|
453
590
|
|
|
454
591
|
similarity_scores = []
|
|
455
592
|
for ref_idx in range(0,len(unique_reference_ids)):
|
|
456
593
|
q_spec = q_spec_tmp
|
|
457
594
|
r_idxs_tmp = np.where(df_reference.iloc[:,0] == unique_reference_ids[ref_idx])[0]
|
|
458
595
|
r_spec = np.asarray(pd.concat([df_reference.iloc[r_idxs_tmp,1], df_reference.iloc[r_idxs_tmp,2]], axis=1).reset_index(drop=True))
|
|
596
|
+
#print(r_spec)
|
|
597
|
+
#r_spec = r_spec.astype(float)
|
|
459
598
|
|
|
460
599
|
is_matched = False
|
|
461
600
|
for transformation in spectrum_preprocessing_order:
|
|
@@ -529,7 +668,7 @@ def get_acc_HRMS(df_query, df_reference, unique_query_ids, unique_reference_ids,
|
|
|
529
668
|
|
|
530
669
|
|
|
531
670
|
|
|
532
|
-
def get_acc_NRMS(df_query, df_reference, unique_query_ids, unique_reference_ids, similarity_measure, weights, spectrum_preprocessing_order, mz_min, mz_max, int_min, int_max, noise_threshold, wf_mz, wf_int, LET_threshold, entropy_dimension, high_quality_reference_library):
|
|
671
|
+
def get_acc_NRMS(df_query, df_reference, unique_query_ids, unique_reference_ids, similarity_measure, weights, spectrum_preprocessing_order, mz_min, mz_max, int_min, int_max, noise_threshold, wf_mz, wf_int, LET_threshold, entropy_dimension, high_quality_reference_library, verbose=True):
|
|
533
672
|
|
|
534
673
|
n_top_matches_to_save = 1
|
|
535
674
|
|
|
@@ -546,7 +685,7 @@ def get_acc_NRMS(df_query, df_reference, unique_query_ids, unique_reference_ids,
|
|
|
546
685
|
similarity_scores = []
|
|
547
686
|
for ref_idx in range(0,len(unique_reference_ids)):
|
|
548
687
|
q_spec = q_spec_tmp
|
|
549
|
-
if ref_idx % 1000 == 0:
|
|
688
|
+
if verbose is True and ref_idx % 1000 == 0:
|
|
550
689
|
print(f'Query spectrum #{query_idx} has had its similarity with {ref_idx} reference library spectra computed')
|
|
551
690
|
r_idxs_tmp = np.where(df_reference.iloc[:,0] == unique_reference_ids[ref_idx])[0]
|
|
552
691
|
r_spec_tmp = np.asarray(pd.concat([df_reference.iloc[r_idxs_tmp,1], df_reference.iloc[r_idxs_tmp,2]], axis=1).reset_index(drop=True))
|
|
@@ -615,7 +754,7 @@ def get_acc_NRMS(df_query, df_reference, unique_query_ids, unique_reference_ids,
|
|
|
615
754
|
|
|
616
755
|
|
|
617
756
|
|
|
618
|
-
def run_spec_lib_matching_on_HRMS_data(query_data=None, reference_data=None, likely_reference_ids=None, similarity_measure='cosine', weights={'Cosine':0.25,'Shannon':0.25,'Renyi':0.25,'Tsallis':0.25}, spectrum_preprocessing_order='FCNMWL', high_quality_reference_library=False, mz_min=0, mz_max=9999999, int_min=0, int_max=9999999, window_size_centroiding=0.5, window_size_matching=0.5, noise_threshold=0.0, wf_mz=0.0, wf_intensity=1.0, LET_threshold=0.0, entropy_dimension=1.1, n_top_matches_to_save=1, print_id_results=False, output_identification=None, output_similarity_scores=None, return_ID_output=False):
|
|
757
|
+
def run_spec_lib_matching_on_HRMS_data(query_data=None, reference_data=None, likely_reference_ids=None, similarity_measure='cosine', weights={'Cosine':0.25,'Shannon':0.25,'Renyi':0.25,'Tsallis':0.25}, spectrum_preprocessing_order='FCNMWL', high_quality_reference_library=False, mz_min=0, mz_max=9999999, int_min=0, int_max=9999999, window_size_centroiding=0.5, window_size_matching=0.5, noise_threshold=0.0, wf_mz=0.0, wf_intensity=1.0, LET_threshold=0.0, entropy_dimension=1.1, n_top_matches_to_save=1, print_id_results=False, output_identification=None, output_similarity_scores=None, return_ID_output=False, verbose=True):
|
|
619
758
|
'''
|
|
620
759
|
runs spectral library matching on high-resolution mass spectrometry (HRMS) data
|
|
621
760
|
|
|
@@ -762,14 +901,13 @@ def run_spec_lib_matching_on_HRMS_data(query_data=None, reference_data=None, lik
|
|
|
762
901
|
|
|
763
902
|
all_similarity_scores = []
|
|
764
903
|
for query_idx in range(0,len(unique_query_ids)):
|
|
765
|
-
|
|
904
|
+
if verbose is True:
|
|
905
|
+
print(f'query spectrum #{query_idx} is being identified')
|
|
766
906
|
q_idxs_tmp = np.where(df_query.iloc[:,0] == unique_query_ids[query_idx])[0]
|
|
767
907
|
q_spec_tmp = np.asarray(pd.concat([df_query.iloc[q_idxs_tmp,1], df_query.iloc[q_idxs_tmp,2]], axis=1).reset_index(drop=True))
|
|
768
908
|
|
|
769
909
|
similarity_scores = []
|
|
770
910
|
for ref_idx in range(0,len(unique_reference_ids)):
|
|
771
|
-
#if ref_idx % 100 == 0:
|
|
772
|
-
# print(f'Query spectrum #{query_idx} has had its similarity with {ref_idx} reference library spectra computed')
|
|
773
911
|
q_spec = q_spec_tmp
|
|
774
912
|
r_idxs_tmp = np.where(df_reference.iloc[:,0] == unique_reference_ids[ref_idx])[0]
|
|
775
913
|
r_spec = np.asarray(pd.concat([df_reference.iloc[r_idxs_tmp,1], df_reference.iloc[r_idxs_tmp,2]], axis=1).reset_index(drop=True))
|
|
@@ -1008,9 +1146,9 @@ def run_spec_lib_matching_on_NRMS_data(query_data=None, reference_data=None, lik
|
|
|
1008
1146
|
|
|
1009
1147
|
similarity_scores = []
|
|
1010
1148
|
for ref_idx in range(0,len(unique_reference_ids)):
|
|
1011
|
-
|
|
1012
|
-
if ref_idx % 1000 == 0:
|
|
1149
|
+
if verbose is True and ref_idx % 1000 == 0:
|
|
1013
1150
|
print(f'Query spectrum #{query_idx} has had its similarity with {ref_idx} reference library spectra computed')
|
|
1151
|
+
q_spec = q_spec_tmp
|
|
1014
1152
|
r_idxs_tmp = np.where(df_reference.iloc[:,0] == unique_reference_ids[ref_idx])[0]
|
|
1015
1153
|
r_spec_tmp = np.asarray(pd.concat([df_reference.iloc[r_idxs_tmp,1], df_reference.iloc[r_idxs_tmp,2]], axis=1).reset_index(drop=True))
|
|
1016
1154
|
r_spec = convert_spec(r_spec_tmp,mzs)
|
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
|
|
2
|
+
#!/usr/bin/env python3
|
|
3
|
+
import argparse
|
|
4
|
+
import sys
|
|
5
|
+
import json
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Dict, List, Tuple
|
|
8
|
+
import numpy as np
|
|
9
|
+
import pandas as pd
|
|
10
|
+
from scipy.optimize import differential_evolution
|
|
11
|
+
from pycompound.spec_lib_matching import get_acc_HRMS, get_acc_NRMS
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
ALL_PARAMS = [
|
|
15
|
+
"window_size_centroiding",
|
|
16
|
+
"window_size_matching",
|
|
17
|
+
"noise_threshold",
|
|
18
|
+
"wf_mz",
|
|
19
|
+
"wf_int",
|
|
20
|
+
"LET_threshold",
|
|
21
|
+
"entropy_dimension"
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
SUGGESTED_BOUNDS = {
|
|
25
|
+
"window_size_centroiding": (0.0, 0.5),
|
|
26
|
+
"window_size_matching": (0.0, 0.5),
|
|
27
|
+
"noise_threshold": (0.0, 0.25),
|
|
28
|
+
"wf_mz": (0.0, 5.0),
|
|
29
|
+
"wf_int": (0.0, 5.0),
|
|
30
|
+
"LET_threshold": (0.0, 5.0),
|
|
31
|
+
"entropy_dimension": (1.0, 3.0)
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
DEFAULT_PARAMS = {
|
|
35
|
+
"window_size_centroiding": 0.5,
|
|
36
|
+
"window_size_matching": 0.5,
|
|
37
|
+
"noise_threshold": 0.10,
|
|
38
|
+
"wf_mz": 0.0,
|
|
39
|
+
"wf_int": 1.0,
|
|
40
|
+
"LET_threshold": 0.0,
|
|
41
|
+
"entropy_dimension": 1.1
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
# ---------- Utilities ----------
|
|
46
|
+
def parse_bound(s: str) -> Tuple[str, Tuple[float, float]]:
|
|
47
|
+
# "name=min:max" → (name, (min, max))
|
|
48
|
+
if "=" not in s or ":" not in s:
|
|
49
|
+
raise argparse.ArgumentTypeError(f"Bad --bound format '{s}'. Use name=min:max")
|
|
50
|
+
name, rng = s.split("=", 1)
|
|
51
|
+
lo, hi = rng.split(":", 1)
|
|
52
|
+
try:
|
|
53
|
+
lo_f, hi_f = float(lo), float(hi)
|
|
54
|
+
except ValueError as e:
|
|
55
|
+
raise argparse.ArgumentTypeError(f"Non-numeric bound in '{s}': {e}")
|
|
56
|
+
if lo_f > hi_f:
|
|
57
|
+
raise argparse.ArgumentTypeError(f"Lower bound > upper bound in '{s}'")
|
|
58
|
+
return name.strip(), (lo_f, hi_f)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def parse_default(s: str) -> Tuple[str, float]:
|
|
62
|
+
# "name=value" → (name, value)
|
|
63
|
+
if "=" not in s:
|
|
64
|
+
raise argparse.ArgumentTypeError(f"Bad --default format '{s}'. Use name=value")
|
|
65
|
+
name, val = s.split("=", 1)
|
|
66
|
+
try:
|
|
67
|
+
v = float(val)
|
|
68
|
+
except ValueError as e:
|
|
69
|
+
raise argparse.ArgumentTypeError(f"Non-numeric default in '{s}': {e}")
|
|
70
|
+
return name.strip(), v
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _vector_to_full_params(X: np.ndarray, default_params: Dict[str, float], optimize_params: List[str]) -> Dict[str, float]:
|
|
74
|
+
params = dict(default_params)
|
|
75
|
+
for name, val in zip(optimize_params, X):
|
|
76
|
+
params[name] = float(val)
|
|
77
|
+
return params
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
# ---------- Objective wrappers (top-level, pickle-friendly) ----------
|
|
81
|
+
def objective_HRMS(X: np.ndarray, ctx: dict) -> float:
|
|
82
|
+
p = _vector_to_full_params(X, ctx["default_params"], ctx["optimize_params"])
|
|
83
|
+
acc = get_acc_HRMS(
|
|
84
|
+
ctx["df_query"], ctx["df_reference"],
|
|
85
|
+
ctx["uq"], ctx["ur"],
|
|
86
|
+
ctx["similarity_measure"], ctx["weights"], ctx["spectrum_preprocessing_order"],
|
|
87
|
+
ctx["mz_min"], ctx["mz_max"], ctx["int_min"], ctx["int_max"],
|
|
88
|
+
p["window_size_centroiding"], p["window_size_matching"], p["noise_threshold"],
|
|
89
|
+
p["wf_mz"], p["wf_int"], p["LET_threshold"],
|
|
90
|
+
p["entropy_dimension"],
|
|
91
|
+
ctx["high_quality_reference_library"],
|
|
92
|
+
verbose=False
|
|
93
|
+
)
|
|
94
|
+
print(f"\n{ctx['optimize_params']} = {np.array(X)}\naccuracy: {acc*100}%")
|
|
95
|
+
return 1.0 - acc
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def objective_NRMS(X: np.ndarray, ctx: dict) -> float:
|
|
99
|
+
p = _vector_to_full_params(X, ctx["default_params"], ctx["optimize_params"])
|
|
100
|
+
acc = get_acc_NRMS(
|
|
101
|
+
ctx["df_query"], ctx["df_reference"],
|
|
102
|
+
ctx["uq"], ctx["ur"],
|
|
103
|
+
ctx["similarity_measure"], ctx["weights"], ctx["spectrum_preprocessing_order"],
|
|
104
|
+
ctx["mz_min"], ctx["mz_max"], ctx["int_min"], ctx["int_max"],
|
|
105
|
+
p["noise_threshold"], p["wf_mz"], p["wf_int"], p["LET_threshold"], p["entropy_dimension"],
|
|
106
|
+
ctx["high_quality_reference_library"],
|
|
107
|
+
verbose=False
|
|
108
|
+
)
|
|
109
|
+
print(f"\n{ctx['optimize_params']} = {np.array(X)}\naccuracy: {acc*100}%")
|
|
110
|
+
return 1.0 - acc
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
# ---------- Main CLI ----------
|
|
114
|
+
def main():
|
|
115
|
+
p = argparse.ArgumentParser(
|
|
116
|
+
description="Parameter tuning via Differential Evolution for HRMS/NRMS using pycompound."
|
|
117
|
+
)
|
|
118
|
+
p.add_argument("--chromatography_platform", choices=["HRMS", "NRMS"], default="HRMS", help="Chromatography Platform.")
|
|
119
|
+
p.add_argument("--query_data", required=True, help="Path to query CSV (must contain 'id' column).")
|
|
120
|
+
p.add_argument("--reference_data", required=True, nargs="+", help="Path(s) to reference CSV(s) (must contain 'id').")
|
|
121
|
+
p.add_argument("--similarity_measure", default="cosine", choices=["cosine", "renyi", "tsallis"], help="Similarity measure.")
|
|
122
|
+
p.add_argument("--weights", default="", help="Weights spec; empty means None.")
|
|
123
|
+
p.add_argument("--spectrum-order", default="CNMWL", help="Spectrum preprocessing order string.")
|
|
124
|
+
p.add_argument("--mz-min", type=float, default=0.0)
|
|
125
|
+
p.add_argument("--mz-max", type=float, default=999_999_999.0)
|
|
126
|
+
p.add_argument("--int-min", type=float, default=0.0)
|
|
127
|
+
p.add_argument("--int-max", type=float, default=999_999_999.0)
|
|
128
|
+
p.add_argument("--hq-ref-lib", action="store_true", help="Use high-quality reference library flag.")
|
|
129
|
+
p.add_argument("--opt", nargs="+", default=["window_size_centroiding", "noise_threshold", "wf_mz", "wf_int"],
|
|
130
|
+
help=f"Parameters to optimize (subset of {ALL_PARAMS}).")
|
|
131
|
+
p.add_argument("--bound", action="append", default=[], type=parse_bound,
|
|
132
|
+
help="Bound spec 'name=min:max'. Repeatable.")
|
|
133
|
+
p.add_argument("--default", dest="defaults", action="append", default=[], type=parse_default,
|
|
134
|
+
help="Override a default 'name=value' for non-optimized params or initial values.")
|
|
135
|
+
p.add_argument("--maxiter", type=int, default=15)
|
|
136
|
+
p.add_argument("--seed", type=int, default=1)
|
|
137
|
+
p.add_argument("--workers", type=int, default=-1, help="Use -1 for all cores; 1 to disable parallelism.")
|
|
138
|
+
args = p.parse_args()
|
|
139
|
+
|
|
140
|
+
unknown = [x for x in args.opt if x not in ALL_PARAMS]
|
|
141
|
+
if unknown:
|
|
142
|
+
sys.exit(f"Error: unknown --opt params: {unknown}")
|
|
143
|
+
|
|
144
|
+
qpath = Path(args.query_data)
|
|
145
|
+
if not qpath.exists():
|
|
146
|
+
sys.exit(f"Query CSV not found: {qpath}")
|
|
147
|
+
|
|
148
|
+
df_query = pd.read_csv(qpath)
|
|
149
|
+
if "id" not in df_query.columns:
|
|
150
|
+
sys.exit("Query CSV must contain an 'id' column.")
|
|
151
|
+
|
|
152
|
+
ref_paths = [Path(pth) for pth in args.reference_data]
|
|
153
|
+
for r in ref_paths:
|
|
154
|
+
if not r.exists():
|
|
155
|
+
sys.exit(f"Reference CSV not found: {r}")
|
|
156
|
+
df_reference = pd.concat([pd.read_csv(r) for r in ref_paths], axis=0, ignore_index=True)
|
|
157
|
+
if "id" not in df_reference.columns:
|
|
158
|
+
sys.exit("Reference CSV must contain an 'id' column.")
|
|
159
|
+
|
|
160
|
+
uq = df_query["id"].unique().tolist()
|
|
161
|
+
ur = df_reference["id"].unique().tolist()
|
|
162
|
+
|
|
163
|
+
default_params = dict(DEFAULT_PARAMS)
|
|
164
|
+
for name, val in args.defaults:
|
|
165
|
+
if name not in DEFAULT_PARAMS:
|
|
166
|
+
sys.exit(f"--default refers to unknown parameter '{name}'. Allowed: {list(DEFAULT_PARAMS)}")
|
|
167
|
+
default_params[name] = val
|
|
168
|
+
|
|
169
|
+
param_bounds: Dict[str, Tuple[float, float]] = dict(SUGGESTED_BOUNDS)
|
|
170
|
+
for name, (lo, hi) in args.bound:
|
|
171
|
+
if name not in SUGGESTED_BOUNDS:
|
|
172
|
+
sys.exit(f"--bound refers to unknown parameter '{name}'. Allowed: {list(SUGGESTED_BOUNDS)}")
|
|
173
|
+
param_bounds[name] = (lo, hi)
|
|
174
|
+
|
|
175
|
+
bounds = [param_bounds[p] for p in args.opt]
|
|
176
|
+
|
|
177
|
+
ctx = dict(
|
|
178
|
+
df_query=df_query,
|
|
179
|
+
df_reference=df_reference,
|
|
180
|
+
uq=uq,
|
|
181
|
+
ur=ur,
|
|
182
|
+
similarity_measure=args.similarity_measure,
|
|
183
|
+
weights=(None if args.weights.strip() == "" else args.weights),
|
|
184
|
+
spectrum_preprocessing_order=args.spectrum_order,
|
|
185
|
+
mz_min=float(args.mz_min),
|
|
186
|
+
mz_max=float(args.mz_max),
|
|
187
|
+
int_min=float(args.int_min),
|
|
188
|
+
int_max=float(args.int_max),
|
|
189
|
+
high_quality_reference_library=bool(args.hq_ref_lib),
|
|
190
|
+
default_params=default_params,
|
|
191
|
+
optimize_params=args.opt,
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
history_acc: List[float] = []
|
|
195
|
+
|
|
196
|
+
def _cb(xk, convergence):
|
|
197
|
+
if args.chromatography_platform == "HRMS":
|
|
198
|
+
acc_pct = (1.0 - objective_HRMS(xk, ctx)) * 100.0
|
|
199
|
+
else:
|
|
200
|
+
acc_pct = (1.0 - objective_NRMS(xk, ctx)) * 100.0
|
|
201
|
+
history_acc.append(acc_pct)
|
|
202
|
+
|
|
203
|
+
objective = objective_HRMS if args.chromatography_platform == "HRMS" else objective_NRMS
|
|
204
|
+
|
|
205
|
+
result = differential_evolution(
|
|
206
|
+
objective,
|
|
207
|
+
bounds=bounds,
|
|
208
|
+
args=(ctx,),
|
|
209
|
+
maxiter=int(args.maxiter),
|
|
210
|
+
tol=0.0,
|
|
211
|
+
seed=int(args.seed),
|
|
212
|
+
workers=int(args.workers),
|
|
213
|
+
callback=_cb,
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
best_params = _vector_to_full_params(result.x, default_params, args.opt)
|
|
217
|
+
best_acc_pct = (1.0 - result.fun) * 100.0
|
|
218
|
+
|
|
219
|
+
print("\n=== Differential Evolution Result ===")
|
|
220
|
+
print(f"Mode: {args.chromatography_platform}")
|
|
221
|
+
print(f"Optimized over: {args.opt}")
|
|
222
|
+
print("Best values (selected params):")
|
|
223
|
+
for name in args.opt:
|
|
224
|
+
print(f" {name}: {best_params[name]}")
|
|
225
|
+
print("\nFull parameter set used in final evaluation:")
|
|
226
|
+
for k in ALL_PARAMS:
|
|
227
|
+
print(f" {k}: {best_params[k]}")
|
|
228
|
+
print(f"\nBest accuracy: {best_acc_pct:.3f}%")
|
|
229
|
+
print(f"DE raw: success={result.success}, nfev={result.nfev}, nit={result.nit}, message='{result.message}'")
|
|
230
|
+
|
|
231
|
+
if __name__ == "__main__":
|
|
232
|
+
main()
|
|
233
|
+
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
|
|
2
|
-
from pycompound.spec_lib_matching import
|
|
3
|
-
from pycompound.spec_lib_matching import
|
|
2
|
+
from pycompound.spec_lib_matching import tune_params_on_HRMS_data_grid
|
|
3
|
+
from pycompound.spec_lib_matching import tune_params_on_NRMS_data_grid
|
|
4
4
|
import argparse
|
|
5
5
|
import json
|
|
6
6
|
from pathlib import Path
|
|
@@ -61,9 +61,9 @@ grid['entropy_dimension'] = [float(x) for x in grid['entropy_dimension']]
|
|
|
61
61
|
|
|
62
62
|
|
|
63
63
|
if args.chromatography_platform == 'HRMS':
|
|
64
|
-
|
|
64
|
+
tune_params_on_HRMS_data_grid(query_data=args.query_data, reference_data=args.reference_data, grid=grid, output_path=args.output_path)
|
|
65
65
|
|
|
66
66
|
if args.chromatography_platform == 'NRMS':
|
|
67
|
-
|
|
67
|
+
tune_params_on_NRMS_data_grid(query_data=args.query_data, reference_data=args.reference_data, grid=grid, output_path=args.output_path)
|
|
68
68
|
|
|
69
69
|
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
app.py,sha256=GdKSi0aRstcIeQulLMxANTwhScVJDNR4HlHC4bkUhXg,62350
|
|
2
|
+
app2.py,sha256=ZTmShHRlv27_HhSzCj8JOVAPv5LLsjtgEkxt1c-7r6I,2950
|
|
3
|
+
pycompound/build_library.py,sha256=sXG5MTJnPE7Gr8YMlLWjfMS7JQrW32lCeCGDw-DFe38,4826
|
|
4
|
+
pycompound/plot_spectra.py,sha256=_yeHooNoJHYlTajaZ9hgUudisdWVlw1Zw1wJfV3tpqc,40632
|
|
5
|
+
pycompound/plot_spectra_CLI.py,sha256=ObaLad5Z5DmfQB-j0HSCg1mLORbYj2BM3hb5Yd0ZdDI,8395
|
|
6
|
+
pycompound/processing.py,sha256=q629rcDaMQMgef-4SbeV9cJnuiLXg97VT2F5AIsyqgI,10654
|
|
7
|
+
pycompound/similarity_measures.py,sha256=NbeVIy9DE_KWlDMXXylekjKuYVrtzbeEXbTutKFxmfU,10460
|
|
8
|
+
pycompound/spec_lib_matching.py,sha256=-4WNGUPqkAZwfrrmhdi7S3PcV-6d3O6nCe-efAtTzZA,73346
|
|
9
|
+
pycompound/spec_lib_matching_CLI.py,sha256=qiekC52FP6ET_3NYvxUDN7km7y1OLUsd9FB4SHfne_Y,9690
|
|
10
|
+
pycompound/tuning_CLI_DE.py,sha256=PXy95LD_jmVeWdgiMlMwEZU_KqPGqDao1skwe5U4Sfc,9147
|
|
11
|
+
pycompound/tuning_CLI_grid.py,sha256=0XU-4ShZiZ2MQy5d0zydH0hphqXvqGtf4etl-ePNarU,8560
|
|
12
|
+
pycompound-0.1.5.dist-info/licenses/LICENSE,sha256=fPFFlkSGg60VQWyWqTSv8yoJnpLzppzdihVWY5NKom8,1064
|
|
13
|
+
pycompound-0.1.5.dist-info/METADATA,sha256=DIz-j3Hh4tXJMjXS-Epg_fy0auOcwYx2UUUWk3MyGN4,1732
|
|
14
|
+
pycompound-0.1.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
15
|
+
pycompound-0.1.5.dist-info/top_level.txt,sha256=cd0cQe1lhTXW9uwTTM5kZgjscKvRQV8XQ41qOwedgHU,20
|
|
16
|
+
pycompound-0.1.5.dist-info/RECORD,,
|
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
app.py,sha256=JA2D6hMTqkvDWgpI7ZAhlyl7vhnSfZPQfI8HBgrJWW0,50781
|
|
2
|
-
pycompound/build_library.py,sha256=sXG5MTJnPE7Gr8YMlLWjfMS7JQrW32lCeCGDw-DFe38,4826
|
|
3
|
-
pycompound/plot_spectra.py,sha256=_yeHooNoJHYlTajaZ9hgUudisdWVlw1Zw1wJfV3tpqc,40632
|
|
4
|
-
pycompound/plot_spectra_CLI.py,sha256=ObaLad5Z5DmfQB-j0HSCg1mLORbYj2BM3hb5Yd0ZdDI,8395
|
|
5
|
-
pycompound/processing.py,sha256=q629rcDaMQMgef-4SbeV9cJnuiLXg97VT2F5AIsyqgI,10654
|
|
6
|
-
pycompound/similarity_measures.py,sha256=NbeVIy9DE_KWlDMXXylekjKuYVrtzbeEXbTutKFxmfU,10460
|
|
7
|
-
pycompound/spec_lib_matching.py,sha256=hEMUD5rAuEfvX1PnDEwilXIWVubnvgDBp-EVwVHu9ro,67141
|
|
8
|
-
pycompound/spec_lib_matching_CLI.py,sha256=qiekC52FP6ET_3NYvxUDN7km7y1OLUsd9FB4SHfne_Y,9690
|
|
9
|
-
pycompound/tuning_CLI.py,sha256=8gdT4EhIpvLHG3PcYtQBmUiPE9fJybwr3LCCilX-EfE,8540
|
|
10
|
-
pycompound-0.1.3.dist-info/licenses/LICENSE,sha256=fPFFlkSGg60VQWyWqTSv8yoJnpLzppzdihVWY5NKom8,1064
|
|
11
|
-
pycompound-0.1.3.dist-info/METADATA,sha256=AzTZQSX5eQZ--_7Bh-UAgAV0mQ3IgQQPoW8aOhkIwNA,1732
|
|
12
|
-
pycompound-0.1.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
13
|
-
pycompound-0.1.3.dist-info/top_level.txt,sha256=wFBLVrqpC07HghIU8tsEdgdvgkdOE3GN_1Gfjk-uEUc,15
|
|
14
|
-
pycompound-0.1.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|