pycompound 0.1.3__tar.gz → 0.1.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. {pycompound-0.1.3/src/pycompound.egg-info → pycompound-0.1.5}/PKG-INFO +1 -1
  2. {pycompound-0.1.3 → pycompound-0.1.5}/pyproject.toml +1 -1
  3. {pycompound-0.1.3 → pycompound-0.1.5}/src/app.py +248 -30
  4. pycompound-0.1.5/src/app2.py +101 -0
  5. {pycompound-0.1.3 → pycompound-0.1.5}/src/pycompound/spec_lib_matching.py +152 -14
  6. pycompound-0.1.5/src/pycompound/tuning_CLI_DE.py +233 -0
  7. pycompound-0.1.3/src/pycompound/tuning_CLI.py → pycompound-0.1.5/src/pycompound/tuning_CLI_grid.py +4 -4
  8. {pycompound-0.1.3 → pycompound-0.1.5/src/pycompound.egg-info}/PKG-INFO +1 -1
  9. {pycompound-0.1.3 → pycompound-0.1.5}/src/pycompound.egg-info/SOURCES.txt +3 -1
  10. {pycompound-0.1.3 → pycompound-0.1.5}/src/pycompound.egg-info/top_level.txt +1 -0
  11. {pycompound-0.1.3 → pycompound-0.1.5}/tests/test_spec_lib_matching.py +2 -0
  12. pycompound-0.1.5/tests/test_tuning.py +60 -0
  13. pycompound-0.1.3/tests/test_tuning.py +0 -21
  14. {pycompound-0.1.3 → pycompound-0.1.5}/LICENSE +0 -0
  15. {pycompound-0.1.3 → pycompound-0.1.5}/README.md +0 -0
  16. {pycompound-0.1.3 → pycompound-0.1.5}/setup.cfg +0 -0
  17. {pycompound-0.1.3 → pycompound-0.1.5}/src/pycompound/build_library.py +0 -0
  18. {pycompound-0.1.3 → pycompound-0.1.5}/src/pycompound/plot_spectra.py +0 -0
  19. {pycompound-0.1.3 → pycompound-0.1.5}/src/pycompound/plot_spectra_CLI.py +0 -0
  20. {pycompound-0.1.3 → pycompound-0.1.5}/src/pycompound/processing.py +0 -0
  21. {pycompound-0.1.3 → pycompound-0.1.5}/src/pycompound/similarity_measures.py +0 -0
  22. {pycompound-0.1.3 → pycompound-0.1.5}/src/pycompound/spec_lib_matching_CLI.py +0 -0
  23. {pycompound-0.1.3 → pycompound-0.1.5}/src/pycompound.egg-info/dependency_links.txt +0 -0
  24. {pycompound-0.1.3 → pycompound-0.1.5}/src/pycompound.egg-info/requires.txt +0 -0
  25. {pycompound-0.1.3 → pycompound-0.1.5}/tests/test_build_library.py +0 -0
  26. {pycompound-0.1.3 → pycompound-0.1.5}/tests/test_plot_spectra.py +0 -0
  27. {pycompound-0.1.3 → pycompound-0.1.5}/tests/test_similarity_measures.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pycompound
3
- Version: 0.1.3
3
+ Version: 0.1.5
4
4
  Summary: Python package to perform compound identification in mass spectrometry via spectral library matching.
5
5
  Author-email: Hunter Dlugas <fy7392@wayne.edu>
6
6
  License-Expression: MIT
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "pycompound"
7
- version = "0.1.3"
7
+ version = "0.1.5"
8
8
  authors = [
9
9
  { name="Hunter Dlugas", email="fy7392@wayne.edu" },
10
10
  ]
@@ -2,10 +2,11 @@
2
2
  from shiny import App, ui, reactive, render, req
3
3
  from pycompound.spec_lib_matching import run_spec_lib_matching_on_HRMS_data
4
4
  from pycompound.spec_lib_matching import run_spec_lib_matching_on_NRMS_data
5
- from pycompound.spec_lib_matching import tune_params_on_HRMS_data
6
- from pycompound.spec_lib_matching import tune_params_on_NRMS_data
7
- from pycompound.spec_lib_matching import tune_params_on_HRMS_data_shiny
8
- from pycompound.spec_lib_matching import tune_params_on_NRMS_data_shiny
5
+ from pycompound.spec_lib_matching import tune_params_on_HRMS_data_grid
6
+ from pycompound.spec_lib_matching import tune_params_on_NRMS_data_grid
7
+ from pycompound.spec_lib_matching import tune_params_on_HRMS_data_grid_shiny
8
+ from pycompound.spec_lib_matching import tune_params_on_NRMS_data_grid_shiny
9
+ from pycompound.spec_lib_matching import tune_params_DE
9
10
  from pycompound.plot_spectra import generate_plots_on_HRMS_data
10
11
  from pycompound.plot_spectra import generate_plots_on_NRMS_data
11
12
  from pathlib import Path
@@ -25,7 +26,6 @@ import ast
25
26
  from numbers import Real
26
27
 
27
28
 
28
-
29
29
  _LOG_QUEUE: asyncio.Queue[str] = asyncio.Queue()
30
30
 
31
31
  def _run_with_redirects(fn, writer, *args, **kwargs):
@@ -395,7 +395,7 @@ def run_spec_lib_matching_ui(platform: str):
395
395
 
396
396
 
397
397
 
398
- def run_parameter_tuning_ui(platform: str):
398
+ def run_parameter_tuning_grid_ui(platform: str):
399
399
  base_inputs = [
400
400
  ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
401
401
  ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
@@ -436,7 +436,7 @@ def run_parameter_tuning_ui(platform: str):
436
436
  ]
437
437
 
438
438
 
439
- run_button_parameter_tuning = ui.download_button("run_btn_parameter_tuning", "Tune parameters", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
439
+ run_button_parameter_tuning_grid = ui.download_button("run_btn_parameter_tuning_grid", "Tune parameters (grid search)", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
440
440
  back_button = ui.input_action_button("back", "Back to main menu", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
441
441
 
442
442
  if platform == "HRMS":
@@ -466,7 +466,7 @@ def run_parameter_tuning_ui(platform: str):
466
466
  ui.TagList(
467
467
  ui.h2("Tune parameters"),
468
468
  inputs_columns,
469
- run_button_parameter_tuning,
469
+ run_button_parameter_tuning_grid,
470
470
  back_button,
471
471
  log_panel
472
472
  ),
@@ -474,8 +474,120 @@ def run_parameter_tuning_ui(platform: str):
474
474
 
475
475
 
476
476
 
477
+ PARAMS_HRMS = {
478
+ "window_size_centroiding": (0.0, 0.5),
479
+ "window_size_matching": (0.0, 0.5),
480
+ "noise_threshold": (0.0, 0.25),
481
+ "wf_mz": (0.0, 5.0),
482
+ "wf_int": (0.0, 5.0),
483
+ "LET_threshold": (0.0, 5.0),
484
+ "entropy_dimension": (1.0, 3.0)
485
+ }
486
+
487
+ PARAMS_NRMS = {
488
+ "noise_threshold": (0.0, 0.25),
489
+ "wf_mz": (0.0, 5.0),
490
+ "wf_int": (0.0, 5.0),
491
+ "LET_threshold": (0.0, 5.0),
492
+ "entropy_dimension": (1.0, 3.0)
493
+ }
494
+
495
+ def run_parameter_tuning_DE_ui(platform: str):
496
+ if platform == 'HRMS':
497
+ PARAMS=PARAMS_HRMS
498
+ else:
499
+ PARAMS=PARAMS_NRMS
500
+
501
+ base_inputs = [
502
+ ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
503
+ ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
504
+ ui.input_select("similarity_measure", "Select similarity measure:", ["cosine","shannon","renyi","tsallis","mixture","jaccard","dice","3w_jaccard","sokal_sneath","binary_cosine","mountford","mcconnaughey","driver_kroeber","simpson","braun_banquet","fager_mcgowan","kulczynski","intersection","hamming","hellinger"]),
505
+ ui.input_text('weights', 'Weights for mixture similarity measure (cosine, shannon, renyi, tsallis):', '0.25, 0.25, 0.25, 0.25'),
506
+ ui.input_select("high_quality_reference_library", "Indicate whether the reference library is considered high quality. If True, filtering and noise removal are only applied to the query spectra.", [False, True],),
507
+ ]
508
+
509
+ if platform == "HRMS":
510
+ extra_inputs = [
511
+ ui.input_text(
512
+ "spectrum_preprocessing_order",
513
+ "Sequence of characters for preprocessing order (C (centroiding), F (filtering), M (matching), N (noise removal), L (low-entropy transformation), W (weight factor transformation)). M must be included, C before M if used.",
514
+ "FCNMWL",
515
+ ),
516
+ ui.input_numeric("window_size_centroiding", "Centroiding window-size:", 0.5),
517
+ ui.input_numeric("window_size_matching", "Matching window-size:", 0.5),
518
+ ]
519
+ else:
520
+ extra_inputs = [
521
+ ui.input_text(
522
+ "spectrum_preprocessing_order",
523
+ "Sequence of characters for preprocessing order (F (filtering), N (noise removal), L (low-entropy transformation), W (weight factor transformation)).",
524
+ "FNLW",
525
+ )
526
+ ]
527
+
528
+ numeric_inputs = [
529
+ ui.input_numeric("mz_min", "Minimum m/z for filtering:", 0),
530
+ ui.input_numeric("mz_max", "Maximum m/z for filtering:", 99999999),
531
+ ui.input_numeric("int_min", "Minimum intensity for filtering:", 0),
532
+ ui.input_numeric("int_max", "Maximum intensity for filtering:", 999999999),
533
+ ui.input_numeric("noise_threshold", "Noise removal threshold:", 0.0),
534
+ ui.input_numeric("wf_mz", "Mass/charge weight factor:", 0.0),
535
+ ui.input_numeric("wf_int", "Intensity weight factor:", 1.0),
536
+ ui.input_numeric("LET_threshold", "Low-entropy threshold:", 0.0),
537
+ ui.input_numeric("entropy_dimension", "Entropy dimension (Renyi/Tsallis only):", 1.1),
538
+ ]
539
+
540
+
541
+ #run_button_parameter_tuning_DE = ui.download_button("run_btn_parameter_tuning_DE", "Tune parameters (differential evolution optimization)", style="font-size:16px; padding:15px 30px; width:300px; height:100px")
542
+ run_button_parameter_tuning_DE = ui.input_action_button("run_btn_parameter_tuning_DE", "Tune parameters (differential evolution optimization)", style="font-size:16px; padding:15px 30px; width:300px; height:100px")
543
+ back_button = ui.input_action_button("back", "Back to main menu", style="font-size:16px; padding:15px 30px; width:300px; height:100px")
544
+
545
+ if platform == "HRMS":
546
+ inputs_columns = ui.layout_columns(
547
+ ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
548
+ ui.div([base_inputs[6:7], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
549
+ ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
550
+ ui.div([numeric_inputs[5:10]], style="display:flex; flex-direction:column; gap:10px;"),
551
+ col_widths=(3,3,3,3),
552
+ )
553
+ elif platform == "NRMS":
554
+ inputs_columns = ui.layout_columns(
555
+ ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
556
+ ui.div([base_inputs[6:7], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
557
+ ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
558
+ ui.div([numeric_inputs[5:10]], style="display:flex; flex-direction:column; gap:10px;"),
559
+ col_widths=(3,3,3,3),
560
+ )
561
+
562
+ return ui.page_fillable(
563
+ ui.layout_sidebar(
564
+ ui.sidebar(
565
+ ui.h3("Select parameters"),
566
+ ui.input_checkbox_group(
567
+ "params",
568
+ None,
569
+ choices=list(PARAMS.keys()),
570
+ selected=["noise_threshold","LET_threshold"],
571
+ ),
572
+ ui.hr(),
573
+ ui.h4("Bounds for selected parameters"),
574
+ ui.output_ui("bounds_inputs"),
575
+ width=360,
576
+ ),
577
+ ui.div(
578
+ ui.h2("Tune parameters (differential evolution optimization)"),
579
+ *(inputs_columns if isinstance(inputs_columns, (list, tuple)) else [inputs_columns]),
580
+ run_button_parameter_tuning_DE,
581
+ back_button,
582
+ ),
583
+ )
584
+ )
585
+
586
+
587
+
477
588
 
478
589
  app_ui = ui.page_fluid(
590
+ ui.head_content(ui.tags.link(rel="icon", href="emblem.png")),
479
591
  ui.output_ui("main_ui"),
480
592
  ui.output_text("status_output")
481
593
  )
@@ -492,8 +604,10 @@ def server(input, output, session):
492
604
  run_status_plot_spectra = reactive.Value("")
493
605
  run_status_spec_lib_matching = reactive.Value("")
494
606
  run_status_plot_spectra_within_spec_lib_matching = reactive.Value("")
495
- run_status_parameter_tuning = reactive.Value("")
496
- is_tuning_running = reactive.Value(False)
607
+ run_status_parameter_tuning_grid = reactive.Value("")
608
+ run_status_parameter_tuning_DE = reactive.Value("")
609
+ is_tuning_grid_running = reactive.Value(False)
610
+ is_tuning_DE_running = reactive.Value(False)
497
611
  match_log_rv = reactive.Value("")
498
612
  is_matching_rv = reactive.Value(False)
499
613
  is_any_job_running = reactive.Value(False)
@@ -513,6 +627,64 @@ def server(input, output, session):
513
627
  converted_query_path_rv = reactive.Value(None)
514
628
  converted_reference_path_rv = reactive.Value(None)
515
629
 
630
+ @output
631
+ @render.ui
632
+ def bounds_inputs():
633
+ selected = input.params()
634
+ if not selected:
635
+ return ui.div(ui.em("Select one or more parameters above."))
636
+
637
+ if input.chromatography_platform() == 'HRMS':
638
+ PARAMS = PARAMS_HRMS
639
+ else:
640
+ PARAMS = PARAMS_NRMS
641
+ blocks = []
642
+ for name in selected:
643
+ lo, hi = PARAMS.get(name, (0.0, 1.0))
644
+ blocks.append(
645
+ ui.card(
646
+ ui.card_header(name),
647
+ ui.layout_columns(
648
+ ui.input_numeric(f"min_{name}", "Lower", lo, step=0.001),
649
+ ui.input_numeric(f"max_{name}", "Upper", hi, step=0.001),
650
+ )
651
+ )
652
+ )
653
+ return ui.div(*blocks)
654
+
655
+ def _read_bounds_dict():
656
+ selected = input.params()
657
+ out = {}
658
+ for name in selected:
659
+ lo_default, hi_default = PARAMS.get(name, (0.0, 1.0))
660
+ lo_id = f"min_{name}"
661
+ hi_id = f"max_{name}"
662
+
663
+ lo_val = input[lo_id]() if lo_id in input else lo_default
664
+ hi_val = input[hi_id]() if hi_id in input else hi_default
665
+
666
+ out[name] = (float(lo_val), float(hi_val))
667
+ return out
668
+
669
+ def _read_bounds():
670
+ opt_params = input.params()
671
+ bounds_dict = {}
672
+ if input.chromatography_platform() == 'HRMS':
673
+ PARAMS = PARAMS_HRMS
674
+ else:
675
+ PARAMS = PARAMS_NRMS
676
+
677
+ for p in opt_params:
678
+ lo_id, hi_id = f"min_{p}", f"max_{p}"
679
+ lo_default, hi_default = PARAMS.get(p, (0.0, 1.0))
680
+ lo = input[lo_id]() if lo_id in input else lo_default
681
+ hi = input[hi_id]() if hi_id in input else hi_default
682
+ if lo > hi:
683
+ lo, hi = hi, lo
684
+ bounds_dict[p] = (float(lo), float(hi))
685
+
686
+ bounds_list = [bounds_dict[p] for p in opt_params]
687
+ return opt_params, bounds_dict, bounds_list
516
688
 
517
689
  def _reset_plot_spectra_state():
518
690
  query_status_rv.set("")
@@ -545,7 +717,8 @@ def server(input, output, session):
545
717
 
546
718
  def _reset_parameter_tuning_state():
547
719
  match_log_rv.set("")
548
- is_tuning_running.set(False)
720
+ is_tuning_grid_running.set(False)
721
+ is_tuning_DE_running.set(False)
549
722
  is_any_job_running.set(False)
550
723
 
551
724
 
@@ -557,7 +730,9 @@ def server(input, output, session):
557
730
  _reset_plot_spectra_state()
558
731
  elif page == "run_spec_lib_matching":
559
732
  _reset_spec_lib_matching_state()
560
- elif page == "run_parameter_tuning":
733
+ elif page == "run_parameter_tuning_grid":
734
+ _reset_parameter_tuning_state()
735
+ elif page == "run_parameter_tuning_DE":
561
736
  _reset_parameter_tuning_state()
562
737
 
563
738
  @reactive.effect
@@ -567,7 +742,9 @@ def server(input, output, session):
567
742
  _reset_plot_spectra_state()
568
743
  elif page == "run_spec_lib_matching":
569
744
  _reset_spec_lib_matching_state()
570
- elif page == "run_parameter_tuning":
745
+ elif page == "run_parameter_tuning_grid":
746
+ _reset_parameter_tuning_state()
747
+ elif page == "run_parameter_tuning_DE":
571
748
  _reset_parameter_tuning_state()
572
749
 
573
750
 
@@ -595,7 +772,7 @@ def server(input, output, session):
595
772
 
596
773
  @reactive.effect
597
774
  async def _pump_logs():
598
- if not (is_any_job_running.get() or is_tuning_running.get() or is_matching_rv.get()):
775
+ if not (is_any_job_running.get() or is_tuning_grid_running.get() or is_tuning_DE_running.get() or is_matching_rv.get()):
599
776
  return
600
777
  reactive.invalidate_later(0.05)
601
778
  msgs = _drain_queue_nowait(_LOG_QUEUE)
@@ -674,9 +851,12 @@ def server(input, output, session):
674
851
  elif input.run_spec_lib_matching() > match_clicks.get():
675
852
  current_page.set("run_spec_lib_matching")
676
853
  match_clicks.set(input.run_spec_lib_matching())
677
- elif input.run_parameter_tuning() > match_clicks.get():
678
- current_page.set("run_parameter_tuning")
679
- match_clicks.set(input.run_parameter_tuning())
854
+ elif input.run_parameter_tuning_grid() > match_clicks.get():
855
+ current_page.set("run_parameter_tuning_grid")
856
+ match_clicks.set(input.run_parameter_tuning_grid())
857
+ elif input.run_parameter_tuning_DE() > match_clicks.get():
858
+ current_page.set("run_parameter_tuning_DE")
859
+ match_clicks.set(input.run_parameter_tuning_DE())
680
860
  elif hasattr(input, "back") and input.back() > back_clicks.get():
681
861
  current_page.set("main_menu")
682
862
  back_clicks.set(input.back())
@@ -688,7 +868,6 @@ def server(input, output, session):
688
868
  img: ImgData = {"src": str(dir / "www/emblem.png"), "width": "320px", "height": "250px"}
689
869
  return img
690
870
 
691
-
692
871
  @output
693
872
  @render.ui
694
873
  def main_ui():
@@ -697,6 +876,7 @@ def server(input, output, session):
697
876
  ui.h2("Main Menu"),
698
877
  ui.div(
699
878
  ui.output_image("image"),
879
+ #ui.img(src="emblem.png", width="320px", height="250px"),
700
880
  style=(
701
881
  "position:fixed; top:0; left:50%; transform:translateX(-50%); "
702
882
  "z-index:1000; text-align:center; padding:10px; background-color:white;"
@@ -720,7 +900,8 @@ def server(input, output, session):
720
900
  ),
721
901
  ui.input_action_button("plot_spectra", "Plot two spectra before and after preprocessing transformations.", style="font-size:18px; padding:20px 40px; width:550px; height:100px; margin-top:10px; margin-right:50px"),
722
902
  ui.input_action_button("run_spec_lib_matching", "Run spectral library matching to perform compound identification on a query library of spectra.", style="font-size:18px; padding:20px 40px; width:550px; height:100px; margin-top:10px; margin-right:50px"),
723
- ui.input_action_button("run_parameter_tuning", "Tune parameters to maximize accuracy of compound identification given a query library with known spectrum IDs.", style="font-size:18px; padding:20px 40px; width:450px; height:120px; margin-top:10px; margin-right:50px"),
903
+ ui.input_action_button("run_parameter_tuning_grid", "Grid search: Tune parameters to maximize accuracy of compound identification given a query library with known spectrum IDs.", style="font-size:18px; padding:20px 40px; width:450px; height:120px; margin-top:10px; margin-right:50px"),
904
+ ui.input_action_button("run_parameter_tuning_DE", "Differential evolution optimization: Tune parameters to maximize accuracy of compound identification given a query library with known spectrum IDs.", style="font-size:18px; padding:20px 40px; width:500px; height:150px; margin-top:10px; margin-right:50px"),
724
905
  ui.div(
725
906
  "References:",
726
907
  style="margin-top:35px; text-align:left; font-size:24px; font-weight:bold"
@@ -771,8 +952,10 @@ def server(input, output, session):
771
952
  return plot_spectra_ui(input.chromatography_platform())
772
953
  elif current_page() == "run_spec_lib_matching":
773
954
  return run_spec_lib_matching_ui(input.chromatography_platform())
774
- elif current_page() == "run_parameter_tuning":
775
- return run_parameter_tuning_ui(input.chromatography_platform())
955
+ elif current_page() == "run_parameter_tuning_grid":
956
+ return run_parameter_tuning_grid_ui(input.chromatography_platform())
957
+ elif current_page() == "run_parameter_tuning_DE":
958
+ return run_parameter_tuning_DE_ui(input.chromatography_platform())
776
959
 
777
960
 
778
961
 
@@ -1014,10 +1197,10 @@ def server(input, output, session):
1014
1197
  yield buf.getvalue()
1015
1198
 
1016
1199
 
1017
- @render.download(filename="parameter_tuning_output.txt")
1018
- async def run_btn_parameter_tuning():
1200
+ @render.download(filename="parameter_tuning_grid_output.txt")
1201
+ async def run_btn_parameter_tuning_grid():
1019
1202
  is_any_job_running.set(True)
1020
- is_tuning_running.set(True)
1203
+ is_tuning_grid_running.set(True)
1021
1204
  match_log_rv.set("Running grid search of all parameters specified...\n")
1022
1205
  await reactive.flush()
1023
1206
 
@@ -1038,7 +1221,7 @@ def server(input, output, session):
1038
1221
  common_kwargs = dict(
1039
1222
  query_data=input.query_data()[0]["datapath"],
1040
1223
  reference_data=input.reference_data()[0]["datapath"],
1041
- output_path=str(Path.cwd() / "parameter_tuning_output.txt"),
1224
+ output_path=str(Path.cwd() / "parameter_tuning_grid_output.txt"),
1042
1225
  return_output=True,
1043
1226
  )
1044
1227
 
@@ -1066,7 +1249,7 @@ def server(input, output, session):
1066
1249
  'window_size_centroiding': window_size_centroiding_tmp,
1067
1250
  'window_size_matching': window_size_matching_tmp,
1068
1251
  }
1069
- df_out = await asyncio.to_thread(_run_with_redirects, tune_params_on_HRMS_data_shiny, rw, **common_kwargs, grid=grid)
1252
+ df_out = await asyncio.to_thread(_run_with_redirects, tune_params_on_HRMS_data_grid_shiny, rw, **common_kwargs, grid=grid)
1070
1253
  else:
1071
1254
  grid = {
1072
1255
  'similarity_measure': similarity_measure_tmp,
@@ -1083,26 +1266,59 @@ def server(input, output, session):
1083
1266
  'entropy_dimension': entropy_dimension_tmp,
1084
1267
  'high_quality_reference_library': high_quality_reference_library_tmp,
1085
1268
  }
1086
- df_out = await asyncio.to_thread(_run_with_redirects, tune_params_on_NRMS_data_shiny, rw, **common_kwargs, grid=grid)
1269
+ df_out = await asyncio.to_thread(_run_with_redirects, tune_params_on_NRMS_data_grid_shiny, rw, **common_kwargs, grid=grid)
1087
1270
 
1088
1271
  match_log_rv.set(match_log_rv.get() + "\n✅ Parameter tuning finished.\n")
1089
1272
  except Exception as e:
1090
1273
  match_log_rv.set(match_log_rv.get() + f"\n❌ Error: {e}\n")
1091
1274
  raise
1092
1275
  finally:
1093
- is_tuning_running.set(False)
1276
+ is_tuning_grid_running.set(False)
1094
1277
  is_any_job_running.set(False)
1095
1278
  await reactive.flush()
1096
1279
 
1097
1280
  yield df_out.to_csv(index=False).encode("utf-8", sep='\t')
1098
1281
 
1099
1282
 
1283
+ @reactive.effect
1284
+ @reactive.event(input.run_btn_parameter_tuning_DE)
1285
+ def _run_btn_parameter_tuning_DE():
1286
+ is_any_job_running.set(True)
1287
+ is_tuning_DE_running.set(True)
1288
+ match_log_rv.set("Tuning specified continuous parameters using differential evolution...\n")
1289
+
1290
+ #print('\nhere!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
1291
+ weights = [float(weight.strip()) for weight in input.weights().split(",") if weight.strip()]
1292
+ weights = {'Cosine':weights[0], 'Shannon':weights[1], 'Renyi':weights[2], 'Tsallis':weights[3]}
1293
+ opt_params, bounds_dict, bounds_list = _read_bounds()
1294
+ #print(input.params())
1295
+ #print("Optimizing over:", opt_params)
1296
+ #print("Bounds list:", bounds_list)
1297
+ #print("Bounds dict:", bounds_dict)
1298
+ #tmp = {"window_size_centroiding":input.window_size_centroiding(), "window_size_matching":input.window_size_matching(), "noise_threshold":input.noise_threshold(), "wf_mz":input.wf_mz(), "wf_int":input.wf_int(), "LET_threshold":input.LET_threshold(), "entropy_dimension":input.entropy_dimension()}
1299
+ #print(tmp)
1300
+ if input.chromatography_platform() == 'HRMS':
1301
+ tune_params_DE(query_data=input.query_data()[0]["datapath"],
1302
+ reference_data=input.reference_data()[0]["datapath"],
1303
+ similarity_measure=input.similarity_measure(),
1304
+ weights=weights,
1305
+ spectrum_preprocessing_order=input.spectrum_preprocessing_order(),
1306
+ mz_min=input.mz_min(),
1307
+ mz_max=input.mz_max(),
1308
+ int_min=input.int_min(),
1309
+ int_max=input.int_max(),
1310
+ high_quality_reference_library=input.high_quality_reference_library(),
1311
+ optimize_params=list(input.params()),
1312
+ param_bounds={"window_size_matching":(0.0,0.5),"noise_threshold":(0.0,0.25),"wf_mz":(0.0,5.0),"wf_int":(0.0,5.0)},
1313
+ #param_bounds=bounds_dict,
1314
+ default_params={"window_size_centroiding":input.window_size_centroiding(), "window_size_matching":input.window_size_matching(), "noise_threshold":input.noise_threshold(), "wf_mz":input.wf_mz(), "wf_int":input.wf_int(), "LET_threshold":input.LET_threshold(), "entropy_dimension":input.entropy_dimension()})
1315
+ #print('here!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n')
1100
1316
 
1101
1317
 
1102
1318
 
1103
1319
  @reactive.effect
1104
1320
  async def _pump_reactive_writer_logs():
1105
- if not is_tuning_running.get():
1321
+ if not is_tuning_grid_running.get():
1106
1322
  return
1107
1323
 
1108
1324
  reactive.invalidate_later(0.1)
@@ -1116,9 +1332,11 @@ def server(input, output, session):
1116
1332
  def status_output():
1117
1333
  return run_status_plot_spectra.get()
1118
1334
  return run_status_spec_lib_matching.get()
1119
- return run_status_parameter_tuning.get()
1335
+ return run_status_parameter_tuning_grid.get()
1336
+ return run_status_parameter_tuning_DE.get()
1120
1337
 
1121
1338
 
1122
1339
  app = App(app_ui, server)
1123
1340
 
1124
1341
 
1342
+
@@ -0,0 +1,101 @@
1
+
2
+
3
+ # app.py
4
+ from shiny import App, ui, render, reactive
5
+ import pandas as pd
6
+
7
+ # Parameters to choose from + suggested default ranges
8
+ PARAMS = {
9
+ "window_size_centroiding": (0.0, 0.5),
10
+ "window_size_matching": (0.0, 0.5),
11
+ "noise_threshold": (0.0, 0.25),
12
+ "wf_mz": (0.0, 5.0),
13
+ "wf_int": (0.0, 5.0),
14
+ "LET_threshold": (0.0, 5.0),
15
+ "entropy_dimension": (1.0, 3.0),
16
+ }
17
+
18
+ app_ui = ui.page_fillable(
19
+ ui.layout_sidebar(
20
+ ui.sidebar(
21
+ ui.h3("Select parameters"),
22
+ ui.input_checkbox_group(
23
+ id="params",
24
+ label=None,
25
+ choices=list(PARAMS.keys()),
26
+ selected=["window_size_centroiding", "noise_threshold"],
27
+ ),
28
+ ui.hr(),
29
+ ui.h4("Bounds for selected parameters"),
30
+ ui.output_ui("bounds_inputs"),
31
+ width=360,
32
+ ),
33
+ )
34
+ )
35
+
36
+ def server(input, output, session):
37
+ @output
38
+ @render.ui
39
+ def bounds_inputs():
40
+ selected = input.params()
41
+ if not selected:
42
+ return ui.div(ui.em("Select one or more parameters above."))
43
+
44
+ blocks = []
45
+ for name in selected:
46
+ lo, hi = PARAMS.get(name, (0.0, 1.0))
47
+ blocks.append(
48
+ ui.card(
49
+ ui.card_header(name),
50
+ ui.layout_columns(
51
+ ui.input_numeric(f"min_{name}", "Lower", lo, step=0.001),
52
+ ui.input_numeric(f"max_{name}", "Upper", hi, step=0.001),
53
+ )
54
+ )
55
+ )
56
+ return ui.div(*blocks)
57
+
58
+ def _read_bounds_dict():
59
+ selected = input.params()
60
+ out = {}
61
+ for name in selected:
62
+ lo_default, hi_default = PARAMS.get(name, (0.0, 1.0))
63
+ lo_id = f"min_{name}"
64
+ hi_id = f"max_{name}"
65
+
66
+ # Use input[...]() and guard with "in input"
67
+ lo_val = input[lo_id]() if lo_id in input else lo_default
68
+ hi_val = input[hi_id]() if hi_id in input else hi_default
69
+
70
+ out[name] = (float(lo_val), float(hi_val))
71
+ return out
72
+
73
+
74
+
75
+ # Table of current bounds
76
+ @output
77
+ @render.data_frame
78
+ def bounds_table():
79
+ b = _read_bounds_dict()
80
+ if not b:
81
+ return pd.DataFrame(columns=["parameter", "lower", "upper"])
82
+ rows = [{"parameter": k, "lower": v[0], "upper": v[1]} for k, v in b.items()]
83
+ return pd.DataFrame(rows)
84
+
85
+ # JSON-ish view (string) you can parse/use elsewhere
86
+ @output
87
+ @render.text
88
+ def bounds_json():
89
+ b = _read_bounds_dict()
90
+ if not b:
91
+ return "{}"
92
+ # Pretty-print as Python dict literal for quick copy/paste
93
+ lines = ["{"]
94
+ for k, (lo, hi) in b.items():
95
+ lines.append(f" '{k}': ({lo}, {hi}),")
96
+ lines.append("}")
97
+ return "\n".join(lines)
98
+
99
+ app = App(app_ui, server)
100
+
101
+
@@ -9,6 +9,139 @@ from itertools import product
9
9
  from joblib import Parallel, delayed
10
10
  import csv
11
11
  import sys, csv
12
+ from scipy.optimize import differential_evolution
13
+
14
+
15
+ def _vector_to_full_params(X, default_params, optimize_params):
16
+ params = default_params.copy()
17
+ for name, val in zip(optimize_params, X):
18
+ params[name] = float(val)
19
+ return params
20
+
21
+
22
+ def objective_function_HRMS(X, ctx):
23
+ p = _vector_to_full_params(X, ctx["default_params"], ctx["optimize_params"])
24
+ if 'window_size_centroiding' in ctx.keys():
25
+ acc = get_acc_HRMS(
26
+ ctx["df_query"], ctx["df_reference"],
27
+ ctx["unique_query_ids"], ctx["unique_reference_ids"],
28
+ ctx["similarity_measure"], ctx["weights"], ctx["spectrum_preprocessing_order"],
29
+ ctx["mz_min"], ctx["mz_max"], ctx["int_min"], ctx["int_max"],
30
+ p["window_size_centroiding"], p["window_size_matching"], p["noise_threshold"],
31
+ p["wf_mz"], p["wf_int"], p["LET_threshold"],
32
+ p["entropy_dimension"],
33
+ ctx["high_quality_reference_library"],
34
+ verbose=False
35
+ )
36
+ else:
37
+ acc = get_acc_NRMS(
38
+ ctx["df_query"], ctx["df_reference"],
39
+ ctx["unique_query_ids"], ctx["unique_reference_ids"],
40
+ ctx["similarity_measure"], ctx["weights"], ctx["spectrum_preprocessing_order"],
41
+ ctx["mz_min"], ctx["mz_max"], ctx["int_min"], ctx["int_max"],
42
+ p["noise_threshold"],
43
+ p["wf_mz"], p["wf_int"], p["LET_threshold"],
44
+ p["entropy_dimension"],
45
+ ctx["high_quality_reference_library"],
46
+ verbose=False
47
+ )
48
+ print(f"\nparams({ctx['optimize_params']}) = {np.array(X)}\naccuracy: {acc*100}%")
49
+ return 1.0 - acc
50
+
51
+
52
+
53
+
54
+ def tune_params_DE(query_data=None, reference_data=None, similarity_measure='cosine', weights=None, spectrum_preprocessing_order='CNMWL', mz_min=0, mz_max=999999999, int_min=0, int_max=999999999, high_quality_reference_library=False, optimize_params=["window_size_centroiding","window_size_matching","noise_threshold","wf_mz","wf_int","LET_threshold","entropy_dimension"], param_bounds={"window_size_centroiding":(0.0,0.5),"window_size_matching":(0.0,0.5),"noise_threshold":(0.0,0.25),"wf_mz":(0.0,5.0),"wf_int":(0.0,5.0),"LET_threshold":(0.0,5.0),"entropy_dimension":(1.0,3.0)}, default_params={"window_size_centroiding": 0.5, "window_size_matching":0.5, "noise_threshold":0.10, "wf_mz":0.0, "wf_int":1.0, "LET_threshold":0.0, "entropy_dimension":1.1}):
55
+
56
+ '''
57
+ print('<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<')
58
+ print(param_bounds)
59
+ print(default_params)
60
+ print(type(param_bounds['noise_threshold'][0]))
61
+ print(type(param_bounds['noise_threshold'][1]))
62
+ print('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
63
+ '''
64
+
65
+ if query_data is None:
66
+ print('\nError: No argument passed to the mandatory query_data. Please pass the path to the TXT file of the query data.')
67
+ sys.exit()
68
+ else:
69
+ extension = query_data.rsplit('.',1)
70
+ extension = extension[(len(extension)-1)]
71
+ if extension == 'mgf' or extension == 'MGF' or extension == 'mzML' or extension == 'mzml' or extension == 'MZML' or extension == 'cdf' or extension == 'CDF':
72
+ output_path_tmp = query_data[:-3] + 'csv'
73
+ build_library_from_raw_data(input_path=query_data, output_path=output_path_tmp, is_reference=False)
74
+ df_query = pd.read_csv(output_path_tmp)
75
+ if extension == 'csv' or extension == 'CSV':
76
+ df_query = pd.read_csv(query_data)
77
+ unique_query_ids = df_query.iloc[:,0].unique()
78
+
79
+ if reference_data is None:
80
+ print('\nError: No argument passed to the mandatory reference_data. Please pass the path to the CSV file of the reference data.')
81
+ sys.exit()
82
+ else:
83
+ if isinstance(reference_data,str):
84
+ df_reference = get_reference_df(reference_data=reference_data)
85
+ unique_reference_ids = df_reference.iloc[:,0].unique()
86
+ else:
87
+ dfs = []
88
+ unique_reference_ids = []
89
+ for f in reference_data:
90
+ tmp = get_reference_df(reference_data=f)
91
+ dfs.append(tmp)
92
+ unique_reference_ids.extend(tmp.iloc[:,0].unique())
93
+ df_reference = pd.concat(dfs, axis=0, ignore_index=True)
94
+
95
+ unique_query_ids = df_query['id'].unique().tolist()
96
+ unique_reference_ids = df_reference['id'].unique().tolist()
97
+
98
+ ctx = dict(
99
+ df_query=df_query,
100
+ df_reference=df_reference,
101
+ unique_query_ids=unique_query_ids,
102
+ unique_reference_ids=unique_reference_ids,
103
+ similarity_measure=similarity_measure,
104
+ weights=weights,
105
+ spectrum_preprocessing_order=spectrum_preprocessing_order,
106
+ mz_min=mz_min, mz_max=mz_max, int_min=int_min, int_max=int_max,
107
+ high_quality_reference_library=high_quality_reference_library,
108
+ default_params=default_params,
109
+ optimize_params=optimize_params,
110
+ )
111
+
112
+ bounds = [param_bounds[p] for p in optimize_params]
113
+
114
+ #print('<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<')
115
+ #print(df_query.head())
116
+ #print(df_reference.head())
117
+ #print(bounds)
118
+ #print(ctx)
119
+ #print('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
120
+
121
+ result = differential_evolution(
122
+ objective_function_HRMS,
123
+ bounds=bounds,
124
+ args=(ctx,),
125
+ maxiter=3,
126
+ tol=0.0,
127
+ workers=-1,
128
+ seed=1,
129
+ )
130
+
131
+ best_full_params = _vector_to_full_params(result.x, default_params, optimize_params)
132
+ best_acc = 100.0 - (result.fun * 100.0)
133
+
134
+ print("\n=== Differential Evolution Result ===")
135
+ print(f"Optimized over: {optimize_params}")
136
+ print("Best values (selected params):")
137
+ for name in optimize_params:
138
+ print(f" {name}: {best_full_params[name]}")
139
+ print("\nFull parameter set used in final evaluation:")
140
+ for k, v in best_full_params.items():
141
+ print(f" {k}: {v}")
142
+ print(f"\nBest accuracy: {best_acc:.3f}%")
143
+
144
+
12
145
 
13
146
 
14
147
  default_HRMS_grid = {'similarity_measure':['cosine'], 'weight':[{'Cosine':0.25,'Shannon':0.25,'Renyi':0.25,'Tsallis':0.25}], 'spectrum_preprocessing_order':['FCNMWL'], 'mz_min':[0], 'mz_max':[9999999], 'int_min':[0], 'int_max':[99999999], 'window_size_centroiding':[0.5], 'window_size_matching':[0.5], 'noise_threshold':[0.0], 'wf_mz':[0.0], 'wf_int':[1.0], 'LET_threshold':[0.0], 'entropy_dimension':[1.1], 'high_quality_reference_library':[False]}
@@ -37,6 +170,7 @@ def _eval_one_HRMS(df_query, df_reference, unique_query_ids, unique_reference_id
37
170
  LET_threshold=LET_threshold_tmp,
38
171
  entropy_dimension=entropy_dimension_tmp,
39
172
  high_quality_reference_library=high_quality_reference_library_tmp,
173
+ verbose=True
40
174
  )
41
175
 
42
176
  return (
@@ -77,7 +211,7 @@ def _eval_one_NRMS(df_query, df_reference, unique_query_ids, unique_reference_id
77
211
 
78
212
 
79
213
 
80
- def tune_params_on_HRMS_data(query_data=None, reference_data=None, grid=None, output_path=None, return_output=False):
214
+ def tune_params_on_HRMS_data_grid(query_data=None, reference_data=None, grid=None, output_path=None, return_output=False):
81
215
  """
82
216
  runs spectral library matching on high-resolution mass spectrometry (HRMS) data with all possible combinations of parameters in the grid dict, saves results from each choice of parameters to a TXT file, and prints top-performing parameters
83
217
 
@@ -153,7 +287,7 @@ def tune_params_on_HRMS_data(query_data=None, reference_data=None, grid=None, ou
153
287
 
154
288
 
155
289
 
156
- def tune_params_on_HRMS_data_shiny(query_data=None, reference_data=None, grid=None, output_path=None, return_output=False):
290
+ def tune_params_on_HRMS_data_grid_shiny(query_data=None, reference_data=None, grid=None, output_path=None, return_output=False):
157
291
  """
158
292
  runs spectral library matching on high-resolution mass spectrometry (HRMS) data with all possible
159
293
  combinations of parameters in the grid dict, saves results from each choice of parameters to a TXT file,
@@ -261,7 +395,7 @@ def tune_params_on_HRMS_data_shiny(query_data=None, reference_data=None, grid=No
261
395
  print(f'Wrote results to {output_path}')
262
396
 
263
397
 
264
- def tune_params_on_NRMS_data(query_data=None, reference_data=None, grid=None, output_path=None, return_output=False):
398
+ def tune_params_on_NRMS_data_grid(query_data=None, reference_data=None, grid=None, output_path=None, return_output=False):
265
399
  """
266
400
  runs spectral library matching on nominal-resolution mass spectrometry (NRMS) data with all possible combinations of parameters in the grid dict, saves results from each choice of parameters to a TXT file, and prints top-performing parameters
267
401
 
@@ -335,7 +469,7 @@ def tune_params_on_NRMS_data(query_data=None, reference_data=None, grid=None, ou
335
469
 
336
470
 
337
471
 
338
- def tune_params_on_NRMS_data_shiny(query_data=None, reference_data=None, grid=None, output_path=None, return_output=False):
472
+ def tune_params_on_NRMS_data_grid_shiny(query_data=None, reference_data=None, grid=None, output_path=None, return_output=False):
339
473
  """
340
474
  runs spectral library matching on nominal-resolution mass spectrometry (NRMS) data with all possible
341
475
  combinations of parameters in the grid dict, saves results from each choice of parameters to a TXT file,
@@ -441,21 +575,26 @@ def tune_params_on_NRMS_data_shiny(query_data=None, reference_data=None, grid=No
441
575
 
442
576
 
443
577
 
444
- def get_acc_HRMS(df_query, df_reference, unique_query_ids, unique_reference_ids, similarity_measure, weights, spectrum_preprocessing_order, mz_min, mz_max, int_min, int_max, window_size_centroiding, window_size_matching, noise_threshold, wf_mz, wf_int, LET_threshold, entropy_dimension, high_quality_reference_library):
578
+ def get_acc_HRMS(df_query, df_reference, unique_query_ids, unique_reference_ids, similarity_measure, weights, spectrum_preprocessing_order, mz_min, mz_max, int_min, int_max, window_size_centroiding, window_size_matching, noise_threshold, wf_mz, wf_int, LET_threshold, entropy_dimension, high_quality_reference_library, verbose=True):
445
579
 
580
+ #print('\n\n\n\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n\n\n\n')
446
581
  n_top_matches_to_save = 1
447
582
 
448
583
  all_similarity_scores = []
449
584
  for query_idx in range(0,len(unique_query_ids)):
450
- print(f'query spectrum #{query_idx} is being identified')
585
+ if verbose is True:
586
+ print(f'query spectrum #{query_idx} is being identified')
451
587
  q_idxs_tmp = np.where(df_query.iloc[:,0] == unique_query_ids[query_idx])[0]
452
588
  q_spec_tmp = np.asarray(pd.concat([df_query.iloc[q_idxs_tmp,1], df_query.iloc[q_idxs_tmp,2]], axis=1).reset_index(drop=True))
589
+ #q_spec_tmp = q_spec_tmp.astype(float)
453
590
 
454
591
  similarity_scores = []
455
592
  for ref_idx in range(0,len(unique_reference_ids)):
456
593
  q_spec = q_spec_tmp
457
594
  r_idxs_tmp = np.where(df_reference.iloc[:,0] == unique_reference_ids[ref_idx])[0]
458
595
  r_spec = np.asarray(pd.concat([df_reference.iloc[r_idxs_tmp,1], df_reference.iloc[r_idxs_tmp,2]], axis=1).reset_index(drop=True))
596
+ #print(r_spec)
597
+ #r_spec = r_spec.astype(float)
459
598
 
460
599
  is_matched = False
461
600
  for transformation in spectrum_preprocessing_order:
@@ -529,7 +668,7 @@ def get_acc_HRMS(df_query, df_reference, unique_query_ids, unique_reference_ids,
529
668
 
530
669
 
531
670
 
532
- def get_acc_NRMS(df_query, df_reference, unique_query_ids, unique_reference_ids, similarity_measure, weights, spectrum_preprocessing_order, mz_min, mz_max, int_min, int_max, noise_threshold, wf_mz, wf_int, LET_threshold, entropy_dimension, high_quality_reference_library):
671
+ def get_acc_NRMS(df_query, df_reference, unique_query_ids, unique_reference_ids, similarity_measure, weights, spectrum_preprocessing_order, mz_min, mz_max, int_min, int_max, noise_threshold, wf_mz, wf_int, LET_threshold, entropy_dimension, high_quality_reference_library, verbose=True):
533
672
 
534
673
  n_top_matches_to_save = 1
535
674
 
@@ -546,7 +685,7 @@ def get_acc_NRMS(df_query, df_reference, unique_query_ids, unique_reference_ids,
546
685
  similarity_scores = []
547
686
  for ref_idx in range(0,len(unique_reference_ids)):
548
687
  q_spec = q_spec_tmp
549
- if ref_idx % 1000 == 0:
688
+ if verbose is True and ref_idx % 1000 == 0:
550
689
  print(f'Query spectrum #{query_idx} has had its similarity with {ref_idx} reference library spectra computed')
551
690
  r_idxs_tmp = np.where(df_reference.iloc[:,0] == unique_reference_ids[ref_idx])[0]
552
691
  r_spec_tmp = np.asarray(pd.concat([df_reference.iloc[r_idxs_tmp,1], df_reference.iloc[r_idxs_tmp,2]], axis=1).reset_index(drop=True))
@@ -615,7 +754,7 @@ def get_acc_NRMS(df_query, df_reference, unique_query_ids, unique_reference_ids,
615
754
 
616
755
 
617
756
 
618
- def run_spec_lib_matching_on_HRMS_data(query_data=None, reference_data=None, likely_reference_ids=None, similarity_measure='cosine', weights={'Cosine':0.25,'Shannon':0.25,'Renyi':0.25,'Tsallis':0.25}, spectrum_preprocessing_order='FCNMWL', high_quality_reference_library=False, mz_min=0, mz_max=9999999, int_min=0, int_max=9999999, window_size_centroiding=0.5, window_size_matching=0.5, noise_threshold=0.0, wf_mz=0.0, wf_intensity=1.0, LET_threshold=0.0, entropy_dimension=1.1, n_top_matches_to_save=1, print_id_results=False, output_identification=None, output_similarity_scores=None, return_ID_output=False):
757
+ def run_spec_lib_matching_on_HRMS_data(query_data=None, reference_data=None, likely_reference_ids=None, similarity_measure='cosine', weights={'Cosine':0.25,'Shannon':0.25,'Renyi':0.25,'Tsallis':0.25}, spectrum_preprocessing_order='FCNMWL', high_quality_reference_library=False, mz_min=0, mz_max=9999999, int_min=0, int_max=9999999, window_size_centroiding=0.5, window_size_matching=0.5, noise_threshold=0.0, wf_mz=0.0, wf_intensity=1.0, LET_threshold=0.0, entropy_dimension=1.1, n_top_matches_to_save=1, print_id_results=False, output_identification=None, output_similarity_scores=None, return_ID_output=False, verbose=True):
619
758
  '''
620
759
  runs spectral library matching on high-resolution mass spectrometry (HRMS) data
621
760
 
@@ -762,14 +901,13 @@ def run_spec_lib_matching_on_HRMS_data(query_data=None, reference_data=None, lik
762
901
 
763
902
  all_similarity_scores = []
764
903
  for query_idx in range(0,len(unique_query_ids)):
765
- print(f'query spectrum #{query_idx} is being identified')
904
+ if verbose is True:
905
+ print(f'query spectrum #{query_idx} is being identified')
766
906
  q_idxs_tmp = np.where(df_query.iloc[:,0] == unique_query_ids[query_idx])[0]
767
907
  q_spec_tmp = np.asarray(pd.concat([df_query.iloc[q_idxs_tmp,1], df_query.iloc[q_idxs_tmp,2]], axis=1).reset_index(drop=True))
768
908
 
769
909
  similarity_scores = []
770
910
  for ref_idx in range(0,len(unique_reference_ids)):
771
- #if ref_idx % 100 == 0:
772
- # print(f'Query spectrum #{query_idx} has had its similarity with {ref_idx} reference library spectra computed')
773
911
  q_spec = q_spec_tmp
774
912
  r_idxs_tmp = np.where(df_reference.iloc[:,0] == unique_reference_ids[ref_idx])[0]
775
913
  r_spec = np.asarray(pd.concat([df_reference.iloc[r_idxs_tmp,1], df_reference.iloc[r_idxs_tmp,2]], axis=1).reset_index(drop=True))
@@ -1008,9 +1146,9 @@ def run_spec_lib_matching_on_NRMS_data(query_data=None, reference_data=None, lik
1008
1146
 
1009
1147
  similarity_scores = []
1010
1148
  for ref_idx in range(0,len(unique_reference_ids)):
1011
- q_spec = q_spec_tmp
1012
- if ref_idx % 1000 == 0:
1149
+ if verbose is True and ref_idx % 1000 == 0:
1013
1150
  print(f'Query spectrum #{query_idx} has had its similarity with {ref_idx} reference library spectra computed')
1151
+ q_spec = q_spec_tmp
1014
1152
  r_idxs_tmp = np.where(df_reference.iloc[:,0] == unique_reference_ids[ref_idx])[0]
1015
1153
  r_spec_tmp = np.asarray(pd.concat([df_reference.iloc[r_idxs_tmp,1], df_reference.iloc[r_idxs_tmp,2]], axis=1).reset_index(drop=True))
1016
1154
  r_spec = convert_spec(r_spec_tmp,mzs)
@@ -0,0 +1,233 @@
1
+
2
+ #!/usr/bin/env python3
3
+ import argparse
4
+ import sys
5
+ import json
6
+ from pathlib import Path
7
+ from typing import Dict, List, Tuple
8
+ import numpy as np
9
+ import pandas as pd
10
+ from scipy.optimize import differential_evolution
11
+ from pycompound.spec_lib_matching import get_acc_HRMS, get_acc_NRMS
12
+
13
+
14
+ ALL_PARAMS = [
15
+ "window_size_centroiding",
16
+ "window_size_matching",
17
+ "noise_threshold",
18
+ "wf_mz",
19
+ "wf_int",
20
+ "LET_threshold",
21
+ "entropy_dimension"
22
+ ]
23
+
24
+ SUGGESTED_BOUNDS = {
25
+ "window_size_centroiding": (0.0, 0.5),
26
+ "window_size_matching": (0.0, 0.5),
27
+ "noise_threshold": (0.0, 0.25),
28
+ "wf_mz": (0.0, 5.0),
29
+ "wf_int": (0.0, 5.0),
30
+ "LET_threshold": (0.0, 5.0),
31
+ "entropy_dimension": (1.0, 3.0)
32
+ }
33
+
34
+ DEFAULT_PARAMS = {
35
+ "window_size_centroiding": 0.5,
36
+ "window_size_matching": 0.5,
37
+ "noise_threshold": 0.10,
38
+ "wf_mz": 0.0,
39
+ "wf_int": 1.0,
40
+ "LET_threshold": 0.0,
41
+ "entropy_dimension": 1.1
42
+ }
43
+
44
+
45
+ # ---------- Utilities ----------
46
+ def parse_bound(s: str) -> Tuple[str, Tuple[float, float]]:
47
+ # "name=min:max" → (name, (min, max))
48
+ if "=" not in s or ":" not in s:
49
+ raise argparse.ArgumentTypeError(f"Bad --bound format '{s}'. Use name=min:max")
50
+ name, rng = s.split("=", 1)
51
+ lo, hi = rng.split(":", 1)
52
+ try:
53
+ lo_f, hi_f = float(lo), float(hi)
54
+ except ValueError as e:
55
+ raise argparse.ArgumentTypeError(f"Non-numeric bound in '{s}': {e}")
56
+ if lo_f > hi_f:
57
+ raise argparse.ArgumentTypeError(f"Lower bound > upper bound in '{s}'")
58
+ return name.strip(), (lo_f, hi_f)
59
+
60
+
61
+ def parse_default(s: str) -> Tuple[str, float]:
62
+ # "name=value" → (name, value)
63
+ if "=" not in s:
64
+ raise argparse.ArgumentTypeError(f"Bad --default format '{s}'. Use name=value")
65
+ name, val = s.split("=", 1)
66
+ try:
67
+ v = float(val)
68
+ except ValueError as e:
69
+ raise argparse.ArgumentTypeError(f"Non-numeric default in '{s}': {e}")
70
+ return name.strip(), v
71
+
72
+
73
+ def _vector_to_full_params(X: np.ndarray, default_params: Dict[str, float], optimize_params: List[str]) -> Dict[str, float]:
74
+ params = dict(default_params)
75
+ for name, val in zip(optimize_params, X):
76
+ params[name] = float(val)
77
+ return params
78
+
79
+
80
+ # ---------- Objective wrappers (top-level, pickle-friendly) ----------
81
+ def objective_HRMS(X: np.ndarray, ctx: dict) -> float:
82
+ p = _vector_to_full_params(X, ctx["default_params"], ctx["optimize_params"])
83
+ acc = get_acc_HRMS(
84
+ ctx["df_query"], ctx["df_reference"],
85
+ ctx["uq"], ctx["ur"],
86
+ ctx["similarity_measure"], ctx["weights"], ctx["spectrum_preprocessing_order"],
87
+ ctx["mz_min"], ctx["mz_max"], ctx["int_min"], ctx["int_max"],
88
+ p["window_size_centroiding"], p["window_size_matching"], p["noise_threshold"],
89
+ p["wf_mz"], p["wf_int"], p["LET_threshold"],
90
+ p["entropy_dimension"],
91
+ ctx["high_quality_reference_library"],
92
+ verbose=False
93
+ )
94
+ print(f"\n{ctx['optimize_params']} = {np.array(X)}\naccuracy: {acc*100}%")
95
+ return 1.0 - acc
96
+
97
+
98
+ def objective_NRMS(X: np.ndarray, ctx: dict) -> float:
99
+ p = _vector_to_full_params(X, ctx["default_params"], ctx["optimize_params"])
100
+ acc = get_acc_NRMS(
101
+ ctx["df_query"], ctx["df_reference"],
102
+ ctx["uq"], ctx["ur"],
103
+ ctx["similarity_measure"], ctx["weights"], ctx["spectrum_preprocessing_order"],
104
+ ctx["mz_min"], ctx["mz_max"], ctx["int_min"], ctx["int_max"],
105
+ p["noise_threshold"], p["wf_mz"], p["wf_int"], p["LET_threshold"], p["entropy_dimension"],
106
+ ctx["high_quality_reference_library"],
107
+ verbose=False
108
+ )
109
+ print(f"\n{ctx['optimize_params']} = {np.array(X)}\naccuracy: {acc*100}%")
110
+ return 1.0 - acc
111
+
112
+
113
+ # ---------- Main CLI ----------
114
+ def main():
115
+ p = argparse.ArgumentParser(
116
+ description="Parameter tuning via Differential Evolution for HRMS/NRMS using pycompound."
117
+ )
118
+ p.add_argument("--chromatography_platform", choices=["HRMS", "NRMS"], default="HRMS", help="Chromatography Platform.")
119
+ p.add_argument("--query_data", required=True, help="Path to query CSV (must contain 'id' column).")
120
+ p.add_argument("--reference_data", required=True, nargs="+", help="Path(s) to reference CSV(s) (must contain 'id').")
121
+ p.add_argument("--similarity_measure", default="cosine", choices=["cosine", "renyi", "tsallis"], help="Similarity measure.")
122
+ p.add_argument("--weights", default="", help="Weights spec; empty means None.")
123
+ p.add_argument("--spectrum-order", default="CNMWL", help="Spectrum preprocessing order string.")
124
+ p.add_argument("--mz-min", type=float, default=0.0)
125
+ p.add_argument("--mz-max", type=float, default=999_999_999.0)
126
+ p.add_argument("--int-min", type=float, default=0.0)
127
+ p.add_argument("--int-max", type=float, default=999_999_999.0)
128
+ p.add_argument("--hq-ref-lib", action="store_true", help="Use high-quality reference library flag.")
129
+ p.add_argument("--opt", nargs="+", default=["window_size_centroiding", "noise_threshold", "wf_mz", "wf_int"],
130
+ help=f"Parameters to optimize (subset of {ALL_PARAMS}).")
131
+ p.add_argument("--bound", action="append", default=[], type=parse_bound,
132
+ help="Bound spec 'name=min:max'. Repeatable.")
133
+ p.add_argument("--default", dest="defaults", action="append", default=[], type=parse_default,
134
+ help="Override a default 'name=value' for non-optimized params or initial values.")
135
+ p.add_argument("--maxiter", type=int, default=15)
136
+ p.add_argument("--seed", type=int, default=1)
137
+ p.add_argument("--workers", type=int, default=-1, help="Use -1 for all cores; 1 to disable parallelism.")
138
+ args = p.parse_args()
139
+
140
+ unknown = [x for x in args.opt if x not in ALL_PARAMS]
141
+ if unknown:
142
+ sys.exit(f"Error: unknown --opt params: {unknown}")
143
+
144
+ qpath = Path(args.query_data)
145
+ if not qpath.exists():
146
+ sys.exit(f"Query CSV not found: {qpath}")
147
+
148
+ df_query = pd.read_csv(qpath)
149
+ if "id" not in df_query.columns:
150
+ sys.exit("Query CSV must contain an 'id' column.")
151
+
152
+ ref_paths = [Path(pth) for pth in args.reference_data]
153
+ for r in ref_paths:
154
+ if not r.exists():
155
+ sys.exit(f"Reference CSV not found: {r}")
156
+ df_reference = pd.concat([pd.read_csv(r) for r in ref_paths], axis=0, ignore_index=True)
157
+ if "id" not in df_reference.columns:
158
+ sys.exit("Reference CSV must contain an 'id' column.")
159
+
160
+ uq = df_query["id"].unique().tolist()
161
+ ur = df_reference["id"].unique().tolist()
162
+
163
+ default_params = dict(DEFAULT_PARAMS)
164
+ for name, val in args.defaults:
165
+ if name not in DEFAULT_PARAMS:
166
+ sys.exit(f"--default refers to unknown parameter '{name}'. Allowed: {list(DEFAULT_PARAMS)}")
167
+ default_params[name] = val
168
+
169
+ param_bounds: Dict[str, Tuple[float, float]] = dict(SUGGESTED_BOUNDS)
170
+ for name, (lo, hi) in args.bound:
171
+ if name not in SUGGESTED_BOUNDS:
172
+ sys.exit(f"--bound refers to unknown parameter '{name}'. Allowed: {list(SUGGESTED_BOUNDS)}")
173
+ param_bounds[name] = (lo, hi)
174
+
175
+ bounds = [param_bounds[p] for p in args.opt]
176
+
177
+ ctx = dict(
178
+ df_query=df_query,
179
+ df_reference=df_reference,
180
+ uq=uq,
181
+ ur=ur,
182
+ similarity_measure=args.similarity_measure,
183
+ weights=(None if args.weights.strip() == "" else args.weights),
184
+ spectrum_preprocessing_order=args.spectrum_order,
185
+ mz_min=float(args.mz_min),
186
+ mz_max=float(args.mz_max),
187
+ int_min=float(args.int_min),
188
+ int_max=float(args.int_max),
189
+ high_quality_reference_library=bool(args.hq_ref_lib),
190
+ default_params=default_params,
191
+ optimize_params=args.opt,
192
+ )
193
+
194
+ history_acc: List[float] = []
195
+
196
+ def _cb(xk, convergence):
197
+ if args.chromatography_platform == "HRMS":
198
+ acc_pct = (1.0 - objective_HRMS(xk, ctx)) * 100.0
199
+ else:
200
+ acc_pct = (1.0 - objective_NRMS(xk, ctx)) * 100.0
201
+ history_acc.append(acc_pct)
202
+
203
+ objective = objective_HRMS if args.chromatography_platform == "HRMS" else objective_NRMS
204
+
205
+ result = differential_evolution(
206
+ objective,
207
+ bounds=bounds,
208
+ args=(ctx,),
209
+ maxiter=int(args.maxiter),
210
+ tol=0.0,
211
+ seed=int(args.seed),
212
+ workers=int(args.workers),
213
+ callback=_cb,
214
+ )
215
+
216
+ best_params = _vector_to_full_params(result.x, default_params, args.opt)
217
+ best_acc_pct = (1.0 - result.fun) * 100.0
218
+
219
+ print("\n=== Differential Evolution Result ===")
220
+ print(f"Mode: {args.chromatography_platform}")
221
+ print(f"Optimized over: {args.opt}")
222
+ print("Best values (selected params):")
223
+ for name in args.opt:
224
+ print(f" {name}: {best_params[name]}")
225
+ print("\nFull parameter set used in final evaluation:")
226
+ for k in ALL_PARAMS:
227
+ print(f" {k}: {best_params[k]}")
228
+ print(f"\nBest accuracy: {best_acc_pct:.3f}%")
229
+ print(f"DE raw: success={result.success}, nfev={result.nfev}, nit={result.nit}, message='{result.message}'")
230
+
231
+ if __name__ == "__main__":
232
+ main()
233
+
@@ -1,6 +1,6 @@
1
1
 
2
- from pycompound.spec_lib_matching import tune_params_on_HRMS_data
3
- from pycompound.spec_lib_matching import tune_params_on_NRMS_data
2
+ from pycompound.spec_lib_matching import tune_params_on_HRMS_data_grid
3
+ from pycompound.spec_lib_matching import tune_params_on_NRMS_data_grid
4
4
  import argparse
5
5
  import json
6
6
  from pathlib import Path
@@ -61,9 +61,9 @@ grid['entropy_dimension'] = [float(x) for x in grid['entropy_dimension']]
61
61
 
62
62
 
63
63
  if args.chromatography_platform == 'HRMS':
64
- tune_params_on_HRMS_data(query_data=args.query_data, reference_data=args.reference_data, grid=grid, output_path=args.output_path)
64
+ tune_params_on_HRMS_data_grid(query_data=args.query_data, reference_data=args.reference_data, grid=grid, output_path=args.output_path)
65
65
 
66
66
  if args.chromatography_platform == 'NRMS':
67
- tune_params_on_NRMS_data(query_data=args.query_data, reference_data=args.reference_data, grid=grid, output_path=args.output_path)
67
+ tune_params_on_NRMS_data_grid(query_data=args.query_data, reference_data=args.reference_data, grid=grid, output_path=args.output_path)
68
68
 
69
69
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pycompound
3
- Version: 0.1.3
3
+ Version: 0.1.5
4
4
  Summary: Python package to perform compound identification in mass spectrometry via spectral library matching.
5
5
  Author-email: Hunter Dlugas <fy7392@wayne.edu>
6
6
  License-Expression: MIT
@@ -2,6 +2,7 @@ LICENSE
2
2
  README.md
3
3
  pyproject.toml
4
4
  src/app.py
5
+ src/app2.py
5
6
  src/pycompound/build_library.py
6
7
  src/pycompound/plot_spectra.py
7
8
  src/pycompound/plot_spectra_CLI.py
@@ -9,7 +10,8 @@ src/pycompound/processing.py
9
10
  src/pycompound/similarity_measures.py
10
11
  src/pycompound/spec_lib_matching.py
11
12
  src/pycompound/spec_lib_matching_CLI.py
12
- src/pycompound/tuning_CLI.py
13
+ src/pycompound/tuning_CLI_DE.py
14
+ src/pycompound/tuning_CLI_grid.py
13
15
  src/pycompound.egg-info/PKG-INFO
14
16
  src/pycompound.egg-info/SOURCES.txt
15
17
  src/pycompound.egg-info/dependency_links.txt
@@ -1,4 +1,5 @@
1
1
  app
2
+ app2
2
3
  pycompound
3
4
  rsconnect-python
4
5
  www
@@ -5,6 +5,7 @@ from pathlib import Path
5
5
  import os
6
6
 
7
7
 
8
+ '''
8
9
  print('\n\ntest #1:')
9
10
  run_spec_lib_matching_on_HRMS_data(query_data=f'{Path.cwd()}/data/lcms_query_library.csv', reference_data=f'{Path.cwd()}/data/lcms_reference_library.csv', similarity_measure='hello')
10
11
 
@@ -34,6 +35,7 @@ run_spec_lib_matching_on_HRMS_data(query_data=f'{Path.cwd()}/data/lcms_query_lib
34
35
 
35
36
  print('\n\ntest #10:')
36
37
  run_spec_lib_matching_on_HRMS_data(query_data=f'{Path.cwd()}/data/lcms_query_library.csv', reference_data=f'{Path.cwd()}/data/lcms_reference_library.csv', normalization_method='tanh')
38
+ '''
37
39
 
38
40
  print('\n\ntest #11:')
39
41
  run_spec_lib_matching_on_HRMS_data(query_data=f'{Path.cwd()}/data/lcms_query_library.csv', reference_data=f'{Path.cwd()}/data/lcms_reference_library.csv', similarity_measure='tsallis', wf_mz=2, wf_intensity=0.5, entropy_dimension=2, n_top_matches_to_save=3, print_id_results=True)
@@ -0,0 +1,60 @@
1
+
2
+ from pycompound.spec_lib_matching import tune_params_on_HRMS_data_grid
3
+ from pycompound.spec_lib_matching import tune_params_on_NRMS_data_grid
4
+ from pycompound.spec_lib_matching import tune_params_DE
5
+ from pathlib import Path
6
+ import os
7
+
8
+ tune_params_DE(query_data=f'{Path.cwd()}/data/tuning/tuning_data/filtered/lcms_query_data.csv',
9
+ reference_data=f'{Path.cwd()}/data/tuning/tuning_data/filtered/lcms_reference_data.csv',
10
+ similarity_measure='shannon',
11
+ optimize_params=["window_size_matching","noise_threshold","wf_mz","wf_int"],
12
+ param_bounds={"window_size_matching":(0.0,0.5),"noise_threshold":(0.0,0.25),"wf_mz":(0.0,5.0),"wf_int":(0.0,5.0)},
13
+ default_params={"window_size_centroiding": 0.5, "window_size_matching":0.5, "noise_threshold":0.10, "wf_mz":0.0, "wf_int":1.0, "LET_threshold":0.0, "entropy_dimension":1.1})
14
+
15
+ '''
16
+ print('\n\ntest #1:')
17
+ tune_params_on_HRMS_data_grid(query_data=f'{Path.cwd()}/data/tuning/lcms_query_library.csv',
18
+ reference_data=f'{Path.cwd()}/data/lcms_reference_library.csv',
19
+ output_path=f'{Path.cwd()}/tuning_param_output_test1.txt')
20
+
21
+ print('\n\ntest #2:')
22
+ tune_params_on_HRMS_data_grid(query_data=f'{Path.cwd()}/data/tuning/lcms_query_library.csv',
23
+ reference_data=f'{Path.cwd()}/data/lcms_reference_library.csv',
24
+ grid={'similarity_measure':['cosine'], 'spectrum_preprocessing_order':['FCNMWL'], 'mz_min':[0], 'mz_max':[9999999], 'int_min':[0], 'int_max':[99999999], 'window_size_centroiding':[0.5], 'window_size_matching':[0.1,0.5], 'noise_threshold':[0.0], 'wf_mz':[0.0], 'wf_int':[1.0], 'LET_threshold':[0.0], 'entropy_dimension':[1.1], 'high_quality_reference_library':[False]},
25
+ output_path=f'{Path.cwd()}/tuning_param_output_test2.txt')
26
+
27
+ print('\n\ntest #3:')
28
+ tune_params_on_NRMS_data_grid(query_data=f'{Path.cwd()}/data/tuning/gcms_query_library.csv',
29
+ reference_data=f'{Path.cwd()}/data/gcms_reference_library.csv',
30
+ output_path=f'{Path.cwd()}/tuning_param_output_test3.txt')
31
+
32
+ print('\n\ntest #4:')
33
+ tune_params_on_NRMS_data_grid(query_data=f'{Path.cwd()}/data/tuning/gcms_query_library.csv',
34
+ reference_data=f'{Path.cwd()}/data/gcms_reference_library.csv',
35
+ grid={'similarity_measure':['cosine','shannon'], 'spectrum_preprocessing_order':['FNLW'], 'mz_min':[0], 'mz_max':[9999999], 'int_min':[0], 'int_max':[99999999], 'noise_threshold':[0.0,0.1], 'wf_mz':[0.0], 'wf_int':[1.0], 'LET_threshold':[0.0,3.0], 'entropy_dimension':[1.1], 'high_quality_reference_library':[False]},
36
+ output_path=f'{Path.cwd()}/tuning_param_output_test4.txt')
37
+
38
+ print('\n\ntest #5:')
39
+ tune_params_on_HRMS_data_grid(query_data=f'{Path.cwd()}/data/tuning/lcms_query_library.csv',
40
+ reference_data=f'{Path.cwd()}/data/lcms_reference_library.csv',
41
+ grid={'similarity_measure':['cosine'], 'weight':[{'Cosine':0.2, 'Shannon':0.2, 'Renyi':0.3, 'Tsallis':0.3},{'Cosine':0.25, 'Shannon':0.25, 'Renyi':0.25, 'Tsallis':0.25}], 'spectrum_preprocessing_order':['FCNMWL'], 'mz_min':[0], 'mz_max':[9999999], 'int_min':[0], 'int_max':[99999999], 'window_size_centroiding':[0.5], 'window_size_matching':[0.5], 'noise_threshold':[0.0], 'wf_mz':[0.0], 'wf_int':[1.0], 'LET_threshold':[0.0,3], 'entropy_dimension':[1.1], 'high_quality_reference_library':[False,True]},
42
+ output_path=f'{Path.cwd()}/tuning_param_output_test5.txt')
43
+
44
+ print('\n\ntest #6:')
45
+ tune_params_DE(query_data=f'{Path.cwd()}/data/tuning/tuning_data/filtered/lcms_query_data.csv',
46
+ reference_data=f'{Path.cwd()}/data/tuning/tuning_data/filtered/lcms_reference_data.csv',
47
+ similarity_measure='shannon',
48
+ optimize_params=["window_size_matching","noise_threshold","wf_mz","wf_int"],
49
+ param_bounds={"window_size_matching":(0.0,0.5),"noise_threshold":(0.0,0.25),"wf_mz":(0.0,5.0),"wf_int":(0.0,5.0)},
50
+ default_params={"window_size_centroiding": 0.5, "window_size_matching":0.5, "noise_threshold":0.10, "wf_mz":0.0, "wf_int":1.0, "LET_threshold":0.0, "entropy_dimension":1.1})
51
+
52
+ print('\n\ntest #7:')
53
+ tune_params_DE(query_data=f'{Path.cwd()}/data/tuning/tuning_data/filtered/gcms_query_data.csv',
54
+ reference_data=f'{Path.cwd()}/data/tuning/tuning_data/filtered/gcms_reference_data.csv',
55
+ similarity_measure='renyi',
56
+ optimize_params=["wf_mz","wf_int","LET_threshold","entropy_dimension"],
57
+ param_bounds={"wf_mz":(0.0,5.0),"wf_int":(0.0,5.0),"LET_threshold":(0,5),"entropy_dimension":(1.01,3)},
58
+ default_params={"noise_threshold":0.10, "wf_mz":0.0, "wf_int":1.0, "LET_threshold":0.0, "entropy_dimension":1.1})
59
+ '''
60
+
@@ -1,21 +0,0 @@
1
-
2
- from pycompound.spec_lib_matching import tune_params_on_HRMS_data
3
- from pycompound.spec_lib_matching import tune_params_on_NRMS_data
4
- from pathlib import Path
5
- import os
6
-
7
- print('\n\ntest #1:')
8
- tune_params_on_HRMS_data(query_data=f'{Path.cwd()}/data/tuning/lcms_query_library.csv', reference_data=f'{Path.cwd()}/data/lcms_reference_library.csv', output_path=f'{Path.cwd()}/tuning_param_output_test1.txt')
9
-
10
- print('\n\ntest #2:')
11
- tune_params_on_HRMS_data(query_data=f'{Path.cwd()}/data/tuning/lcms_query_library.csv', reference_data=f'{Path.cwd()}/data/lcms_reference_library.csv', grid={'similarity_measure':['cosine'], 'spectrum_preprocessing_order':['FCNMWL'], 'mz_min':[0], 'mz_max':[9999999], 'int_min':[0], 'int_max':[99999999], 'window_size_centroiding':[0.5], 'window_size_matching':[0.1,0.5], 'noise_threshold':[0.0], 'wf_mz':[0.0], 'wf_int':[1.0], 'LET_threshold':[0.0], 'entropy_dimension':[1.1], 'high_quality_reference_library':[False]}, output_path=f'{Path.cwd()}/tuning_param_output_test2.txt')
12
-
13
- print('\n\ntest #3:')
14
- tune_params_on_NRMS_data(query_data=f'{Path.cwd()}/data/tuning/gcms_query_library.csv', reference_data=f'{Path.cwd()}/data/gcms_reference_library.csv', output_path=f'{Path.cwd()}/tuning_param_output_test3.txt')
15
-
16
- print('\n\ntest #4:')
17
- tune_params_on_NRMS_data(query_data=f'{Path.cwd()}/data/tuning/gcms_query_library.csv', reference_data=f'{Path.cwd()}/data/gcms_reference_library.csv', grid={'similarity_measure':['cosine','shannon'], 'spectrum_preprocessing_order':['FNLW'], 'mz_min':[0], 'mz_max':[9999999], 'int_min':[0], 'int_max':[99999999], 'noise_threshold':[0.0,0.1], 'wf_mz':[0.0], 'wf_int':[1.0], 'LET_threshold':[0.0,3.0], 'entropy_dimension':[1.1], 'high_quality_reference_library':[False]}, output_path=f'{Path.cwd()}/tuning_param_output_test4.txt')
18
-
19
- print('\n\ntest #5:')
20
- tune_params_on_HRMS_data(query_data=f'{Path.cwd()}/data/tuning/lcms_query_library.csv', reference_data=f'{Path.cwd()}/data/lcms_reference_library.csv', grid={'similarity_measure':['cosine'], 'weight':[{'Cosine':0.2, 'Shannon':0.2, 'Renyi':0.3, 'Tsallis':0.3},{'Cosine':0.25, 'Shannon':0.25, 'Renyi':0.25, 'Tsallis':0.25}], 'spectrum_preprocessing_order':['FCNMWL'], 'mz_min':[0], 'mz_max':[9999999], 'int_min':[0], 'int_max':[99999999], 'window_size_centroiding':[0.5], 'window_size_matching':[0.5], 'noise_threshold':[0.0], 'wf_mz':[0.0], 'wf_int':[1.0], 'LET_threshold':[0.0,3], 'entropy_dimension':[1.1], 'high_quality_reference_library':[False,True]}, output_path=f'{Path.cwd()}/tuning_param_output_test5.txt')
21
-
File without changes
File without changes
File without changes