pycompound 0.1.4__tar.gz → 0.1.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. {pycompound-0.1.4/src/pycompound.egg-info → pycompound-0.1.5}/PKG-INFO +1 -1
  2. {pycompound-0.1.4 → pycompound-0.1.5}/pyproject.toml +1 -1
  3. {pycompound-0.1.4 → pycompound-0.1.5}/src/app.py +245 -56
  4. pycompound-0.1.5/src/app2.py +101 -0
  5. {pycompound-0.1.4 → pycompound-0.1.5}/src/pycompound/spec_lib_matching.py +138 -4
  6. {pycompound-0.1.4 → pycompound-0.1.5/src/pycompound.egg-info}/PKG-INFO +1 -1
  7. {pycompound-0.1.4 → pycompound-0.1.5}/src/pycompound.egg-info/SOURCES.txt +1 -0
  8. {pycompound-0.1.4 → pycompound-0.1.5}/src/pycompound.egg-info/top_level.txt +1 -0
  9. {pycompound-0.1.4 → pycompound-0.1.5}/tests/test_tuning.py +8 -0
  10. {pycompound-0.1.4 → pycompound-0.1.5}/LICENSE +0 -0
  11. {pycompound-0.1.4 → pycompound-0.1.5}/README.md +0 -0
  12. {pycompound-0.1.4 → pycompound-0.1.5}/setup.cfg +0 -0
  13. {pycompound-0.1.4 → pycompound-0.1.5}/src/pycompound/build_library.py +0 -0
  14. {pycompound-0.1.4 → pycompound-0.1.5}/src/pycompound/plot_spectra.py +0 -0
  15. {pycompound-0.1.4 → pycompound-0.1.5}/src/pycompound/plot_spectra_CLI.py +0 -0
  16. {pycompound-0.1.4 → pycompound-0.1.5}/src/pycompound/processing.py +0 -0
  17. {pycompound-0.1.4 → pycompound-0.1.5}/src/pycompound/similarity_measures.py +0 -0
  18. {pycompound-0.1.4 → pycompound-0.1.5}/src/pycompound/spec_lib_matching_CLI.py +0 -0
  19. {pycompound-0.1.4 → pycompound-0.1.5}/src/pycompound/tuning_CLI_DE.py +0 -0
  20. {pycompound-0.1.4 → pycompound-0.1.5}/src/pycompound/tuning_CLI_grid.py +0 -0
  21. {pycompound-0.1.4 → pycompound-0.1.5}/src/pycompound.egg-info/dependency_links.txt +0 -0
  22. {pycompound-0.1.4 → pycompound-0.1.5}/src/pycompound.egg-info/requires.txt +0 -0
  23. {pycompound-0.1.4 → pycompound-0.1.5}/tests/test_build_library.py +0 -0
  24. {pycompound-0.1.4 → pycompound-0.1.5}/tests/test_plot_spectra.py +0 -0
  25. {pycompound-0.1.4 → pycompound-0.1.5}/tests/test_similarity_measures.py +0 -0
  26. {pycompound-0.1.4 → pycompound-0.1.5}/tests/test_spec_lib_matching.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pycompound
3
- Version: 0.1.4
3
+ Version: 0.1.5
4
4
  Summary: Python package to perform compound identification in mass spectrometry via spectral library matching.
5
5
  Author-email: Hunter Dlugas <fy7392@wayne.edu>
6
6
  License-Expression: MIT
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "pycompound"
7
- version = "0.1.4"
7
+ version = "0.1.5"
8
8
  authors = [
9
9
  { name="Hunter Dlugas", email="fy7392@wayne.edu" },
10
10
  ]
@@ -2,10 +2,11 @@
2
2
  from shiny import App, ui, reactive, render, req
3
3
  from pycompound.spec_lib_matching import run_spec_lib_matching_on_HRMS_data
4
4
  from pycompound.spec_lib_matching import run_spec_lib_matching_on_NRMS_data
5
- from pycompound.spec_lib_matching import tune_params_on_HRMS_data
6
- from pycompound.spec_lib_matching import tune_params_on_NRMS_data
7
- from pycompound.spec_lib_matching import tune_params_on_HRMS_data_shiny
8
- from pycompound.spec_lib_matching import tune_params_on_NRMS_data_shiny
5
+ from pycompound.spec_lib_matching import tune_params_on_HRMS_data_grid
6
+ from pycompound.spec_lib_matching import tune_params_on_NRMS_data_grid
7
+ from pycompound.spec_lib_matching import tune_params_on_HRMS_data_grid_shiny
8
+ from pycompound.spec_lib_matching import tune_params_on_NRMS_data_grid_shiny
9
+ from pycompound.spec_lib_matching import tune_params_DE
9
10
  from pycompound.plot_spectra import generate_plots_on_HRMS_data
10
11
  from pycompound.plot_spectra import generate_plots_on_NRMS_data
11
12
  from pathlib import Path
@@ -394,7 +395,7 @@ def run_spec_lib_matching_ui(platform: str):
394
395
 
395
396
 
396
397
 
397
- def run_parameter_tuning_ui(platform: str):
398
+ def run_parameter_tuning_grid_ui(platform: str):
398
399
  base_inputs = [
399
400
  ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
400
401
  ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
@@ -435,7 +436,7 @@ def run_parameter_tuning_ui(platform: str):
435
436
  ]
436
437
 
437
438
 
438
- run_button_parameter_tuning = ui.download_button("run_btn_parameter_tuning", "Tune parameters", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
439
+ run_button_parameter_tuning_grid = ui.download_button("run_btn_parameter_tuning_grid", "Tune parameters (grid search)", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
439
440
  back_button = ui.input_action_button("back", "Back to main menu", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
440
441
 
441
442
  if platform == "HRMS":
@@ -465,7 +466,7 @@ def run_parameter_tuning_ui(platform: str):
465
466
  ui.TagList(
466
467
  ui.h2("Tune parameters"),
467
468
  inputs_columns,
468
- run_button_parameter_tuning,
469
+ run_button_parameter_tuning_grid,
469
470
  back_button,
470
471
  log_panel
471
472
  ),
@@ -473,12 +474,117 @@ def run_parameter_tuning_ui(platform: str):
473
474
 
474
475
 
475
476
 
476
- '''
477
- app_ui = ui.page_fluid(
478
- ui.output_ui("main_ui"),
479
- ui.output_text("status_output")
480
- )
481
- '''
477
+ PARAMS_HRMS = {
478
+ "window_size_centroiding": (0.0, 0.5),
479
+ "window_size_matching": (0.0, 0.5),
480
+ "noise_threshold": (0.0, 0.25),
481
+ "wf_mz": (0.0, 5.0),
482
+ "wf_int": (0.0, 5.0),
483
+ "LET_threshold": (0.0, 5.0),
484
+ "entropy_dimension": (1.0, 3.0)
485
+ }
486
+
487
+ PARAMS_NRMS = {
488
+ "noise_threshold": (0.0, 0.25),
489
+ "wf_mz": (0.0, 5.0),
490
+ "wf_int": (0.0, 5.0),
491
+ "LET_threshold": (0.0, 5.0),
492
+ "entropy_dimension": (1.0, 3.0)
493
+ }
494
+
495
+ def run_parameter_tuning_DE_ui(platform: str):
496
+ if platform == 'HRMS':
497
+ PARAMS=PARAMS_HRMS
498
+ else:
499
+ PARAMS=PARAMS_NRMS
500
+
501
+ base_inputs = [
502
+ ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
503
+ ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
504
+ ui.input_select("similarity_measure", "Select similarity measure:", ["cosine","shannon","renyi","tsallis","mixture","jaccard","dice","3w_jaccard","sokal_sneath","binary_cosine","mountford","mcconnaughey","driver_kroeber","simpson","braun_banquet","fager_mcgowan","kulczynski","intersection","hamming","hellinger"]),
505
+ ui.input_text('weights', 'Weights for mixture similarity measure (cosine, shannon, renyi, tsallis):', '0.25, 0.25, 0.25, 0.25'),
506
+ ui.input_select("high_quality_reference_library", "Indicate whether the reference library is considered high quality. If True, filtering and noise removal are only applied to the query spectra.", [False, True],),
507
+ ]
508
+
509
+ if platform == "HRMS":
510
+ extra_inputs = [
511
+ ui.input_text(
512
+ "spectrum_preprocessing_order",
513
+ "Sequence of characters for preprocessing order (C (centroiding), F (filtering), M (matching), N (noise removal), L (low-entropy transformation), W (weight factor transformation)). M must be included, C before M if used.",
514
+ "FCNMWL",
515
+ ),
516
+ ui.input_numeric("window_size_centroiding", "Centroiding window-size:", 0.5),
517
+ ui.input_numeric("window_size_matching", "Matching window-size:", 0.5),
518
+ ]
519
+ else:
520
+ extra_inputs = [
521
+ ui.input_text(
522
+ "spectrum_preprocessing_order",
523
+ "Sequence of characters for preprocessing order (F (filtering), N (noise removal), L (low-entropy transformation), W (weight factor transformation)).",
524
+ "FNLW",
525
+ )
526
+ ]
527
+
528
+ numeric_inputs = [
529
+ ui.input_numeric("mz_min", "Minimum m/z for filtering:", 0),
530
+ ui.input_numeric("mz_max", "Maximum m/z for filtering:", 99999999),
531
+ ui.input_numeric("int_min", "Minimum intensity for filtering:", 0),
532
+ ui.input_numeric("int_max", "Maximum intensity for filtering:", 999999999),
533
+ ui.input_numeric("noise_threshold", "Noise removal threshold:", 0.0),
534
+ ui.input_numeric("wf_mz", "Mass/charge weight factor:", 0.0),
535
+ ui.input_numeric("wf_int", "Intensity weight factor:", 1.0),
536
+ ui.input_numeric("LET_threshold", "Low-entropy threshold:", 0.0),
537
+ ui.input_numeric("entropy_dimension", "Entropy dimension (Renyi/Tsallis only):", 1.1),
538
+ ]
539
+
540
+
541
+ #run_button_parameter_tuning_DE = ui.download_button("run_btn_parameter_tuning_DE", "Tune parameters (differential evolution optimization)", style="font-size:16px; padding:15px 30px; width:300px; height:100px")
542
+ run_button_parameter_tuning_DE = ui.input_action_button("run_btn_parameter_tuning_DE", "Tune parameters (differential evolution optimization)", style="font-size:16px; padding:15px 30px; width:300px; height:100px")
543
+ back_button = ui.input_action_button("back", "Back to main menu", style="font-size:16px; padding:15px 30px; width:300px; height:100px")
544
+
545
+ if platform == "HRMS":
546
+ inputs_columns = ui.layout_columns(
547
+ ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
548
+ ui.div([base_inputs[6:7], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
549
+ ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
550
+ ui.div([numeric_inputs[5:10]], style="display:flex; flex-direction:column; gap:10px;"),
551
+ col_widths=(3,3,3,3),
552
+ )
553
+ elif platform == "NRMS":
554
+ inputs_columns = ui.layout_columns(
555
+ ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
556
+ ui.div([base_inputs[6:7], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
557
+ ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
558
+ ui.div([numeric_inputs[5:10]], style="display:flex; flex-direction:column; gap:10px;"),
559
+ col_widths=(3,3,3,3),
560
+ )
561
+
562
+ return ui.page_fillable(
563
+ ui.layout_sidebar(
564
+ ui.sidebar(
565
+ ui.h3("Select parameters"),
566
+ ui.input_checkbox_group(
567
+ "params",
568
+ None,
569
+ choices=list(PARAMS.keys()),
570
+ selected=["noise_threshold","LET_threshold"],
571
+ ),
572
+ ui.hr(),
573
+ ui.h4("Bounds for selected parameters"),
574
+ ui.output_ui("bounds_inputs"),
575
+ width=360,
576
+ ),
577
+ ui.div(
578
+ ui.h2("Tune parameters (differential evolution optimization)"),
579
+ *(inputs_columns if isinstance(inputs_columns, (list, tuple)) else [inputs_columns]),
580
+ run_button_parameter_tuning_DE,
581
+ back_button,
582
+ ),
583
+ )
584
+ )
585
+
586
+
587
+
482
588
 
483
589
  app_ui = ui.page_fluid(
484
590
  ui.head_content(ui.tags.link(rel="icon", href="emblem.png")),
@@ -498,8 +604,10 @@ def server(input, output, session):
498
604
  run_status_plot_spectra = reactive.Value("")
499
605
  run_status_spec_lib_matching = reactive.Value("")
500
606
  run_status_plot_spectra_within_spec_lib_matching = reactive.Value("")
501
- run_status_parameter_tuning = reactive.Value("")
502
- is_tuning_running = reactive.Value(False)
607
+ run_status_parameter_tuning_grid = reactive.Value("")
608
+ run_status_parameter_tuning_DE = reactive.Value("")
609
+ is_tuning_grid_running = reactive.Value(False)
610
+ is_tuning_DE_running = reactive.Value(False)
503
611
  match_log_rv = reactive.Value("")
504
612
  is_matching_rv = reactive.Value(False)
505
613
  is_any_job_running = reactive.Value(False)
@@ -519,6 +627,64 @@ def server(input, output, session):
519
627
  converted_query_path_rv = reactive.Value(None)
520
628
  converted_reference_path_rv = reactive.Value(None)
521
629
 
630
+ @output
631
+ @render.ui
632
+ def bounds_inputs():
633
+ selected = input.params()
634
+ if not selected:
635
+ return ui.div(ui.em("Select one or more parameters above."))
636
+
637
+ if input.chromatography_platform() == 'HRMS':
638
+ PARAMS = PARAMS_HRMS
639
+ else:
640
+ PARAMS = PARAMS_NRMS
641
+ blocks = []
642
+ for name in selected:
643
+ lo, hi = PARAMS.get(name, (0.0, 1.0))
644
+ blocks.append(
645
+ ui.card(
646
+ ui.card_header(name),
647
+ ui.layout_columns(
648
+ ui.input_numeric(f"min_{name}", "Lower", lo, step=0.001),
649
+ ui.input_numeric(f"max_{name}", "Upper", hi, step=0.001),
650
+ )
651
+ )
652
+ )
653
+ return ui.div(*blocks)
654
+
655
+ def _read_bounds_dict():
656
+ selected = input.params()
657
+ out = {}
658
+ for name in selected:
659
+ lo_default, hi_default = PARAMS.get(name, (0.0, 1.0))
660
+ lo_id = f"min_{name}"
661
+ hi_id = f"max_{name}"
662
+
663
+ lo_val = input[lo_id]() if lo_id in input else lo_default
664
+ hi_val = input[hi_id]() if hi_id in input else hi_default
665
+
666
+ out[name] = (float(lo_val), float(hi_val))
667
+ return out
668
+
669
+ def _read_bounds():
670
+ opt_params = input.params()
671
+ bounds_dict = {}
672
+ if input.chromatography_platform() == 'HRMS':
673
+ PARAMS = PARAMS_HRMS
674
+ else:
675
+ PARAMS = PARAMS_NRMS
676
+
677
+ for p in opt_params:
678
+ lo_id, hi_id = f"min_{p}", f"max_{p}"
679
+ lo_default, hi_default = PARAMS.get(p, (0.0, 1.0))
680
+ lo = input[lo_id]() if lo_id in input else lo_default
681
+ hi = input[hi_id]() if hi_id in input else hi_default
682
+ if lo > hi:
683
+ lo, hi = hi, lo
684
+ bounds_dict[p] = (float(lo), float(hi))
685
+
686
+ bounds_list = [bounds_dict[p] for p in opt_params]
687
+ return opt_params, bounds_dict, bounds_list
522
688
 
523
689
  def _reset_plot_spectra_state():
524
690
  query_status_rv.set("")
@@ -551,7 +717,8 @@ def server(input, output, session):
551
717
 
552
718
  def _reset_parameter_tuning_state():
553
719
  match_log_rv.set("")
554
- is_tuning_running.set(False)
720
+ is_tuning_grid_running.set(False)
721
+ is_tuning_DE_running.set(False)
555
722
  is_any_job_running.set(False)
556
723
 
557
724
 
@@ -563,7 +730,9 @@ def server(input, output, session):
563
730
  _reset_plot_spectra_state()
564
731
  elif page == "run_spec_lib_matching":
565
732
  _reset_spec_lib_matching_state()
566
- elif page == "run_parameter_tuning":
733
+ elif page == "run_parameter_tuning_grid":
734
+ _reset_parameter_tuning_state()
735
+ elif page == "run_parameter_tuning_DE":
567
736
  _reset_parameter_tuning_state()
568
737
 
569
738
  @reactive.effect
@@ -573,7 +742,9 @@ def server(input, output, session):
573
742
  _reset_plot_spectra_state()
574
743
  elif page == "run_spec_lib_matching":
575
744
  _reset_spec_lib_matching_state()
576
- elif page == "run_parameter_tuning":
745
+ elif page == "run_parameter_tuning_grid":
746
+ _reset_parameter_tuning_state()
747
+ elif page == "run_parameter_tuning_DE":
577
748
  _reset_parameter_tuning_state()
578
749
 
579
750
 
@@ -601,7 +772,7 @@ def server(input, output, session):
601
772
 
602
773
  @reactive.effect
603
774
  async def _pump_logs():
604
- if not (is_any_job_running.get() or is_tuning_running.get() or is_matching_rv.get()):
775
+ if not (is_any_job_running.get() or is_tuning_grid_running.get() or is_tuning_DE_running.get() or is_matching_rv.get()):
605
776
  return
606
777
  reactive.invalidate_later(0.05)
607
778
  msgs = _drain_queue_nowait(_LOG_QUEUE)
@@ -680,9 +851,12 @@ def server(input, output, session):
680
851
  elif input.run_spec_lib_matching() > match_clicks.get():
681
852
  current_page.set("run_spec_lib_matching")
682
853
  match_clicks.set(input.run_spec_lib_matching())
683
- elif input.run_parameter_tuning() > match_clicks.get():
684
- current_page.set("run_parameter_tuning")
685
- match_clicks.set(input.run_parameter_tuning())
854
+ elif input.run_parameter_tuning_grid() > match_clicks.get():
855
+ current_page.set("run_parameter_tuning_grid")
856
+ match_clicks.set(input.run_parameter_tuning_grid())
857
+ elif input.run_parameter_tuning_DE() > match_clicks.get():
858
+ current_page.set("run_parameter_tuning_DE")
859
+ match_clicks.set(input.run_parameter_tuning_DE())
686
860
  elif hasattr(input, "back") and input.back() > back_clicks.get():
687
861
  current_page.set("main_menu")
688
862
  back_clicks.set(input.back())
@@ -726,7 +900,8 @@ def server(input, output, session):
726
900
  ),
727
901
  ui.input_action_button("plot_spectra", "Plot two spectra before and after preprocessing transformations.", style="font-size:18px; padding:20px 40px; width:550px; height:100px; margin-top:10px; margin-right:50px"),
728
902
  ui.input_action_button("run_spec_lib_matching", "Run spectral library matching to perform compound identification on a query library of spectra.", style="font-size:18px; padding:20px 40px; width:550px; height:100px; margin-top:10px; margin-right:50px"),
729
- ui.input_action_button("run_parameter_tuning", "Tune parameters to maximize accuracy of compound identification given a query library with known spectrum IDs.", style="font-size:18px; padding:20px 40px; width:450px; height:120px; margin-top:10px; margin-right:50px"),
903
+ ui.input_action_button("run_parameter_tuning_grid", "Grid search: Tune parameters to maximize accuracy of compound identification given a query library with known spectrum IDs.", style="font-size:18px; padding:20px 40px; width:450px; height:120px; margin-top:10px; margin-right:50px"),
904
+ ui.input_action_button("run_parameter_tuning_DE", "Differential evolution optimization: Tune parameters to maximize accuracy of compound identification given a query library with known spectrum IDs.", style="font-size:18px; padding:20px 40px; width:500px; height:150px; margin-top:10px; margin-right:50px"),
730
905
  ui.div(
731
906
  "References:",
732
907
  style="margin-top:35px; text-align:left; font-size:24px; font-weight:bold"
@@ -777,8 +952,10 @@ def server(input, output, session):
777
952
  return plot_spectra_ui(input.chromatography_platform())
778
953
  elif current_page() == "run_spec_lib_matching":
779
954
  return run_spec_lib_matching_ui(input.chromatography_platform())
780
- elif current_page() == "run_parameter_tuning":
781
- return run_parameter_tuning_ui(input.chromatography_platform())
955
+ elif current_page() == "run_parameter_tuning_grid":
956
+ return run_parameter_tuning_grid_ui(input.chromatography_platform())
957
+ elif current_page() == "run_parameter_tuning_DE":
958
+ return run_parameter_tuning_DE_ui(input.chromatography_platform())
782
959
 
783
960
 
784
961
 
@@ -1020,10 +1197,10 @@ def server(input, output, session):
1020
1197
  yield buf.getvalue()
1021
1198
 
1022
1199
 
1023
- @render.download(filename="parameter_tuning_output.txt")
1024
- async def run_btn_parameter_tuning():
1200
+ @render.download(filename="parameter_tuning_grid_output.txt")
1201
+ async def run_btn_parameter_tuning_grid():
1025
1202
  is_any_job_running.set(True)
1026
- is_tuning_running.set(True)
1203
+ is_tuning_grid_running.set(True)
1027
1204
  match_log_rv.set("Running grid search of all parameters specified...\n")
1028
1205
  await reactive.flush()
1029
1206
 
@@ -1044,7 +1221,7 @@ def server(input, output, session):
1044
1221
  common_kwargs = dict(
1045
1222
  query_data=input.query_data()[0]["datapath"],
1046
1223
  reference_data=input.reference_data()[0]["datapath"],
1047
- output_path=str(Path.cwd() / "parameter_tuning_output.txt"),
1224
+ output_path=str(Path.cwd() / "parameter_tuning_grid_output.txt"),
1048
1225
  return_output=True,
1049
1226
  )
1050
1227
 
@@ -1072,7 +1249,7 @@ def server(input, output, session):
1072
1249
  'window_size_centroiding': window_size_centroiding_tmp,
1073
1250
  'window_size_matching': window_size_matching_tmp,
1074
1251
  }
1075
- df_out = await asyncio.to_thread(_run_with_redirects, tune_params_on_HRMS_data_shiny, rw, **common_kwargs, grid=grid)
1252
+ df_out = await asyncio.to_thread(_run_with_redirects, tune_params_on_HRMS_data_grid_shiny, rw, **common_kwargs, grid=grid)
1076
1253
  else:
1077
1254
  grid = {
1078
1255
  'similarity_measure': similarity_measure_tmp,
@@ -1089,26 +1266,59 @@ def server(input, output, session):
1089
1266
  'entropy_dimension': entropy_dimension_tmp,
1090
1267
  'high_quality_reference_library': high_quality_reference_library_tmp,
1091
1268
  }
1092
- df_out = await asyncio.to_thread(_run_with_redirects, tune_params_on_NRMS_data_shiny, rw, **common_kwargs, grid=grid)
1269
+ df_out = await asyncio.to_thread(_run_with_redirects, tune_params_on_NRMS_data_grid_shiny, rw, **common_kwargs, grid=grid)
1093
1270
 
1094
1271
  match_log_rv.set(match_log_rv.get() + "\n✅ Parameter tuning finished.\n")
1095
1272
  except Exception as e:
1096
1273
  match_log_rv.set(match_log_rv.get() + f"\n❌ Error: {e}\n")
1097
1274
  raise
1098
1275
  finally:
1099
- is_tuning_running.set(False)
1276
+ is_tuning_grid_running.set(False)
1100
1277
  is_any_job_running.set(False)
1101
1278
  await reactive.flush()
1102
1279
 
1103
1280
  yield df_out.to_csv(index=False).encode("utf-8", sep='\t')
1104
1281
 
1105
1282
 
1283
+ @reactive.effect
1284
+ @reactive.event(input.run_btn_parameter_tuning_DE)
1285
+ def _run_btn_parameter_tuning_DE():
1286
+ is_any_job_running.set(True)
1287
+ is_tuning_DE_running.set(True)
1288
+ match_log_rv.set("Tuning specified continuous parameters using differential evolution...\n")
1289
+
1290
+ #print('\nhere!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
1291
+ weights = [float(weight.strip()) for weight in input.weights().split(",") if weight.strip()]
1292
+ weights = {'Cosine':weights[0], 'Shannon':weights[1], 'Renyi':weights[2], 'Tsallis':weights[3]}
1293
+ opt_params, bounds_dict, bounds_list = _read_bounds()
1294
+ #print(input.params())
1295
+ #print("Optimizing over:", opt_params)
1296
+ #print("Bounds list:", bounds_list)
1297
+ #print("Bounds dict:", bounds_dict)
1298
+ #tmp = {"window_size_centroiding":input.window_size_centroiding(), "window_size_matching":input.window_size_matching(), "noise_threshold":input.noise_threshold(), "wf_mz":input.wf_mz(), "wf_int":input.wf_int(), "LET_threshold":input.LET_threshold(), "entropy_dimension":input.entropy_dimension()}
1299
+ #print(tmp)
1300
+ if input.chromatography_platform() == 'HRMS':
1301
+ tune_params_DE(query_data=input.query_data()[0]["datapath"],
1302
+ reference_data=input.reference_data()[0]["datapath"],
1303
+ similarity_measure=input.similarity_measure(),
1304
+ weights=weights,
1305
+ spectrum_preprocessing_order=input.spectrum_preprocessing_order(),
1306
+ mz_min=input.mz_min(),
1307
+ mz_max=input.mz_max(),
1308
+ int_min=input.int_min(),
1309
+ int_max=input.int_max(),
1310
+ high_quality_reference_library=input.high_quality_reference_library(),
1311
+ optimize_params=list(input.params()),
1312
+ param_bounds={"window_size_matching":(0.0,0.5),"noise_threshold":(0.0,0.25),"wf_mz":(0.0,5.0),"wf_int":(0.0,5.0)},
1313
+ #param_bounds=bounds_dict,
1314
+ default_params={"window_size_centroiding":input.window_size_centroiding(), "window_size_matching":input.window_size_matching(), "noise_threshold":input.noise_threshold(), "wf_mz":input.wf_mz(), "wf_int":input.wf_int(), "LET_threshold":input.LET_threshold(), "entropy_dimension":input.entropy_dimension()})
1315
+ #print('here!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n')
1106
1316
 
1107
1317
 
1108
1318
 
1109
1319
  @reactive.effect
1110
1320
  async def _pump_reactive_writer_logs():
1111
- if not is_tuning_running.get():
1321
+ if not is_tuning_grid_running.get():
1112
1322
  return
1113
1323
 
1114
1324
  reactive.invalidate_later(0.1)
@@ -1122,32 +1332,11 @@ def server(input, output, session):
1122
1332
  def status_output():
1123
1333
  return run_status_plot_spectra.get()
1124
1334
  return run_status_spec_lib_matching.get()
1125
- return run_status_parameter_tuning.get()
1335
+ return run_status_parameter_tuning_grid.get()
1336
+ return run_status_parameter_tuning_DE.get()
1126
1337
 
1127
1338
 
1128
1339
  app = App(app_ui, server)
1129
1340
 
1130
1341
 
1131
1342
 
1132
- '''
1133
- from starlette.middleware.base import BaseHTTPMiddleware
1134
- from starlette.requests import Request
1135
-
1136
- class _InjectBaseURLMiddleware(BaseHTTPMiddleware):
1137
- async def dispatch(self, request: Request, call_next):
1138
- hdrs = dict(request.scope.get("headers", []))
1139
- if b"rstudio-connect-app-base-url" not in hdrs:
1140
- host = request.headers.get("x-forwarded-host") or request.headers.get("host") or ""
1141
- proto = request.headers.get("x-forwarded-proto") or "https"
1142
- root_path = (request.scope.get("root_path") or "").rstrip("/")
1143
- base = f"{proto}://{host}{root_path}"
1144
- new_headers = list(request.scope.get("headers", [])) + [
1145
- (b"rstudio-connect-app-base-url", base.encode("utf-8"))
1146
- ]
1147
- request.scope["headers"] = new_headers
1148
- return await call_next(request)
1149
-
1150
- app.starlette_app.add_middleware(_InjectBaseURLMiddleware)
1151
- '''
1152
-
1153
-
@@ -0,0 +1,101 @@
1
+
2
+
3
+ # app.py
4
+ from shiny import App, ui, render, reactive
5
+ import pandas as pd
6
+
7
+ # Parameters to choose from + suggested default ranges
8
+ PARAMS = {
9
+ "window_size_centroiding": (0.0, 0.5),
10
+ "window_size_matching": (0.0, 0.5),
11
+ "noise_threshold": (0.0, 0.25),
12
+ "wf_mz": (0.0, 5.0),
13
+ "wf_int": (0.0, 5.0),
14
+ "LET_threshold": (0.0, 5.0),
15
+ "entropy_dimension": (1.0, 3.0),
16
+ }
17
+
18
+ app_ui = ui.page_fillable(
19
+ ui.layout_sidebar(
20
+ ui.sidebar(
21
+ ui.h3("Select parameters"),
22
+ ui.input_checkbox_group(
23
+ id="params",
24
+ label=None,
25
+ choices=list(PARAMS.keys()),
26
+ selected=["window_size_centroiding", "noise_threshold"],
27
+ ),
28
+ ui.hr(),
29
+ ui.h4("Bounds for selected parameters"),
30
+ ui.output_ui("bounds_inputs"),
31
+ width=360,
32
+ ),
33
+ )
34
+ )
35
+
36
+ def server(input, output, session):
37
+ @output
38
+ @render.ui
39
+ def bounds_inputs():
40
+ selected = input.params()
41
+ if not selected:
42
+ return ui.div(ui.em("Select one or more parameters above."))
43
+
44
+ blocks = []
45
+ for name in selected:
46
+ lo, hi = PARAMS.get(name, (0.0, 1.0))
47
+ blocks.append(
48
+ ui.card(
49
+ ui.card_header(name),
50
+ ui.layout_columns(
51
+ ui.input_numeric(f"min_{name}", "Lower", lo, step=0.001),
52
+ ui.input_numeric(f"max_{name}", "Upper", hi, step=0.001),
53
+ )
54
+ )
55
+ )
56
+ return ui.div(*blocks)
57
+
58
+ def _read_bounds_dict():
59
+ selected = input.params()
60
+ out = {}
61
+ for name in selected:
62
+ lo_default, hi_default = PARAMS.get(name, (0.0, 1.0))
63
+ lo_id = f"min_{name}"
64
+ hi_id = f"max_{name}"
65
+
66
+ # Use input[...]() and guard with "in input"
67
+ lo_val = input[lo_id]() if lo_id in input else lo_default
68
+ hi_val = input[hi_id]() if hi_id in input else hi_default
69
+
70
+ out[name] = (float(lo_val), float(hi_val))
71
+ return out
72
+
73
+
74
+
75
+ # Table of current bounds
76
+ @output
77
+ @render.data_frame
78
+ def bounds_table():
79
+ b = _read_bounds_dict()
80
+ if not b:
81
+ return pd.DataFrame(columns=["parameter", "lower", "upper"])
82
+ rows = [{"parameter": k, "lower": v[0], "upper": v[1]} for k, v in b.items()]
83
+ return pd.DataFrame(rows)
84
+
85
+ # JSON-ish view (string) you can parse/use elsewhere
86
+ @output
87
+ @render.text
88
+ def bounds_json():
89
+ b = _read_bounds_dict()
90
+ if not b:
91
+ return "{}"
92
+ # Pretty-print as Python dict literal for quick copy/paste
93
+ lines = ["{"]
94
+ for k, (lo, hi) in b.items():
95
+ lines.append(f" '{k}': ({lo}, {hi}),")
96
+ lines.append("}")
97
+ return "\n".join(lines)
98
+
99
+ app = App(app_ui, server)
100
+
101
+
@@ -9,6 +9,139 @@ from itertools import product
9
9
  from joblib import Parallel, delayed
10
10
  import csv
11
11
  import sys, csv
12
+ from scipy.optimize import differential_evolution
13
+
14
+
15
+ def _vector_to_full_params(X, default_params, optimize_params):
16
+ params = default_params.copy()
17
+ for name, val in zip(optimize_params, X):
18
+ params[name] = float(val)
19
+ return params
20
+
21
+
22
+ def objective_function_HRMS(X, ctx):
23
+ p = _vector_to_full_params(X, ctx["default_params"], ctx["optimize_params"])
24
+ if 'window_size_centroiding' in ctx.keys():
25
+ acc = get_acc_HRMS(
26
+ ctx["df_query"], ctx["df_reference"],
27
+ ctx["unique_query_ids"], ctx["unique_reference_ids"],
28
+ ctx["similarity_measure"], ctx["weights"], ctx["spectrum_preprocessing_order"],
29
+ ctx["mz_min"], ctx["mz_max"], ctx["int_min"], ctx["int_max"],
30
+ p["window_size_centroiding"], p["window_size_matching"], p["noise_threshold"],
31
+ p["wf_mz"], p["wf_int"], p["LET_threshold"],
32
+ p["entropy_dimension"],
33
+ ctx["high_quality_reference_library"],
34
+ verbose=False
35
+ )
36
+ else:
37
+ acc = get_acc_NRMS(
38
+ ctx["df_query"], ctx["df_reference"],
39
+ ctx["unique_query_ids"], ctx["unique_reference_ids"],
40
+ ctx["similarity_measure"], ctx["weights"], ctx["spectrum_preprocessing_order"],
41
+ ctx["mz_min"], ctx["mz_max"], ctx["int_min"], ctx["int_max"],
42
+ p["noise_threshold"],
43
+ p["wf_mz"], p["wf_int"], p["LET_threshold"],
44
+ p["entropy_dimension"],
45
+ ctx["high_quality_reference_library"],
46
+ verbose=False
47
+ )
48
+ print(f"\nparams({ctx['optimize_params']}) = {np.array(X)}\naccuracy: {acc*100}%")
49
+ return 1.0 - acc
50
+
51
+
52
+
53
+
54
+ def tune_params_DE(query_data=None, reference_data=None, similarity_measure='cosine', weights=None, spectrum_preprocessing_order='CNMWL', mz_min=0, mz_max=999999999, int_min=0, int_max=999999999, high_quality_reference_library=False, optimize_params=["window_size_centroiding","window_size_matching","noise_threshold","wf_mz","wf_int","LET_threshold","entropy_dimension"], param_bounds={"window_size_centroiding":(0.0,0.5),"window_size_matching":(0.0,0.5),"noise_threshold":(0.0,0.25),"wf_mz":(0.0,5.0),"wf_int":(0.0,5.0),"LET_threshold":(0.0,5.0),"entropy_dimension":(1.0,3.0)}, default_params={"window_size_centroiding": 0.5, "window_size_matching":0.5, "noise_threshold":0.10, "wf_mz":0.0, "wf_int":1.0, "LET_threshold":0.0, "entropy_dimension":1.1}):
55
+
56
+ '''
57
+ print('<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<')
58
+ print(param_bounds)
59
+ print(default_params)
60
+ print(type(param_bounds['noise_threshold'][0]))
61
+ print(type(param_bounds['noise_threshold'][1]))
62
+ print('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
63
+ '''
64
+
65
+ if query_data is None:
66
+ print('\nError: No argument passed to the mandatory query_data. Please pass the path to the TXT file of the query data.')
67
+ sys.exit()
68
+ else:
69
+ extension = query_data.rsplit('.',1)
70
+ extension = extension[(len(extension)-1)]
71
+ if extension == 'mgf' or extension == 'MGF' or extension == 'mzML' or extension == 'mzml' or extension == 'MZML' or extension == 'cdf' or extension == 'CDF':
72
+ output_path_tmp = query_data[:-3] + 'csv'
73
+ build_library_from_raw_data(input_path=query_data, output_path=output_path_tmp, is_reference=False)
74
+ df_query = pd.read_csv(output_path_tmp)
75
+ if extension == 'csv' or extension == 'CSV':
76
+ df_query = pd.read_csv(query_data)
77
+ unique_query_ids = df_query.iloc[:,0].unique()
78
+
79
+ if reference_data is None:
80
+ print('\nError: No argument passed to the mandatory reference_data. Please pass the path to the CSV file of the reference data.')
81
+ sys.exit()
82
+ else:
83
+ if isinstance(reference_data,str):
84
+ df_reference = get_reference_df(reference_data=reference_data)
85
+ unique_reference_ids = df_reference.iloc[:,0].unique()
86
+ else:
87
+ dfs = []
88
+ unique_reference_ids = []
89
+ for f in reference_data:
90
+ tmp = get_reference_df(reference_data=f)
91
+ dfs.append(tmp)
92
+ unique_reference_ids.extend(tmp.iloc[:,0].unique())
93
+ df_reference = pd.concat(dfs, axis=0, ignore_index=True)
94
+
95
+ unique_query_ids = df_query['id'].unique().tolist()
96
+ unique_reference_ids = df_reference['id'].unique().tolist()
97
+
98
+ ctx = dict(
99
+ df_query=df_query,
100
+ df_reference=df_reference,
101
+ unique_query_ids=unique_query_ids,
102
+ unique_reference_ids=unique_reference_ids,
103
+ similarity_measure=similarity_measure,
104
+ weights=weights,
105
+ spectrum_preprocessing_order=spectrum_preprocessing_order,
106
+ mz_min=mz_min, mz_max=mz_max, int_min=int_min, int_max=int_max,
107
+ high_quality_reference_library=high_quality_reference_library,
108
+ default_params=default_params,
109
+ optimize_params=optimize_params,
110
+ )
111
+
112
+ bounds = [param_bounds[p] for p in optimize_params]
113
+
114
+ #print('<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<')
115
+ #print(df_query.head())
116
+ #print(df_reference.head())
117
+ #print(bounds)
118
+ #print(ctx)
119
+ #print('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
120
+
121
+ result = differential_evolution(
122
+ objective_function_HRMS,
123
+ bounds=bounds,
124
+ args=(ctx,),
125
+ maxiter=3,
126
+ tol=0.0,
127
+ workers=-1,
128
+ seed=1,
129
+ )
130
+
131
+ best_full_params = _vector_to_full_params(result.x, default_params, optimize_params)
132
+ best_acc = 100.0 - (result.fun * 100.0)
133
+
134
+ print("\n=== Differential Evolution Result ===")
135
+ print(f"Optimized over: {optimize_params}")
136
+ print("Best values (selected params):")
137
+ for name in optimize_params:
138
+ print(f" {name}: {best_full_params[name]}")
139
+ print("\nFull parameter set used in final evaluation:")
140
+ for k, v in best_full_params.items():
141
+ print(f" {k}: {v}")
142
+ print(f"\nBest accuracy: {best_acc:.3f}%")
143
+
144
+
12
145
 
13
146
 
14
147
  default_HRMS_grid = {'similarity_measure':['cosine'], 'weight':[{'Cosine':0.25,'Shannon':0.25,'Renyi':0.25,'Tsallis':0.25}], 'spectrum_preprocessing_order':['FCNMWL'], 'mz_min':[0], 'mz_max':[9999999], 'int_min':[0], 'int_max':[99999999], 'window_size_centroiding':[0.5], 'window_size_matching':[0.5], 'noise_threshold':[0.0], 'wf_mz':[0.0], 'wf_int':[1.0], 'LET_threshold':[0.0], 'entropy_dimension':[1.1], 'high_quality_reference_library':[False]}
@@ -78,7 +211,7 @@ def _eval_one_NRMS(df_query, df_reference, unique_query_ids, unique_reference_id
78
211
 
79
212
 
80
213
 
81
- def tune_params_on_HRMS_data(query_data=None, reference_data=None, grid=None, output_path=None, return_output=False):
214
+ def tune_params_on_HRMS_data_grid(query_data=None, reference_data=None, grid=None, output_path=None, return_output=False):
82
215
  """
83
216
  runs spectral library matching on high-resolution mass spectrometry (HRMS) data with all possible combinations of parameters in the grid dict, saves results from each choice of parameters to a TXT file, and prints top-performing parameters
84
217
 
@@ -154,7 +287,7 @@ def tune_params_on_HRMS_data(query_data=None, reference_data=None, grid=None, ou
154
287
 
155
288
 
156
289
 
157
- def tune_params_on_HRMS_data_shiny(query_data=None, reference_data=None, grid=None, output_path=None, return_output=False):
290
+ def tune_params_on_HRMS_data_grid_shiny(query_data=None, reference_data=None, grid=None, output_path=None, return_output=False):
158
291
  """
159
292
  runs spectral library matching on high-resolution mass spectrometry (HRMS) data with all possible
160
293
  combinations of parameters in the grid dict, saves results from each choice of parameters to a TXT file,
@@ -262,7 +395,7 @@ def tune_params_on_HRMS_data_shiny(query_data=None, reference_data=None, grid=No
262
395
  print(f'Wrote results to {output_path}')
263
396
 
264
397
 
265
- def tune_params_on_NRMS_data(query_data=None, reference_data=None, grid=None, output_path=None, return_output=False):
398
+ def tune_params_on_NRMS_data_grid(query_data=None, reference_data=None, grid=None, output_path=None, return_output=False):
266
399
  """
267
400
  runs spectral library matching on nominal-resolution mass spectrometry (NRMS) data with all possible combinations of parameters in the grid dict, saves results from each choice of parameters to a TXT file, and prints top-performing parameters
268
401
 
@@ -336,7 +469,7 @@ def tune_params_on_NRMS_data(query_data=None, reference_data=None, grid=None, ou
336
469
 
337
470
 
338
471
 
339
- def tune_params_on_NRMS_data_shiny(query_data=None, reference_data=None, grid=None, output_path=None, return_output=False):
472
+ def tune_params_on_NRMS_data_grid_shiny(query_data=None, reference_data=None, grid=None, output_path=None, return_output=False):
340
473
  """
341
474
  runs spectral library matching on nominal-resolution mass spectrometry (NRMS) data with all possible
342
475
  combinations of parameters in the grid dict, saves results from each choice of parameters to a TXT file,
@@ -444,6 +577,7 @@ def tune_params_on_NRMS_data_shiny(query_data=None, reference_data=None, grid=No
444
577
 
445
578
  def get_acc_HRMS(df_query, df_reference, unique_query_ids, unique_reference_ids, similarity_measure, weights, spectrum_preprocessing_order, mz_min, mz_max, int_min, int_max, window_size_centroiding, window_size_matching, noise_threshold, wf_mz, wf_int, LET_threshold, entropy_dimension, high_quality_reference_library, verbose=True):
446
579
 
580
+ #print('\n\n\n\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n\n\n\n')
447
581
  n_top_matches_to_save = 1
448
582
 
449
583
  all_similarity_scores = []
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pycompound
3
- Version: 0.1.4
3
+ Version: 0.1.5
4
4
  Summary: Python package to perform compound identification in mass spectrometry via spectral library matching.
5
5
  Author-email: Hunter Dlugas <fy7392@wayne.edu>
6
6
  License-Expression: MIT
@@ -2,6 +2,7 @@ LICENSE
2
2
  README.md
3
3
  pyproject.toml
4
4
  src/app.py
5
+ src/app2.py
5
6
  src/pycompound/build_library.py
6
7
  src/pycompound/plot_spectra.py
7
8
  src/pycompound/plot_spectra_CLI.py
@@ -1,4 +1,5 @@
1
1
  app
2
+ app2
2
3
  pycompound
3
4
  rsconnect-python
4
5
  www
@@ -5,7 +5,14 @@ from pycompound.spec_lib_matching import tune_params_DE
5
5
  from pathlib import Path
6
6
  import os
7
7
 
8
+ tune_params_DE(query_data=f'{Path.cwd()}/data/tuning/tuning_data/filtered/lcms_query_data.csv',
9
+ reference_data=f'{Path.cwd()}/data/tuning/tuning_data/filtered/lcms_reference_data.csv',
10
+ similarity_measure='shannon',
11
+ optimize_params=["window_size_matching","noise_threshold","wf_mz","wf_int"],
12
+ param_bounds={"window_size_matching":(0.0,0.5),"noise_threshold":(0.0,0.25),"wf_mz":(0.0,5.0),"wf_int":(0.0,5.0)},
13
+ default_params={"window_size_centroiding": 0.5, "window_size_matching":0.5, "noise_threshold":0.10, "wf_mz":0.0, "wf_int":1.0, "LET_threshold":0.0, "entropy_dimension":1.1})
8
14
 
15
+ '''
9
16
  print('\n\ntest #1:')
10
17
  tune_params_on_HRMS_data_grid(query_data=f'{Path.cwd()}/data/tuning/lcms_query_library.csv',
11
18
  reference_data=f'{Path.cwd()}/data/lcms_reference_library.csv',
@@ -49,4 +56,5 @@ tune_params_DE(query_data=f'{Path.cwd()}/data/tuning/tuning_data/filtered/gcms_q
49
56
  optimize_params=["wf_mz","wf_int","LET_threshold","entropy_dimension"],
50
57
  param_bounds={"wf_mz":(0.0,5.0),"wf_int":(0.0,5.0),"LET_threshold":(0,5),"entropy_dimension":(1.01,3)},
51
58
  default_params={"noise_threshold":0.10, "wf_mz":0.0, "wf_int":1.0, "LET_threshold":0.0, "entropy_dimension":1.1})
59
+ '''
52
60
 
File without changes
File without changes
File without changes