PyPI - pycompound - Versions diffs - 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl - Mend

pycompound 0.1.0py3-none-any.whl → 0.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

app.py +428 -63
pycompound/plot_spectra.py +7 -4
pycompound/spec_lib_matching.py +53 -25
pycompound/tuning_CLI.py +1 -2
{pycompound-0.1.0.dist-info → pycompound-0.1.1.dist-info}/METADATA +1 -1
pycompound-0.1.1.dist-info/RECORD +14 -0
pycompound-0.1.0.dist-info/RECORD +0 -14
{pycompound-0.1.0.dist-info → pycompound-0.1.1.dist-info}/WHEEL +0 -0
{pycompound-0.1.0.dist-info → pycompound-0.1.1.dist-info}/licenses/LICENSE +0 -0
{pycompound-0.1.0.dist-info → pycompound-0.1.1.dist-info}/top_level.txt +0 -0

app.py CHANGED Viewed

@@ -18,8 +18,25 @@ import matplotlib.pyplot as plt
 import pandas as pd
 import numpy as np
 import netCDF4 as nc
-from pyteomics import mgf
-from pyteomics import mzml
+from pyteomics import mgf, mzml
+import ast
+_LOG_QUEUE: asyncio.Queue[str] = asyncio.Queue()
+def strip_text(s):
+    return [x.strip() for x in s.strip('[]').split(',') if x.strip()]
+def strip_numeric(s):
+    return [float(x.strip()) for x in s.strip('[]').split(',') if x.strip()]
+def strip_weights(s):
+    tuples = ast.literal_eval(s)
+    keys = ['Cosine', 'Shannon', 'Renyi', 'Tsallis']
+    return [dict(zip(keys,t)) for t in tuples]
 def build_library(input_path=None, output_path=None):
@@ -152,30 +169,37 @@ def extract_first_column_ids(file_path: str, max_ids: int = 20000):
     return []
+def _open_plot_window(session, png_bytes: bytes, title: str = "plot.png"):
+    """Send PNG bytes to browser and open in a new window as a data URL."""
+    b64 = base64.b64encode(png_bytes).decode("ascii")
+    data_url = f"data:image/png;base64,{b64}"
+    session.send_custom_message("open-plot-window", {"png": data_url, "title": title})
 def plot_spectra_ui(platform: str):
     # Base inputs common to all platforms
     base_inputs = [
         ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
         ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
-    ui.input_selectize(
-        "spectrum_ID1",
-        "Select spectrum ID 1:",
-        choices=[],
-        multiple=False,
-        options={"placeholder": "Upload a query file to load IDs..."},
-    ),
-    ui.input_selectize(
-        "spectrum_ID2",
-        "Select spectrum ID 2 (optional):",
-        choices=[],
-        multiple=False,
-        options={"placeholder": "Upload a reference file to load IDs..."},
-    ),
+        ui.input_selectize(
+            "spectrum_ID1",
+            "Select spectrum ID 1 (default is the first spectrum in the library):",
+            choices=[],
+            multiple=False,
+            options={"placeholder": "Upload a library..."},
+        ),
+        ui.input_selectize(
+            "spectrum_ID2",
+            "Select spectrum ID 2 (default is the first spectrum in the library):",
+            choices=[],
+            multiple=False,
+            options={"placeholder": "Upload a library..."},
+        ),
         ui.input_select("similarity_measure", "Select similarity measure:", ["cosine","shannon","renyi","tsallis","mixture","jaccard","dice","3w_jaccard","sokal_sneath","binary_cosine","mountford","mcconnaughey","driver_kroeber","simpson","braun_banquet","fager_mcgowan","kulczynski","intersection","hamming","hellinger"]),
+        ui.input_text('weights', 'Weights for similarity measure (cosine, shannon, renyi, tsallis):', '0.25, 0.25, 0.25, 0.25'),
         ui.input_select(
             "high_quality_reference_library",
-            "Indicate whether the reference library is considered high quality. "
-            "If True, filtering and noise removal are only applied to the query spectra.",
+            "Indicate whether the reference library is considered high quality. If True, filtering and noise removal are only applied to the query spectra.",
             [False, True],
         ),
     ]
@@ -185,7 +209,7 @@ def plot_spectra_ui(platform: str):
         extra_inputs = [
             ui.input_text(
                 "spectrum_preprocessing_order",
-                "Sequence of characters for preprocessing order (C, F, M, N, L, W). M must be included, C before M if used.",
+                "Sequence of characters for preprocessing order (C (centroiding), F (filtering), M (matching), N (noise removal), L (low-entropy transformation), W (weight factor transformation)). M must be included, C before M if used.",
                 "FCNMWL",
             ),
             ui.input_numeric("window_size_centroiding", "Centroiding window-size:", 0.5),
@@ -195,7 +219,7 @@ def plot_spectra_ui(platform: str):
         extra_inputs = [
             ui.input_text(
                 "spectrum_preprocessing_order",
-                "Sequence of characters for preprocessing order (F, N, L, W).",
+                "Sequence of characters for preprocessing order (F (filtering), N (noise removal), L (low-entropy transformation), W (weight factor transformation)).",
                 "FNLW",
             )
         ]
@@ -227,19 +251,19 @@ def plot_spectra_ui(platform: str):
     # Layout base_inputs and extra_inputs in columns
     if platform == "HRMS":
         inputs_columns = ui.layout_columns(
-            ui.div(base_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
-            ui.div([base_inputs[5:6], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
+            ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
+            ui.div([base_inputs[6:7], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
             ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
             ui.div([numeric_inputs[5:10], select_input], style="display:flex; flex-direction:column; gap:10px;"),
-            col_widths=(3, 3, 3, 3),
+            col_widths=(3,3,3,3),
         )
     elif platform == "NRMS":
         inputs_columns = ui.layout_columns(
-            ui.div(base_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
-            ui.div([base_inputs[5:6], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
+            ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
+            ui.div([base_inputs[6:7], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
             ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
             ui.div([numeric_inputs[5:10], select_input], style="display:flex; flex-direction:column; gap:10px;"),
-            col_widths=(3, 3, 3, 3),
+            col_widths=(3,3,3,3),
         )
     # Combine everything
@@ -249,7 +273,8 @@ def plot_spectra_ui(platform: str):
             inputs_columns,
             run_button_plot_spectra,
             back_button,
-            ui.div(ui.output_text("plot_query_status"), style="margin-top:8px; font-size:14px")
+            ui.div(ui.output_text("plot_query_status"), style="margin-top:8px; font-size:14px"),
+            ui.div(ui.output_text("plot_reference_status"), style="margin-top:8px; font-size:14px")
         ),
     )
@@ -261,12 +286,26 @@ def run_spec_lib_matching_ui(platform: str):
         ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
         ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
         ui.input_select("similarity_measure", "Select similarity measure:", ["cosine","shannon","renyi","tsallis","mixture","jaccard","dice","3w_jaccard","sokal_sneath","binary_cosine","mountford","mcconnaughey","driver_kroeber","simpson","braun_banquet","fager_mcgowan","kulczynski","intersection","hamming","hellinger"]),
+        ui.input_text('weights', 'Weights for similarity measure (cosine, shannon, renyi, tsallis):', '0.25, 0.25, 0.25, 0.25'),
+        ui.input_selectize(
+            "spectrum_ID1",
+            "Select spectrum ID 1 (only applicable for plotting; default is the first spectrum in the query library):",
+            choices=[],
+            multiple=False,
+            options={"placeholder": "Upload a library..."},
+        ),
+        ui.input_selectize(
+            "spectrum_ID2",
+            "Select spectrum ID 2 (only applicable for plotting; default is the first spectrum in the reference library):",
+            choices=[],
+            multiple=False,
+            options={"placeholder": "Upload a library..."},
+        ),
         ui.input_select(
             "high_quality_reference_library",
-            "Indicate whether the reference library is considered high quality. "
-            "If True, filtering and noise removal are only applied to the query spectra.",
+            "Indicate whether the reference library is considered high quality. If True, filtering and noise removal are only applied to the query spectra.",
             [False, True],
-        ),
+        )
     ]
     # Extra inputs depending on platform
@@ -274,7 +313,7 @@ def run_spec_lib_matching_ui(platform: str):
         extra_inputs = [
             ui.input_text(
                 "spectrum_preprocessing_order",
-                "Sequence of characters for preprocessing order (C, F, M, N, L, W). M must be included, C before M if used.",
+                "Sequence of characters for preprocessing order (C (centroiding), F (filtering), M (matching), N (noise removal), L (low-entropy transformation), W (weight factor transformation)). M must be included, C before M if used.",
                 "FCNMWL",
             ),
             ui.input_numeric("window_size_centroiding", "Centroiding window-size:", 0.5),
@@ -284,7 +323,7 @@ def run_spec_lib_matching_ui(platform: str):
         extra_inputs = [
             ui.input_text(
                 "spectrum_preprocessing_order",
-                "Sequence of characters for preprocessing order (F, N, L, W).",
+                "Sequence of characters for preprocessing order (F (filtering), N (noise removal), L (low-entropy transformation), W (weight factor transformation)).",
                 "FNLW",
             )
         ]
@@ -300,30 +339,31 @@ def run_spec_lib_matching_ui(platform: str):
         ui.input_numeric("wf_int", "Intensity weight factor:", 1.0),
         ui.input_numeric("LET_threshold", "Low-entropy threshold:", 0.0),
         ui.input_numeric("entropy_dimension", "Entropy dimension (Renyi/Tsallis only):", 1.1),
-        ui.input_numeric("n_top_matches_to_save", "Number of top matches to save:", 1),
+        ui.input_numeric("n_top_matches_to_save", "Number of top matches to save:", 3),
     ]
     # Run and Back buttons
-    run_button_spec_lib_matching = ui.download_button("run_btn_spec_lib_matching", "Run", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
+    run_button_spec_lib_matching = ui.download_button("run_btn_spec_lib_matching", "Run Spectral Library Matching", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
+    run_button_plot_spectra_within_spec_lib_matching = ui.download_button("run_btn_plot_spectra_within_spec_lib_matching", "Plot Spectra", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
     back_button = ui.input_action_button("back", "Back to main menu", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
     # Layout base_inputs and extra_inputs in columns
     if platform == "HRMS":
         inputs_columns = ui.layout_columns(
-            ui.div(base_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
-            ui.div([base_inputs[5:6], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
+            ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
+            ui.div([base_inputs[6:7], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
             ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
             ui.div(numeric_inputs[5:10], style="display:flex; flex-direction:column; gap:10px;"),
-            col_widths=(3, 3, 3, 3),
+            col_widths=(3,3,3,3)
         )
     elif platform == "NRMS":
         inputs_columns = ui.layout_columns(
-            ui.div(base_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
-            ui.div([base_inputs[5:6], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
+            ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
+            ui.div([base_inputs[6:7], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
             ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
             ui.div(numeric_inputs[5:10], style="display:flex; flex-direction:column; gap:10px;"),
-            col_widths=(3, 3, 3, 3),
+            col_widths=(3,3,3,3)
         )
     log_panel = ui.card(
@@ -338,13 +378,100 @@ def run_spec_lib_matching_ui(platform: str):
             ui.h2("Run Spectral Library Matching"),
             inputs_columns,
             run_button_spec_lib_matching,
+            run_button_plot_spectra_within_spec_lib_matching,
             back_button,
-            log_panel,
+            log_panel
         ),
     )
+def run_parameter_tuning_ui(platform: str):
+    # Base inputs common to all platforms
+    base_inputs = [
+        ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
+        ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
+        ui.input_selectize("similarity_measure", "Select similarity measure(s):", ["cosine","shannon","renyi","tsallis","mixture","jaccard","dice","3w_jaccard","sokal_sneath","binary_cosine","mountford","mcconnaughey","driver_kroeber","simpson","braun_banquet","fager_mcgowan","kulczynski","intersection","hamming","hellinger"], multiple=True, selected='cosine'),
+        ui.input_text('weights', 'Weights for similarity measure (cosine, shannon, renyi, tsallis):', '((0.25, 0.25, 0.25, 0.25), (0.2, 0.3, 0.4, 0.1))'),
+        ui.input_text("high_quality_reference_library", "Indicate whether the reference library is considered high quality. If True, filtering and noise removal are only applied to the query spectra.", '[True]')
+    ]
+    # Extra inputs depending on platform
+    if platform == "HRMS":
+        extra_inputs = [
+            ui.input_text(
+                "spectrum_preprocessing_order",
+                "Sequence of characters for preprocessing order (C (centroiding), F (filtering), M (matching), N (noise removal), L (low-entropy transformation), W (weight factor transformation)). M must be included, C before M if used.",
+                "[FCNMWL,CWM]",
+            ),
+            ui.input_text("window_size_centroiding", "Centroiding window-size:", "[0.5]"),
+            ui.input_text("window_size_matching", "Matching window-size:", "[0.1,0.5]"),
+        ]
+    else:
+        extra_inputs = [
+            ui.input_text(
+                "spectrum_preprocessing_order",
+                "Sequence of characters for preprocessing order (F (filtering), N (noise removal), L (low-entropy transformation), W (weight factor transformation)).",
+                "[FNLW,WNL]",
+            )
+        ]
+    # Numeric inputs
+    numeric_inputs = [
+        ui.input_text("mz_min", "Minimum m/z for filtering:", '[0]'),
+        ui.input_text("mz_max", "Maximum m/z for filtering:", '[99999999]'),
+        ui.input_text("int_min", "Minimum intensity for filtering:", '[0]'),
+        ui.input_text("int_max", "Maximum intensity for filtering:", '[999999999]'),
+        ui.input_text("noise_threshold", "Noise removal threshold:", '[0.0]'),
+        ui.input_text("wf_mz", "Mass/charge weight factor:", '[0.0]'),
+        ui.input_text("wf_int", "Intensity weight factor:", '[1.0]'),
+        ui.input_text("LET_threshold", "Low-entropy threshold:", '[0.0]'),
+        ui.input_text("entropy_dimension", "Entropy dimension (Renyi/Tsallis only):", '[1.1]')
+    ]
+    # Run and Back buttons
+    run_button_parameter_tuning = ui.download_button("run_btn_parameter_tuning", "Tune parameters", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
+    back_button = ui.input_action_button("back", "Back to main menu", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
+    # Layout base_inputs and extra_inputs in columns
+    if platform == "HRMS":
+        inputs_columns = ui.layout_columns(
+            ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
+            ui.div([base_inputs[6:7], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
+            ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
+            ui.div(numeric_inputs[5:9], style="display:flex; flex-direction:column; gap:10px;"),
+            col_widths=(3, 3, 3, 3),
+        )
+    elif platform == "NRMS":
+        inputs_columns = ui.layout_columns(
+            ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
+            ui.div([base_inputs[6:7], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
+            ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
+            ui.div(numeric_inputs[5:9], style="display:flex; flex-direction:column; gap:10px;"),
+            col_widths=(3, 3, 3, 3),
+        )
+    log_panel = ui.card(
+        ui.card_header("Identification log"),
+        ui.output_text_verbatim("match_log"),
+        style="max-height:300px; overflow:auto"
+    )
+    # Combine everything
+    return ui.div(
+        ui.TagList(
+            ui.h2("Tune parameters"),
+            inputs_columns,
+            run_button_parameter_tuning,
+            back_button,
+            log_panel
+        ),
+    )
 app_ui = ui.page_fluid(
     ui.output_ui("main_ui"),
     ui.output_text("status_output")
@@ -361,8 +488,12 @@ def server(input, output, session):
     run_status_plot_spectra = reactive.Value("")
     run_status_spec_lib_matching = reactive.Value("")
+    run_status_plot_spectra_within_spec_lib_matching = reactive.Value("")
+    run_status_parameter_tuning = reactive.Value("")
+    is_tuning_running = reactive.Value(False)
     match_log_rv = reactive.Value("")
     is_matching_rv = reactive.Value(False)
+    is_any_job_running = reactive.Value(False)
     query_ids_rv = reactive.Value([])
     query_file_path_rv = reactive.Value(None)
@@ -377,6 +508,29 @@ def server(input, output, session):
     converted_reference_path_rv = reactive.Value(None)
+    #def _drain_queue_nowait(q: asyncio.Queue[str]) -> list[str]:
+    def _drain_queue_nowait(q: asyncio.Queue) -> list[str]:
+        out = []
+        try:
+            while True:
+                out.append(q.get_nowait())
+        except asyncio.QueueEmpty:
+            pass
+        return out
+    @reactive.effect
+    async def _pump_logs():
+        if not is_any_job_running.get():
+            return
+        reactive.invalidate_later(0.1)
+        msgs = _drain_queue_nowait(_LOG_QUEUE)
+        if msgs:
+            match_log_rv.set(match_log_rv.get() + "".join(msgs))
+            await reactive.flush()
     def process_database(file_path: str):
         suffix = Path(file_path).suffix.lower()
         return {"path": file_path, "suffix": suffix}
@@ -385,13 +539,14 @@ def server(input, output, session):
     def plot_query_status():
         return query_status_rv.get() or ""
+    @render.text
+    def plot_reference_status():
+        return reference_status_rv.get() or ""
     @reactive.effect
     @reactive.event(input.query_data)
     async def _on_query_upload():
-        if current_page() != "plot_spectra":
-            return
         files = input.query_data()
         req(files and len(files) > 0)
@@ -414,9 +569,6 @@ def server(input, output, session):
     @reactive.effect
     @reactive.event(input.reference_data)
     async def _on_reference_upload():
-        if current_page() != "plot_spectra":
-            return
         files = input.reference_data()
         req(files and len(files) > 0)
@@ -467,6 +619,9 @@ def server(input, output, session):
         elif input.run_spec_lib_matching() > match_clicks.get():
             current_page.set("run_spec_lib_matching")
             match_clicks.set(input.run_spec_lib_matching())
+        elif input.run_parameter_tuning() > match_clicks.get():
+            current_page.set("run_parameter_tuning")
+            match_clicks.set(input.run_parameter_tuning())
         elif hasattr(input, "back") and input.back() > back_clicks.get():
             current_page.set("main_menu")
             back_clicks.set(input.back())
@@ -512,6 +667,7 @@ def server(input, output, session):
                 ),
                 ui.input_action_button("plot_spectra", "Plot two spectra before and after preprocessing transformations.", style="font-size:18px; padding:20px 40px; width:550px; height:100px; margin-top:10px; margin-right:50px"),
                 ui.input_action_button("run_spec_lib_matching", "Run spectral library matching to perform compound identification on a query library of spectra.", style="font-size:18px; padding:20px 40px; width:550px; height:100px; margin-top:10px; margin-right:50px"),
+                ui.input_action_button("run_parameter_tuning", "Tune parameters to maximize accuracy of compound identification given a query library with known spectrum IDs.", style="font-size:18px; padding:20px 40px; width:450px; height:120px; margin-top:10px; margin-right:50px"),
                 ui.div(
                     "References:",
                     style="margin-top:35px; text-align:left; font-size:24px; font-weight:bold"
@@ -562,14 +718,16 @@ def server(input, output, session):
             return plot_spectra_ui(input.chromatography_platform())
         elif current_page() == "run_spec_lib_matching":
             return run_spec_lib_matching_ui(input.chromatography_platform())
+        elif current_page() == "run_parameter_tuning":
+            return run_parameter_tuning_ui(input.chromatography_platform())
     @reactive.effect
     @reactive.event(input.query_data)
     async def _populate_ids_from_query_upload():
-        if current_page() != "plot_spectra":
-            return
+        #if current_page() != "plot_spectra":
+        #    return
         files = input.query_data()
         if not files:
@@ -628,8 +786,8 @@ def server(input, output, session):
     @reactive.effect
     @reactive.event(input.reference_data)
     async def _populate_ids_from_reference_upload():
-        if current_page() != "plot_spectra":
-            return
+        #if current_page() != "plot_spectra":
+        #    return
         files = input.reference_data()
         if not files:
@@ -685,19 +843,23 @@ def server(input, output, session):
             raise
     @render.download(filename=lambda: f"plot.png")
     def run_btn_plot_spectra():
         spectrum_ID1 = input.spectrum_ID1() or None
         spectrum_ID2 = input.spectrum_ID2() or None
+        weights = [float(weight.strip()) for weight in input.weights().split(",") if weight.strip()]
+        weights = {'Cosine':weights[0], 'Shannon':weights[1], 'Renyi':weights[2], 'Tsallis':weights[3]}
         if input.chromatography_platform() == "HRMS":
-            fig = generate_plots_on_HRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], spectrum_ID1=spectrum_ID1, spectrum_ID2=spectrum_ID2, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), window_size_centroiding=input.window_size_centroiding(), window_size_matching=input.window_size_matching(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), y_axis_transformation=input.y_axis_transformation(), return_plot=True)
-            #run_status_plot_spectra.set("✅ Plotting has finished.")
+            fig = generate_plots_on_HRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], spectrum_ID1=spectrum_ID1, spectrum_ID2=spectrum_ID2, similarity_measure=input.similarity_measure(), weights=weights, spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), window_size_centroiding=input.window_size_centroiding(), window_size_matching=input.window_size_matching(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), y_axis_transformation=input.y_axis_transformation(), return_plot=True)
+            plt.show()
         elif input.chromatography_platform() == "NRMS":
             fig = generate_plots_on_NRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], spectrum_ID1=spectrum_ID1, spectrum_ID2=spectrum_ID2, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), y_axis_transformation=input.y_axis_transformation(), return_plot=True)
+            plt.show()
         with io.BytesIO() as buf:
             fig.savefig(buf, format="png", dpi=150, bbox_inches="tight")
+            plt.close()
             yield buf.getvalue()
@@ -705,6 +867,7 @@ def server(input, output, session):
     def status_output():
         return run_status_plot_spectra.get()
         return run_status_spec_lib_matching.get()
+        return run_status_parameter_tuning.get()
     class ReactiveWriter(io.TextIOBase):
@@ -718,7 +881,6 @@ def server(input, output, session):
             def _apply():
                 self.rv.set(self.rv.get() + s)
                 self.loop.create_task(reactive.flush())
             self.loop.call_soon_threadsafe(_apply)
             return len(s)
@@ -728,22 +890,24 @@ def server(input, output, session):
     @render.download(filename="identification_output.csv")
     async def run_btn_spec_lib_matching():
-        # 1) quick first paint
         match_log_rv.set("Starting identification...\n")
         await reactive.flush()
-        # 2) normalize inputs (same as before)
         hq = input.high_quality_reference_library()
         if isinstance(hq, str):
             hq = hq.lower() == "true"
         elif isinstance(hq, (int, float)):
             hq = bool(hq)
+        weights = [float(weight.strip()) for weight in input.weights().split(",") if weight.strip()]
+        weights = {'Cosine':weights[0], 'Shannon':weights[1], 'Renyi':weights[2], 'Tsallis':weights[3]}
         common_kwargs = dict(
             query_data=input.query_data()[0]["datapath"],
             reference_data=input.reference_data()[0]["datapath"],
             likely_reference_ids=None,
             similarity_measure=input.similarity_measure(),
+            weights=weights,
             spectrum_preprocessing_order=input.spectrum_preprocessing_order(),
             high_quality_reference_library=hq,
             mz_min=input.mz_min(), mz_max=input.mz_max(),
@@ -752,7 +916,7 @@ def server(input, output, session):
             wf_mz=input.wf_mz(), wf_intensity=input.wf_int(),
             LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(),
             n_top_matches_to_save=input.n_top_matches_to_save(),
-            print_id_results=True,  # ensure the library actually prints progress
+            print_id_results=True,  # ensure the library actually prints
             output_identification=str(Path.cwd() / "identification_output.csv"),
             output_similarity_scores=str(Path.cwd() / "similarity_scores.csv"),
             return_ID_output=True,
@@ -761,7 +925,6 @@ def server(input, output, session):
         loop = asyncio.get_running_loop()
         rw = ReactiveWriter(match_log_rv, loop)
-        # 3) run the heavy function in a thread so the event loop can repaint
         try:
             with redirect_stdout(rw), redirect_stderr(rw):
                 if input.chromatography_platform() == "HRMS":
@@ -772,9 +935,7 @@ def server(input, output, session):
                         **common_kwargs
                     )
                 else:
-                    df_out = await asyncio.to_thread(
-                        run_spec_lib_matching_on_NRMS_data, **common_kwargs
-                    )
+                    df_out = await asyncio.to_thread(run_spec_lib_matching_on_NRMS_data, **common_kwargs)
             match_log_rv.set(match_log_rv.get() + "\n✅ Identification finished.\n")
             await reactive.flush()
         except Exception as e:
@@ -782,8 +943,212 @@ def server(input, output, session):
             await reactive.flush()
             raise
-        # 4) stream CSV back to the browser
+        yield df_out.to_csv(index=True)
+    @render.download(filename="plot.png")
+    def run_btn_plot_spectra_within_spec_lib_matching():
+        req(input.query_data(), input.reference_data())
+        spectrum_ID1 = input.spectrum_ID1() or None
+        spectrum_ID2 = input.spectrum_ID2() or None
+        hq = input.high_quality_reference_library()
+        if isinstance(hq, str):
+            hq = hq.lower() == "true"
+        elif isinstance(hq, (int, float)):
+            hq = bool(hq)
+        weights = [float(weight.strip()) for weight in input.weights().split(",") if weight.strip()]
+        weights = {'Cosine':weights[0], 'Shannon':weights[1], 'Renyi':weights[2], 'Tsallis':weights[3]}
+        common = dict(
+            query_data=input.query_data()[0]['datapath'],
+            reference_data=input.reference_data()[0]['datapath'],
+            spectrum_ID1=spectrum_ID1,
+            spectrum_ID2=spectrum_ID2,
+            similarity_measure=input.similarity_measure(),
+            weights=weights,
+            spectrum_preprocessing_order=input.spectrum_preprocessing_order(),
+            high_quality_reference_library=hq,
+            mz_min=input.mz_min(), mz_max=input.mz_max(),
+            int_min=input.int_min(), int_max=input.int_max(),
+            noise_threshold=input.noise_threshold(),
+            wf_mz=input.wf_mz(), wf_intensity=input.wf_int(),
+            LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(),
+            y_axis_transformation="normalized",
+            return_plot=True
+        )
+        if input.chromatography_platform() == "HRMS":
+            fig = generate_plots_on_HRMS_data(
+                window_size_centroiding=input.window_size_centroiding(),
+                window_size_matching=input.window_size_matching(),
+                **common
+            )
+            plt.show()
+        else:
+            fig = generate_plots_on_NRMS_data(**common)
+            plt.show()
+        with io.BytesIO() as buf:
+            fig.savefig(buf, format="png", dpi=150, bbox_inches="tight")
+            plt.close()
+            yield buf.getvalue()
+    '''
+    @render.download(filename="parameter_tuning_output.csv")
+    async def run_btn_parameter_tuning():
+        match_log_rv.set("Running grid search of all parameters specified...\n")
+        similarity_measure_tmp = list(input.similarity_measure())
+        high_quality_reference_library_tmp = [x.strip().lower() == "true" for x in input.high_quality_reference_library().strip().strip("[]").split(",") if x.strip()]
+        spectrum_preprocessing_order_tmp = strip_text(input.spectrum_preprocessing_order())
+        mz_min_tmp = strip_numeric(input.mz_min())
+        mz_max_tmp = strip_numeric(input.mz_max())
+        int_min_tmp = strip_numeric(input.int_min())
+        int_max_tmp = strip_numeric(input.int_max())
+        noise_threshold_tmp = strip_numeric(input.noise_threshold())
+        wf_mz_tmp = strip_numeric(input.wf_mz())
+        wf_int_tmp = strip_numeric(input.wf_int())
+        LET_threshold_tmp = strip_numeric(input.LET_threshold())
+        entropy_dimension_tmp = strip_numeric(input.entropy_dimension())
+        weights_tmp = strip_weights(input.weights())
+        common_kwargs = dict(
+            query_data=input.query_data()[0]["datapath"],
+            reference_data=input.reference_data()[0]["datapath"],
+            output_path=str(Path.cwd() / "parameter_tuning_output.csv"),
+            return_output=True
+        )
+        loop = asyncio.get_running_loop()
+        rw = ReactiveWriter(match_log_rv, loop)
+        try:
+            with redirect_stdout(rw), redirect_stderr(rw):
+                if input.chromatography_platform() == "HRMS":
+                    window_size_centroiding_tmp = strip_numeric(input.window_size_centroiding())
+                    window_size_matching_tmp = strip_numeric(input.window_size_matching())
+                    grid={'similarity_measure':similarity_measure_tmp, 'weight':weights_tmp, 'spectrum_preprocessing_order':spectrum_preprocessing_order_tmp, 'mz_min':mz_min_tmp, 'mz_max':mz_max_tmp, 'int_min':int_min_tmp, 'int_max':int_max_tmp, 'noise_threshold':noise_threshold_tmp, 'wf_mz':wf_mz_tmp, 'wf_int':wf_int_tmp, 'LET_threshold':LET_threshold_tmp, 'entropy_dimension':entropy_dimension_tmp, 'high_quality_reference_library':high_quality_reference_library_tmp, 'window_size_centroiding':window_size_centroiding_tmp, 'window_size_matching':window_size_matching_tmp}
+                    df_out = await asyncio.to_thread(tune_params_on_HRMS_data, **common_kwargs, grid=grid)
+                else:
+                    grid={'similarity_measure':similarity_measure_tmp, 'weight':weights_tmp, 'spectrum_preprocessing_order':spectrum_preprocessing_order_tmp, 'mz_min':mz_min_tmp, 'mz_max':mz_max_tmp, 'int_min':int_min_tmp, 'int_max':int_max_tmp, 'noise_threshold':noise_threshold_tmp, 'wf_mz':wf_mz_tmp, 'wf_int':wf_int_tmp, 'LET_threshold':LET_threshold_tmp, 'entropy_dimension':entropy_dimension_tmp, 'high_quality_reference_library':high_quality_reference_library_tmp}
+                    df_out = await asyncio.to_thread(tune_params_on_NRMS_data, **common_kwargs, grid=grid)
+            match_log_rv.set(match_log_rv.get() + "\n✅ Parameter tuning finished.\n")
+            #await reactive.flush()
+        except Exception as e:
+            match_log_rv.set(match_log_rv.get() + f"\n❌ Error: {e}\n")
+            #await reactive.flush()
+            raise
         yield df_out.to_csv(index=False)
+    '''
+    @render.download(filename="parameter_tuning_output.csv")
+    async def run_btn_parameter_tuning():
+        is_any_job_running.set(True)
+        is_tuning_running.set(True)
+        match_log_rv.set("Running grid search of all parameters specified...\n")
+        _drain_queue_nowait(_LOG_QUEUE)
+        similarity_measure_tmp = list(input.similarity_measure())
+        high_quality_reference_library_tmp = [x.strip().lower() == "true" for x in input.high_quality_reference_library().strip().strip("[]").split(",") if x.strip()]
+        spectrum_preprocessing_order_tmp = strip_text(input.spectrum_preprocessing_order())
+        mz_min_tmp = strip_numeric(input.mz_min())
+        mz_max_tmp = strip_numeric(input.mz_max())
+        int_min_tmp = strip_numeric(input.int_min())
+        int_max_tmp = strip_numeric(input.int_max())
+        noise_threshold_tmp = strip_numeric(input.noise_threshold())
+        wf_mz_tmp = strip_numeric(input.wf_mz())
+        wf_int_tmp = strip_numeric(input.wf_int())
+        LET_threshold_tmp = strip_numeric(input.LET_threshold())
+        entropy_dimension_tmp = strip_numeric(input.entropy_dimension())
+        weights_tmp = strip_weights(input.weights())
+        common_kwargs = dict(
+            query_data=input.query_data()[0]["datapath"],
+            reference_data=input.reference_data()[0]["datapath"],
+            output_path=str(Path.cwd() / "parameter_tuning_output.csv"),
+            return_output=True
+        )
+        loop = asyncio.get_running_loop()
+        rw = ReactiveWriter(match_log_rv,loop)
+        try:
+            with redirect_stdout(ReactiveWriter(match_log_rv, asyncio.get_running_loop())), redirect_stderr(ReactiveWriter(match_log_rv, asyncio.get_running_loop())):
+                if input.chromatography_platform() == "HRMS":
+                    window_size_centroiding_tmp = strip_numeric(input.window_size_centroiding())
+                    window_size_matching_tmp = strip_numeric(input.window_size_matching())
+                    grid={'similarity_measure':similarity_measure_tmp, 'weight':weights_tmp, 'spectrum_preprocessing_order':spectrum_preprocessing_order_tmp, 'mz_min':mz_min_tmp, 'mz_max':mz_max_tmp, 'int_min':int_min_tmp, 'int_max':int_max_tmp, 'noise_threshold':noise_threshold_tmp, 'wf_mz':wf_mz_tmp, 'wf_int':wf_int_tmp, 'LET_threshold':LET_threshold_tmp, 'entropy_dimension':entropy_dimension_tmp, 'high_quality_reference_library':high_quality_reference_library_tmp, 'window_size_centroiding':window_size_centroiding_tmp, 'window_size_matching':window_size_matching_tmp}
+                    df_out = await asyncio.to_thread(tune_params_on_HRMS_data, **common_kwargs, grid=grid)
+                else:
+                    grid={'similarity_measure':similarity_measure_tmp, 'weight':weights_tmp, 'spectrum_preprocessing_order':spectrum_preprocessing_order_tmp, 'mz_min':mz_min_tmp, 'mz_max':mz_max_tmp, 'int_min':int_min_tmp, 'int_max':int_max_tmp, 'noise_threshold':noise_threshold_tmp, 'wf_mz':wf_mz_tmp, 'wf_int':wf_int_tmp, 'LET_threshold':LET_threshold_tmp, 'entropy_dimension':entropy_dimension_tmp, 'high_quality_reference_library':high_quality_reference_library_tmp}
+                    df_out = await asyncio.to_thread(tune_params_on_NRMS_data, **common_kwargs, grid=grid)
+            match_log_rv.set(match_log_rv.get() + "\n✅ Parameter tuning finished.\n")
+        except Exception as e:
+            match_log_rv.set(match_log_rv.get() + f"\n❌ Error: {e}\n")
+            raise
+        finally:
+            is_tuning_running.set(False)
+            is_any_job_running.set(False)
+            trailing = _drain_queue_nowait(_LOG_QUEUE)
+            if trailing:
+                match_log_rv.set(match_log_rv.get() + "".join(trailing))
+            await reactive.flush()
+        #yield df_out.to_csv(index=False)
+        csv_bytes = df_out.to_csv(index=False).encode('utf-8')
+        yield csv_bytes
+    @render.text
+    def status_output():
+        return run_status_plot_spectra.get()
+        return run_status_spec_lib_matching.get()
+        return run_status_parameter_tuning.get()
+    class ReactiveWriter(io.TextIOBase):
+        def __init__(self, rv: reactive.Value, loop: asyncio.AbstractEventLoop):
+            self._rv = rv
+            self._loop = loop
+        def write(self, s: str):
+            if not s:
+                return 0
+            self._loop.call_soon_threadsafe(_LOG_QUEUE.put_nowait, s)
+            return len(s)
+        def flush(self):
+            pass
+    @reactive.effect
+    async def _pump_reactive_writer_logs():
+        if not is_tuning_running.get():
+            return
+        reactive.invalidate_later(0.1)
+        msgs = _drain_queue_nowait(_LOG_QUEUE)
+        if msgs:
+            match_log_rv.set(match_log_rv.get() + "".join(msgs))
+            await reactive.flush()
+    @render.text
+    def status_output():
+        return run_status_plot_spectra.get()
+        return run_status_spec_lib_matching.get()
+        return run_status_parameter_tuning.get()
 app = App(app_ui, server)

pycompound/plot_spectra.py CHANGED Viewed

@@ -177,8 +177,6 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
             spec_tmp = spectrum_ID1
             spectrum_ID1 = spectrum_ID2
             spectrum_ID2 = spec_tmp
-        print(unique_query_ids)
-        print(spectrum_ID1)
         query_idx = unique_query_ids.index(spectrum_ID1)
         reference_idx = unique_reference_ids.index(spectrum_ID2)
         q_idxs_tmp = np.where(df_query.iloc[:,0].astype(str) == unique_query_ids[query_idx])[0]
@@ -311,7 +309,7 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
     plt.figlegend(loc = 'upper center')
     fig.text(0.05, 0.18, f'Similarity Measure: {similarity_measure.capitalize()}', fontsize=7)
     fig.text(0.05, 0.15, f'Similarity Score: {round(similarity_score,4)}', fontsize=7)
-    fig.text(0.05, 0.12, f'Spectrum Preprocessing Order: {''.join(spectrum_preprocessing_order)}', fontsize=7)
+    fig.text(0.05, 0.12, f"Spectrum Preprocessing Order: {''.join(spectrum_preprocessing_order)}", fontsize=7)
     fig.text(0.05, 0.09, f'High Quality Reference Library: {high_quality_reference_library}', fontsize=7)
     fig.text(0.05, 0.06, f'Window Size (Centroiding): {window_size_centroiding}', fontsize=7)
     fig.text(0.05, 0.03, f'Window Size (Matching): {window_size_matching}', fontsize=7)
@@ -320,6 +318,9 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
     fig.text(0.45, 0.12, f'Noise Threshold: {noise_threshold}', fontsize=7)
     fig.text(0.45, 0.09, f'Weight Factors (m/z,intensity): ({wf_mz},{wf_intensity})', fontsize=7)
     fig.text(0.45, 0.06, f'Low-Entropy Threshold: {LET_threshold}', fontsize=7)
+    if similarity_measure == 'mixture':
+        fig.text(0.45, 0.03, f'Weights for mixture similarity: {weights}', fontsize=7)
     plt.savefig(output_path, format='pdf')
     if return_plot == True:
@@ -606,13 +607,15 @@ def generate_plots_on_NRMS_data(query_data=None, reference_data=None, spectrum_I
     plt.figlegend(loc = 'upper center')
     fig.text(0.05, 0.15, f'Similarity Measure: {similarity_measure.capitalize()}', fontsize=7)
     fig.text(0.05, 0.12, f'Similarity Score: {round(similarity_score,4)}', fontsize=7)
-    fig.text(0.05, 0.09, f'Spectrum Preprocessing Order: {''.join(spectrum_preprocessing_order)}', fontsize=7)
+    fig.text(0.05, 0.09, f"Spectrum Preprocessing Order: {''.join(spectrum_preprocessing_order)}", fontsize=7)
     fig.text(0.05, 0.06, f'High Quality Reference Library: {high_quality_reference_library}', fontsize=7)
     fig.text(0.05, 0.03, f'Raw-Scale M/Z Range: [{min_mz},{max_mz}]', fontsize=7)
     fig.text(0.45, 0.15, f'Raw-Scale Intensity Range: [{int_min_tmp},{int_max_tmp}]', fontsize=7)
     fig.text(0.45, 0.12, f'Noise Threshold: {noise_threshold}', fontsize=7)
     fig.text(0.45, 0.09, f'Weight Factors (m/z,intensity): ({wf_mz},{wf_intensity})', fontsize=7)
     fig.text(0.45, 0.06, f'Low-Entropy Threshold: {LET_threshold}', fontsize=7)
+    if similarity_measure=='mixture':
+        fig.text(0.45, 0.03, f'Weights for mixture similarity: {weights}', fontsize=7)
     plt.savefig(output_path, format='pdf')
     if return_plot == True:

pycompound/spec_lib_matching.py CHANGED Viewed

@@ -9,6 +9,12 @@ from pathlib import Path
 import json
 from itertools import product
 from joblib import Parallel, delayed
+import csv
+default_HRMS_grid = {'similarity_measure':['cosine'], 'weight':[{'Cosine':0.25,'Shannon':0.25,'Renyi':0.25,'Tsallis':0.25}], 'spectrum_preprocessing_order':['FCNMWL'], 'mz_min':[0], 'mz_max':[9999999], 'int_min':[0], 'int_max':[99999999], 'window_size_centroiding':[0.5], 'window_size_matching':[0.5], 'noise_threshold':[0.0], 'wf_mz':[0.0], 'wf_int':[1.0], 'LET_threshold':[0.0], 'entropy_dimension':[1.1], 'high_quality_reference_library':[False]}
+default_NRMS_grid = {'similarity_measure':['cosine'], 'weight':[{'Cosine':0.25,'Shannon':0.25,'Renyi':0.25,'Tsallis':0.25}], 'spectrum_preprocessing_order':['FCNMWL'], 'mz_min':[0], 'mz_max':[9999999], 'int_min':[0], 'int_max':[99999999], 'noise_threshold':[0.0], 'wf_mz':[0.0], 'wf_int':[1.0], 'LET_threshold':[0.0], 'entropy_dimension':[1.1], 'high_quality_reference_library':[False]}
 def _eval_one_HRMS(df_query, df_reference, unique_query_ids, unique_reference_ids,
               similarity_measure_tmp, weight,
@@ -71,7 +77,8 @@ def _eval_one_NRMS(df_query, df_reference, unique_query_ids, unique_reference_id
     )
-def tune_params_on_HRMS_data(query_data=None, reference_data=None, grid={'similarity_measure':['cosine'], 'weight':[{'Cosine':0.25,'Shannon':0.25,'Renyi':0.25,'Tsallis':0.25}], 'spectrum_preprocessing_order':['FCNMWL'], 'mz_min':[0], 'mz_max':[9999999], 'int_min':[0], 'int_max':[99999999], 'window_size_centroiding':[0.5], 'window_size_matching':[0.5], 'noise_threshold':[0.0], 'wf_mz':[0.0], 'wf_int':[1.0], 'LET_threshold':[0.0], 'entropy_dimension':[1.1], 'high_quality_reference_library':[False]}, output_path=None):
+def tune_params_on_HRMS_data(query_data=None, reference_data=None, grid=None, output_path=None, return_output=False):
     """
     runs spectral library matching on high-resolution mass spectrometry (HRMS) data with all possible combinations of parameters in the grid dict, saves results from each choice of parameters to a CSV file, and prints top-performing parameters
@@ -81,6 +88,7 @@ def tune_params_on_HRMS_data(query_data=None, reference_data=None, grid={'simila
     --output_path: accuracy from each choice of parameter set is saved to a CSV file here.
     """
+    grid = {**default_HRMS_grid, **(grid or {})}
     for key, value in grid.items():
         globals()[key] = value
@@ -118,24 +126,35 @@ def tune_params_on_HRMS_data(query_data=None, reference_data=None, grid={'simila
     print(f'\nNote that there are {len(unique_query_ids)} unique query spectra, {len(unique_reference_ids)} unique reference spectra, and {len(set(unique_query_ids) & set(unique_reference_ids))} of the query and reference spectra IDs are in common.\n')
     if output_path is None:
-        output_path = f'{Path.cwd()}/tuning_param_output.csv'
+        output_path = f'{Path.cwd()}/tuning_param_output.txt'
         print(f'Warning: since output_path=None, the output will be written to the current working directory: {output_path}')
-    # build parameter grid out of the lists you already set
     param_grid = product(similarity_measure, weight, spectrum_preprocessing_order, mz_min, mz_max, int_min, int_max, noise_threshold,
                          window_size_centroiding, window_size_matching, wf_mz, wf_int, LET_threshold, entropy_dimension, high_quality_reference_library)
-    # run in parallel on all CPUs
     results = Parallel(n_jobs=-1, verbose=10)(delayed(_eval_one_HRMS)(df_query, df_reference, unique_query_ids, unique_reference_ids, *params) for params in param_grid)
     df_out = pd.DataFrame(results, columns=[
         'ACC','SIMILARITY.MEASURE','WEIGHT','SPECTRUM.PROCESSING.ORDER', 'MZ.MIN','MZ.MAX','INT.MIN','INT.MAX','NOISE.THRESHOLD',
         'WINDOW.SIZE.CENTROIDING','WINDOW.SIZE.MATCHING', 'WF.MZ','WF.INT','LET.THRESHOLD','ENTROPY.DIMENSION', 'HIGH.QUALITY.REFERENCE.LIBRARY'
     ])
-    df_out = df_out.drop(columns=['WEIGHT'])
-    df_out.to_csv(output_path, index=False)
+    df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("\"","",regex=False)
+    df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("{","",regex=False)
+    df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("}","",regex=False)
+    df_out['WEIGHT'] = df_out['WEIGHT'].str.replace(":","",regex=False)
+    df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("Cosine","",regex=False)
+    df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("Shannon","",regex=False)
+    df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("Renyi","",regex=False)
+    df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("Tsallis","",regex=False)
+    df_out['WEIGHT'] = df_out['WEIGHT'].str.replace(" ","",regex=False)
+    df_out.to_csv(output_path, index=False, sep='\t', quoting=csv.QUOTE_NONE)
+    if return_output is False:
+        df_out.to_csv(output_path, index=False, sep='\t', quoting=csv.QUOTE_NONE)
+    else:
+        return df_out
-def tune_params_on_NRMS_data(query_data=None, reference_data=None, grid={'similarity_measure':['cosine'], 'weight':[{'Cosine':0.25,'Shannon':0.25,'Renyi':0.25,'Tsallis':0.25}], 'spectrum_preprocessing_order':['FNLW'], 'mz_min':[0], 'mz_max':[9999999], 'int_min':[0], 'int_max':[99999999], 'noise_threshold':[0.0], 'wf_mz':[0.0], 'wf_int':[1.0], 'LET_threshold':[0.0], 'entropy_dimension':[1.1], 'high_quality_reference_library':[False]}, output_path=None):
+def tune_params_on_NRMS_data(query_data=None, reference_data=None, grid=None, output_path=None, return_output=False):
     """
     runs spectral library matching on nominal-resolution mass spectrometry (NRMS) data with all possible combinations of parameters in the grid dict, saves results from each choice of parameters to a CSV file, and prints top-performing parameters
@@ -145,10 +164,10 @@ def tune_params_on_NRMS_data(query_data=None, reference_data=None, grid={'simila
     --output_path: accuracy from each choice of parameter set is saved to a CSV file here
     """
+    grid = {**default_NRMS_grid, **(grid or {})}
     for key, value in grid.items():
         globals()[key] = value
-    # load query and reference libraries
     if query_data is None:
         print('\nError: No argument passed to the mandatory query_data. Please pass the path to the CSV file of the query data.')
         sys.exit()
@@ -182,21 +201,30 @@ def tune_params_on_NRMS_data(query_data=None, reference_data=None, grid={'simila
     print(f'\nNote that there are {len(unique_query_ids)} unique query spectra, {len(unique_reference_ids)} unique reference spectra, and {len(set(unique_query_ids) & set(unique_reference_ids))} of the query and reference spectra IDs are in common.\n')
     if output_path is None:
-        output_path = f'{Path.cwd()}/tuning_param_output.csv'
+        output_path = f'{Path.cwd()}/tuning_param_output.txt'
         print(f'Warning: since output_path=None, the output will be written to the current working directory: {output_path}')
-    # build parameter grid out of the lists you already set
     param_grid = product(similarity_measure, weight, spectrum_preprocessing_order, mz_min, mz_max, int_min, int_max,
                          noise_threshold, wf_mz, wf_int, LET_threshold, entropy_dimension, high_quality_reference_library)
-    # run in parallel on all CPUs
     results = Parallel(n_jobs=-1, verbose=10)(delayed(_eval_one_NRMS)(df_query, df_reference, unique_query_ids, unique_reference_ids, *params) for params in param_grid)
     df_out = pd.DataFrame(results, columns=[
         'ACC','SIMILARITY.MEASURE','WEIGHT','SPECTRUM.PROCESSING.ORDER', 'MZ.MIN','MZ.MAX','INT.MIN','INT.MAX',
         'NOISE.THRESHOLD','WF.MZ','WF.INT','LET.THRESHOLD','ENTROPY.DIMENSION', 'HIGH.QUALITY.REFERENCE.LIBRARY'
     ])
-    df_out = df_out.drop(columns=['WEIGHT'])
-    df_out.to_csv(output_path, index=False)
+    df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("\"","",regex=False)
+    df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("{","",regex=False)
+    df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("}","",regex=False)
+    df_out['WEIGHT'] = df_out['WEIGHT'].str.replace(":","",regex=False)
+    df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("Cosine","",regex=False)
+    df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("Shannon","",regex=False)
+    df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("Renyi","",regex=False)
+    df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("Tsallis","",regex=False)
+    df_out['WEIGHT'] = df_out['WEIGHT'].str.replace(" ","",regex=False)
+    if return_output is False:
+        df_out.to_csv(output_path, index=False, sep='\t', quoting=csv.QUOTE_NONE)
+    else:
+        return df_out
@@ -413,8 +441,8 @@ def run_spec_lib_matching_on_HRMS_data(query_data=None, reference_data=None, lik
     --entropy_dimension: Entropy dimension parameter. Must have positive value other than 1. When the entropy dimension is 1, then Renyi and Tsallis entropy are equivalent to Shannon entropy. Therefore, this parameter only applies to the renyi and tsallis similarity measures. This parameter will be ignored if similarity measure cosine or shannon is chosen. Default: 1.1
     --n_top_matches_to_save: The number of top matches to report. For example, if n_top_matches_to_save=5, then for each query spectrum, the five reference spectra with the largest similarity with the given query spectrum will be reported. Default: 1
     --print_id_results: Flag that prints identification results if True. Default: False
-    --output_identification: Output CSV file containing the most-similar reference spectra for each query spectrum along with the corresponding similarity scores. Default is to save identification output in current working directory with filename \'output_identification.csv\'.
-    --output_similarity_scores: Output CSV file containing similarity scores between all query spectrum/spectra and all reference spectra. Each row corresponds to a query spectrum, the left-most column contains the query spectrum/spectra identifier, and the remaining column contain the similarity scores with respect to all reference library spectra. If no argument passed, then this CSV file is written to the current working directory with filename \'output_all_similarity_scores\'.csv.')
+    --output_identification: Output CSV file containing the most-similar reference spectra for each query spectrum along with the corresponding similarity scores. Default is to save identification output in current working directory with filename \'output_identification.txt\'.
+    --output_similarity_scores: Output CSV file containing similarity scores between all query spectrum/spectra and all reference spectra. Each row corresponds to a query spectrum, the left-most column contains the query spectrum/spectra identifier, and the remaining column contain the similarity scores with respect to all reference library spectra. If no argument passed, then this CSV file is written to the current working directory with filename \'output_all_similarity_scores\'.txt.')
     '''
     # load query and reference libraries
@@ -528,11 +556,11 @@ def run_spec_lib_matching_on_HRMS_data(query_data=None, reference_data=None, lik
         sys.exit()
     if output_identification is None:
-        output_identification = f'{Path.cwd()}/output_identification.csv'
+        output_identification = f'{Path.cwd()}/output_identification.txt'
         print(f'Warning: writing identification output to {output_identification}')
     if output_similarity_scores is None:
-        output_similarity_scores = f'{Path.cwd()}/output_all_similarity_scores.csv'
+        output_similarity_scores = f'{Path.cwd()}/output_all_similarity_scores.txt'
         print(f'Warning: writing similarity scores to {output_similarity_scores}')
@@ -644,10 +672,10 @@ def run_spec_lib_matching_on_HRMS_data(query_data=None, reference_data=None, lik
     if return_ID_output is False:
         # write spectral library matching results to disk
-        df_top_ref_specs.to_csv(output_identification)
+        df_top_ref_specs.to_csv(output_identification, sep='\t')
         # write all similarity scores to disk
-        df_scores.to_csv(output_similarity_scores)
+        df_scores.to_csv(output_similarity_scores, sep='\t')
     else:
         return df_top_ref_specs
@@ -678,8 +706,8 @@ def run_spec_lib_matching_on_NRMS_data(query_data=None, reference_data=None, lik
     --normalization_method: Method used to normalize the intensities of each spectrum so that the intensities sum to 1. Since the objects entropy quantifies the uncertainy of must be probability distributions, the intensities of a given spectrum must sum to 1 prior to computing the entropy of the given spectrum intensities. Options: \'standard\' and \'softmax\'. Default: standard.
     --n_top_matches_to_save: The number of top matches to report. For example, if n_top_matches_to_save=5, then for each query spectrum, the five reference spectra with the largest similarity with the given query spectrum will be reported. Default: 1
     --print_id_results: Flag that prints identification results if True. Default: False
-    --output_identification: Output CSV file containing the most-similar reference spectra for each query spectrum along with the corresponding similarity scores. Default is to save identification output in current working directory with filename \'output_identification.csv\'.
-    --output_similarity_scores: Output CSV file containing similarity scores between all query spectrum/spectra and all reference spectra. Each row corresponds to a query spectrum, the left-most column contains the query spectrum/spectra identifier, and the remaining column contain the similarity scores with respect to all reference library spectra. If no argument passed, then this CSV file is written to the current working directory with filename \'output_all_similarity_scores\'.csv.')
+    --output_identification: Output CSV file containing the most-similar reference spectra for each query spectrum along with the corresponding similarity scores. Default is to save identification output in current working directory with filename \'output_identification.txt\'.
+    --output_similarity_scores: Output CSV file containing similarity scores between all query spectrum/spectra and all reference spectra. Each row corresponds to a query spectrum, the left-most column contains the query spectrum/spectra identifier, and the remaining column contain the similarity scores with respect to all reference library spectra. If no argument passed, then this CSV file is written to the current working directory with filename \'output_all_similarity_scores\'.txt.')
     '''
     # load query and reference libraries
@@ -778,11 +806,11 @@ def run_spec_lib_matching_on_NRMS_data(query_data=None, reference_data=None, lik
         sys.exit()
     if output_identification is None:
-        output_identification = f'{Path.cwd()}/output_identification.csv'
+        output_identification = f'{Path.cwd()}/output_identification.txt'
         print(f'Warning: writing identification output to {output_identification}')
     if output_similarity_scores is None:
-        output_similarity_scores = f'{Path.cwd()}/output_all_similarity_scores.csv'
+        output_similarity_scores = f'{Path.cwd()}/output_all_similarity_scores.txt'
         print(f'Warning: writing similarity scores to {output_similarity_scores}')
@@ -894,11 +922,11 @@ def run_spec_lib_matching_on_NRMS_data(query_data=None, reference_data=None, lik
     if return_ID_output is False:
         # write spectral library matching results to disk
-        df_top_ref_specs.to_csv(output_identification)
+        df_top_ref_specs.to_csv(output_identification, sep='\t')
         # write all similarity scores to disk
         df_scores.columns = ['Reference Spectrum ID: ' + col for col in  list(map(str,df_scores.columns.tolist()))]
-        df_scores.to_csv(output_similarity_scores)
+        df_scores.to_csv(output_similarity_scores, sep='\t')
     else:
         return df_top_ref_specs

pycompound/tuning_CLI.py CHANGED Viewed

@@ -40,8 +40,7 @@ else:
     sys.exit()
-grid = {'similarity_measure':args.similarity_measure.split(','), 'weight':args.weights, 'spectrum_preprocessing_order':spectrum_preprocessing_order.split(','), 'mz_min':args.mz_min.split(','), 'mz_max':args.mz_max.split(','), 'int_min':args.int_min.split(','), 'int_max':args.int_max.split(','), 'window_size_centroiding':args.window_size_centroiding.split(','), 'window_size_matching':args.window_size_matching.split(','), 'noise_threshold':args.noise_threshold.split(','), 'wf_mz':args.wf_mz.split(','), 'wf_int':args.wf_intensity.split(','), 'LET_threshold':args.LET_threshold.split(','), 'entropy_dimension':args.entropy_dimension.split(','), 'high_quality_reference_library':args.high_quality_reference_library.split(',')}
+grid = {'similarity_measure':args.similarity_measure.split(','), 'weight':[args.weights], 'spectrum_preprocessing_order':spectrum_preprocessing_order.split(','), 'mz_min':args.mz_min.split(','), 'mz_max':args.mz_max.split(','), 'int_min':args.int_min.split(','), 'int_max':args.int_max.split(','), 'window_size_centroiding':args.window_size_centroiding.split(','), 'window_size_matching':args.window_size_matching.split(','), 'noise_threshold':args.noise_threshold.split(','), 'wf_mz':args.wf_mz.split(','), 'wf_int':args.wf_intensity.split(','), 'LET_threshold':args.LET_threshold.split(','), 'entropy_dimension':args.entropy_dimension.split(','), 'high_quality_reference_library':args.high_quality_reference_library.split(',')}
 if args.chromatography_platform == 'HRMS':
     grid['mz_min'] = [float(x) for x in grid['mz_min']]

{pycompound-0.1.0.dist-info → pycompound-0.1.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pycompound
-Version: 0.1.0
+Version: 0.1.1
 Summary: Python package to perform compound identification in mass spectrometry via spectral library matching.
 Author-email: Hunter Dlugas <fy7392@wayne.edu>
 License-Expression: MIT

pycompound-0.1.1.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,14 @@
+app.py,sha256=k5mPyctA1eWkGjtnKrJb7STuweh_aH4HmPUH07jO92Y,53841
+pycompound/build_library.py,sha256=8ghpX8wfj6u-3V5X2IdJ-e8G_FRSla1lO0pzLj7hOtI,5373
+pycompound/plot_spectra.py,sha256=Q7nDSW3Y5pR_Ql4JeEmyd6KRRyzvxk9j0yaUR0hfjJc,42275
+pycompound/plot_spectra_CLI.py,sha256=ObaLad5Z5DmfQB-j0HSCg1mLORbYj2BM3hb5Yd0ZdDI,8395
+pycompound/processing.py,sha256=vqtKaZ6vot6wlnKNTYUQFX7ccPpnCAl0L6bN289vZoM,11068
+pycompound/similarity_measures.py,sha256=TuvtEXWwyxE6dfpmuAqRC6gOHvHg3Jf21099pVaNBAs,10702
+pycompound/spec_lib_matching.py,sha256=AAMxWqi6LXWo-tJ-uqJ4QxfHSg8bX3G_DJVt2bLLMcM,61860
+pycompound/spec_lib_matching_CLI.py,sha256=EdXM0dRQfwGQAK4OKxhcVytuUnX9pRyJROwC6rloZ9s,9915
+pycompound/tuning_CLI.py,sha256=dSFLwMiI0_6G4YDZR5ubqn9-75ixOvDPZMOoGS-_B6w,8540
+pycompound-0.1.1.dist-info/licenses/LICENSE,sha256=fPFFlkSGg60VQWyWqTSv8yoJnpLzppzdihVWY5NKom8,1064
+pycompound-0.1.1.dist-info/METADATA,sha256=XZtkvSau_Z723iCgy_LTR1CkYryDxXBdIFtb_D_E9u0,1732
+pycompound-0.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+pycompound-0.1.1.dist-info/top_level.txt,sha256=wFBLVrqpC07HghIU8tsEdgdvgkdOE3GN_1Gfjk-uEUc,15
+pycompound-0.1.1.dist-info/RECORD,,

pycompound-0.1.0.dist-info/RECORD DELETED Viewed

@@ -1,14 +0,0 @@
-app.py,sha256=ab1hII23lVwAmMh4bfzdni50vz-bK-ODbJT_b1VjGMA,34678
-pycompound/build_library.py,sha256=8ghpX8wfj6u-3V5X2IdJ-e8G_FRSla1lO0pzLj7hOtI,5373
-pycompound/plot_spectra.py,sha256=_5r9YR3AA2IfTbcyfyTnPxxxA92T4hQ9olOgaw7FE6A,42082
-pycompound/plot_spectra_CLI.py,sha256=ObaLad5Z5DmfQB-j0HSCg1mLORbYj2BM3hb5Yd0ZdDI,8395
-pycompound/processing.py,sha256=vqtKaZ6vot6wlnKNTYUQFX7ccPpnCAl0L6bN289vZoM,11068
-pycompound/similarity_measures.py,sha256=TuvtEXWwyxE6dfpmuAqRC6gOHvHg3Jf21099pVaNBAs,10702
-pycompound/spec_lib_matching.py,sha256=p8gj-72fjkf0p7XrqEl9hnYUGNSbyr7BXugvRT7Y5OA,60311
-pycompound/spec_lib_matching_CLI.py,sha256=EdXM0dRQfwGQAK4OKxhcVytuUnX9pRyJROwC6rloZ9s,9915
-pycompound/tuning_CLI.py,sha256=lkFBRZ5VxCBteIh_KTkQFdUBVZA0dL-BLiyMZce1vzE,8539
-pycompound-0.1.0.dist-info/licenses/LICENSE,sha256=fPFFlkSGg60VQWyWqTSv8yoJnpLzppzdihVWY5NKom8,1064
-pycompound-0.1.0.dist-info/METADATA,sha256=qfM4rP0BeGThYpxvGa7vOseRsUgtJ4aH8hgUtio0Ugw,1732
-pycompound-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-pycompound-0.1.0.dist-info/top_level.txt,sha256=wFBLVrqpC07HghIU8tsEdgdvgkdOE3GN_1Gfjk-uEUc,15
-pycompound-0.1.0.dist-info/RECORD,,

{pycompound-0.1.0.dist-info → pycompound-0.1.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{pycompound-0.1.0.dist-info → pycompound-0.1.1.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{pycompound-0.1.0.dist-info → pycompound-0.1.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

pycompound 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl

pycompound 0.1.0py3-none-any.whl → 0.1.1py3-none-any.whl