PyPI - pycompound - Versions diffs - 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl - Mend

pycompound 0.1.0py3-none-any.whl → 0.1.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

app.py +470 -144
pycompound/build_library.py +2 -9
pycompound/plot_spectra.py +17 -42
pycompound/processing.py +0 -9
pycompound/similarity_measures.py +0 -3
pycompound/spec_lib_matching.py +295 -102
pycompound/spec_lib_matching_CLI.py +2 -7
pycompound/tuning_CLI.py +2 -3
{pycompound-0.1.0.dist-info → pycompound-0.1.2.dist-info}/METADATA +1 -1
pycompound-0.1.2.dist-info/RECORD +14 -0
pycompound-0.1.0.dist-info/RECORD +0 -14
{pycompound-0.1.0.dist-info → pycompound-0.1.2.dist-info}/WHEEL +0 -0
{pycompound-0.1.0.dist-info → pycompound-0.1.2.dist-info}/licenses/LICENSE +0 -0
{pycompound-0.1.0.dist-info → pycompound-0.1.2.dist-info}/top_level.txt +0 -0

app.py CHANGED Viewed

@@ -4,6 +4,8 @@ from pycompound.spec_lib_matching import run_spec_lib_matching_on_HRMS_data
 from pycompound.spec_lib_matching import run_spec_lib_matching_on_NRMS_data
 from pycompound.spec_lib_matching import tune_params_on_HRMS_data
 from pycompound.spec_lib_matching import tune_params_on_NRMS_data
+from pycompound.spec_lib_matching import tune_params_on_HRMS_data_shiny
+from pycompound.spec_lib_matching import tune_params_on_NRMS_data_shiny
 from pycompound.plot_spectra import generate_plots_on_HRMS_data
 from pycompound.plot_spectra import generate_plots_on_NRMS_data
 from pathlib import Path
@@ -18,8 +20,45 @@ import matplotlib.pyplot as plt
 import pandas as pd
 import numpy as np
 import netCDF4 as nc
-from pyteomics import mgf
-from pyteomics import mzml
+from pyteomics import mgf, mzml
+import ast
+from numbers import Real
+_LOG_QUEUE: asyncio.Queue[str] = asyncio.Queue()
+def _run_with_redirects(fn, writer, *args, **kwargs):
+    with redirect_stdout(writer), redirect_stderr(writer):
+        return fn(*args, **kwargs)
+def strip_text(s):
+    return [x.strip() for x in s.strip('[]').split(',') if x.strip()]
+def strip_numeric(s):
+    return [float(x.strip()) for x in s.strip('[]').split(',') if x.strip()]
+def strip_weights(s):
+    obj = ast.literal_eval(s) if isinstance(s, (str, bytes)) else s
+    keys = ['Cosine', 'Shannon', 'Renyi', 'Tsallis']
+    if isinstance(obj, (list, tuple)):
+        if len(obj) == 4 and all(isinstance(x, Real) for x in obj):
+            tuples = [obj]
+        else:
+            tuples = list(obj)
+    else:
+        raise ValueError(f"Expected a 4-tuple or a sequence of 4-tuples, got {type(obj).__name__}")
+    out = []
+    for t in tuples:
+        if not (isinstance(t, (list, tuple)) and len(t) == 4):
+            raise ValueError(f"Each item must be a 4-tuple, got: {t!r}")
+        out.append(dict(zip(keys, t)))
+    return out
 def build_library(input_path=None, output_path=None):
@@ -152,40 +191,45 @@ def extract_first_column_ids(file_path: str, max_ids: int = 20000):
     return []
+def _open_plot_window(session, png_bytes: bytes, title: str = "plot.png"):
+    """Send PNG bytes to browser and open in a new window as a data URL."""
+    b64 = base64.b64encode(png_bytes).decode("ascii")
+    data_url = f"data:image/png;base64,{b64}"
+    session.send_custom_message("open-plot-window", {"png": data_url, "title": title})
 def plot_spectra_ui(platform: str):
-    # Base inputs common to all platforms
     base_inputs = [
         ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
         ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
-    ui.input_selectize(
-        "spectrum_ID1",
-        "Select spectrum ID 1:",
-        choices=[],
-        multiple=False,
-        options={"placeholder": "Upload a query file to load IDs..."},
-    ),
-    ui.input_selectize(
-        "spectrum_ID2",
-        "Select spectrum ID 2 (optional):",
-        choices=[],
-        multiple=False,
-        options={"placeholder": "Upload a reference file to load IDs..."},
-    ),
+        ui.input_selectize(
+            "spectrum_ID1",
+            "Select spectrum ID 1 (default is the first spectrum in the library):",
+            choices=[],
+            multiple=False,
+            options={"placeholder": "Upload a library..."},
+        ),
+        ui.input_selectize(
+            "spectrum_ID2",
+            "Select spectrum ID 2 (default is the first spectrum in the library):",
+            choices=[],
+            multiple=False,
+            options={"placeholder": "Upload a library..."},
+        ),
         ui.input_select("similarity_measure", "Select similarity measure:", ["cosine","shannon","renyi","tsallis","mixture","jaccard","dice","3w_jaccard","sokal_sneath","binary_cosine","mountford","mcconnaughey","driver_kroeber","simpson","braun_banquet","fager_mcgowan","kulczynski","intersection","hamming","hellinger"]),
+        ui.input_text('weights', 'Weights for mixture similarity measure (cosine, shannon, renyi, tsallis):', '0.25, 0.25, 0.25, 0.25'),
         ui.input_select(
             "high_quality_reference_library",
-            "Indicate whether the reference library is considered high quality. "
-            "If True, filtering and noise removal are only applied to the query spectra.",
+            "Indicate whether the reference library is considered high quality. If True, filtering and noise removal are only applied to the query spectra.",
             [False, True],
         ),
     ]
-    # Extra inputs depending on platform
     if platform == "HRMS":
         extra_inputs = [
             ui.input_text(
                 "spectrum_preprocessing_order",
-                "Sequence of characters for preprocessing order (C, F, M, N, L, W). M must be included, C before M if used.",
+                "Sequence of characters for preprocessing order (C (centroiding), F (filtering), M (matching), N (noise removal), L (low-entropy transformation), W (weight factor transformation)). M must be included, C before M if used.",
                 "FCNMWL",
             ),
             ui.input_numeric("window_size_centroiding", "Centroiding window-size:", 0.5),
@@ -195,12 +239,11 @@ def plot_spectra_ui(platform: str):
         extra_inputs = [
             ui.input_text(
                 "spectrum_preprocessing_order",
-                "Sequence of characters for preprocessing order (F, N, L, W).",
+                "Sequence of characters for preprocessing order (F (filtering), N (noise removal), L (low-entropy transformation), W (weight factor transformation)).",
                 "FNLW",
             )
         ]
-    # Numeric inputs
     numeric_inputs = [
         ui.input_numeric("mz_min", "Minimum m/z for filtering:", 0),
         ui.input_numeric("mz_max", "Maximum m/z for filtering:", 99999999),
@@ -213,68 +256,77 @@ def plot_spectra_ui(platform: str):
         ui.input_numeric("entropy_dimension", "Entropy dimension (Renyi/Tsallis only):", 1.1),
     ]
-    # Y-axis transformation select input
     select_input = ui.input_select(
         "y_axis_transformation",
         "Transformation to apply to intensity axis:",
         ["normalized", "none", "log10", "sqrt"],
     )
-    # Run and Back buttons
     run_button_plot_spectra = ui.download_button("run_btn_plot_spectra", "Run", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
     back_button = ui.input_action_button("back", "Back to main menu", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
-    # Layout base_inputs and extra_inputs in columns
     if platform == "HRMS":
         inputs_columns = ui.layout_columns(
-            ui.div(base_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
-            ui.div([base_inputs[5:6], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
+            ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
+            ui.div([base_inputs[6:7], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
             ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
             ui.div([numeric_inputs[5:10], select_input], style="display:flex; flex-direction:column; gap:10px;"),
-            col_widths=(3, 3, 3, 3),
+            col_widths=(3,3,3,3),
         )
     elif platform == "NRMS":
         inputs_columns = ui.layout_columns(
-            ui.div(base_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
-            ui.div([base_inputs[5:6], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
+            ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
+            ui.div([base_inputs[6:7], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
             ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
             ui.div([numeric_inputs[5:10], select_input], style="display:flex; flex-direction:column; gap:10px;"),
-            col_widths=(3, 3, 3, 3),
+            col_widths=(3,3,3,3),
         )
-    # Combine everything
     return ui.div(
         ui.TagList(
             ui.h2("Plot Spectra"),
             inputs_columns,
             run_button_plot_spectra,
             back_button,
-            ui.div(ui.output_text("plot_query_status"), style="margin-top:8px; font-size:14px")
+            ui.div(ui.output_text("plot_query_status"), style="margin-top:8px; font-size:14px"),
+            ui.div(ui.output_text("plot_reference_status"), style="margin-top:8px; font-size:14px")
         ),
     )
 def run_spec_lib_matching_ui(platform: str):
-    # Base inputs common to all platforms
     base_inputs = [
         ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
         ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
         ui.input_select("similarity_measure", "Select similarity measure:", ["cosine","shannon","renyi","tsallis","mixture","jaccard","dice","3w_jaccard","sokal_sneath","binary_cosine","mountford","mcconnaughey","driver_kroeber","simpson","braun_banquet","fager_mcgowan","kulczynski","intersection","hamming","hellinger"]),
+        ui.input_text('weights', 'Weights for mixture similarity measure (cosine, shannon, renyi, tsallis):', '0.25, 0.25, 0.25, 0.25'),
+        ui.input_selectize(
+            "spectrum_ID1",
+            "Select spectrum ID 1 (only applicable for plotting; default is the first spectrum in the query library):",
+            choices=[],
+            multiple=False,
+            options={"placeholder": "Upload a library..."},
+        ),
+        ui.input_selectize(
+            "spectrum_ID2",
+            "Select spectrum ID 2 (only applicable for plotting; default is the first spectrum in the reference library):",
+            choices=[],
+            multiple=False,
+            options={"placeholder": "Upload a library..."},
+        ),
         ui.input_select(
             "high_quality_reference_library",
-            "Indicate whether the reference library is considered high quality. "
-            "If True, filtering and noise removal are only applied to the query spectra.",
+            "Indicate whether the reference library is considered high quality. If True, filtering and noise removal are only applied to the query spectra.",
             [False, True],
-        ),
+        )
     ]
-    # Extra inputs depending on platform
     if platform == "HRMS":
         extra_inputs = [
             ui.input_text(
                 "spectrum_preprocessing_order",
-                "Sequence of characters for preprocessing order (C, F, M, N, L, W). M must be included, C before M if used.",
+                "Sequence of characters for preprocessing order (C (centroiding), F (filtering), M (matching), N (noise removal), L (low-entropy transformation), W (weight factor transformation)). M must be included, C before M if used.",
                 "FCNMWL",
             ),
             ui.input_numeric("window_size_centroiding", "Centroiding window-size:", 0.5),
@@ -284,12 +336,11 @@ def run_spec_lib_matching_ui(platform: str):
         extra_inputs = [
             ui.input_text(
                 "spectrum_preprocessing_order",
-                "Sequence of characters for preprocessing order (F, N, L, W).",
+                "Sequence of characters for preprocessing order (F (filtering), N (noise removal), L (low-entropy transformation), W (weight factor transformation)).",
                 "FNLW",
             )
         ]
-    # Numeric inputs
     numeric_inputs = [
         ui.input_numeric("mz_min", "Minimum m/z for filtering:", 0),
         ui.input_numeric("mz_max", "Maximum m/z for filtering:", 99999999),
@@ -300,30 +351,29 @@ def run_spec_lib_matching_ui(platform: str):
         ui.input_numeric("wf_int", "Intensity weight factor:", 1.0),
         ui.input_numeric("LET_threshold", "Low-entropy threshold:", 0.0),
         ui.input_numeric("entropy_dimension", "Entropy dimension (Renyi/Tsallis only):", 1.1),
-        ui.input_numeric("n_top_matches_to_save", "Number of top matches to save:", 1),
+        ui.input_numeric("n_top_matches_to_save", "Number of top matches to save:", 3),
     ]
-    # Run and Back buttons
-    run_button_spec_lib_matching = ui.download_button("run_btn_spec_lib_matching", "Run", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
+    run_button_spec_lib_matching = ui.download_button("run_btn_spec_lib_matching", "Run Spectral Library Matching", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
+    run_button_plot_spectra_within_spec_lib_matching = ui.download_button("run_btn_plot_spectra_within_spec_lib_matching", "Plot Spectra", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
     back_button = ui.input_action_button("back", "Back to main menu", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
-    # Layout base_inputs and extra_inputs in columns
     if platform == "HRMS":
         inputs_columns = ui.layout_columns(
-            ui.div(base_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
-            ui.div([base_inputs[5:6], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
+            ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
+            ui.div([base_inputs[6:7], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
             ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
             ui.div(numeric_inputs[5:10], style="display:flex; flex-direction:column; gap:10px;"),
-            col_widths=(3, 3, 3, 3),
+            col_widths=(3,3,3,3)
         )
     elif platform == "NRMS":
         inputs_columns = ui.layout_columns(
-            ui.div(base_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
-            ui.div([base_inputs[5:6], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
+            ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
+            ui.div([base_inputs[6:7], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
             ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
             ui.div(numeric_inputs[5:10], style="display:flex; flex-direction:column; gap:10px;"),
-            col_widths=(3, 3, 3, 3),
+            col_widths=(3,3,3,3)
         )
     log_panel = ui.card(
@@ -332,19 +382,99 @@ def run_spec_lib_matching_ui(platform: str):
         style="max-height:300px; overflow:auto"
     )
-    # Combine everything
     return ui.div(
         ui.TagList(
             ui.h2("Run Spectral Library Matching"),
             inputs_columns,
             run_button_spec_lib_matching,
+            run_button_plot_spectra_within_spec_lib_matching,
             back_button,
-            log_panel,
+            log_panel
         ),
     )
+def run_parameter_tuning_ui(platform: str):
+    base_inputs = [
+        ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
+        ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
+        ui.input_selectize("similarity_measure", "Select similarity measure(s):", ["cosine","shannon","renyi","tsallis","mixture","jaccard","dice","3w_jaccard","sokal_sneath","binary_cosine","mountford","mcconnaughey","driver_kroeber","simpson","braun_banquet","fager_mcgowan","kulczynski","intersection","hamming","hellinger"], multiple=True, selected='cosine'),
+        ui.input_text('weights', 'Weights for mixture similarity measure (cosine, shannon, renyi, tsallis):', '((0.25, 0.25, 0.25, 0.25))'),
+        ui.input_text("high_quality_reference_library", "Indicate whether the reference library is considered high quality. If True, filtering and noise removal are only applied to the query spectra.", '[True]')
+    ]
+    if platform == "HRMS":
+        extra_inputs = [
+            ui.input_text(
+                "spectrum_preprocessing_order",
+                "Sequence of characters for preprocessing order (C (centroiding), F (filtering), M (matching), N (noise removal), L (low-entropy transformation), W (weight factor transformation)). M must be included, C before M if used.",
+                "[FCNMWL,CWM]",
+            ),
+            ui.input_text("window_size_centroiding", "Centroiding window-size:", "[0.5]"),
+            ui.input_text("window_size_matching", "Matching window-size:", "[0.1,0.5]"),
+        ]
+    else:
+        extra_inputs = [
+            ui.input_text(
+                "spectrum_preprocessing_order",
+                "Sequence of characters for preprocessing order (F (filtering), N (noise removal), L (low-entropy transformation), W (weight factor transformation)).",
+                "[FNLW,WNL]",
+            )
+        ]
+    numeric_inputs = [
+        ui.input_text("mz_min", "Minimum m/z for filtering:", '[0]'),
+        ui.input_text("mz_max", "Maximum m/z for filtering:", '[99999999]'),
+        ui.input_text("int_min", "Minimum intensity for filtering:", '[0]'),
+        ui.input_text("int_max", "Maximum intensity for filtering:", '[999999999]'),
+        ui.input_text("noise_threshold", "Noise removal threshold:", '[0.0]'),
+        ui.input_text("wf_mz", "Mass/charge weight factor:", '[0.0]'),
+        ui.input_text("wf_int", "Intensity weight factor:", '[1.0]'),
+        ui.input_text("LET_threshold", "Low-entropy threshold:", '[0.0]'),
+        ui.input_text("entropy_dimension", "Entropy dimension (Renyi/Tsallis only):", '[1.1]')
+    ]
+    run_button_parameter_tuning = ui.download_button("run_btn_parameter_tuning", "Tune parameters", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
+    back_button = ui.input_action_button("back", "Back to main menu", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
+    if platform == "HRMS":
+        inputs_columns = ui.layout_columns(
+            ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
+            ui.div([base_inputs[6:7], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
+            ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
+            ui.div(numeric_inputs[5:9], style="display:flex; flex-direction:column; gap:10px;"),
+            col_widths=(3, 3, 3, 3),
+        )
+    elif platform == "NRMS":
+        inputs_columns = ui.layout_columns(
+            ui.div(base_inputs[0:6], style="display:flex; flex-direction:column; gap:10px;"),
+            ui.div([base_inputs[6:7], *extra_inputs], style="display:flex; flex-direction:column; gap:10px;"),
+            ui.div(numeric_inputs[0:5], style="display:flex; flex-direction:column; gap:10px;"),
+            ui.div(numeric_inputs[5:9], style="display:flex; flex-direction:column; gap:10px;"),
+            col_widths=(3, 3, 3, 3),
+        )
+    log_panel = ui.card(
+        ui.card_header("Identification log"),
+        ui.output_text_verbatim("match_log"),
+        style="max-height:300px; overflow:auto"
+    )
+    return ui.div(
+        ui.TagList(
+            ui.h2("Tune parameters"),
+            inputs_columns,
+            run_button_parameter_tuning,
+            back_button,
+            log_panel
+        ),
+    )
 app_ui = ui.page_fluid(
     ui.output_ui("main_ui"),
     ui.output_text("status_output")
@@ -361,8 +491,15 @@ def server(input, output, session):
     run_status_plot_spectra = reactive.Value("")
     run_status_spec_lib_matching = reactive.Value("")
+    run_status_plot_spectra_within_spec_lib_matching = reactive.Value("")
+    run_status_parameter_tuning = reactive.Value("")
+    is_tuning_running = reactive.Value(False)
     match_log_rv = reactive.Value("")
     is_matching_rv = reactive.Value(False)
+    is_any_job_running = reactive.Value(False)
+    latest_csv_path_rv = reactive.Value("")
+    latest_df_rv = reactive.Value(None)
+    is_running_rv = reactive.Value(False)
     query_ids_rv = reactive.Value([])
     query_file_path_rv = reactive.Value(None)
@@ -377,6 +514,96 @@ def server(input, output, session):
     converted_reference_path_rv = reactive.Value(None)
+    def _reset_plot_spectra_state():
+        query_status_rv.set("")
+        reference_status_rv.set("")
+        query_ids_rv.set([])
+        reference_ids_rv.set([])
+        query_file_path_rv.set(None)
+        reference_file_path_rv.set(None)
+        query_result_rv.set(None)
+        reference_result_rv.set(None)
+        converted_query_path_rv.set(None)
+        converted_reference_path_rv.set(None)
+        try:
+            ui.update_selectize("spectrum_ID1", choices=[], selected=None)
+            ui.update_selectize("spectrum_ID2", choices=[], selected=None)
+        except Exception:
+            pass
+    def _reset_spec_lib_matching_state():
+        match_log_rv.set("")
+        is_matching_rv.set(False)
+        is_any_job_running.set(False)
+        try:
+            ui.update_selectize("spectrum_ID1", choices=[], selected=None)
+            ui.update_selectize("spectrum_ID2", choices=[], selected=None)
+        except Exception:
+            pass
+    def _reset_parameter_tuning_state():
+        match_log_rv.set("")
+        is_tuning_running.set(False)
+        is_any_job_running.set(False)
+    @reactive.effect
+    @reactive.event(input.back)
+    def _clear_on_back_from_pages():
+        page = current_page()
+        if page == "plot_spectra":
+            _reset_plot_spectra_state()
+        elif page == "run_spec_lib_matching":
+            _reset_spec_lib_matching_state()
+        elif page == "run_parameter_tuning":
+            _reset_parameter_tuning_state()
+    @reactive.effect
+    def _clear_on_enter_pages():
+        page = current_page()
+        if page == "plot_spectra":
+            _reset_plot_spectra_state()
+        elif page == "run_spec_lib_matching":
+            _reset_spec_lib_matching_state()
+        elif page == "run_parameter_tuning":
+            _reset_parameter_tuning_state()
+    def _drain_queue_nowait(q: asyncio.Queue) -> list[str]:
+        out = []
+        try:
+            while True:
+                out.append(q.get_nowait())
+        except asyncio.QueueEmpty:
+            pass
+        return out
+    class ReactiveWriter(io.TextIOBase):
+        def __init__(self, loop: asyncio.AbstractEventLoop):
+            self._loop = loop
+        def write(self, s: str):
+            if not s:
+                return 0
+            self._loop.call_soon_threadsafe(_LOG_QUEUE.put_nowait, s)
+            return len(s)
+        def flush(self):
+            pass
+    @reactive.effect
+    async def _pump_logs():
+        if not (is_any_job_running.get() or is_tuning_running.get() or is_matching_rv.get()):
+            return
+        reactive.invalidate_later(0.05)
+        msgs = _drain_queue_nowait(_LOG_QUEUE)
+        if msgs:
+            match_log_rv.set(match_log_rv.get() + "".join(msgs))
+            await reactive.flush()
     def process_database(file_path: str):
         suffix = Path(file_path).suffix.lower()
         return {"path": file_path, "suffix": suffix}
@@ -385,13 +612,14 @@ def server(input, output, session):
     def plot_query_status():
         return query_status_rv.get() or ""
+    @render.text
+    def plot_reference_status():
+        return reference_status_rv.get() or ""
     @reactive.effect
     @reactive.event(input.query_data)
     async def _on_query_upload():
-        if current_page() != "plot_spectra":
-            return
         files = input.query_data()
         req(files and len(files) > 0)
@@ -414,9 +642,6 @@ def server(input, output, session):
     @reactive.effect
     @reactive.event(input.reference_data)
     async def _on_reference_upload():
-        if current_page() != "plot_spectra":
-            return
         files = input.reference_data()
         req(files and len(files) > 0)
@@ -441,24 +666,6 @@ def server(input, output, session):
         return match_log_rv.get()
-    class ReactiveWriter(io.TextIOBase):
-        def __init__(self, rv):
-            self.rv = rv
-        def write(self, s: str):
-            if not s:
-                return 0
-            self.rv.set(self.rv.get() + s)
-            try:
-                loop = asyncio.get_running_loop()
-                loop.create_task(reactive.flush())
-            except RuntimeError:
-                pass
-            return len(s)
-        def flush(self):
-            pass
     @reactive.Effect
     def _():
         if input.plot_spectra() > plot_clicks.get():
@@ -467,6 +674,9 @@ def server(input, output, session):
         elif input.run_spec_lib_matching() > match_clicks.get():
             current_page.set("run_spec_lib_matching")
             match_clicks.set(input.run_spec_lib_matching())
+        elif input.run_parameter_tuning() > match_clicks.get():
+            current_page.set("run_parameter_tuning")
+            match_clicks.set(input.run_parameter_tuning())
         elif hasattr(input, "back") and input.back() > back_clicks.get():
             current_page.set("main_menu")
             back_clicks.set(input.back())
@@ -474,8 +684,6 @@ def server(input, output, session):
     @render.image
     def image():
-        from pathlib import Path
         dir = Path(__file__).resolve().parent
         img: ImgData = {"src": str(dir / "www/emblem.png"), "width": "320px", "height": "250px"}
         return img
@@ -512,6 +720,7 @@ def server(input, output, session):
                 ),
                 ui.input_action_button("plot_spectra", "Plot two spectra before and after preprocessing transformations.", style="font-size:18px; padding:20px 40px; width:550px; height:100px; margin-top:10px; margin-right:50px"),
                 ui.input_action_button("run_spec_lib_matching", "Run spectral library matching to perform compound identification on a query library of spectra.", style="font-size:18px; padding:20px 40px; width:550px; height:100px; margin-top:10px; margin-right:50px"),
+                ui.input_action_button("run_parameter_tuning", "Tune parameters to maximize accuracy of compound identification given a query library with known spectrum IDs.", style="font-size:18px; padding:20px 40px; width:450px; height:120px; margin-top:10px; margin-right:50px"),
                 ui.div(
                     "References:",
                     style="margin-top:35px; text-align:left; font-size:24px; font-weight:bold"
@@ -562,15 +771,14 @@ def server(input, output, session):
             return plot_spectra_ui(input.chromatography_platform())
         elif current_page() == "run_spec_lib_matching":
             return run_spec_lib_matching_ui(input.chromatography_platform())
+        elif current_page() == "run_parameter_tuning":
+            return run_parameter_tuning_ui(input.chromatography_platform())
     @reactive.effect
     @reactive.event(input.query_data)
     async def _populate_ids_from_query_upload():
-        if current_page() != "plot_spectra":
-            return
         files = input.query_data()
         if not files:
             return
@@ -578,7 +786,6 @@ def server(input, output, session):
         in_path = Path(files[0]["datapath"])
         suffix = in_path.suffix.lower()
-        # Decide what CSV to read IDs from
         try:
             if suffix == ".csv":
                 csv_path = in_path
@@ -587,17 +794,14 @@ def server(input, output, session):
                 query_status_rv.set(f"Converting {in_path.name} → CSV …")
                 await reactive.flush()
-                # Choose an output temp path next to the upload
                 tmp_csv_path = in_path.with_suffix(".converted.csv")
                 out_obj = await asyncio.to_thread(build_library, str(in_path), str(tmp_csv_path))
-                # out_obj may be a path (str/PathLike) OR a DataFrame. Normalize to a path.
                 if isinstance(out_obj, (str, os.PathLike, Path)):
                     csv_path = Path(out_obj)
                 elif isinstance(out_obj, pd.DataFrame):
-                    # Write the DF to our chosen path
-                    out_obj.to_csv(tmp_csv_path, index=False)
+                    out_obj.to_csv(tmp_csv_path, index=False, sep='\t')
                     csv_path = tmp_csv_path
                 else:
                     raise TypeError(f"build_library returned unsupported type: {type(out_obj)}")
@@ -607,16 +811,12 @@ def server(input, output, session):
             query_status_rv.set(f"Reading IDs from: {csv_path.name} …")
             await reactive.flush()
-            # Extract IDs from the CSV’s first column
             ids = await asyncio.to_thread(extract_first_column_ids, str(csv_path))
             query_ids_rv.set(ids)
-            # Update dropdowns
             ui.update_selectize("spectrum_ID1", choices=ids, selected=(ids[0] if ids else None))
-            query_status_rv.set(
-                f"✅ Loaded {len(ids)} IDs from {csv_path.name}" if ids else f"⚠️ No IDs found in {csv_path.name}"
-            )
+            query_status_rv.set(f"✅ Loaded {len(ids)} IDs from {csv_path.name}" if ids else f"⚠️ No IDs found in {csv_path.name}")
             await reactive.flush()
         except Exception as e:
@@ -628,9 +828,6 @@ def server(input, output, session):
     @reactive.effect
     @reactive.event(input.reference_data)
     async def _populate_ids_from_reference_upload():
-        if current_page() != "plot_spectra":
-            return
         files = input.reference_data()
         if not files:
             return
@@ -638,7 +835,6 @@ def server(input, output, session):
         in_path = Path(files[0]["datapath"])
         suffix = in_path.suffix.lower()
-        # Decide what CSV to read IDs from
         try:
             if suffix == ".csv":
                 csv_path = in_path
@@ -647,17 +843,14 @@ def server(input, output, session):
                 reference_status_rv.set(f"Converting {in_path.name} → CSV …")
                 await reactive.flush()
-                # Choose an output temp path next to the upload
                 tmp_csv_path = in_path.with_suffix(".converted.csv")
                 out_obj = await asyncio.to_thread(build_library, str(in_path), str(tmp_csv_path))
-                # out_obj may be a path (str/PathLike) OR a DataFrame. Normalize to a path.
                 if isinstance(out_obj, (str, os.PathLike, Path)):
                     csv_path = Path(out_obj)
                 elif isinstance(out_obj, pd.DataFrame):
-                    # Write the DF to our chosen path
-                    out_obj.to_csv(tmp_csv_path, index=False)
+                    out_obj.to_csv(tmp_csv_path, index=False, sep='\t')
                     csv_path = tmp_csv_path
                 else:
                     raise TypeError(f"build_library returned unsupported type: {type(out_obj)}")
@@ -667,11 +860,9 @@ def server(input, output, session):
             reference_status_rv.set(f"Reading IDs from: {csv_path.name} …")
             await reactive.flush()
-            # Extract IDs from the CSV’s first column
             ids = await asyncio.to_thread(extract_first_column_ids, str(csv_path))
             reference_ids_rv.set(ids)
-            # Update dropdowns
             ui.update_selectize("spectrum_ID2", choices=ids, selected=(ids[0] if ids else None))
             reference_status_rv.set(
@@ -685,65 +876,47 @@ def server(input, output, session):
             raise
     @render.download(filename=lambda: f"plot.png")
     def run_btn_plot_spectra():
         spectrum_ID1 = input.spectrum_ID1() or None
         spectrum_ID2 = input.spectrum_ID2() or None
+        weights = [float(weight.strip()) for weight in input.weights().split(",") if weight.strip()]
+        weights = {'Cosine':weights[0], 'Shannon':weights[1], 'Renyi':weights[2], 'Tsallis':weights[3]}
         if input.chromatography_platform() == "HRMS":
-            fig = generate_plots_on_HRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], spectrum_ID1=spectrum_ID1, spectrum_ID2=spectrum_ID2, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), window_size_centroiding=input.window_size_centroiding(), window_size_matching=input.window_size_matching(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), y_axis_transformation=input.y_axis_transformation(), return_plot=True)
-            #run_status_plot_spectra.set("✅ Plotting has finished.")
+            fig = generate_plots_on_HRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], spectrum_ID1=spectrum_ID1, spectrum_ID2=spectrum_ID2, similarity_measure=input.similarity_measure(), weights=weights, spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), window_size_centroiding=input.window_size_centroiding(), window_size_matching=input.window_size_matching(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), y_axis_transformation=input.y_axis_transformation(), return_plot=True)
+            plt.show()
         elif input.chromatography_platform() == "NRMS":
             fig = generate_plots_on_NRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], spectrum_ID1=spectrum_ID1, spectrum_ID2=spectrum_ID2, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), y_axis_transformation=input.y_axis_transformation(), return_plot=True)
+            plt.show()
         with io.BytesIO() as buf:
             fig.savefig(buf, format="png", dpi=150, bbox_inches="tight")
+            plt.close()
             yield buf.getvalue()
-    @render.text
-    def status_output():
-        return run_status_plot_spectra.get()
-        return run_status_spec_lib_matching.get()
-    class ReactiveWriter(io.TextIOBase):
-        def __init__(self, rv: reactive.Value, loop: asyncio.AbstractEventLoop):
-            self.rv = rv
-            self.loop = loop
-        def write(self, s: str):
-            if not s:
-                return 0
-            def _apply():
-                self.rv.set(self.rv.get() + s)
-                self.loop.create_task(reactive.flush())
-            self.loop.call_soon_threadsafe(_apply)
-            return len(s)
-        def flush(self):
-            pass
-    @render.download(filename="identification_output.csv")
+    @render.download(filename="identification_output.txt")
     async def run_btn_spec_lib_matching():
-        # 1) quick first paint
-        match_log_rv.set("Starting identification...\n")
+        match_log_rv.set("Running identification...\n")
         await reactive.flush()
-        # 2) normalize inputs (same as before)
         hq = input.high_quality_reference_library()
         if isinstance(hq, str):
             hq = hq.lower() == "true"
         elif isinstance(hq, (int, float)):
             hq = bool(hq)
+        weights = [float(weight.strip()) for weight in input.weights().split(",") if weight.strip()]
+        weights = {'Cosine':weights[0], 'Shannon':weights[1], 'Renyi':weights[2], 'Tsallis':weights[3]}
         common_kwargs = dict(
             query_data=input.query_data()[0]["datapath"],
             reference_data=input.reference_data()[0]["datapath"],
             likely_reference_ids=None,
             similarity_measure=input.similarity_measure(),
+            weights=weights,
             spectrum_preprocessing_order=input.spectrum_preprocessing_order(),
             high_quality_reference_library=hq,
             mz_min=input.mz_min(), mz_max=input.mz_max(),
@@ -752,16 +925,15 @@ def server(input, output, session):
             wf_mz=input.wf_mz(), wf_intensity=input.wf_int(),
             LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(),
             n_top_matches_to_save=input.n_top_matches_to_save(),
-            print_id_results=True,  # ensure the library actually prints progress
-            output_identification=str(Path.cwd() / "identification_output.csv"),
-            output_similarity_scores=str(Path.cwd() / "similarity_scores.csv"),
+            print_id_results=True,
+            output_identification=str(Path.cwd() / "identification_output.txt"),
+            output_similarity_scores=str(Path.cwd() / "similarity_scores.txt"),
             return_ID_output=True,
         )
         loop = asyncio.get_running_loop()
-        rw = ReactiveWriter(match_log_rv, loop)
+        rw = ReactiveWriter(loop)
-        # 3) run the heavy function in a thread so the event loop can repaint
         try:
             with redirect_stdout(rw), redirect_stderr(rw):
                 if input.chromatography_platform() == "HRMS":
@@ -772,9 +944,7 @@ def server(input, output, session):
                         **common_kwargs
                     )
                 else:
-                    df_out = await asyncio.to_thread(
-                        run_spec_lib_matching_on_NRMS_data, **common_kwargs
-                    )
+                    df_out = await asyncio.to_thread(run_spec_lib_matching_on_NRMS_data, **common_kwargs)
             match_log_rv.set(match_log_rv.get() + "\n✅ Identification finished.\n")
             await reactive.flush()
         except Exception as e:
@@ -782,8 +952,164 @@ def server(input, output, session):
             await reactive.flush()
             raise
-        # 4) stream CSV back to the browser
-        yield df_out.to_csv(index=False)
+        yield df_out.to_csv(index=True, sep='\t')
+    @render.download(filename="plot.png")
+    def run_btn_plot_spectra_within_spec_lib_matching():
+        req(input.query_data(), input.reference_data())
+        spectrum_ID1 = input.spectrum_ID1() or None
+        spectrum_ID2 = input.spectrum_ID2() or None
+        hq = input.high_quality_reference_library()
+        if isinstance(hq, str):
+            hq = hq.lower() == "true"
+        elif isinstance(hq, (int, float)):
+            hq = bool(hq)
+        weights = [float(weight.strip()) for weight in input.weights().split(",") if weight.strip()]
+        weights = {'Cosine':weights[0], 'Shannon':weights[1], 'Renyi':weights[2], 'Tsallis':weights[3]}
+        common = dict(
+            query_data=input.query_data()[0]['datapath'],
+            reference_data=input.reference_data()[0]['datapath'],
+            spectrum_ID1=spectrum_ID1,
+            spectrum_ID2=spectrum_ID2,
+            similarity_measure=input.similarity_measure(),
+            weights=weights,
+            spectrum_preprocessing_order=input.spectrum_preprocessing_order(),
+            high_quality_reference_library=hq,
+            mz_min=input.mz_min(), mz_max=input.mz_max(),
+            int_min=input.int_min(), int_max=input.int_max(),
+            noise_threshold=input.noise_threshold(),
+            wf_mz=input.wf_mz(), wf_intensity=input.wf_int(),
+            LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(),
+            y_axis_transformation="normalized",
+            return_plot=True
+        )
+        if input.chromatography_platform() == "HRMS":
+            fig = generate_plots_on_HRMS_data(
+                window_size_centroiding=input.window_size_centroiding(),
+                window_size_matching=input.window_size_matching(),
+                **common
+            )
+            plt.show()
+        else:
+            fig = generate_plots_on_NRMS_data(**common)
+            plt.show()
+        with io.BytesIO() as buf:
+            fig.savefig(buf, format="png", dpi=150, bbox_inches="tight")
+            plt.close()
+            yield buf.getvalue()
+    @render.download(filename="parameter_tuning_output.txt")
+    async def run_btn_parameter_tuning():
+        is_any_job_running.set(True)
+        is_tuning_running.set(True)
+        match_log_rv.set("Running grid search of all parameters specified...\n")
+        await reactive.flush()
+        similarity_measure_tmp = list(input.similarity_measure())
+        high_quality_reference_library_tmp = [x.strip().lower() == "true" for x in input.high_quality_reference_library().strip().strip("[]").split(",") if x.strip()]
+        spectrum_preprocessing_order_tmp = strip_text(input.spectrum_preprocessing_order())
+        mz_min_tmp = strip_numeric(input.mz_min())
+        mz_max_tmp = strip_numeric(input.mz_max())
+        int_min_tmp = strip_numeric(input.int_min())
+        int_max_tmp = strip_numeric(input.int_max())
+        noise_threshold_tmp = strip_numeric(input.noise_threshold())
+        wf_mz_tmp = strip_numeric(input.wf_mz())
+        wf_int_tmp = strip_numeric(input.wf_int())
+        LET_threshold_tmp = strip_numeric(input.LET_threshold())
+        entropy_dimension_tmp = strip_numeric(input.entropy_dimension())
+        weights_tmp = strip_weights(input.weights())
+        common_kwargs = dict(
+            query_data=input.query_data()[0]["datapath"],
+            reference_data=input.reference_data()[0]["datapath"],
+            output_path=str(Path.cwd() / "parameter_tuning_output.txt"),
+            return_output=True,
+        )
+        loop = asyncio.get_running_loop()
+        rw = ReactiveWriter(loop)
+        try:
+            if input.chromatography_platform() == "HRMS":
+                window_size_centroiding_tmp = strip_numeric(input.window_size_centroiding())
+                window_size_matching_tmp = strip_numeric(input.window_size_matching())
+                grid = {
+                    'similarity_measure': similarity_measure_tmp,
+                    'weight': weights_tmp,
+                    'spectrum_preprocessing_order': spectrum_preprocessing_order_tmp,
+                    'mz_min': mz_min_tmp,
+                    'mz_max': mz_max_tmp,
+                    'int_min': int_min_tmp,
+                    'int_max': int_max_tmp,
+                    'noise_threshold': noise_threshold_tmp,
+                    'wf_mz': wf_mz_tmp,
+                    'wf_int': wf_int_tmp,
+                    'LET_threshold': LET_threshold_tmp,
+                    'entropy_dimension': entropy_dimension_tmp,
+                    'high_quality_reference_library': high_quality_reference_library_tmp,
+                    'window_size_centroiding': window_size_centroiding_tmp,
+                    'window_size_matching': window_size_matching_tmp,
+                }
+                df_out = await asyncio.to_thread(_run_with_redirects, tune_params_on_HRMS_data_shiny, rw, **common_kwargs, grid=grid)
+            else:
+                grid = {
+                    'similarity_measure': similarity_measure_tmp,
+                    'weight': weights_tmp,
+                    'spectrum_preprocessing_order': spectrum_preprocessing_order_tmp,
+                    'mz_min': mz_min_tmp,
+                    'mz_max': mz_max_tmp,
+                    'int_min': int_min_tmp,
+                    'int_max': int_max_tmp,
+                    'noise_threshold': noise_threshold_tmp,
+                    'wf_mz': wf_mz_tmp,
+                    'wf_int': wf_int_tmp,
+                    'LET_threshold': LET_threshold_tmp,
+                    'entropy_dimension': entropy_dimension_tmp,
+                    'high_quality_reference_library': high_quality_reference_library_tmp,
+                }
+                df_out = await asyncio.to_thread(_run_with_redirects, tune_params_on_NRMS_data_shiny, rw, **common_kwargs, grid=grid)
+            match_log_rv.set(match_log_rv.get() + "\n✅ Parameter tuning finished.\n")
+        except Exception as e:
+            match_log_rv.set(match_log_rv.get() + f"\n❌ Error: {e}\n")
+            raise
+        finally:
+            is_tuning_running.set(False)
+            is_any_job_running.set(False)
+            await reactive.flush()
+        yield df_out.to_csv(index=False).encode("utf-8", sep='\t')
+    @reactive.effect
+    async def _pump_reactive_writer_logs():
+        if not is_tuning_running.get():
+            return
+        reactive.invalidate_later(0.1)
+        msgs = _drain_queue_nowait(_LOG_QUEUE)
+        if msgs:
+            match_log_rv.set(match_log_rv.get() + "".join(msgs))
+            await reactive.flush()
+    @render.text
+    def status_output():
+        return run_status_plot_spectra.get()
+        return run_status_spec_lib_matching.get()
+        return run_status_parameter_tuning.get()
 app = App(app_ui, server)

pycompound 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl

pycompound 0.1.0py3-none-any.whl → 0.1.2py3-none-any.whl