PyPI - pycompound - Versions diffs - 0.0.9__py3-none-any.whl → 0.0.10__py3-none-any.whl - Mend

pycompound 0.0.9py3-none-any.whl → 0.0.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
-from shiny import App, ui, reactive, render
+from shiny import App, ui, reactive, render, req
 from pycompound.spec_lib_matching import run_spec_lib_matching_on_HRMS_data
 from pycompound.spec_lib_matching import run_spec_lib_matching_on_NRMS_data
 from pycompound.spec_lib_matching import tune_params_on_HRMS_data
@@ -7,14 +7,149 @@ from pycompound.spec_lib_matching import tune_params_on_NRMS_data
 from pycompound.plot_spectra import generate_plots_on_HRMS_data
 from pycompound.plot_spectra import generate_plots_on_NRMS_data
 from pathlib import Path
+from contextlib import redirect_stdout, redirect_stderr
 import subprocess
 import traceback
 import asyncio
 import io
-#import matplotlib
-#matplotlib.use('agg')
+import os
+import sys
 import matplotlib.pyplot as plt
-#from matplotlib.figure import Figure
+import pandas as pd
+import numpy as np
+import netCDF4 as nc
+from pyteomics import mgf
+from pyteomics import mzml
+def build_library(input_path=None, output_path=None):
+    last_three_chars = input_path[(len(input_path)-3):len(input_path)]
+    last_four_chars = input_path[(len(input_path)-4):len(input_path)]
+    if last_three_chars == 'csv' or last_three_chars == 'CSV':
+        return pd.read_csv(input_path)
+    else:
+        if last_three_chars == 'mgf' or last_three_chars == 'MGF':
+            input_file_type = 'mgf'
+        elif last_four_chars == 'mzML' or last_four_chars == 'mzml' or last_four_chars == 'MZML':
+            input_file_type = 'mzML'
+        elif last_three_chars == 'cdf' or last_three_chars == 'CDF':
+            input_file_type = 'cdf'
+        elif last_three_chars == 'msp' or last_three_chars == 'MSP':
+            input_file_type = 'msp'
+        else:
+            print('ERROR: either an \'mgf\', \'mzML\', \'cdf\', or \'msp\' file must be passed to --input_path')
+            sys.exit()
+        spectra = []
+        if input_file_type == 'mgf':
+            with mgf.read(input_path, index_by_scans = True) as reader:
+                for spec in reader:
+                    spectra.append(spec)
+        if input_file_type == 'mzML':
+            with mzml.read(input_path) as reader:
+                for spec in reader:
+                    spectra.append(spec)
+        if input_file_type == 'mgf' or input_file_type == 'mzML':
+            ids = []
+            mzs = []
+            ints = []
+            for i in range(0,len(spectra)):
+                for j in range(0,len(spectra[i]['m/z array'])):
+                    if input_file_type == 'mzML':
+                        ids.append(f'ID_{i+1}')
+                    else:
+                        ids.append(spectra[i]['params']['name'])
+                    mzs.append(spectra[i]['m/z array'][j])
+                    ints.append(spectra[i]['intensity array'][j])
+        if input_file_type == 'cdf':
+            dataset = nc.Dataset(input_path, 'r')
+            all_mzs = dataset.variables['mass_values'][:]
+            all_ints = dataset.variables['intensity_values'][:]
+            scan_idxs = dataset.variables['scan_index'][:]
+            dataset.close()
+            ids = []
+            mzs = []
+            ints = []
+            for i in range(0,(len(scan_idxs)-1)):
+                if i % 1000 == 0:
+                    print(f'analyzed {i} out of {len(scan_idxs)} scans')
+                s_idx = scan_idxs[i]
+                e_idx = scan_idxs[i+1]
+                mzs_tmp = all_mzs[s_idx:e_idx]
+                ints_tmp = all_ints[s_idx:e_idx]
+                for j in range(0,len(mzs_tmp)):
+                    ids.append(f'ID_{i+1}')
+                    mzs.append(mzs_tmp[j])
+                    ints.append(ints_tmp[j])
+        if input_file_type == 'msp':
+            ids = []
+            mzs = []
+            ints = []
+            with open(input_path, 'r') as f:
+                i = 0
+                for line in f:
+                    line = line.strip()
+                    if line.startswith('Name:'):
+                        i += 1
+                        spectrum_id = line.replace('Name: ','')
+                    elif line and line[0].isdigit():
+                        try:
+                            mz, intensity = map(float, line.split()[:2])
+                            ids.append(spectrum_id)
+                            mzs.append(mz)
+                            ints.append(intensity)
+                        except ValueError:
+                            continue
+        df = pd.DataFrame({'id':ids, 'mz_ratio':mzs, 'intensity':ints})
+        return df
+def extract_first_column_ids(file_path: str, max_ids: int = 20000):
+    suffix = Path(file_path).suffix.lower()
+    if suffix == ".csv":
+        df = pd.read_csv(file_path, usecols=[0])
+        ids = df.iloc[:, 0].astype(str).dropna()
+        ids = [x for x in ids if x.strip() != ""]
+        seen = set()
+        uniq = []
+        for x in ids:
+            if x not in seen:
+                uniq.append(x)
+                seen.add(x)
+        return uniq[:max_ids]
+    ids = []
+    try:
+        with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
+            for line in f:
+                ls = line.strip()
+                if ls.startswith("TITLE="):
+                    ids.append(ls.split("=", 1)[1].strip())
+                elif ls.lower().startswith("name:"):
+                    ids.append(ls.split(":", 1)[1].strip())
+                if len(ids) >= max_ids:
+                    break
+    except Exception:
+        pass
+    if ids:
+        seen = set()
+        uniq = []
+        for x in ids:
+            if x not in seen:
+                uniq.append(x)
+                seen.add(x)
+        return uniq
+    return []
 def plot_spectra_ui(platform: str):
@@ -22,8 +157,20 @@ def plot_spectra_ui(platform: str):
     base_inputs = [
         ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
         ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
-        ui.input_text("spectrum_ID1", "Input ID of one spectrum to be plotted:", None),
-        ui.input_text("spectrum_ID2", "Input ID of another spectrum to be plotted:", None),
+    ui.input_selectize(
+        "spectrum_ID1",
+        "Select spectrum ID 1:",
+        choices=[],
+        multiple=False,
+        options={"placeholder": "Upload a query file to load IDs..."},
+    ),
+    ui.input_selectize(
+        "spectrum_ID2",
+        "Select spectrum ID 2 (optional):",
+        choices=[],
+        multiple=False,
+        options={"placeholder": "Upload a reference file to load IDs..."},
+    ),
         ui.input_select("similarity_measure", "Select similarity measure:", ["cosine","shannon","renyi","tsallis","mixture","jaccard","dice","3w_jaccard","sokal_sneath","binary_cosine","mountford","mcconnaughey","driver_kroeber","simpson","braun_banquet","fager_mcgowan","kulczynski","intersection","hamming","hellinger"]),
         ui.input_select(
             "high_quality_reference_library",
@@ -77,7 +224,6 @@ def plot_spectra_ui(platform: str):
     run_button_plot_spectra = ui.download_button("run_btn_plot_spectra", "Run", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
     back_button = ui.input_action_button("back", "Back to main menu", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
-    #print(len(extra_inputs))
     # Layout base_inputs and extra_inputs in columns
     if platform == "HRMS":
         inputs_columns = ui.layout_columns(
@@ -102,7 +248,8 @@ def plot_spectra_ui(platform: str):
             ui.h2("Plot Spectra"),
             inputs_columns,
             run_button_plot_spectra,
-            back_button
+            back_button,
+            ui.div(ui.output_text("plot_query_status"), style="margin-top:8px; font-size:14px")
         ),
     )
@@ -179,13 +326,20 @@ def run_spec_lib_matching_ui(platform: str):
             col_widths=(3, 3, 3, 3),
         )
+    log_panel = ui.card(
+        ui.card_header("Identification log"),
+        ui.output_text_verbatim("match_log"),
+        style="max-height:300px; overflow:auto"
+    )
     # Combine everything
     return ui.div(
         ui.TagList(
             ui.h2("Run Spectral Library Matching"),
             inputs_columns,
             run_button_spec_lib_matching,
-            back_button
+            back_button,
+            log_panel,
         ),
     )
@@ -207,11 +361,106 @@ def server(input, output, session):
     run_status_plot_spectra = reactive.Value("")
     run_status_spec_lib_matching = reactive.Value("")
+    match_log_rv = reactive.Value("")
+    is_matching_rv = reactive.Value(False)
+    query_ids_rv = reactive.Value([])
+    query_file_path_rv = reactive.Value(None)
+    query_result_rv = reactive.Value(None)
+    query_status_rv = reactive.Value("")
+    reference_ids_rv = reactive.Value([])
+    reference_file_path_rv = reactive.Value(None)
+    reference_result_rv = reactive.Value(None)
+    reference_status_rv = reactive.Value("")
+    converted_query_path_rv = reactive.Value(None)
+    converted_reference_path_rv = reactive.Value(None)
+    def process_database(file_path: str):
+        suffix = Path(file_path).suffix.lower()
+        return {"path": file_path, "suffix": suffix}
+    @render.text
+    def plot_query_status():
+        return query_status_rv.get() or ""
+    @reactive.effect
+    @reactive.event(input.query_data)
+    async def _on_query_upload():
+        if current_page() != "plot_spectra":
+            return
+        files = input.query_data()
+        req(files and len(files) > 0)
+        file_path = files[0]["datapath"]
+        query_file_path_rv.set(file_path)
+        query_status_rv.set(f"Processing query database: {Path(file_path).name} …")
+        await reactive.flush()
+        try:
+            result = await asyncio.to_thread(process_database, file_path)
+            query_result_rv.set(result)
+            query_status_rv.set("✅ Query database processed.")
+            await reactive.flush()
+        except Exception as e:
+            query_status_rv.set(f"❌ Failed to process query database: {e}")
+            await reactive.flush()
+    @reactive.effect
+    @reactive.event(input.reference_data)
+    async def _on_reference_upload():
+        if current_page() != "plot_spectra":
+            return
+        files = input.reference_data()
+        req(files and len(files) > 0)
+        file_path = files[0]["datapath"]
+        reference_file_path_rv.set(file_path)
+        reference_status_rv.set(f"Processing reference database: {Path(file_path).name} …")
+        await reactive.flush()
+        try:
+            result = await asyncio.to_thread(process_database, file_path)
+            reference_result_rv.set(result)
+            reference_status_rv.set("✅ Reference database processed.")
+            await reactive.flush()
+        except Exception as e:
+            reference_status_rv.set(f"❌ Failed to process reference database: {e}")
+            await reactive.flush()
+    @render.text
+    def match_log():
+        return match_log_rv.get()
+    class ReactiveWriter(io.TextIOBase):
+        def __init__(self, rv):
+            self.rv = rv
+        def write(self, s: str):
+            if not s:
+                return 0
+            self.rv.set(self.rv.get() + s)
+            try:
+                loop = asyncio.get_running_loop()
+                loop.create_task(reactive.flush())
+            except RuntimeError:
+                pass
+            return len(s)
+        def flush(self):
+            pass
     @reactive.Effect
     def _():
-        # Main menu buttons
         if input.plot_spectra() > plot_clicks.get():
             current_page.set("plot_spectra")
             plot_clicks.set(input.plot_spectra())
@@ -315,56 +564,126 @@ def server(input, output, session):
             return run_spec_lib_matching_ui(input.chromatography_platform())
-    '''
     @reactive.effect
-    @reactive.event(input.run_btn_plot_spectra)
-    def _():
-        if current_page() == "plot_spectra":
-            if len(input.spectrum_ID1())==0:
-                spectrum_ID1 = None
-            else:
-                spectrum_ID1 = input.spectrum_ID1()
-            if len(input.spectrum_ID2())==0:
-                spectrum_ID2 = None
+    @reactive.event(input.query_data)
+    async def _populate_ids_from_query_upload():
+        if current_page() != "plot_spectra":
+            return
+        files = input.query_data()
+        if not files:
+            return
+        in_path = Path(files[0]["datapath"])
+        suffix = in_path.suffix.lower()
+        # Decide what CSV to read IDs from
+        try:
+            if suffix == ".csv":
+                csv_path = in_path
+                converted_query_path_rv.set(str(csv_path))
             else:
-                spectrum_ID2 = input.spectrum_ID2()
-            if input.chromatography_platform() == "HRMS":
-                try:
-                    fig = generate_plots_on_HRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], spectrum_ID1=spectrum_ID1, spectrum_ID2=spectrum_ID2, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), window_size_centroiding=input.window_size_centroiding(), window_size_matching=input.window_size_matching(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), y_axis_transformation=input.y_axis_transformation(), return_plot=True)
-                    #plt.show()
-                    with io.BytesIO() as buf:
-                        plt.savefig(buf, format="png", dpi=150, bbox_inches="tight")
-                        yield buf.getvalue()
-                    run_status_plot_spectra.set(f"✅  Plotting has finished.")
-                except Exception as e:
-                    run_status_plot_spectra.set(f"❌ Error: {traceback.format_exc()}")
-            elif input.chromatography_platform() == "NRMS":
-                try:
-                    generate_plots_on_NRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], spectrum_ID1=spectrum_ID1, spectrum_ID2=spectrum_ID2, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), y_axis_transformation=input.y_axis_transformation(), return_plot=True)
-                    #plt.show()
-                    run_status_plot_spectra.set(f"✅  Plotting has finished.")
-                except Exception as e:
-                    run_status_plot_spectra.set(f"❌ Error: {traceback.format_exc()}")
+                query_status_rv.set(f"Converting {in_path.name} → CSV …")
+                await reactive.flush()
+                # Choose an output temp path next to the upload
+                tmp_csv_path = in_path.with_suffix(".converted.csv")
+                out_obj = await asyncio.to_thread(build_library, str(in_path), str(tmp_csv_path))
+                # out_obj may be a path (str/PathLike) OR a DataFrame. Normalize to a path.
+                if isinstance(out_obj, (str, os.PathLike, Path)):
+                    csv_path = Path(out_obj)
+                elif isinstance(out_obj, pd.DataFrame):
+                    # Write the DF to our chosen path
+                    out_obj.to_csv(tmp_csv_path, index=False)
+                    csv_path = tmp_csv_path
+                else:
+                    raise TypeError(f"build_library returned unsupported type: {type(out_obj)}")
+                converted_query_path_rv.set(str(csv_path))
+            query_status_rv.set(f"Reading IDs from: {csv_path.name} …")
+            await reactive.flush()
+            # Extract IDs from the CSV’s first column
+            ids = await asyncio.to_thread(extract_first_column_ids, str(csv_path))
+            query_ids_rv.set(ids)
+            # Update dropdowns
+            ui.update_selectize("spectrum_ID1", choices=ids, selected=(ids[0] if ids else None))
+            query_status_rv.set(
+                f"✅ Loaded {len(ids)} IDs from {csv_path.name}" if ids else f"⚠️ No IDs found in {csv_path.name}"
+            )
+            await reactive.flush()
+        except Exception as e:
+            query_status_rv.set(f"❌ Failed: {e}")
+            await reactive.flush()
+            raise
     @reactive.effect
-    @reactive.event(input.run_btn_run_spec_lib_matching)
-    def _():
-        if current_page() == 'run_spec_lib_matching':
-            if input.chromatography_platform() == 'HRMS':
-                try:
-                    run_spec_lib_matching_on_HRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], likely_reference_ids=None, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), window_size_centroiding=input.window_size_centroiding(), window_size_matching=input.window_size_matching(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), n_top_matches_to_save=input.n_top_matches_to_save(), print_id_results=False, output_identification=f'{Path.cwd()}/output_identification.csv', output_similarity_scores=f'{Path.cwd()}/')
-                    run_status_spec_lib_matching.set(f"✅  Spectral library matching has finished.")
-                except Exception as e:
-                    run_status_spec_lib_matching.set(f"❌ Error: {traceback.format_exc()}")
-            elif input.chromatography_platform() == 'NRMS':
-                try:
-                    run_spec_lib_matching_on_NRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], likely_reference_ids=None, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), n_top_matches_to_save=input.n_top_matches_to_save(), print_id_results=False, output_identification=f'{Path.cwd()}/output_identification.csv', output_similarity_scores=f'{Path.cwd()}/output_similarity_scores.csv')
-                    run_status_spec_lib_matching.set(f"✅  Spectral library matching has finished.")
-                except Exception as e:
-                    run_status_spec_lib_matching.set(f"❌ Error: {traceback.format_exc()}")
-    '''
+    @reactive.event(input.reference_data)
+    async def _populate_ids_from_reference_upload():
+        if current_page() != "plot_spectra":
+            return
+        files = input.reference_data()
+        if not files:
+            return
+        in_path = Path(files[0]["datapath"])
+        suffix = in_path.suffix.lower()
+        # Decide what CSV to read IDs from
+        try:
+            if suffix == ".csv":
+                csv_path = in_path
+                converted_reference_path_rv.set(str(csv_path))
+            else:
+                reference_status_rv.set(f"Converting {in_path.name} → CSV …")
+                await reactive.flush()
+                # Choose an output temp path next to the upload
+                tmp_csv_path = in_path.with_suffix(".converted.csv")
+                out_obj = await asyncio.to_thread(build_library, str(in_path), str(tmp_csv_path))
+                # out_obj may be a path (str/PathLike) OR a DataFrame. Normalize to a path.
+                if isinstance(out_obj, (str, os.PathLike, Path)):
+                    csv_path = Path(out_obj)
+                elif isinstance(out_obj, pd.DataFrame):
+                    # Write the DF to our chosen path
+                    out_obj.to_csv(tmp_csv_path, index=False)
+                    csv_path = tmp_csv_path
+                else:
+                    raise TypeError(f"build_library returned unsupported type: {type(out_obj)}")
+                converted_reference_path_rv.set(str(csv_path))
+            reference_status_rv.set(f"Reading IDs from: {csv_path.name} …")
+            await reactive.flush()
+            # Extract IDs from the CSV’s first column
+            ids = await asyncio.to_thread(extract_first_column_ids, str(csv_path))
+            reference_ids_rv.set(ids)
+            # Update dropdowns
+            ui.update_selectize("spectrum_ID2", choices=ids, selected=(ids[0] if ids else None))
+            reference_status_rv.set(
+                f"✅ Loaded {len(ids)} IDs from {csv_path.name}" if ids else f"⚠️ No IDs found in {csv_path.name}"
+            )
+            await reactive.flush()
+        except Exception as e:
+            reference_status_rv.set(f"❌ Failed: {e}")
+            await reactive.flush()
+            raise
     @render.download(filename=lambda: f"plot.png")
@@ -382,23 +701,90 @@ def server(input, output, session):
             yield buf.getvalue()
-    @render.download(filename=lambda: f"plot.png")
-    def run_btn_spec_lib_matching():
-        if input.chromatography_platform() == "HRMS":
-            df_out = run_spec_lib_matching_on_HRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], likely_reference_ids=None, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), window_size_centroiding=input.window_size_centroiding(), window_size_matching=input.window_size_matching(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), n_top_matches_to_save=input.n_top_matches_to_save(), print_id_results=False, output_identification=f'{Path.cwd()}/output_identification.csv', output_similarity_scores=f'{Path.cwd()}/', return_ID_output=True)
-        elif input.chromatography_platform() == "NRMS":
-            df_out = run_spec_lib_matching_on_NRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], likely_reference_ids=None, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), n_top_matches_to_save=input.n_top_matches_to_save(), print_id_results=False, output_identification=f'{Path.cwd()}/output_identification.csv', output_similarity_scores=f'{Path.cwd()}/output_similarity_scores.csv', return_ID_output=True)
-        df_out.to_csv(io.StringIO(), index=False)
-        return buf.getvalue().encode('utf-8')
     @render.text
     def status_output():
         return run_status_plot_spectra.get()
         return run_status_spec_lib_matching.get()
+    class ReactiveWriter(io.TextIOBase):
+        def __init__(self, rv: reactive.Value, loop: asyncio.AbstractEventLoop):
+            self.rv = rv
+            self.loop = loop
+        def write(self, s: str):
+            if not s:
+                return 0
+            def _apply():
+                self.rv.set(self.rv.get() + s)
+                self.loop.create_task(reactive.flush())
+            self.loop.call_soon_threadsafe(_apply)
+            return len(s)
+        def flush(self):
+            pass
+    @render.download(filename="identification_output.csv")
+    async def run_btn_spec_lib_matching():
+        # 1) quick first paint
+        match_log_rv.set("Starting identification...\n")
+        await reactive.flush()
+        # 2) normalize inputs (same as before)
+        hq = input.high_quality_reference_library()
+        if isinstance(hq, str):
+            hq = hq.lower() == "true"
+        elif isinstance(hq, (int, float)):
+            hq = bool(hq)
+        common_kwargs = dict(
+            query_data=input.query_data()[0]["datapath"],
+            reference_data=input.reference_data()[0]["datapath"],
+            likely_reference_ids=None,
+            similarity_measure=input.similarity_measure(),
+            spectrum_preprocessing_order=input.spectrum_preprocessing_order(),
+            high_quality_reference_library=hq,
+            mz_min=input.mz_min(), mz_max=input.mz_max(),
+            int_min=input.int_min(), int_max=input.int_max(),
+            noise_threshold=input.noise_threshold(),
+            wf_mz=input.wf_mz(), wf_intensity=input.wf_int(),
+            LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(),
+            n_top_matches_to_save=input.n_top_matches_to_save(),
+            print_id_results=True,  # ensure the library actually prints progress
+            output_identification=str(Path.cwd() / "identification_output.csv"),
+            output_similarity_scores=str(Path.cwd() / "similarity_scores.csv"),
+            return_ID_output=True,
+        )
+        loop = asyncio.get_running_loop()
+        rw = ReactiveWriter(match_log_rv, loop)
+        # 3) run the heavy function in a thread so the event loop can repaint
+        try:
+            with redirect_stdout(rw), redirect_stderr(rw):
+                if input.chromatography_platform() == "HRMS":
+                    df_out = await asyncio.to_thread(
+                        run_spec_lib_matching_on_HRMS_data,
+                        window_size_centroiding=input.window_size_centroiding(),
+                        window_size_matching=input.window_size_matching(),
+                        **common_kwargs
+                    )
+                else:
+                    df_out = await asyncio.to_thread(
+                        run_spec_lib_matching_on_NRMS_data, **common_kwargs
+                    )
+            match_log_rv.set(match_log_rv.get() + "\n✅ Identification finished.\n")
+            await reactive.flush()
+        except Exception as e:
+            match_log_rv.set(match_log_rv.get() + f"\n❌ Error: {e}\n")
+            await reactive.flush()
+            raise
+        # 4) stream CSV back to the browser
+        yield df_out.to_csv(index=False)
 app = App(app_ui, server)

pycompound/plot_spectra.py CHANGED Viewed

@@ -45,7 +45,7 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
         extension = extension[(len(extension)-1)]
         if extension == 'mgf' or extension == 'MGF' or extension == 'mzML' or extension == 'mzml' or extension == 'MZML' or extension == 'cdf' or extension == 'CDF':
             output_path_tmp = query_data[:-3] + 'csv'
-            build_library_from_raw_data(input_path=query_data, output_path=output_path_tmp, is_reference=False)
+            build_library_from_raw_data(input_path=query_data, output_path=output_path_tmp, is_reference=True)
             df_query = pd.read_csv(output_path_tmp)
         if extension == 'csv' or extension == 'CSV':
             df_query = pd.read_csv(query_data)
@@ -177,6 +177,8 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
             spec_tmp = spectrum_ID1
             spectrum_ID1 = spectrum_ID2
             spectrum_ID2 = spec_tmp
+        print(unique_query_ids)
+        print(spectrum_ID1)
         query_idx = unique_query_ids.index(spectrum_ID1)
         reference_idx = unique_reference_ids.index(spectrum_ID2)
         q_idxs_tmp = np.where(df_query.iloc[:,0].astype(str) == unique_query_ids[query_idx])[0]

{pycompound-0.0.9.dist-info → pycompound-0.0.10.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pycompound
-Version: 0.0.9
+Version: 0.0.10
 Summary: Python package to perform compound identification in mass spectrometry via spectral library matching.
 Author-email: Hunter Dlugas <fy7392@wayne.edu>
 License-Expression: MIT

{pycompound-0.0.9.dist-info → pycompound-0.0.10.dist-info}/RECORD RENAMED Viewed

@@ -1,14 +1,14 @@
-app.py,sha256=eJmAAdCDWS7-3jjCkp8gueBtBkefh8oIbyr45Snh8C0,26187
+app.py,sha256=ab1hII23lVwAmMh4bfzdni50vz-bK-ODbJT_b1VjGMA,34678
 pycompound/build_library.py,sha256=8ghpX8wfj6u-3V5X2IdJ-e8G_FRSla1lO0pzLj7hOtI,5373
-pycompound/plot_spectra.py,sha256=wOnf2oOAfifj7FYkTZAcIeD7dHW1aRHzmsspPpySDcY,42023
+pycompound/plot_spectra.py,sha256=_5r9YR3AA2IfTbcyfyTnPxxxA92T4hQ9olOgaw7FE6A,42082
 pycompound/plot_spectra_CLI.py,sha256=ObaLad5Z5DmfQB-j0HSCg1mLORbYj2BM3hb5Yd0ZdDI,8395
 pycompound/processing.py,sha256=vqtKaZ6vot6wlnKNTYUQFX7ccPpnCAl0L6bN289vZoM,11068
 pycompound/similarity_measures.py,sha256=TuvtEXWwyxE6dfpmuAqRC6gOHvHg3Jf21099pVaNBAs,10702
 pycompound/spec_lib_matching.py,sha256=p8gj-72fjkf0p7XrqEl9hnYUGNSbyr7BXugvRT7Y5OA,60311
 pycompound/spec_lib_matching_CLI.py,sha256=EdXM0dRQfwGQAK4OKxhcVytuUnX9pRyJROwC6rloZ9s,9915
 pycompound/tuning_CLI.py,sha256=lkFBRZ5VxCBteIh_KTkQFdUBVZA0dL-BLiyMZce1vzE,8539
-pycompound-0.0.9.dist-info/licenses/LICENSE,sha256=fPFFlkSGg60VQWyWqTSv8yoJnpLzppzdihVWY5NKom8,1064
-pycompound-0.0.9.dist-info/METADATA,sha256=--hu6G380jnsb6J7XvGFiwXJ_lZ6of0stydeLWYjp6U,1732
-pycompound-0.0.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-pycompound-0.0.9.dist-info/top_level.txt,sha256=wFBLVrqpC07HghIU8tsEdgdvgkdOE3GN_1Gfjk-uEUc,15
-pycompound-0.0.9.dist-info/RECORD,,
+pycompound-0.0.10.dist-info/licenses/LICENSE,sha256=fPFFlkSGg60VQWyWqTSv8yoJnpLzppzdihVWY5NKom8,1064
+pycompound-0.0.10.dist-info/METADATA,sha256=Gb0d0ZbClc4AFRcDjMnNWcb4TCuq84CJl-AKCNjY2wU,1733
+pycompound-0.0.10.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+pycompound-0.0.10.dist-info/top_level.txt,sha256=wFBLVrqpC07HghIU8tsEdgdvgkdOE3GN_1Gfjk-uEUc,15
+pycompound-0.0.10.dist-info/RECORD,,

{pycompound-0.0.9.dist-info → pycompound-0.0.10.dist-info}/WHEEL RENAMED Viewed

File without changes

{pycompound-0.0.9.dist-info → pycompound-0.0.10.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{pycompound-0.0.9.dist-info → pycompound-0.0.10.dist-info}/top_level.txt RENAMED Viewed

File without changes

pycompound 0.0.9__py3-none-any.whl → 0.0.10__py3-none-any.whl

pycompound 0.0.9py3-none-any.whl → 0.0.10py3-none-any.whl