PyPI - pycompound - Versions diffs - 0.0.8__py3-none-any.whl → 0.0.10__py3-none-any.whl - Mend

pycompound 0.0.8py3-none-any.whl → 0.0.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

app.py +481 -54
pycompound/plot_spectra.py +7 -5
pycompound/spec_lib_matching.py +20 -12
{pycompound-0.0.8.dist-info → pycompound-0.0.10.dist-info}/METADATA +2 -3
pycompound-0.0.10.dist-info/RECORD +14 -0
pycompound-0.0.8.dist-info/RECORD +0 -14
{pycompound-0.0.8.dist-info → pycompound-0.0.10.dist-info}/WHEEL +0 -0
{pycompound-0.0.8.dist-info → pycompound-0.0.10.dist-info}/licenses/LICENSE +0 -0
{pycompound-0.0.8.dist-info → pycompound-0.0.10.dist-info}/top_level.txt +0 -0

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
-from shiny import App, ui, reactive, render
+from shiny import App, ui, reactive, render, req
 from pycompound.spec_lib_matching import run_spec_lib_matching_on_HRMS_data
 from pycompound.spec_lib_matching import run_spec_lib_matching_on_NRMS_data
 from pycompound.spec_lib_matching import tune_params_on_HRMS_data
@@ -7,11 +7,149 @@ from pycompound.spec_lib_matching import tune_params_on_NRMS_data
 from pycompound.plot_spectra import generate_plots_on_HRMS_data
 from pycompound.plot_spectra import generate_plots_on_NRMS_data
 from pathlib import Path
+from contextlib import redirect_stdout, redirect_stderr
 import subprocess
 import traceback
 import asyncio
 import io
+import os
+import sys
 import matplotlib.pyplot as plt
+import pandas as pd
+import numpy as np
+import netCDF4 as nc
+from pyteomics import mgf
+from pyteomics import mzml
+def build_library(input_path=None, output_path=None):
+    last_three_chars = input_path[(len(input_path)-3):len(input_path)]
+    last_four_chars = input_path[(len(input_path)-4):len(input_path)]
+    if last_three_chars == 'csv' or last_three_chars == 'CSV':
+        return pd.read_csv(input_path)
+    else:
+        if last_three_chars == 'mgf' or last_three_chars == 'MGF':
+            input_file_type = 'mgf'
+        elif last_four_chars == 'mzML' or last_four_chars == 'mzml' or last_four_chars == 'MZML':
+            input_file_type = 'mzML'
+        elif last_three_chars == 'cdf' or last_three_chars == 'CDF':
+            input_file_type = 'cdf'
+        elif last_three_chars == 'msp' or last_three_chars == 'MSP':
+            input_file_type = 'msp'
+        else:
+            print('ERROR: either an \'mgf\', \'mzML\', \'cdf\', or \'msp\' file must be passed to --input_path')
+            sys.exit()
+        spectra = []
+        if input_file_type == 'mgf':
+            with mgf.read(input_path, index_by_scans = True) as reader:
+                for spec in reader:
+                    spectra.append(spec)
+        if input_file_type == 'mzML':
+            with mzml.read(input_path) as reader:
+                for spec in reader:
+                    spectra.append(spec)
+        if input_file_type == 'mgf' or input_file_type == 'mzML':
+            ids = []
+            mzs = []
+            ints = []
+            for i in range(0,len(spectra)):
+                for j in range(0,len(spectra[i]['m/z array'])):
+                    if input_file_type == 'mzML':
+                        ids.append(f'ID_{i+1}')
+                    else:
+                        ids.append(spectra[i]['params']['name'])
+                    mzs.append(spectra[i]['m/z array'][j])
+                    ints.append(spectra[i]['intensity array'][j])
+        if input_file_type == 'cdf':
+            dataset = nc.Dataset(input_path, 'r')
+            all_mzs = dataset.variables['mass_values'][:]
+            all_ints = dataset.variables['intensity_values'][:]
+            scan_idxs = dataset.variables['scan_index'][:]
+            dataset.close()
+            ids = []
+            mzs = []
+            ints = []
+            for i in range(0,(len(scan_idxs)-1)):
+                if i % 1000 == 0:
+                    print(f'analyzed {i} out of {len(scan_idxs)} scans')
+                s_idx = scan_idxs[i]
+                e_idx = scan_idxs[i+1]
+                mzs_tmp = all_mzs[s_idx:e_idx]
+                ints_tmp = all_ints[s_idx:e_idx]
+                for j in range(0,len(mzs_tmp)):
+                    ids.append(f'ID_{i+1}')
+                    mzs.append(mzs_tmp[j])
+                    ints.append(ints_tmp[j])
+        if input_file_type == 'msp':
+            ids = []
+            mzs = []
+            ints = []
+            with open(input_path, 'r') as f:
+                i = 0
+                for line in f:
+                    line = line.strip()
+                    if line.startswith('Name:'):
+                        i += 1
+                        spectrum_id = line.replace('Name: ','')
+                    elif line and line[0].isdigit():
+                        try:
+                            mz, intensity = map(float, line.split()[:2])
+                            ids.append(spectrum_id)
+                            mzs.append(mz)
+                            ints.append(intensity)
+                        except ValueError:
+                            continue
+        df = pd.DataFrame({'id':ids, 'mz_ratio':mzs, 'intensity':ints})
+        return df
+def extract_first_column_ids(file_path: str, max_ids: int = 20000):
+    suffix = Path(file_path).suffix.lower()
+    if suffix == ".csv":
+        df = pd.read_csv(file_path, usecols=[0])
+        ids = df.iloc[:, 0].astype(str).dropna()
+        ids = [x for x in ids if x.strip() != ""]
+        seen = set()
+        uniq = []
+        for x in ids:
+            if x not in seen:
+                uniq.append(x)
+                seen.add(x)
+        return uniq[:max_ids]
+    ids = []
+    try:
+        with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
+            for line in f:
+                ls = line.strip()
+                if ls.startswith("TITLE="):
+                    ids.append(ls.split("=", 1)[1].strip())
+                elif ls.lower().startswith("name:"):
+                    ids.append(ls.split(":", 1)[1].strip())
+                if len(ids) >= max_ids:
+                    break
+    except Exception:
+        pass
+    if ids:
+        seen = set()
+        uniq = []
+        for x in ids:
+            if x not in seen:
+                uniq.append(x)
+                seen.add(x)
+        return uniq
+    return []
 def plot_spectra_ui(platform: str):
@@ -19,8 +157,20 @@ def plot_spectra_ui(platform: str):
     base_inputs = [
         ui.input_file("query_data", "Upload query dataset (mgf, mzML, cdf, msp, or csv):"),
         ui.input_file("reference_data", "Upload reference dataset (mgf, mzML, cdf, msp, or csv):"),
-        ui.input_text("spectrum_ID1", "Input ID of one spectrum to be plotted:", None),
-        ui.input_text("spectrum_ID2", "Input ID of another spectrum to be plotted:", None),
+    ui.input_selectize(
+        "spectrum_ID1",
+        "Select spectrum ID 1:",
+        choices=[],
+        multiple=False,
+        options={"placeholder": "Upload a query file to load IDs..."},
+    ),
+    ui.input_selectize(
+        "spectrum_ID2",
+        "Select spectrum ID 2 (optional):",
+        choices=[],
+        multiple=False,
+        options={"placeholder": "Upload a reference file to load IDs..."},
+    ),
         ui.input_select("similarity_measure", "Select similarity measure:", ["cosine","shannon","renyi","tsallis","mixture","jaccard","dice","3w_jaccard","sokal_sneath","binary_cosine","mountford","mcconnaughey","driver_kroeber","simpson","braun_banquet","fager_mcgowan","kulczynski","intersection","hamming","hellinger"]),
         ui.input_select(
             "high_quality_reference_library",
@@ -71,10 +221,9 @@ def plot_spectra_ui(platform: str):
     )
     # Run and Back buttons
-    run_button = ui.input_action_button("run_btn", "Run", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
+    run_button_plot_spectra = ui.download_button("run_btn_plot_spectra", "Run", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
     back_button = ui.input_action_button("back", "Back to main menu", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
-    #print(len(extra_inputs))
     # Layout base_inputs and extra_inputs in columns
     if platform == "HRMS":
         inputs_columns = ui.layout_columns(
@@ -98,8 +247,9 @@ def plot_spectra_ui(platform: str):
         ui.TagList(
             ui.h2("Plot Spectra"),
             inputs_columns,
-            run_button,
-            back_button
+            run_button_plot_spectra,
+            back_button,
+            ui.div(ui.output_text("plot_query_status"), style="margin-top:8px; font-size:14px")
         ),
     )
@@ -155,10 +305,9 @@ def run_spec_lib_matching_ui(platform: str):
     # Run and Back buttons
-    run_button = ui.input_action_button("run_btn", "Run", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
+    run_button_spec_lib_matching = ui.download_button("run_btn_spec_lib_matching", "Run", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
     back_button = ui.input_action_button("back", "Back to main menu", style="font-size:16px; padding:15px 30px; width:200px; height:80px")
-    #print(len(extra_inputs))
     # Layout base_inputs and extra_inputs in columns
     if platform == "HRMS":
         inputs_columns = ui.layout_columns(
@@ -177,13 +326,20 @@ def run_spec_lib_matching_ui(platform: str):
             col_widths=(3, 3, 3, 3),
         )
+    log_panel = ui.card(
+        ui.card_header("Identification log"),
+        ui.output_text_verbatim("match_log"),
+        style="max-height:300px; overflow:auto"
+    )
     # Combine everything
     return ui.div(
         ui.TagList(
             ui.h2("Run Spectral Library Matching"),
             inputs_columns,
-            run_button,
-            back_button
+            run_button_spec_lib_matching,
+            back_button,
+            log_panel,
         ),
     )
@@ -197,19 +353,114 @@ app_ui = ui.page_fluid(
 def server(input, output, session):
-    # Track which page to show
     current_page = reactive.Value("main_menu")
-    # Track button clicks
     plot_clicks = reactive.Value(0)
     match_clicks = reactive.Value(0)
     back_clicks = reactive.Value(0)
-    run_status = reactive.Value("Waiting for input...")
+    run_status_plot_spectra = reactive.Value("")
+    run_status_spec_lib_matching = reactive.Value("")
+    match_log_rv = reactive.Value("")
+    is_matching_rv = reactive.Value(False)
+    query_ids_rv = reactive.Value([])
+    query_file_path_rv = reactive.Value(None)
+    query_result_rv = reactive.Value(None)
+    query_status_rv = reactive.Value("")
+    reference_ids_rv = reactive.Value([])
+    reference_file_path_rv = reactive.Value(None)
+    reference_result_rv = reactive.Value(None)
+    reference_status_rv = reactive.Value("")
+    converted_query_path_rv = reactive.Value(None)
+    converted_reference_path_rv = reactive.Value(None)
+    def process_database(file_path: str):
+        suffix = Path(file_path).suffix.lower()
+        return {"path": file_path, "suffix": suffix}
+    @render.text
+    def plot_query_status():
+        return query_status_rv.get() or ""
+    @reactive.effect
+    @reactive.event(input.query_data)
+    async def _on_query_upload():
+        if current_page() != "plot_spectra":
+            return
+        files = input.query_data()
+        req(files and len(files) > 0)
+        file_path = files[0]["datapath"]
+        query_file_path_rv.set(file_path)
+        query_status_rv.set(f"Processing query database: {Path(file_path).name} …")
+        await reactive.flush()
+        try:
+            result = await asyncio.to_thread(process_database, file_path)
+            query_result_rv.set(result)
+            query_status_rv.set("✅ Query database processed.")
+            await reactive.flush()
+        except Exception as e:
+            query_status_rv.set(f"❌ Failed to process query database: {e}")
+            await reactive.flush()
+    @reactive.effect
+    @reactive.event(input.reference_data)
+    async def _on_reference_upload():
+        if current_page() != "plot_spectra":
+            return
+        files = input.reference_data()
+        req(files and len(files) > 0)
+        file_path = files[0]["datapath"]
+        reference_file_path_rv.set(file_path)
+        reference_status_rv.set(f"Processing reference database: {Path(file_path).name} …")
+        await reactive.flush()
+        try:
+            result = await asyncio.to_thread(process_database, file_path)
+            reference_result_rv.set(result)
+            reference_status_rv.set("✅ Reference database processed.")
+            await reactive.flush()
+        except Exception as e:
+            reference_status_rv.set(f"❌ Failed to process reference database: {e}")
+            await reactive.flush()
+    @render.text
+    def match_log():
+        return match_log_rv.get()
+    class ReactiveWriter(io.TextIOBase):
+        def __init__(self, rv):
+            self.rv = rv
+        def write(self, s: str):
+            if not s:
+                return 0
+            self.rv.set(self.rv.get() + s)
+            try:
+                loop = asyncio.get_running_loop()
+                loop.create_task(reactive.flush())
+            except RuntimeError:
+                pass
+            return len(s)
+        def flush(self):
+            pass
     @reactive.Effect
     def _():
-        # Main menu buttons
         if input.plot_spectra() > plot_clicks.get():
             current_page.set("plot_spectra")
             plot_clicks.set(input.plot_spectra())
@@ -220,6 +471,7 @@ def server(input, output, session):
             current_page.set("main_menu")
             back_clicks.set(input.back())
     @render.image
     def image():
         from pathlib import Path
@@ -228,6 +480,7 @@ def server(input, output, session):
         img: ImgData = {"src": str(dir / "www/emblem.png"), "width": "320px", "height": "250px"}
         return img
     @output
     @render.ui
     def main_ui():
@@ -310,53 +563,227 @@ def server(input, output, session):
         elif current_page() == "run_spec_lib_matching":
             return run_spec_lib_matching_ui(input.chromatography_platform())
     @reactive.effect
-    @reactive.event(input.run_btn)
-    def _():
-        if current_page() == "plot_spectra":
-            if len(input.spectrum_ID1())==0:
-                spectrum_ID1 = None
+    @reactive.event(input.query_data)
+    async def _populate_ids_from_query_upload():
+        if current_page() != "plot_spectra":
+            return
+        files = input.query_data()
+        if not files:
+            return
+        in_path = Path(files[0]["datapath"])
+        suffix = in_path.suffix.lower()
+        # Decide what CSV to read IDs from
+        try:
+            if suffix == ".csv":
+                csv_path = in_path
+                converted_query_path_rv.set(str(csv_path))
             else:
-                spectrum_ID1 = input.spectrum_ID1()
-            if len(input.spectrum_ID2())==0:
-                spectrum_ID2 = None
+                query_status_rv.set(f"Converting {in_path.name} → CSV …")
+                await reactive.flush()
+                # Choose an output temp path next to the upload
+                tmp_csv_path = in_path.with_suffix(".converted.csv")
+                out_obj = await asyncio.to_thread(build_library, str(in_path), str(tmp_csv_path))
+                # out_obj may be a path (str/PathLike) OR a DataFrame. Normalize to a path.
+                if isinstance(out_obj, (str, os.PathLike, Path)):
+                    csv_path = Path(out_obj)
+                elif isinstance(out_obj, pd.DataFrame):
+                    # Write the DF to our chosen path
+                    out_obj.to_csv(tmp_csv_path, index=False)
+                    csv_path = tmp_csv_path
+                else:
+                    raise TypeError(f"build_library returned unsupported type: {type(out_obj)}")
+                converted_query_path_rv.set(str(csv_path))
+            query_status_rv.set(f"Reading IDs from: {csv_path.name} …")
+            await reactive.flush()
+            # Extract IDs from the CSV’s first column
+            ids = await asyncio.to_thread(extract_first_column_ids, str(csv_path))
+            query_ids_rv.set(ids)
+            # Update dropdowns
+            ui.update_selectize("spectrum_ID1", choices=ids, selected=(ids[0] if ids else None))
+            query_status_rv.set(
+                f"✅ Loaded {len(ids)} IDs from {csv_path.name}" if ids else f"⚠️ No IDs found in {csv_path.name}"
+            )
+            await reactive.flush()
+        except Exception as e:
+            query_status_rv.set(f"❌ Failed: {e}")
+            await reactive.flush()
+            raise
+    @reactive.effect
+    @reactive.event(input.reference_data)
+    async def _populate_ids_from_reference_upload():
+        if current_page() != "plot_spectra":
+            return
+        files = input.reference_data()
+        if not files:
+            return
+        in_path = Path(files[0]["datapath"])
+        suffix = in_path.suffix.lower()
+        # Decide what CSV to read IDs from
+        try:
+            if suffix == ".csv":
+                csv_path = in_path
+                converted_reference_path_rv.set(str(csv_path))
             else:
-                spectrum_ID2 = input.spectrum_ID2()
-            if input.chromatography_platform() == "HRMS":
-                try:
-                    fig = generate_plots_on_HRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], spectrum_ID1=spectrum_ID1, spectrum_ID2=spectrum_ID2, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), window_size_centroiding=input.window_size_centroiding(), window_size_matching=input.window_size_matching(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), y_axis_transformation=input.y_axis_transformation(), return_plot=True)
-                    plt.show()
-                    run_status.set(f"✅  Plotting has finished.")
-                except Exception as e:
-                    run_status.set(f"❌ Error: {traceback.format_exc()}")
-            elif input.chromatography_platform() == "NRMS":
-                try:
-                    generate_plots_on_NRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], spectrum_ID1=spectrum_ID1, spectrum_ID2=spectrum_ID2, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), y_axis_transformation=input.y_axis_transformation(), return_plot=True)
-                    plt.show()
-                    run_status.set(f"✅  Plotting has finished.")
-                except Exception as e:
-                    run_status.set(f"❌ Error: {traceback.format_exc()}")
-        elif current_page() == 'run_spec_lib_matching':
-            if input.chromatography_platform() == 'HRMS':
-                try:
-                    run_spec_lib_matching_on_HRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], likely_reference_ids=None, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), window_size_centroiding=input.window_size_centroiding(), window_size_matching=input.window_size_matching(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), n_top_matches_to_save=input.n_top_matches_to_save(), print_id_results=False, output_identification=f'{Path.cwd()}/output_identification.csv', output_similarity_scores=f'{Path.cwd()}/')
-                    run_status.set(f"✅  Spectral library matching has finished and results were written to {Path.cwd()}/output_similarity_scores.csv.")
-                except Exception as e:
-                    run_status.set(f"❌ Error: {traceback.format_exc()}")
-            elif input.chromatography_platform() == 'NRMS':
-                try:
-                    run_spec_lib_matching_on_NRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], likely_reference_ids=None, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), n_top_matches_to_save=input.n_top_matches_to_save(), print_id_results=False, output_identification=f'{Path.cwd()}/output_identification.csv', output_similarity_scores=f'{Path.cwd()}/output_similarity_scores.csv')
-                    run_status.set(f"✅  Spectral library matching has finished and results were written to {Path.cwd()}/")
-                except Exception as e:
-                    run_status.set(f"❌ Error: {traceback.format_exc()}")
+                reference_status_rv.set(f"Converting {in_path.name} → CSV …")
+                await reactive.flush()
+                # Choose an output temp path next to the upload
+                tmp_csv_path = in_path.with_suffix(".converted.csv")
+                out_obj = await asyncio.to_thread(build_library, str(in_path), str(tmp_csv_path))
+                # out_obj may be a path (str/PathLike) OR a DataFrame. Normalize to a path.
+                if isinstance(out_obj, (str, os.PathLike, Path)):
+                    csv_path = Path(out_obj)
+                elif isinstance(out_obj, pd.DataFrame):
+                    # Write the DF to our chosen path
+                    out_obj.to_csv(tmp_csv_path, index=False)
+                    csv_path = tmp_csv_path
+                else:
+                    raise TypeError(f"build_library returned unsupported type: {type(out_obj)}")
+                converted_reference_path_rv.set(str(csv_path))
+            reference_status_rv.set(f"Reading IDs from: {csv_path.name} …")
+            await reactive.flush()
+            # Extract IDs from the CSV’s first column
+            ids = await asyncio.to_thread(extract_first_column_ids, str(csv_path))
+            reference_ids_rv.set(ids)
+            # Update dropdowns
+            ui.update_selectize("spectrum_ID2", choices=ids, selected=(ids[0] if ids else None))
+            reference_status_rv.set(
+                f"✅ Loaded {len(ids)} IDs from {csv_path.name}" if ids else f"⚠️ No IDs found in {csv_path.name}"
+            )
+            await reactive.flush()
+        except Exception as e:
+            reference_status_rv.set(f"❌ Failed: {e}")
+            await reactive.flush()
+            raise
+    @render.download(filename=lambda: f"plot.png")
+    def run_btn_plot_spectra():
+        spectrum_ID1 = input.spectrum_ID1() or None
+        spectrum_ID2 = input.spectrum_ID2() or None
+        if input.chromatography_platform() == "HRMS":
+            fig = generate_plots_on_HRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], spectrum_ID1=spectrum_ID1, spectrum_ID2=spectrum_ID2, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), window_size_centroiding=input.window_size_centroiding(), window_size_matching=input.window_size_matching(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), y_axis_transformation=input.y_axis_transformation(), return_plot=True)
+            #run_status_plot_spectra.set("✅ Plotting has finished.")
+        elif input.chromatography_platform() == "NRMS":
+            fig = generate_plots_on_NRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], spectrum_ID1=spectrum_ID1, spectrum_ID2=spectrum_ID2, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), y_axis_transformation=input.y_axis_transformation(), return_plot=True)
+        with io.BytesIO() as buf:
+            fig.savefig(buf, format="png", dpi=150, bbox_inches="tight")
+            yield buf.getvalue()
     @render.text
     def status_output():
-        return run_status.get()
+        return run_status_plot_spectra.get()
+        return run_status_spec_lib_matching.get()
+    class ReactiveWriter(io.TextIOBase):
+        def __init__(self, rv: reactive.Value, loop: asyncio.AbstractEventLoop):
+            self.rv = rv
+            self.loop = loop
+        def write(self, s: str):
+            if not s:
+                return 0
+            def _apply():
+                self.rv.set(self.rv.get() + s)
+                self.loop.create_task(reactive.flush())
+            self.loop.call_soon_threadsafe(_apply)
+            return len(s)
+        def flush(self):
+            pass
+    @render.download(filename="identification_output.csv")
+    async def run_btn_spec_lib_matching():
+        # 1) quick first paint
+        match_log_rv.set("Starting identification...\n")
+        await reactive.flush()
+        # 2) normalize inputs (same as before)
+        hq = input.high_quality_reference_library()
+        if isinstance(hq, str):
+            hq = hq.lower() == "true"
+        elif isinstance(hq, (int, float)):
+            hq = bool(hq)
+        common_kwargs = dict(
+            query_data=input.query_data()[0]["datapath"],
+            reference_data=input.reference_data()[0]["datapath"],
+            likely_reference_ids=None,
+            similarity_measure=input.similarity_measure(),
+            spectrum_preprocessing_order=input.spectrum_preprocessing_order(),
+            high_quality_reference_library=hq,
+            mz_min=input.mz_min(), mz_max=input.mz_max(),
+            int_min=input.int_min(), int_max=input.int_max(),
+            noise_threshold=input.noise_threshold(),
+            wf_mz=input.wf_mz(), wf_intensity=input.wf_int(),
+            LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(),
+            n_top_matches_to_save=input.n_top_matches_to_save(),
+            print_id_results=True,  # ensure the library actually prints progress
+            output_identification=str(Path.cwd() / "identification_output.csv"),
+            output_similarity_scores=str(Path.cwd() / "similarity_scores.csv"),
+            return_ID_output=True,
+        )
+        loop = asyncio.get_running_loop()
+        rw = ReactiveWriter(match_log_rv, loop)
+        # 3) run the heavy function in a thread so the event loop can repaint
+        try:
+            with redirect_stdout(rw), redirect_stderr(rw):
+                if input.chromatography_platform() == "HRMS":
+                    df_out = await asyncio.to_thread(
+                        run_spec_lib_matching_on_HRMS_data,
+                        window_size_centroiding=input.window_size_centroiding(),
+                        window_size_matching=input.window_size_matching(),
+                        **common_kwargs
+                    )
+                else:
+                    df_out = await asyncio.to_thread(
+                        run_spec_lib_matching_on_NRMS_data, **common_kwargs
+                    )
+            match_log_rv.set(match_log_rv.get() + "\n✅ Identification finished.\n")
+            await reactive.flush()
+        except Exception as e:
+            match_log_rv.set(match_log_rv.get() + f"\n❌ Error: {e}\n")
+            await reactive.flush()
+            raise
+        # 4) stream CSV back to the browser
+        yield df_out.to_csv(index=False)
 app = App(app_ui, server)

pycompound/plot_spectra.py CHANGED Viewed

@@ -45,7 +45,7 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
         extension = extension[(len(extension)-1)]
         if extension == 'mgf' or extension == 'MGF' or extension == 'mzML' or extension == 'mzml' or extension == 'MZML' or extension == 'cdf' or extension == 'CDF':
             output_path_tmp = query_data[:-3] + 'csv'
-            build_library_from_raw_data(input_path=query_data, output_path=output_path_tmp, is_reference=False)
+            build_library_from_raw_data(input_path=query_data, output_path=output_path_tmp, is_reference=True)
             df_query = pd.read_csv(output_path_tmp)
         if extension == 'csv' or extension == 'CSV':
             df_query = pd.read_csv(query_data)
@@ -96,8 +96,8 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
         print(f'Error: spectrum_preprocessing_order must contain only \'C\', \'F\', \'M\', \'N\', \'L\', \'W\'.')
         sys.exit()
-    if similarity_measure not in ['cosine','shannon','renyi','tsallis','mixture','jaccard','dice','3w_jaccard','sokal_sneath','binary_cosine','mountford','mcconnaughey','driver_kroeber','simpson','braun_banquet','fager_mcgowan','kulczynski','intersection','hamming','hellinger']:
-        print('\nError: similarity_measure must be either cosine, shannon, renyi, tsallis, mixture, jaccard, dice, 3w_jaccard, sokal_sneath, binary_cosine, mountford, mcconnaughey, driver_kroeber, simpson, braun_banquet, fager_mcgowan, kulczynski, intersection, hamming, or hellinger.')
+    if similarity_measure not in ['cosine','shannon','renyi','tsallis','mixture','jaccard','dice','3w_jaccard','sokal_sneath','binary_cosine','mountford','mcconnaughey','driver_kroeber','simpson','braun_banquet','fager_mcgowan','kulczynski','interection','hamming','hellinger']:
+        print('\nError: similarity_measure must be either cosine, shannon, renyi, tsallis, mixture, jaccard, dice, 3w_jaccard, sokal_sneath, binary_cosine, mountford, mcconnaughey, driver_kroeber, simpson, braun_banquet, fager_mcgowan, kulczynski, interection, hamming, or hellinger.')
         sys.exit()
     if isinstance(int_min,int) is True:
@@ -177,6 +177,8 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
             spec_tmp = spectrum_ID1
             spectrum_ID1 = spectrum_ID2
             spectrum_ID2 = spec_tmp
+        print(unique_query_ids)
+        print(spectrum_ID1)
         query_idx = unique_query_ids.index(spectrum_ID1)
         reference_idx = unique_reference_ids.index(spectrum_ID2)
         q_idxs_tmp = np.where(df_query.iloc[:,0].astype(str) == unique_query_ids[query_idx])[0]
@@ -400,8 +402,8 @@ def generate_plots_on_NRMS_data(query_data=None, reference_data=None, spectrum_I
         print(f'Error: spectrum_preprocessing_order must contain only \'F\', \'N\', \'W\', \'L\'.')
         sys.exit()
-    if similarity_measure not in ['cosine','shannon','renyi','tsallis','mixture','jaccard','dice','3w_jaccard','sokal_sneath','binary_cosine','mountford','mcconnaughey','driver_kroeber','simpson','braun_banquet','fager_mcgowan','kulczynski','intersection','hamming','hellinger']:
-        print('\nError: similarity_measure must be either cosine, shannon, renyi, tsallis, mixture, jaccard, dice, 3w_jaccard, sokal_sneath, binary_cosine, mountford, mcconnaughey, driver_kroeber, simpson, braun_banquet, fager_mcgowan, kulczynski, intersection, hamming, or hellinger.')
+    if similarity_measure not in ['cosine','shannon','renyi','tsallis','mixture','jaccard','dice','3w_jaccard','sokal_sneath','binary_cosine','mountford','mcconnaughey','driver_kroeber','simpson','braun_banquet','fager_mcgowan','kulczynski','interection','hamming','hellinger']:
+        print('\nError: similarity_measure must be either cosine, shannon, renyi, tsallis, mixture, jaccard, dice, 3w_jaccard, sokal_sneath, binary_cosine, mountford, mcconnaughey, driver_kroeber, simpson, braun_banquet, fager_mcgowan, kulczynski, interection, hamming, or hellinger.')
         sys.exit()
     if isinstance(int_min,int) is True:

pycompound/spec_lib_matching.py CHANGED Viewed

@@ -389,7 +389,7 @@ def get_acc_NRMS(df_query, df_reference, unique_query_ids, unique_reference_ids,
-def run_spec_lib_matching_on_HRMS_data(query_data=None, reference_data=None, likely_reference_ids=None, similarity_measure='cosine', weights={'Cosine':0.25,'Shannon':0.25,'Renyi':0.25,'Tsallis':0.25}, spectrum_preprocessing_order='FCNMWL', high_quality_reference_library=False, mz_min=0, mz_max=9999999, int_min=0, int_max=9999999, window_size_centroiding=0.5, window_size_matching=0.5, noise_threshold=0.0, wf_mz=0.0, wf_intensity=1.0, LET_threshold=0.0, entropy_dimension=1.1, n_top_matches_to_save=1, print_id_results=False, output_identification=None, output_similarity_scores=None):
+def run_spec_lib_matching_on_HRMS_data(query_data=None, reference_data=None, likely_reference_ids=None, similarity_measure='cosine', weights={'Cosine':0.25,'Shannon':0.25,'Renyi':0.25,'Tsallis':0.25}, spectrum_preprocessing_order='FCNMWL', high_quality_reference_library=False, mz_min=0, mz_max=9999999, int_min=0, int_max=9999999, window_size_centroiding=0.5, window_size_matching=0.5, noise_threshold=0.0, wf_mz=0.0, wf_intensity=1.0, LET_threshold=0.0, entropy_dimension=1.1, n_top_matches_to_save=1, print_id_results=False, output_identification=None, output_similarity_scores=None, return_ID_output=False):
     '''
     runs spectral library matching on high-resolution mass spectrometry (HRMS) data
@@ -636,22 +636,26 @@ def run_spec_lib_matching_on_HRMS_data(query_data=None, reference_data=None, lik
     df_top_ref_specs.index = unique_query_ids
     df_top_ref_specs.index.names = ['Query Spectrum ID']
+    df_scores.columns = ['Reference Spectrum ID: ' + col for col in  list(map(str,df_scores.columns.tolist()))]
     # print the identification results if the user desires
     if print_id_results == True:
         print(df_top_ref_specs.to_string())
-    # write spectral library matching results to disk
-    df_top_ref_specs.to_csv(output_identification)
+    if return_ID_output is False:
+        # write spectral library matching results to disk
+        df_top_ref_specs.to_csv(output_identification)
-    # write all similarity scores to disk
-    df_scores.columns = ['Reference Spectrum ID: ' + col for col in  list(map(str,df_scores.columns.tolist()))]
-    df_scores.to_csv(output_similarity_scores)
+        # write all similarity scores to disk
+        df_scores.to_csv(output_similarity_scores)
+    else:
+        return df_top_ref_specs
-def run_spec_lib_matching_on_NRMS_data(query_data=None, reference_data=None, likely_reference_ids=None, spectrum_preprocessing_order='FNLW', similarity_measure='cosine', weights={'Cosine':0.25,'Shannon':0.25,'Renyi':0.25,'Tsallis':0.25}, high_quality_reference_library=False, mz_min=0, mz_max=9999999, int_min=0, int_max=9999999, noise_threshold=0.0, wf_mz=0.0, wf_intensity=1.0, LET_threshold=0.0, entropy_dimension=1.1, n_top_matches_to_save=1, print_id_results=False, output_identification=None, output_similarity_scores=None):
+def run_spec_lib_matching_on_NRMS_data(query_data=None, reference_data=None, likely_reference_ids=None, spectrum_preprocessing_order='FNLW', similarity_measure='cosine', weights={'Cosine':0.25,'Shannon':0.25,'Renyi':0.25,'Tsallis':0.25}, high_quality_reference_library=False, mz_min=0, mz_max=9999999, int_min=0, int_max=9999999, noise_threshold=0.0, wf_mz=0.0, wf_intensity=1.0, LET_threshold=0.0, entropy_dimension=1.1, n_top_matches_to_save=1, print_id_results=False, output_identification=None, output_similarity_scores=None, return_ID_output=False):
     '''
     runs spectral library matching on nominal-resolution mass spectrometry (NRMS) data
@@ -886,11 +890,15 @@ def run_spec_lib_matching_on_NRMS_data(query_data=None, reference_data=None, lik
     if print_id_results == True:
         print(df_top_ref_specs.to_string())
-    # write spectral library matching results to disk
-    df_top_ref_specs.to_csv(output_identification)
-    # write all similarity scores to disk
     df_scores.columns = ['Reference Spectrum ID: ' + col for col in  list(map(str,df_scores.columns.tolist()))]
-    df_scores.to_csv(output_similarity_scores)
+    if return_ID_output is False:
+        # write spectral library matching results to disk
+        df_top_ref_specs.to_csv(output_identification)
+        # write all similarity scores to disk
+        df_scores.columns = ['Reference Spectrum ID: ' + col for col in  list(map(str,df_scores.columns.tolist()))]
+        df_scores.to_csv(output_similarity_scores)
+    else:
+        return df_top_ref_specs

{pycompound-0.0.8.dist-info → pycompound-0.0.10.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pycompound
-Version: 0.0.8
+Version: 0.0.10
 Summary: Python package to perform compound identification in mass spectrometry via spectral library matching.
 Author-email: Hunter Dlugas <fy7392@wayne.edu>
 License-Expression: MIT
@@ -19,10 +19,9 @@ Requires-Dist: pyteomics==4.7.2
 Requires-Dist: netCDF4==1.6.5
 Requires-Dist: lxml>=5.1.0
 Requires-Dist: orjson==3.11.0
+Requires-Dist: shiny==1.4.0
 Requires-Dist: joblib==1.5.2
 Dynamic: license-file
 # PyCompound
 A Python-based tool for spectral library matching, PyCompound is available as a Python package with a command-line interface (CLI) available and as a GUI application build with Python/Shiny. It performs spectral library matching to identify chemical compounds, offering a range of spectrum preprocessing transformations and similarity measures, including Cosine, three entropy-based similarity measures, and a plethora of binary similarity measures. PyCompound also includes functionality to tune parameters commonly used in a compound identification workflow given a query library of spectra with known ID. PyCompound supports both high-resolution mass spectrometry (HRMS) data (e.g., LC-MS/MS) and nominal-resolution mass spectrometry (NRMS) data (e.g., GC-MS). For the full documentation, see the GitHub repository https://github.com/hdlugas/pycompound.

pycompound-0.0.10.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,14 @@
+app.py,sha256=ab1hII23lVwAmMh4bfzdni50vz-bK-ODbJT_b1VjGMA,34678
+pycompound/build_library.py,sha256=8ghpX8wfj6u-3V5X2IdJ-e8G_FRSla1lO0pzLj7hOtI,5373
+pycompound/plot_spectra.py,sha256=_5r9YR3AA2IfTbcyfyTnPxxxA92T4hQ9olOgaw7FE6A,42082
+pycompound/plot_spectra_CLI.py,sha256=ObaLad5Z5DmfQB-j0HSCg1mLORbYj2BM3hb5Yd0ZdDI,8395
+pycompound/processing.py,sha256=vqtKaZ6vot6wlnKNTYUQFX7ccPpnCAl0L6bN289vZoM,11068
+pycompound/similarity_measures.py,sha256=TuvtEXWwyxE6dfpmuAqRC6gOHvHg3Jf21099pVaNBAs,10702
+pycompound/spec_lib_matching.py,sha256=p8gj-72fjkf0p7XrqEl9hnYUGNSbyr7BXugvRT7Y5OA,60311
+pycompound/spec_lib_matching_CLI.py,sha256=EdXM0dRQfwGQAK4OKxhcVytuUnX9pRyJROwC6rloZ9s,9915
+pycompound/tuning_CLI.py,sha256=lkFBRZ5VxCBteIh_KTkQFdUBVZA0dL-BLiyMZce1vzE,8539
+pycompound-0.0.10.dist-info/licenses/LICENSE,sha256=fPFFlkSGg60VQWyWqTSv8yoJnpLzppzdihVWY5NKom8,1064
+pycompound-0.0.10.dist-info/METADATA,sha256=Gb0d0ZbClc4AFRcDjMnNWcb4TCuq84CJl-AKCNjY2wU,1733
+pycompound-0.0.10.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+pycompound-0.0.10.dist-info/top_level.txt,sha256=wFBLVrqpC07HghIU8tsEdgdvgkdOE3GN_1Gfjk-uEUc,15
+pycompound-0.0.10.dist-info/RECORD,,

pycompound-0.0.8.dist-info/RECORD DELETED Viewed

@@ -1,14 +0,0 @@
-app.py,sha256=DCaQEp8_1-oldlhzEKo5HpKC2S-orV9gJxuSEHga9MY,21493
-pycompound/build_library.py,sha256=8ghpX8wfj6u-3V5X2IdJ-e8G_FRSla1lO0pzLj7hOtI,5373
-pycompound/plot_spectra.py,sha256=9s6bDgNv_CZsgMlM_CDToJMxJCasVJbFAGoUrZPfnW8,42027
-pycompound/plot_spectra_CLI.py,sha256=ObaLad5Z5DmfQB-j0HSCg1mLORbYj2BM3hb5Yd0ZdDI,8395
-pycompound/processing.py,sha256=vqtKaZ6vot6wlnKNTYUQFX7ccPpnCAl0L6bN289vZoM,11068
-pycompound/similarity_measures.py,sha256=TuvtEXWwyxE6dfpmuAqRC6gOHvHg3Jf21099pVaNBAs,10702
-pycompound/spec_lib_matching.py,sha256=pfDPmH1aQ11_25T80U9i0OUbgjCvvkzNEcDeBrDWNtA,59962
-pycompound/spec_lib_matching_CLI.py,sha256=EdXM0dRQfwGQAK4OKxhcVytuUnX9pRyJROwC6rloZ9s,9915
-pycompound/tuning_CLI.py,sha256=lkFBRZ5VxCBteIh_KTkQFdUBVZA0dL-BLiyMZce1vzE,8539
-pycompound-0.0.8.dist-info/licenses/LICENSE,sha256=fPFFlkSGg60VQWyWqTSv8yoJnpLzppzdihVWY5NKom8,1064
-pycompound-0.0.8.dist-info/METADATA,sha256=N76PE3DJwBvDV2VU4qXrbMvVQc7tTHaxZE_fpBxuC84,1706
-pycompound-0.0.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-pycompound-0.0.8.dist-info/top_level.txt,sha256=wFBLVrqpC07HghIU8tsEdgdvgkdOE3GN_1Gfjk-uEUc,15
-pycompound-0.0.8.dist-info/RECORD,,

{pycompound-0.0.8.dist-info → pycompound-0.0.10.dist-info}/WHEEL RENAMED Viewed

File without changes

{pycompound-0.0.8.dist-info → pycompound-0.0.10.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{pycompound-0.0.8.dist-info → pycompound-0.0.10.dist-info}/top_level.txt RENAMED Viewed

File without changes

pycompound 0.0.8__py3-none-any.whl → 0.0.10__py3-none-any.whl

pycompound 0.0.8py3-none-any.whl → 0.0.10py3-none-any.whl