PyPI - pycompound - Versions diffs - 0.1.2__tar.gz → 0.1.4__tar.gz - Mend

pycompound 0.1.2tar.gz → 0.1.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

{pycompound-0.1.2/src/pycompound.egg-info → pycompound-0.1.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pycompound
-Version: 0.1.2
+Version: 0.1.4
 Summary: Python package to perform compound identification in mass spectrometry via spectral library matching.
 Author-email: Hunter Dlugas <fy7392@wayne.edu>
 License-Expression: MIT

{pycompound-0.1.2 → pycompound-0.1.4}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "pycompound"
-version = "0.1.2"
+version = "0.1.4"
 authors = [
   { name="Hunter Dlugas", email="fy7392@wayne.edu" },
 ]

{pycompound-0.1.2 → pycompound-0.1.4}/src/app.py RENAMED Viewed

@@ -25,7 +25,6 @@ import ast
 from numbers import Real
 _LOG_QUEUE: asyncio.Queue[str] = asyncio.Queue()
 def _run_with_redirects(fn, writer, *args, **kwargs):
@@ -474,8 +473,15 @@ def run_parameter_tuning_ui(platform: str):
+'''
+app_ui = ui.page_fluid(
+    ui.output_ui("main_ui"),
+    ui.output_text("status_output")
+)
+'''
 app_ui = ui.page_fluid(
+    ui.head_content(ui.tags.link(rel="icon", href="emblem.png")),
     ui.output_ui("main_ui"),
     ui.output_text("status_output")
 )
@@ -688,7 +694,6 @@ def server(input, output, session):
         img: ImgData = {"src": str(dir / "www/emblem.png"), "width": "320px", "height": "250px"}
         return img
     @output
     @render.ui
     def main_ui():
@@ -697,6 +702,7 @@ def server(input, output, session):
                 ui.h2("Main Menu"),
                 ui.div(
                     ui.output_image("image"),
+                    #ui.img(src="emblem.png", width="320px", height="250px"),
                     style=(
                         "position:fixed; top:0; left:50%; transform:translateX(-50%); "
                         "z-index:1000; text-align:center; padding:10px; background-color:white;"
@@ -884,11 +890,18 @@ def server(input, output, session):
         weights = [float(weight.strip()) for weight in input.weights().split(",") if weight.strip()]
         weights = {'Cosine':weights[0], 'Shannon':weights[1], 'Renyi':weights[2], 'Tsallis':weights[3]}
+        high_quality_reference_library_tmp2 = False
+        if input.high_quality_reference_library() != 'False':
+            high_quality_reference_library_tmp2 = True
+        print(input.high_quality_reference_library())
+        print(high_quality_reference_library_tmp2)
         if input.chromatography_platform() == "HRMS":
-            fig = generate_plots_on_HRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], spectrum_ID1=spectrum_ID1, spectrum_ID2=spectrum_ID2, similarity_measure=input.similarity_measure(), weights=weights, spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), window_size_centroiding=input.window_size_centroiding(), window_size_matching=input.window_size_matching(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), y_axis_transformation=input.y_axis_transformation(), return_plot=True)
+            fig = generate_plots_on_HRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], spectrum_ID1=spectrum_ID1, spectrum_ID2=spectrum_ID2, similarity_measure=input.similarity_measure(), weights=weights, spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=high_quality_reference_library_tmp2, mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), window_size_centroiding=input.window_size_centroiding(), window_size_matching=input.window_size_matching(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), y_axis_transformation=input.y_axis_transformation(), return_plot=True)
             plt.show()
         elif input.chromatography_platform() == "NRMS":
-            fig = generate_plots_on_NRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], spectrum_ID1=spectrum_ID1, spectrum_ID2=spectrum_ID2, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=input.high_quality_reference_library(), mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), y_axis_transformation=input.y_axis_transformation(), return_plot=True)
+            fig = generate_plots_on_NRMS_data(query_data=input.query_data()[0]['datapath'], reference_data=input.reference_data()[0]['datapath'], spectrum_ID1=spectrum_ID1, spectrum_ID2=spectrum_ID2, similarity_measure=input.similarity_measure(), spectrum_preprocessing_order=input.spectrum_preprocessing_order(), high_quality_reference_library=high_quality_reference_library_tmp2, mz_min=input.mz_min(), mz_max=input.mz_max(), int_min=input.int_min(), int_max=input.int_max(), noise_threshold=input.noise_threshold(), wf_mz=input.wf_mz(), wf_intensity=input.wf_int(), LET_threshold=input.LET_threshold(), entropy_dimension=input.entropy_dimension(), y_axis_transformation=input.y_axis_transformation(), return_plot=True)
             plt.show()
         with io.BytesIO() as buf:
             fig.savefig(buf, format="png", dpi=150, bbox_inches="tight")
@@ -1115,3 +1128,26 @@ def server(input, output, session):
 app = App(app_ui, server)
+'''
+from starlette.middleware.base import BaseHTTPMiddleware
+from starlette.requests import Request
+class _InjectBaseURLMiddleware(BaseHTTPMiddleware):
+    async def dispatch(self, request: Request, call_next):
+        hdrs = dict(request.scope.get("headers", []))
+        if b"rstudio-connect-app-base-url" not in hdrs:
+            host = request.headers.get("x-forwarded-host") or request.headers.get("host") or ""
+            proto = request.headers.get("x-forwarded-proto") or "https"
+            root_path = (request.scope.get("root_path") or "").rstrip("/")
+            base = f"{proto}://{host}{root_path}"
+            new_headers = list(request.scope.get("headers", [])) + [
+                (b"rstudio-connect-app-base-url", base.encode("utf-8"))
+            ]
+            request.scope["headers"] = new_headers
+        return await call_next(request)
+app.starlette_app.add_middleware(_InjectBaseURLMiddleware)
+'''

{pycompound-0.1.2 → pycompound-0.1.4}/src/pycompound/plot_spectra.py RENAMED Viewed

@@ -92,8 +92,8 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
         print(f'Error: spectrum_preprocessing_order must contain only \'C\', \'F\', \'M\', \'N\', \'L\', \'W\'.')
         sys.exit()
-    if similarity_measure not in ['cosine','shannon','renyi','tsallis','mixture','jaccard','dice','3w_jaccard','sokal_sneath','binary_cosine','mountford','mcconnaughey','driver_kroeber','simpson','braun_banquet','fager_mcgowan','kulczynski','interection','hamming','hellinger']:
-        print('\nError: similarity_measure must be either cosine, shannon, renyi, tsallis, mixture, jaccard, dice, 3w_jaccard, sokal_sneath, binary_cosine, mountford, mcconnaughey, driver_kroeber, simpson, braun_banquet, fager_mcgowan, kulczynski, interection, hamming, or hellinger.')
+    if similarity_measure not in ['cosine','shannon','renyi','tsallis','mixture','jaccard','dice','3w_jaccard','sokal_sneath','binary_cosine','mountford','mcconnaughey','driver_kroeber','simpson','braun_banquet','fager_mcgowan','kulczynski','intersection','hamming','hellinger']:
+        print('\nError: similarity_measure must be either cosine, shannon, renyi, tsallis, mixture, jaccard, dice, 3w_jaccard, sokal_sneath, binary_cosine, mountford, mcconnaughey, driver_kroeber, simpson, braun_banquet, fager_mcgowan, kulczynski, intersection, hamming, or hellinger.')
         sys.exit()
     if isinstance(int_min,int) is True:
@@ -243,10 +243,12 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
             r_spec[:,1] = LE_transform(r_spec[:,1], LET_threshold, normalization_method=normalization_method)
         if transformation == 'N' and q_spec.shape[0] > 1 and r_spec.shape[1] > 1:
             q_spec = remove_noise(q_spec, nr = noise_threshold)
-            r_spec = remove_noise(r_spec, nr = noise_threshold)
+            if high_quality_reference_library == False or high_quality_reference_library == 'False':
+                r_spec = remove_noise(r_spec, nr = noise_threshold)
         if transformation == 'F' and q_spec.shape[0] > 1 and r_spec.shape[1] > 1:
             q_spec = filter_spec_lcms(q_spec, mz_min = mz_min, mz_max = mz_max, int_min = int_min, int_max = int_max, is_matched = is_matched)
-            r_spec = filter_spec_lcms(r_spec, mz_min = mz_min, mz_max = mz_max, int_min = int_min, int_max = int_max, is_matched = is_matched)
+            if high_quality_reference_library == False or high_quality_reference_library == 'False':
+                r_spec = filter_spec_lcms(r_spec, mz_min = mz_min, mz_max = mz_max, int_min = int_min, int_max = int_max, is_matched = is_matched)
     q_ints = q_spec[:,1]
     r_ints = r_spec[:,1]
@@ -291,12 +293,15 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
         plt.yticks([])
+    print('\n\n\n')
+    print(high_quality_reference_library)
+    print('\n\n\n')
     plt.subplots_adjust(top=0.8, hspace=0.92, bottom=0.3)
     plt.figlegend(loc = 'upper center')
     fig.text(0.05, 0.18, f'Similarity Measure: {similarity_measure.capitalize()}', fontsize=7)
     fig.text(0.05, 0.15, f'Similarity Score: {round(similarity_score,4)}', fontsize=7)
     fig.text(0.05, 0.12, f"Spectrum Preprocessing Order: {''.join(spectrum_preprocessing_order)}", fontsize=7)
-    fig.text(0.05, 0.09, f'High Quality Reference Library: {high_quality_reference_library}', fontsize=7)
+    fig.text(0.05, 0.09, f'High Quality Reference Library: {str(high_quality_reference_library)}', fontsize=7)
     fig.text(0.05, 0.06, f'Window Size (Centroiding): {window_size_centroiding}', fontsize=7)
     fig.text(0.05, 0.03, f'Window Size (Matching): {window_size_matching}', fontsize=7)
     fig.text(0.45, 0.18, f'Raw-Scale M/Z Range: [{mz_min_tmp},{mz_max_tmp}]', fontsize=7)
@@ -387,8 +392,8 @@ def generate_plots_on_NRMS_data(query_data=None, reference_data=None, spectrum_I
         print(f'Error: spectrum_preprocessing_order must contain only \'F\', \'N\', \'W\', \'L\'.')
         sys.exit()
-    if similarity_measure not in ['cosine','shannon','renyi','tsallis','mixture','jaccard','dice','3w_jaccard','sokal_sneath','binary_cosine','mountford','mcconnaughey','driver_kroeber','simpson','braun_banquet','fager_mcgowan','kulczynski','interection','hamming','hellinger']:
-        print('\nError: similarity_measure must be either cosine, shannon, renyi, tsallis, mixture, jaccard, dice, 3w_jaccard, sokal_sneath, binary_cosine, mountford, mcconnaughey, driver_kroeber, simpson, braun_banquet, fager_mcgowan, kulczynski, interection, hamming, or hellinger.')
+    if similarity_measure not in ['cosine','shannon','renyi','tsallis','mixture','jaccard','dice','3w_jaccard','sokal_sneath','binary_cosine','mountford','mcconnaughey','driver_kroeber','simpson','braun_banquet','fager_mcgowan','kulczynski','intersection','hamming','hellinger']:
+        print('\nError: similarity_measure must be either cosine, shannon, renyi, tsallis, mixture, jaccard, dice, 3w_jaccard, sokal_sneath, binary_cosine, mountford, mcconnaughey, driver_kroeber, simpson, braun_banquet, fager_mcgowan, kulczynski, intersection, hamming, or hellinger.')
         sys.exit()
     if isinstance(int_min,int) is True:
@@ -528,11 +533,11 @@ def generate_plots_on_NRMS_data(query_data=None, reference_data=None, spectrum_I
             r_spec[:,1] = LE_transform(r_spec[:,1], LET_threshold, normalization_method)
         if transformation == 'N':
             q_spec = remove_noise(q_spec, nr = noise_threshold)
-            if high_quality_reference_library == False:
+            if high_quality_reference_library == False or high_quality_reference_library == 'False':
                 r_spec = remove_noise(r_spec, nr = noise_threshold)
         if transformation == 'F':
             q_spec = filter_spec_gcms(q_spec, mz_min = mz_min, mz_max = mz_max, int_min = int_min, int_max = int_max)
-            if high_quality_reference_library == False:
+            if high_quality_reference_library == False or high_quality_reference_library == 'False':
                 r_spec = filter_spec_gcms(r_spec, mz_min = mz_min, mz_max = mz_max, int_min = int_min, int_max = int_max)
     if q_spec.shape[0] > 1:
@@ -580,7 +585,7 @@ def generate_plots_on_NRMS_data(query_data=None, reference_data=None, spectrum_I
     fig.text(0.05, 0.15, f'Similarity Measure: {similarity_measure.capitalize()}', fontsize=7)
     fig.text(0.05, 0.12, f'Similarity Score: {round(similarity_score,4)}', fontsize=7)
     fig.text(0.05, 0.09, f"Spectrum Preprocessing Order: {''.join(spectrum_preprocessing_order)}", fontsize=7)
-    fig.text(0.05, 0.06, f'High Quality Reference Library: {high_quality_reference_library}', fontsize=7)
+    fig.text(0.05, 0.06, f'High Quality Reference Library: {str(high_quality_reference_library)}', fontsize=7)
     fig.text(0.05, 0.03, f'Raw-Scale M/Z Range: [{min_mz},{max_mz}]', fontsize=7)
     fig.text(0.45, 0.15, f'Raw-Scale Intensity Range: [{int_min_tmp},{int_max_tmp}]', fontsize=7)
     fig.text(0.45, 0.12, f'Noise Threshold: {noise_threshold}', fontsize=7)

{pycompound-0.1.2 → pycompound-0.1.4}/src/pycompound/spec_lib_matching.py RENAMED Viewed

@@ -37,6 +37,7 @@ def _eval_one_HRMS(df_query, df_reference, unique_query_ids, unique_reference_id
         LET_threshold=LET_threshold_tmp,
         entropy_dimension=entropy_dimension_tmp,
         high_quality_reference_library=high_quality_reference_library_tmp,
+        verbose=True
     )
     return (
@@ -441,21 +442,25 @@ def tune_params_on_NRMS_data_shiny(query_data=None, reference_data=None, grid=No
-def get_acc_HRMS(df_query, df_reference, unique_query_ids, unique_reference_ids, similarity_measure, weights, spectrum_preprocessing_order, mz_min, mz_max, int_min, int_max, window_size_centroiding, window_size_matching, noise_threshold, wf_mz, wf_int, LET_threshold, entropy_dimension, high_quality_reference_library):
+def get_acc_HRMS(df_query, df_reference, unique_query_ids, unique_reference_ids, similarity_measure, weights, spectrum_preprocessing_order, mz_min, mz_max, int_min, int_max, window_size_centroiding, window_size_matching, noise_threshold, wf_mz, wf_int, LET_threshold, entropy_dimension, high_quality_reference_library, verbose=True):
     n_top_matches_to_save = 1
     all_similarity_scores =  []
     for query_idx in range(0,len(unique_query_ids)):
-        print(f'query spectrum #{query_idx} is being identified')
+        if verbose is True:
+            print(f'query spectrum #{query_idx} is being identified')
         q_idxs_tmp = np.where(df_query.iloc[:,0] == unique_query_ids[query_idx])[0]
         q_spec_tmp = np.asarray(pd.concat([df_query.iloc[q_idxs_tmp,1], df_query.iloc[q_idxs_tmp,2]], axis=1).reset_index(drop=True))
+        #q_spec_tmp = q_spec_tmp.astype(float)
         similarity_scores = []
         for ref_idx in range(0,len(unique_reference_ids)):
             q_spec = q_spec_tmp
             r_idxs_tmp = np.where(df_reference.iloc[:,0] == unique_reference_ids[ref_idx])[0]
             r_spec = np.asarray(pd.concat([df_reference.iloc[r_idxs_tmp,1], df_reference.iloc[r_idxs_tmp,2]], axis=1).reset_index(drop=True))
+            #print(r_spec)
+            #r_spec = r_spec.astype(float)
             is_matched = False
             for transformation in spectrum_preprocessing_order:
@@ -529,7 +534,7 @@ def get_acc_HRMS(df_query, df_reference, unique_query_ids, unique_reference_ids,
-def get_acc_NRMS(df_query, df_reference, unique_query_ids, unique_reference_ids, similarity_measure, weights, spectrum_preprocessing_order, mz_min, mz_max, int_min, int_max, noise_threshold, wf_mz, wf_int, LET_threshold, entropy_dimension, high_quality_reference_library):
+def get_acc_NRMS(df_query, df_reference, unique_query_ids, unique_reference_ids, similarity_measure, weights, spectrum_preprocessing_order, mz_min, mz_max, int_min, int_max, noise_threshold, wf_mz, wf_int, LET_threshold, entropy_dimension, high_quality_reference_library, verbose=True):
     n_top_matches_to_save = 1
@@ -546,7 +551,7 @@ def get_acc_NRMS(df_query, df_reference, unique_query_ids, unique_reference_ids,
         similarity_scores = []
         for ref_idx in range(0,len(unique_reference_ids)):
             q_spec = q_spec_tmp
-            if ref_idx % 1000 == 0:
+            if verbose is True and ref_idx % 1000 == 0:
                 print(f'Query spectrum #{query_idx} has had its similarity with {ref_idx} reference library spectra computed')
             r_idxs_tmp = np.where(df_reference.iloc[:,0] == unique_reference_ids[ref_idx])[0]
             r_spec_tmp = np.asarray(pd.concat([df_reference.iloc[r_idxs_tmp,1], df_reference.iloc[r_idxs_tmp,2]], axis=1).reset_index(drop=True))
@@ -615,7 +620,7 @@ def get_acc_NRMS(df_query, df_reference, unique_query_ids, unique_reference_ids,
-def run_spec_lib_matching_on_HRMS_data(query_data=None, reference_data=None, likely_reference_ids=None, similarity_measure='cosine', weights={'Cosine':0.25,'Shannon':0.25,'Renyi':0.25,'Tsallis':0.25}, spectrum_preprocessing_order='FCNMWL', high_quality_reference_library=False, mz_min=0, mz_max=9999999, int_min=0, int_max=9999999, window_size_centroiding=0.5, window_size_matching=0.5, noise_threshold=0.0, wf_mz=0.0, wf_intensity=1.0, LET_threshold=0.0, entropy_dimension=1.1, n_top_matches_to_save=1, print_id_results=False, output_identification=None, output_similarity_scores=None, return_ID_output=False):
+def run_spec_lib_matching_on_HRMS_data(query_data=None, reference_data=None, likely_reference_ids=None, similarity_measure='cosine', weights={'Cosine':0.25,'Shannon':0.25,'Renyi':0.25,'Tsallis':0.25}, spectrum_preprocessing_order='FCNMWL', high_quality_reference_library=False, mz_min=0, mz_max=9999999, int_min=0, int_max=9999999, window_size_centroiding=0.5, window_size_matching=0.5, noise_threshold=0.0, wf_mz=0.0, wf_intensity=1.0, LET_threshold=0.0, entropy_dimension=1.1, n_top_matches_to_save=1, print_id_results=False, output_identification=None, output_similarity_scores=None, return_ID_output=False, verbose=True):
     '''
     runs spectral library matching on high-resolution mass spectrometry (HRMS) data
@@ -762,14 +767,13 @@ def run_spec_lib_matching_on_HRMS_data(query_data=None, reference_data=None, lik
     all_similarity_scores =  []
     for query_idx in range(0,len(unique_query_ids)):
-        print(f'query spectrum #{query_idx} is being identified')
+        if verbose is True:
+            print(f'query spectrum #{query_idx} is being identified')
         q_idxs_tmp = np.where(df_query.iloc[:,0] == unique_query_ids[query_idx])[0]
         q_spec_tmp = np.asarray(pd.concat([df_query.iloc[q_idxs_tmp,1], df_query.iloc[q_idxs_tmp,2]], axis=1).reset_index(drop=True))
         similarity_scores = []
         for ref_idx in range(0,len(unique_reference_ids)):
-            #if ref_idx % 100 == 0:
-            #    print(f'Query spectrum #{query_idx} has had its similarity with {ref_idx} reference library spectra computed')
             q_spec = q_spec_tmp
             r_idxs_tmp = np.where(df_reference.iloc[:,0] == unique_reference_ids[ref_idx])[0]
             r_spec = np.asarray(pd.concat([df_reference.iloc[r_idxs_tmp,1], df_reference.iloc[r_idxs_tmp,2]], axis=1).reset_index(drop=True))
@@ -1008,9 +1012,9 @@ def run_spec_lib_matching_on_NRMS_data(query_data=None, reference_data=None, lik
         similarity_scores = []
         for ref_idx in range(0,len(unique_reference_ids)):
-            q_spec = q_spec_tmp
-            if ref_idx % 1000 == 0:
+            if verbose is True and ref_idx % 1000 == 0:
                 print(f'Query spectrum #{query_idx} has had its similarity with {ref_idx} reference library spectra computed')
+            q_spec = q_spec_tmp
             r_idxs_tmp = np.where(df_reference.iloc[:,0] == unique_reference_ids[ref_idx])[0]
             r_spec_tmp = np.asarray(pd.concat([df_reference.iloc[r_idxs_tmp,1], df_reference.iloc[r_idxs_tmp,2]], axis=1).reset_index(drop=True))
             r_spec = convert_spec(r_spec_tmp,mzs)

pycompound-0.1.4/src/pycompound/tuning_CLI_DE.py ADDED Viewed

@@ -0,0 +1,233 @@
+#!/usr/bin/env python3
+import argparse
+import sys
+import json
+from pathlib import Path
+from typing import Dict, List, Tuple
+import numpy as np
+import pandas as pd
+from scipy.optimize import differential_evolution
+from pycompound.spec_lib_matching import get_acc_HRMS, get_acc_NRMS
+ALL_PARAMS = [
+    "window_size_centroiding",
+    "window_size_matching",
+    "noise_threshold",
+    "wf_mz",
+    "wf_int",
+    "LET_threshold",
+    "entropy_dimension"
+]
+SUGGESTED_BOUNDS = {
+    "window_size_centroiding": (0.0, 0.5),
+    "window_size_matching":    (0.0, 0.5),
+    "noise_threshold":         (0.0, 0.25),
+    "wf_mz":                   (0.0, 5.0),
+    "wf_int":                  (0.0, 5.0),
+    "LET_threshold":           (0.0, 5.0),
+    "entropy_dimension":       (1.0, 3.0)
+}
+DEFAULT_PARAMS = {
+    "window_size_centroiding": 0.5,
+    "window_size_matching":    0.5,
+    "noise_threshold":         0.10,
+    "wf_mz":                   0.0,
+    "wf_int":                  1.0,
+    "LET_threshold":           0.0,
+    "entropy_dimension":       1.1
+}
+# ---------- Utilities ----------
+def parse_bound(s: str) -> Tuple[str, Tuple[float, float]]:
+    # "name=min:max" → (name, (min, max))
+    if "=" not in s or ":" not in s:
+        raise argparse.ArgumentTypeError(f"Bad --bound format '{s}'. Use name=min:max")
+    name, rng = s.split("=", 1)
+    lo, hi = rng.split(":", 1)
+    try:
+        lo_f, hi_f = float(lo), float(hi)
+    except ValueError as e:
+        raise argparse.ArgumentTypeError(f"Non-numeric bound in '{s}': {e}")
+    if lo_f > hi_f:
+        raise argparse.ArgumentTypeError(f"Lower bound > upper bound in '{s}'")
+    return name.strip(), (lo_f, hi_f)
+def parse_default(s: str) -> Tuple[str, float]:
+    # "name=value" → (name, value)
+    if "=" not in s:
+        raise argparse.ArgumentTypeError(f"Bad --default format '{s}'. Use name=value")
+    name, val = s.split("=", 1)
+    try:
+        v = float(val)
+    except ValueError as e:
+        raise argparse.ArgumentTypeError(f"Non-numeric default in '{s}': {e}")
+    return name.strip(), v
+def _vector_to_full_params(X: np.ndarray, default_params: Dict[str, float], optimize_params: List[str]) -> Dict[str, float]:
+    params = dict(default_params)
+    for name, val in zip(optimize_params, X):
+        params[name] = float(val)
+    return params
+# ---------- Objective wrappers (top-level, pickle-friendly) ----------
+def objective_HRMS(X: np.ndarray, ctx: dict) -> float:
+    p = _vector_to_full_params(X, ctx["default_params"], ctx["optimize_params"])
+    acc = get_acc_HRMS(
+        ctx["df_query"], ctx["df_reference"],
+        ctx["uq"], ctx["ur"],
+        ctx["similarity_measure"], ctx["weights"], ctx["spectrum_preprocessing_order"],
+        ctx["mz_min"], ctx["mz_max"], ctx["int_min"], ctx["int_max"],
+        p["window_size_centroiding"], p["window_size_matching"], p["noise_threshold"],
+        p["wf_mz"], p["wf_int"], p["LET_threshold"],
+        p["entropy_dimension"],
+        ctx["high_quality_reference_library"],
+        verbose=False
+    )
+    print(f"\n{ctx['optimize_params']} = {np.array(X)}\naccuracy: {acc*100}%")
+    return 1.0 - acc
+def objective_NRMS(X: np.ndarray, ctx: dict) -> float:
+    p = _vector_to_full_params(X, ctx["default_params"], ctx["optimize_params"])
+    acc = get_acc_NRMS(
+        ctx["df_query"], ctx["df_reference"],
+        ctx["uq"], ctx["ur"],
+        ctx["similarity_measure"], ctx["weights"], ctx["spectrum_preprocessing_order"],
+        ctx["mz_min"], ctx["mz_max"], ctx["int_min"], ctx["int_max"],
+        p["noise_threshold"], p["wf_mz"], p["wf_int"], p["LET_threshold"], p["entropy_dimension"],
+        ctx["high_quality_reference_library"],
+        verbose=False
+    )
+    print(f"\n{ctx['optimize_params']} = {np.array(X)}\naccuracy: {acc*100}%")
+    return 1.0 - acc
+# ---------- Main CLI ----------
+def main():
+    p = argparse.ArgumentParser(
+        description="Parameter tuning via Differential Evolution for HRMS/NRMS using pycompound."
+    )
+    p.add_argument("--chromatography_platform", choices=["HRMS", "NRMS"], default="HRMS", help="Chromatography Platform.")
+    p.add_argument("--query_data", required=True, help="Path to query CSV (must contain 'id' column).")
+    p.add_argument("--reference_data", required=True, nargs="+", help="Path(s) to reference CSV(s) (must contain 'id').")
+    p.add_argument("--similarity_measure", default="cosine", choices=["cosine", "renyi", "tsallis"], help="Similarity measure.")
+    p.add_argument("--weights", default="", help="Weights spec; empty means None.")
+    p.add_argument("--spectrum-order", default="CNMWL", help="Spectrum preprocessing order string.")
+    p.add_argument("--mz-min", type=float, default=0.0)
+    p.add_argument("--mz-max", type=float, default=999_999_999.0)
+    p.add_argument("--int-min", type=float, default=0.0)
+    p.add_argument("--int-max", type=float, default=999_999_999.0)
+    p.add_argument("--hq-ref-lib", action="store_true", help="Use high-quality reference library flag.")
+    p.add_argument("--opt", nargs="+", default=["window_size_centroiding", "noise_threshold", "wf_mz", "wf_int"],
+                   help=f"Parameters to optimize (subset of {ALL_PARAMS}).")
+    p.add_argument("--bound", action="append", default=[], type=parse_bound,
+                   help="Bound spec 'name=min:max'. Repeatable.")
+    p.add_argument("--default", dest="defaults", action="append", default=[], type=parse_default,
+                   help="Override a default 'name=value' for non-optimized params or initial values.")
+    p.add_argument("--maxiter", type=int, default=15)
+    p.add_argument("--seed", type=int, default=1)
+    p.add_argument("--workers", type=int, default=-1, help="Use -1 for all cores; 1 to disable parallelism.")
+    args = p.parse_args()
+    unknown = [x for x in args.opt if x not in ALL_PARAMS]
+    if unknown:
+        sys.exit(f"Error: unknown --opt params: {unknown}")
+    qpath = Path(args.query_data)
+    if not qpath.exists():
+        sys.exit(f"Query CSV not found: {qpath}")
+    df_query = pd.read_csv(qpath)
+    if "id" not in df_query.columns:
+        sys.exit("Query CSV must contain an 'id' column.")
+    ref_paths = [Path(pth) for pth in args.reference_data]
+    for r in ref_paths:
+        if not r.exists():
+            sys.exit(f"Reference CSV not found: {r}")
+    df_reference = pd.concat([pd.read_csv(r) for r in ref_paths], axis=0, ignore_index=True)
+    if "id" not in df_reference.columns:
+        sys.exit("Reference CSV must contain an 'id' column.")
+    uq = df_query["id"].unique().tolist()
+    ur = df_reference["id"].unique().tolist()
+    default_params = dict(DEFAULT_PARAMS)
+    for name, val in args.defaults:
+        if name not in DEFAULT_PARAMS:
+            sys.exit(f"--default refers to unknown parameter '{name}'. Allowed: {list(DEFAULT_PARAMS)}")
+        default_params[name] = val
+    param_bounds: Dict[str, Tuple[float, float]] = dict(SUGGESTED_BOUNDS)
+    for name, (lo, hi) in args.bound:
+        if name not in SUGGESTED_BOUNDS:
+            sys.exit(f"--bound refers to unknown parameter '{name}'. Allowed: {list(SUGGESTED_BOUNDS)}")
+        param_bounds[name] = (lo, hi)
+    bounds = [param_bounds[p] for p in args.opt]
+    ctx = dict(
+        df_query=df_query,
+        df_reference=df_reference,
+        uq=uq,
+        ur=ur,
+        similarity_measure=args.similarity_measure,
+        weights=(None if args.weights.strip() == "" else args.weights),
+        spectrum_preprocessing_order=args.spectrum_order,
+        mz_min=float(args.mz_min),
+        mz_max=float(args.mz_max),
+        int_min=float(args.int_min),
+        int_max=float(args.int_max),
+        high_quality_reference_library=bool(args.hq_ref_lib),
+        default_params=default_params,
+        optimize_params=args.opt,
+    )
+    history_acc: List[float] = []
+    def _cb(xk, convergence):
+        if args.chromatography_platform == "HRMS":
+            acc_pct = (1.0 - objective_HRMS(xk, ctx)) * 100.0
+        else:
+            acc_pct = (1.0 - objective_NRMS(xk, ctx)) * 100.0
+        history_acc.append(acc_pct)
+    objective = objective_HRMS if args.chromatography_platform == "HRMS" else objective_NRMS
+    result = differential_evolution(
+        objective,
+        bounds=bounds,
+        args=(ctx,),
+        maxiter=int(args.maxiter),
+        tol=0.0,
+        seed=int(args.seed),
+        workers=int(args.workers),
+        callback=_cb,
+    )
+    best_params = _vector_to_full_params(result.x, default_params, args.opt)
+    best_acc_pct = (1.0 - result.fun) * 100.0
+    print("\n=== Differential Evolution Result ===")
+    print(f"Mode: {args.chromatography_platform}")
+    print(f"Optimized over: {args.opt}")
+    print("Best values (selected params):")
+    for name in args.opt:
+        print(f"  {name}: {best_params[name]}")
+    print("\nFull parameter set used in final evaluation:")
+    for k in ALL_PARAMS:
+        print(f"  {k}: {best_params[k]}")
+    print(f"\nBest accuracy: {best_acc_pct:.3f}%")
+    print(f"DE raw: success={result.success}, nfev={result.nfev}, nit={result.nit}, message='{result.message}'")
+if __name__ == "__main__":
+    main()

pycompound-0.1.2/src/pycompound/tuning_CLI.py → pycompound-0.1.4/src/pycompound/tuning_CLI_grid.py RENAMED Viewed

@@ -1,6 +1,6 @@
-from pycompound.spec_lib_matching import tune_params_on_HRMS_data
-from pycompound.spec_lib_matching import tune_params_on_NRMS_data
+from pycompound.spec_lib_matching import tune_params_on_HRMS_data_grid
+from pycompound.spec_lib_matching import tune_params_on_NRMS_data_grid
 import argparse
 import json
 from pathlib import Path
@@ -61,9 +61,9 @@ grid['entropy_dimension'] = [float(x) for x in grid['entropy_dimension']]
 if args.chromatography_platform == 'HRMS':
-    tune_params_on_HRMS_data(query_data=args.query_data, reference_data=args.reference_data, grid=grid, output_path=args.output_path)
+    tune_params_on_HRMS_data_grid(query_data=args.query_data, reference_data=args.reference_data, grid=grid, output_path=args.output_path)
 if args.chromatography_platform == 'NRMS':
-    tune_params_on_NRMS_data(query_data=args.query_data, reference_data=args.reference_data, grid=grid, output_path=args.output_path)
+    tune_params_on_NRMS_data_grid(query_data=args.query_data, reference_data=args.reference_data, grid=grid, output_path=args.output_path)

{pycompound-0.1.2 → pycompound-0.1.4/src/pycompound.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pycompound
-Version: 0.1.2
+Version: 0.1.4
 Summary: Python package to perform compound identification in mass spectrometry via spectral library matching.
 Author-email: Hunter Dlugas <fy7392@wayne.edu>
 License-Expression: MIT

{pycompound-0.1.2 → pycompound-0.1.4}/src/pycompound.egg-info/SOURCES.txt RENAMED Viewed

@@ -9,7 +9,8 @@ src/pycompound/processing.py
 src/pycompound/similarity_measures.py
 src/pycompound/spec_lib_matching.py
 src/pycompound/spec_lib_matching_CLI.py
-src/pycompound/tuning_CLI.py
+src/pycompound/tuning_CLI_DE.py
+src/pycompound/tuning_CLI_grid.py
 src/pycompound.egg-info/PKG-INFO
 src/pycompound.egg-info/SOURCES.txt
 src/pycompound.egg-info/dependency_links.txt

{pycompound-0.1.2 → pycompound-0.1.4}/tests/test_spec_lib_matching.py RENAMED Viewed

@@ -5,6 +5,7 @@ from pathlib import Path
 import os
+'''
 print('\n\ntest #1:')
 run_spec_lib_matching_on_HRMS_data(query_data=f'{Path.cwd()}/data/lcms_query_library.csv', reference_data=f'{Path.cwd()}/data/lcms_reference_library.csv', similarity_measure='hello')
@@ -34,6 +35,7 @@ run_spec_lib_matching_on_HRMS_data(query_data=f'{Path.cwd()}/data/lcms_query_lib
 print('\n\ntest #10:')
 run_spec_lib_matching_on_HRMS_data(query_data=f'{Path.cwd()}/data/lcms_query_library.csv', reference_data=f'{Path.cwd()}/data/lcms_reference_library.csv', normalization_method='tanh')
+'''
 print('\n\ntest #11:')
 run_spec_lib_matching_on_HRMS_data(query_data=f'{Path.cwd()}/data/lcms_query_library.csv', reference_data=f'{Path.cwd()}/data/lcms_reference_library.csv', similarity_measure='tsallis', wf_mz=2, wf_intensity=0.5, entropy_dimension=2, n_top_matches_to_save=3, print_id_results=True)

pycompound-0.1.4/tests/test_tuning.py ADDED Viewed

@@ -0,0 +1,52 @@
+from pycompound.spec_lib_matching import tune_params_on_HRMS_data_grid
+from pycompound.spec_lib_matching import tune_params_on_NRMS_data_grid
+from pycompound.spec_lib_matching import tune_params_DE
+from pathlib import Path
+import os
+print('\n\ntest #1:')
+tune_params_on_HRMS_data_grid(query_data=f'{Path.cwd()}/data/tuning/lcms_query_library.csv',
+                              reference_data=f'{Path.cwd()}/data/lcms_reference_library.csv',
+                              output_path=f'{Path.cwd()}/tuning_param_output_test1.txt')
+print('\n\ntest #2:')
+tune_params_on_HRMS_data_grid(query_data=f'{Path.cwd()}/data/tuning/lcms_query_library.csv',
+                              reference_data=f'{Path.cwd()}/data/lcms_reference_library.csv',
+                              grid={'similarity_measure':['cosine'], 'spectrum_preprocessing_order':['FCNMWL'], 'mz_min':[0], 'mz_max':[9999999], 'int_min':[0], 'int_max':[99999999], 'window_size_centroiding':[0.5], 'window_size_matching':[0.1,0.5], 'noise_threshold':[0.0], 'wf_mz':[0.0], 'wf_int':[1.0], 'LET_threshold':[0.0], 'entropy_dimension':[1.1], 'high_quality_reference_library':[False]},
+                              output_path=f'{Path.cwd()}/tuning_param_output_test2.txt')
+print('\n\ntest #3:')
+tune_params_on_NRMS_data_grid(query_data=f'{Path.cwd()}/data/tuning/gcms_query_library.csv',
+                              reference_data=f'{Path.cwd()}/data/gcms_reference_library.csv',
+                              output_path=f'{Path.cwd()}/tuning_param_output_test3.txt')
+print('\n\ntest #4:')
+tune_params_on_NRMS_data_grid(query_data=f'{Path.cwd()}/data/tuning/gcms_query_library.csv',
+                              reference_data=f'{Path.cwd()}/data/gcms_reference_library.csv',
+                              grid={'similarity_measure':['cosine','shannon'], 'spectrum_preprocessing_order':['FNLW'], 'mz_min':[0], 'mz_max':[9999999], 'int_min':[0], 'int_max':[99999999], 'noise_threshold':[0.0,0.1], 'wf_mz':[0.0], 'wf_int':[1.0], 'LET_threshold':[0.0,3.0], 'entropy_dimension':[1.1], 'high_quality_reference_library':[False]},
+                              output_path=f'{Path.cwd()}/tuning_param_output_test4.txt')
+print('\n\ntest #5:')
+tune_params_on_HRMS_data_grid(query_data=f'{Path.cwd()}/data/tuning/lcms_query_library.csv',
+                              reference_data=f'{Path.cwd()}/data/lcms_reference_library.csv',
+                              grid={'similarity_measure':['cosine'], 'weight':[{'Cosine':0.2, 'Shannon':0.2, 'Renyi':0.3, 'Tsallis':0.3},{'Cosine':0.25, 'Shannon':0.25, 'Renyi':0.25, 'Tsallis':0.25}], 'spectrum_preprocessing_order':['FCNMWL'], 'mz_min':[0], 'mz_max':[9999999], 'int_min':[0], 'int_max':[99999999], 'window_size_centroiding':[0.5], 'window_size_matching':[0.5], 'noise_threshold':[0.0], 'wf_mz':[0.0], 'wf_int':[1.0], 'LET_threshold':[0.0,3], 'entropy_dimension':[1.1], 'high_quality_reference_library':[False,True]},
+                              output_path=f'{Path.cwd()}/tuning_param_output_test5.txt')
+print('\n\ntest #6:')
+tune_params_DE(query_data=f'{Path.cwd()}/data/tuning/tuning_data/filtered/lcms_query_data.csv',
+               reference_data=f'{Path.cwd()}/data/tuning/tuning_data/filtered/lcms_reference_data.csv',
+               similarity_measure='shannon',
+               optimize_params=["window_size_matching","noise_threshold","wf_mz","wf_int"],
+               param_bounds={"window_size_matching":(0.0,0.5),"noise_threshold":(0.0,0.25),"wf_mz":(0.0,5.0),"wf_int":(0.0,5.0)},
+               default_params={"window_size_centroiding": 0.5, "window_size_matching":0.5, "noise_threshold":0.10, "wf_mz":0.0, "wf_int":1.0, "LET_threshold":0.0, "entropy_dimension":1.1})
+print('\n\ntest #7:')
+tune_params_DE(query_data=f'{Path.cwd()}/data/tuning/tuning_data/filtered/gcms_query_data.csv',
+               reference_data=f'{Path.cwd()}/data/tuning/tuning_data/filtered/gcms_reference_data.csv',
+               similarity_measure='renyi',
+               optimize_params=["wf_mz","wf_int","LET_threshold","entropy_dimension"],
+               param_bounds={"wf_mz":(0.0,5.0),"wf_int":(0.0,5.0),"LET_threshold":(0,5),"entropy_dimension":(1.01,3)},
+               default_params={"noise_threshold":0.10, "wf_mz":0.0, "wf_int":1.0, "LET_threshold":0.0, "entropy_dimension":1.1})

pycompound-0.1.2/tests/test_tuning.py DELETED Viewed

@@ -1,21 +0,0 @@
-from pycompound.spec_lib_matching import tune_params_on_HRMS_data
-from pycompound.spec_lib_matching import tune_params_on_NRMS_data
-from pathlib import Path
-import os
-print('\n\ntest #1:')
-tune_params_on_HRMS_data(query_data=f'{Path.cwd()}/data/tuning/lcms_query_library.csv', reference_data=f'{Path.cwd()}/data/lcms_reference_library.csv', output_path=f'{Path.cwd()}/tuning_param_output_test1.txt')
-print('\n\ntest #2:')
-tune_params_on_HRMS_data(query_data=f'{Path.cwd()}/data/tuning/lcms_query_library.csv', reference_data=f'{Path.cwd()}/data/lcms_reference_library.csv', grid={'similarity_measure':['cosine'], 'spectrum_preprocessing_order':['FCNMWL'], 'mz_min':[0], 'mz_max':[9999999], 'int_min':[0], 'int_max':[99999999], 'window_size_centroiding':[0.5], 'window_size_matching':[0.1,0.5], 'noise_threshold':[0.0], 'wf_mz':[0.0], 'wf_int':[1.0], 'LET_threshold':[0.0], 'entropy_dimension':[1.1], 'high_quality_reference_library':[False]}, output_path=f'{Path.cwd()}/tuning_param_output_test2.txt')
-print('\n\ntest #3:')
-tune_params_on_NRMS_data(query_data=f'{Path.cwd()}/data/tuning/gcms_query_library.csv', reference_data=f'{Path.cwd()}/data/gcms_reference_library.csv', output_path=f'{Path.cwd()}/tuning_param_output_test3.txt')
-print('\n\ntest #4:')
-tune_params_on_NRMS_data(query_data=f'{Path.cwd()}/data/tuning/gcms_query_library.csv', reference_data=f'{Path.cwd()}/data/gcms_reference_library.csv', grid={'similarity_measure':['cosine','shannon'], 'spectrum_preprocessing_order':['FNLW'], 'mz_min':[0], 'mz_max':[9999999], 'int_min':[0], 'int_max':[99999999], 'noise_threshold':[0.0,0.1], 'wf_mz':[0.0], 'wf_int':[1.0], 'LET_threshold':[0.0,3.0], 'entropy_dimension':[1.1], 'high_quality_reference_library':[False]}, output_path=f'{Path.cwd()}/tuning_param_output_test4.txt')
-print('\n\ntest #5:')
-tune_params_on_HRMS_data(query_data=f'{Path.cwd()}/data/tuning/lcms_query_library.csv', reference_data=f'{Path.cwd()}/data/lcms_reference_library.csv', grid={'similarity_measure':['cosine'], 'weight':[{'Cosine':0.2, 'Shannon':0.2, 'Renyi':0.3, 'Tsallis':0.3},{'Cosine':0.25, 'Shannon':0.25, 'Renyi':0.25, 'Tsallis':0.25}], 'spectrum_preprocessing_order':['FCNMWL'], 'mz_min':[0], 'mz_max':[9999999], 'int_min':[0], 'int_max':[99999999], 'window_size_centroiding':[0.5], 'window_size_matching':[0.5], 'noise_threshold':[0.0], 'wf_mz':[0.0], 'wf_int':[1.0], 'LET_threshold':[0.0,3], 'entropy_dimension':[1.1], 'high_quality_reference_library':[False,True]}, output_path=f'{Path.cwd()}/tuning_param_output_test5.txt')