PyPI - pycompound - Versions diffs - 0.0.55__py3-none-any.whl → 0.1.1__py3-none-any.whl - Mend

pycompound 0.0.55py3-none-any.whl → 0.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

{pycompound_fy7392 → pycompound}/plot_spectra.py RENAMED Viewed

@@ -45,7 +45,7 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
         extension = extension[(len(extension)-1)]
         if extension == 'mgf' or extension == 'MGF' or extension == 'mzML' or extension == 'mzml' or extension == 'MZML' or extension == 'cdf' or extension == 'CDF':
             output_path_tmp = query_data[:-3] + 'csv'
-            build_library_from_raw_data(input_path=query_data, output_path=output_path_tmp, is_reference=False)
+            build_library_from_raw_data(input_path=query_data, output_path=output_path_tmp, is_reference=True)
             df_query = pd.read_csv(output_path_tmp)
         if extension == 'csv' or extension == 'CSV':
             df_query = pd.read_csv(query_data)
@@ -309,7 +309,7 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
     plt.figlegend(loc = 'upper center')
     fig.text(0.05, 0.18, f'Similarity Measure: {similarity_measure.capitalize()}', fontsize=7)
     fig.text(0.05, 0.15, f'Similarity Score: {round(similarity_score,4)}', fontsize=7)
-    fig.text(0.05, 0.12, f'Spectrum Preprocessing Order: {''.join(spectrum_preprocessing_order)}', fontsize=7)
+    fig.text(0.05, 0.12, f"Spectrum Preprocessing Order: {''.join(spectrum_preprocessing_order)}", fontsize=7)
     fig.text(0.05, 0.09, f'High Quality Reference Library: {high_quality_reference_library}', fontsize=7)
     fig.text(0.05, 0.06, f'Window Size (Centroiding): {window_size_centroiding}', fontsize=7)
     fig.text(0.05, 0.03, f'Window Size (Matching): {window_size_matching}', fontsize=7)
@@ -318,6 +318,9 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
     fig.text(0.45, 0.12, f'Noise Threshold: {noise_threshold}', fontsize=7)
     fig.text(0.45, 0.09, f'Weight Factors (m/z,intensity): ({wf_mz},{wf_intensity})', fontsize=7)
     fig.text(0.45, 0.06, f'Low-Entropy Threshold: {LET_threshold}', fontsize=7)
+    if similarity_measure == 'mixture':
+        fig.text(0.45, 0.03, f'Weights for mixture similarity: {weights}', fontsize=7)
     plt.savefig(output_path, format='pdf')
     if return_plot == True:
@@ -604,13 +607,15 @@ def generate_plots_on_NRMS_data(query_data=None, reference_data=None, spectrum_I
     plt.figlegend(loc = 'upper center')
     fig.text(0.05, 0.15, f'Similarity Measure: {similarity_measure.capitalize()}', fontsize=7)
     fig.text(0.05, 0.12, f'Similarity Score: {round(similarity_score,4)}', fontsize=7)
-    fig.text(0.05, 0.09, f'Spectrum Preprocessing Order: {''.join(spectrum_preprocessing_order)}', fontsize=7)
+    fig.text(0.05, 0.09, f"Spectrum Preprocessing Order: {''.join(spectrum_preprocessing_order)}", fontsize=7)
     fig.text(0.05, 0.06, f'High Quality Reference Library: {high_quality_reference_library}', fontsize=7)
     fig.text(0.05, 0.03, f'Raw-Scale M/Z Range: [{min_mz},{max_mz}]', fontsize=7)
     fig.text(0.45, 0.15, f'Raw-Scale Intensity Range: [{int_min_tmp},{int_max_tmp}]', fontsize=7)
     fig.text(0.45, 0.12, f'Noise Threshold: {noise_threshold}', fontsize=7)
     fig.text(0.45, 0.09, f'Weight Factors (m/z,intensity): ({wf_mz},{wf_intensity})', fontsize=7)
     fig.text(0.45, 0.06, f'Low-Entropy Threshold: {LET_threshold}', fontsize=7)
+    if similarity_measure=='mixture':
+        fig.text(0.45, 0.03, f'Weights for mixture similarity: {weights}', fontsize=7)
     plt.savefig(output_path, format='pdf')
     if return_plot == True:

{pycompound_fy7392 → pycompound}/plot_spectra_CLI.py RENAMED Viewed

@@ -1,6 +1,6 @@
-from pycompound_fy7392.plot_spectra import generate_plots_on_HRMS_data
-from pycompound_fy7392.plot_spectra import generate_plots_on_NRMS_data
+from pycompound.plot_spectra import generate_plots_on_HRMS_data
+from pycompound.plot_spectra import generate_plots_on_NRMS_data
 import pandas as pd
 import argparse
 import json

{pycompound_fy7392 → pycompound}/processing.py RENAMED Viewed

@@ -1,7 +1,7 @@
 # This script contains the functions used to transform spectra prior to computing similarity scores
-from pycompound_fy7392.build_library import build_library_from_raw_data
+from pycompound.build_library import build_library_from_raw_data
 import scipy.stats
 import numpy as np
 import pandas as pd

{pycompound_fy7392 → pycompound}/spec_lib_matching.py RENAMED Viewed

@@ -1,7 +1,7 @@
 # this script's function runs spectral library matching to identify unknown query compound(s)
-from pycompound_fy7392.build_library import build_library_from_raw_data
+from pycompound.build_library import build_library_from_raw_data
 from .processing import *
 from .similarity_measures import *
 import pandas as pd
@@ -9,6 +9,12 @@ from pathlib import Path
 import json
 from itertools import product
 from joblib import Parallel, delayed
+import csv
+default_HRMS_grid = {'similarity_measure':['cosine'], 'weight':[{'Cosine':0.25,'Shannon':0.25,'Renyi':0.25,'Tsallis':0.25}], 'spectrum_preprocessing_order':['FCNMWL'], 'mz_min':[0], 'mz_max':[9999999], 'int_min':[0], 'int_max':[99999999], 'window_size_centroiding':[0.5], 'window_size_matching':[0.5], 'noise_threshold':[0.0], 'wf_mz':[0.0], 'wf_int':[1.0], 'LET_threshold':[0.0], 'entropy_dimension':[1.1], 'high_quality_reference_library':[False]}
+default_NRMS_grid = {'similarity_measure':['cosine'], 'weight':[{'Cosine':0.25,'Shannon':0.25,'Renyi':0.25,'Tsallis':0.25}], 'spectrum_preprocessing_order':['FCNMWL'], 'mz_min':[0], 'mz_max':[9999999], 'int_min':[0], 'int_max':[99999999], 'noise_threshold':[0.0], 'wf_mz':[0.0], 'wf_int':[1.0], 'LET_threshold':[0.0], 'entropy_dimension':[1.1], 'high_quality_reference_library':[False]}
 def _eval_one_HRMS(df_query, df_reference, unique_query_ids, unique_reference_ids,
               similarity_measure_tmp, weight,
@@ -71,7 +77,8 @@ def _eval_one_NRMS(df_query, df_reference, unique_query_ids, unique_reference_id
     )
-def tune_params_on_HRMS_data(query_data=None, reference_data=None, grid={'similarity_measure':['cosine'], 'weight':[{'Cosine':0.25,'Shannon':0.25,'Renyi':0.25,'Tsallis':0.25}], 'spectrum_preprocessing_order':['FCNMWL'], 'mz_min':[0], 'mz_max':[9999999], 'int_min':[0], 'int_max':[99999999], 'window_size_centroiding':[0.5], 'window_size_matching':[0.5], 'noise_threshold':[0.0], 'wf_mz':[0.0], 'wf_int':[1.0], 'LET_threshold':[0.0], 'entropy_dimension':[1.1], 'high_quality_reference_library':[False]}, output_path=None):
+def tune_params_on_HRMS_data(query_data=None, reference_data=None, grid=None, output_path=None, return_output=False):
     """
     runs spectral library matching on high-resolution mass spectrometry (HRMS) data with all possible combinations of parameters in the grid dict, saves results from each choice of parameters to a CSV file, and prints top-performing parameters
@@ -81,6 +88,7 @@ def tune_params_on_HRMS_data(query_data=None, reference_data=None, grid={'simila
     --output_path: accuracy from each choice of parameter set is saved to a CSV file here.
     """
+    grid = {**default_HRMS_grid, **(grid or {})}
     for key, value in grid.items():
         globals()[key] = value
@@ -118,24 +126,35 @@ def tune_params_on_HRMS_data(query_data=None, reference_data=None, grid={'simila
     print(f'\nNote that there are {len(unique_query_ids)} unique query spectra, {len(unique_reference_ids)} unique reference spectra, and {len(set(unique_query_ids) & set(unique_reference_ids))} of the query and reference spectra IDs are in common.\n')
     if output_path is None:
-        output_path = f'{Path.cwd()}/tuning_param_output.csv'
+        output_path = f'{Path.cwd()}/tuning_param_output.txt'
         print(f'Warning: since output_path=None, the output will be written to the current working directory: {output_path}')
-    # build parameter grid out of the lists you already set
     param_grid = product(similarity_measure, weight, spectrum_preprocessing_order, mz_min, mz_max, int_min, int_max, noise_threshold,
                          window_size_centroiding, window_size_matching, wf_mz, wf_int, LET_threshold, entropy_dimension, high_quality_reference_library)
-    # run in parallel on all CPUs
     results = Parallel(n_jobs=-1, verbose=10)(delayed(_eval_one_HRMS)(df_query, df_reference, unique_query_ids, unique_reference_ids, *params) for params in param_grid)
     df_out = pd.DataFrame(results, columns=[
         'ACC','SIMILARITY.MEASURE','WEIGHT','SPECTRUM.PROCESSING.ORDER', 'MZ.MIN','MZ.MAX','INT.MIN','INT.MAX','NOISE.THRESHOLD',
         'WINDOW.SIZE.CENTROIDING','WINDOW.SIZE.MATCHING', 'WF.MZ','WF.INT','LET.THRESHOLD','ENTROPY.DIMENSION', 'HIGH.QUALITY.REFERENCE.LIBRARY'
     ])
-    df_out = df_out.drop(columns=['WEIGHT'])
-    df_out.to_csv(output_path, index=False)
+    df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("\"","",regex=False)
+    df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("{","",regex=False)
+    df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("}","",regex=False)
+    df_out['WEIGHT'] = df_out['WEIGHT'].str.replace(":","",regex=False)
+    df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("Cosine","",regex=False)
+    df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("Shannon","",regex=False)
+    df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("Renyi","",regex=False)
+    df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("Tsallis","",regex=False)
+    df_out['WEIGHT'] = df_out['WEIGHT'].str.replace(" ","",regex=False)
+    df_out.to_csv(output_path, index=False, sep='\t', quoting=csv.QUOTE_NONE)
+    if return_output is False:
+        df_out.to_csv(output_path, index=False, sep='\t', quoting=csv.QUOTE_NONE)
+    else:
+        return df_out
-def tune_params_on_NRMS_data(query_data=None, reference_data=None, grid={'similarity_measure':['cosine'], 'weight':[{'Cosine':0.25,'Shannon':0.25,'Renyi':0.25,'Tsallis':0.25}], 'spectrum_preprocessing_order':['FNLW'], 'mz_min':[0], 'mz_max':[9999999], 'int_min':[0], 'int_max':[99999999], 'noise_threshold':[0.0], 'wf_mz':[0.0], 'wf_int':[1.0], 'LET_threshold':[0.0], 'entropy_dimension':[1.1], 'high_quality_reference_library':[False]}, output_path=None):
+def tune_params_on_NRMS_data(query_data=None, reference_data=None, grid=None, output_path=None, return_output=False):
     """
     runs spectral library matching on nominal-resolution mass spectrometry (NRMS) data with all possible combinations of parameters in the grid dict, saves results from each choice of parameters to a CSV file, and prints top-performing parameters
@@ -145,10 +164,10 @@ def tune_params_on_NRMS_data(query_data=None, reference_data=None, grid={'simila
     --output_path: accuracy from each choice of parameter set is saved to a CSV file here
     """
+    grid = {**default_NRMS_grid, **(grid or {})}
     for key, value in grid.items():
         globals()[key] = value
-    # load query and reference libraries
     if query_data is None:
         print('\nError: No argument passed to the mandatory query_data. Please pass the path to the CSV file of the query data.')
         sys.exit()
@@ -182,21 +201,30 @@ def tune_params_on_NRMS_data(query_data=None, reference_data=None, grid={'simila
     print(f'\nNote that there are {len(unique_query_ids)} unique query spectra, {len(unique_reference_ids)} unique reference spectra, and {len(set(unique_query_ids) & set(unique_reference_ids))} of the query and reference spectra IDs are in common.\n')
     if output_path is None:
-        output_path = f'{Path.cwd()}/tuning_param_output.csv'
+        output_path = f'{Path.cwd()}/tuning_param_output.txt'
         print(f'Warning: since output_path=None, the output will be written to the current working directory: {output_path}')
-    # build parameter grid out of the lists you already set
     param_grid = product(similarity_measure, weight, spectrum_preprocessing_order, mz_min, mz_max, int_min, int_max,
                          noise_threshold, wf_mz, wf_int, LET_threshold, entropy_dimension, high_quality_reference_library)
-    # run in parallel on all CPUs
     results = Parallel(n_jobs=-1, verbose=10)(delayed(_eval_one_NRMS)(df_query, df_reference, unique_query_ids, unique_reference_ids, *params) for params in param_grid)
     df_out = pd.DataFrame(results, columns=[
         'ACC','SIMILARITY.MEASURE','WEIGHT','SPECTRUM.PROCESSING.ORDER', 'MZ.MIN','MZ.MAX','INT.MIN','INT.MAX',
         'NOISE.THRESHOLD','WF.MZ','WF.INT','LET.THRESHOLD','ENTROPY.DIMENSION', 'HIGH.QUALITY.REFERENCE.LIBRARY'
     ])
-    df_out = df_out.drop(columns=['WEIGHT'])
-    df_out.to_csv(output_path, index=False)
+    df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("\"","",regex=False)
+    df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("{","",regex=False)
+    df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("}","",regex=False)
+    df_out['WEIGHT'] = df_out['WEIGHT'].str.replace(":","",regex=False)
+    df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("Cosine","",regex=False)
+    df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("Shannon","",regex=False)
+    df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("Renyi","",regex=False)
+    df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("Tsallis","",regex=False)
+    df_out['WEIGHT'] = df_out['WEIGHT'].str.replace(" ","",regex=False)
+    if return_output is False:
+        df_out.to_csv(output_path, index=False, sep='\t', quoting=csv.QUOTE_NONE)
+    else:
+        return df_out
@@ -389,7 +417,7 @@ def get_acc_NRMS(df_query, df_reference, unique_query_ids, unique_reference_ids,
-def run_spec_lib_matching_on_HRMS_data(query_data=None, reference_data=None, likely_reference_ids=None, similarity_measure='cosine', weights={'Cosine':0.25,'Shannon':0.25,'Renyi':0.25,'Tsallis':0.25}, spectrum_preprocessing_order='FCNMWL', high_quality_reference_library=False, mz_min=0, mz_max=9999999, int_min=0, int_max=9999999, window_size_centroiding=0.5, window_size_matching=0.5, noise_threshold=0.0, wf_mz=0.0, wf_intensity=1.0, LET_threshold=0.0, entropy_dimension=1.1, n_top_matches_to_save=1, print_id_results=False, output_identification=None, output_similarity_scores=None):
+def run_spec_lib_matching_on_HRMS_data(query_data=None, reference_data=None, likely_reference_ids=None, similarity_measure='cosine', weights={'Cosine':0.25,'Shannon':0.25,'Renyi':0.25,'Tsallis':0.25}, spectrum_preprocessing_order='FCNMWL', high_quality_reference_library=False, mz_min=0, mz_max=9999999, int_min=0, int_max=9999999, window_size_centroiding=0.5, window_size_matching=0.5, noise_threshold=0.0, wf_mz=0.0, wf_intensity=1.0, LET_threshold=0.0, entropy_dimension=1.1, n_top_matches_to_save=1, print_id_results=False, output_identification=None, output_similarity_scores=None, return_ID_output=False):
     '''
     runs spectral library matching on high-resolution mass spectrometry (HRMS) data
@@ -413,8 +441,8 @@ def run_spec_lib_matching_on_HRMS_data(query_data=None, reference_data=None, lik
     --entropy_dimension: Entropy dimension parameter. Must have positive value other than 1. When the entropy dimension is 1, then Renyi and Tsallis entropy are equivalent to Shannon entropy. Therefore, this parameter only applies to the renyi and tsallis similarity measures. This parameter will be ignored if similarity measure cosine or shannon is chosen. Default: 1.1
     --n_top_matches_to_save: The number of top matches to report. For example, if n_top_matches_to_save=5, then for each query spectrum, the five reference spectra with the largest similarity with the given query spectrum will be reported. Default: 1
     --print_id_results: Flag that prints identification results if True. Default: False
-    --output_identification: Output CSV file containing the most-similar reference spectra for each query spectrum along with the corresponding similarity scores. Default is to save identification output in current working directory with filename \'output_identification.csv\'.
-    --output_similarity_scores: Output CSV file containing similarity scores between all query spectrum/spectra and all reference spectra. Each row corresponds to a query spectrum, the left-most column contains the query spectrum/spectra identifier, and the remaining column contain the similarity scores with respect to all reference library spectra. If no argument passed, then this CSV file is written to the current working directory with filename \'output_all_similarity_scores\'.csv.')
+    --output_identification: Output CSV file containing the most-similar reference spectra for each query spectrum along with the corresponding similarity scores. Default is to save identification output in current working directory with filename \'output_identification.txt\'.
+    --output_similarity_scores: Output CSV file containing similarity scores between all query spectrum/spectra and all reference spectra. Each row corresponds to a query spectrum, the left-most column contains the query spectrum/spectra identifier, and the remaining column contain the similarity scores with respect to all reference library spectra. If no argument passed, then this CSV file is written to the current working directory with filename \'output_all_similarity_scores\'.txt.')
     '''
     # load query and reference libraries
@@ -528,11 +556,11 @@ def run_spec_lib_matching_on_HRMS_data(query_data=None, reference_data=None, lik
         sys.exit()
     if output_identification is None:
-        output_identification = f'{Path.cwd()}/output_identification.csv'
+        output_identification = f'{Path.cwd()}/output_identification.txt'
         print(f'Warning: writing identification output to {output_identification}')
     if output_similarity_scores is None:
-        output_similarity_scores = f'{Path.cwd()}/output_all_similarity_scores.csv'
+        output_similarity_scores = f'{Path.cwd()}/output_all_similarity_scores.txt'
         print(f'Warning: writing similarity scores to {output_similarity_scores}')
@@ -636,22 +664,26 @@ def run_spec_lib_matching_on_HRMS_data(query_data=None, reference_data=None, lik
     df_top_ref_specs.index = unique_query_ids
     df_top_ref_specs.index.names = ['Query Spectrum ID']
+    df_scores.columns = ['Reference Spectrum ID: ' + col for col in  list(map(str,df_scores.columns.tolist()))]
     # print the identification results if the user desires
     if print_id_results == True:
         print(df_top_ref_specs.to_string())
-    # write spectral library matching results to disk
-    df_top_ref_specs.to_csv(output_identification)
+    if return_ID_output is False:
+        # write spectral library matching results to disk
+        df_top_ref_specs.to_csv(output_identification, sep='\t')
-    # write all similarity scores to disk
-    df_scores.columns = ['Reference Spectrum ID: ' + col for col in  list(map(str,df_scores.columns.tolist()))]
-    df_scores.to_csv(output_similarity_scores)
+        # write all similarity scores to disk
+        df_scores.to_csv(output_similarity_scores, sep='\t')
+    else:
+        return df_top_ref_specs
-def run_spec_lib_matching_on_NRMS_data(query_data=None, reference_data=None, likely_reference_ids=None, spectrum_preprocessing_order='FNLW', similarity_measure='cosine', weights={'Cosine':0.25,'Shannon':0.25,'Renyi':0.25,'Tsallis':0.25}, high_quality_reference_library=False, mz_min=0, mz_max=9999999, int_min=0, int_max=9999999, noise_threshold=0.0, wf_mz=0.0, wf_intensity=1.0, LET_threshold=0.0, entropy_dimension=1.1, n_top_matches_to_save=1, print_id_results=False, output_identification=None, output_similarity_scores=None):
+def run_spec_lib_matching_on_NRMS_data(query_data=None, reference_data=None, likely_reference_ids=None, spectrum_preprocessing_order='FNLW', similarity_measure='cosine', weights={'Cosine':0.25,'Shannon':0.25,'Renyi':0.25,'Tsallis':0.25}, high_quality_reference_library=False, mz_min=0, mz_max=9999999, int_min=0, int_max=9999999, noise_threshold=0.0, wf_mz=0.0, wf_intensity=1.0, LET_threshold=0.0, entropy_dimension=1.1, n_top_matches_to_save=1, print_id_results=False, output_identification=None, output_similarity_scores=None, return_ID_output=False):
     '''
     runs spectral library matching on nominal-resolution mass spectrometry (NRMS) data
@@ -674,8 +706,8 @@ def run_spec_lib_matching_on_NRMS_data(query_data=None, reference_data=None, lik
     --normalization_method: Method used to normalize the intensities of each spectrum so that the intensities sum to 1. Since the objects entropy quantifies the uncertainy of must be probability distributions, the intensities of a given spectrum must sum to 1 prior to computing the entropy of the given spectrum intensities. Options: \'standard\' and \'softmax\'. Default: standard.
     --n_top_matches_to_save: The number of top matches to report. For example, if n_top_matches_to_save=5, then for each query spectrum, the five reference spectra with the largest similarity with the given query spectrum will be reported. Default: 1
     --print_id_results: Flag that prints identification results if True. Default: False
-    --output_identification: Output CSV file containing the most-similar reference spectra for each query spectrum along with the corresponding similarity scores. Default is to save identification output in current working directory with filename \'output_identification.csv\'.
-    --output_similarity_scores: Output CSV file containing similarity scores between all query spectrum/spectra and all reference spectra. Each row corresponds to a query spectrum, the left-most column contains the query spectrum/spectra identifier, and the remaining column contain the similarity scores with respect to all reference library spectra. If no argument passed, then this CSV file is written to the current working directory with filename \'output_all_similarity_scores\'.csv.')
+    --output_identification: Output CSV file containing the most-similar reference spectra for each query spectrum along with the corresponding similarity scores. Default is to save identification output in current working directory with filename \'output_identification.txt\'.
+    --output_similarity_scores: Output CSV file containing similarity scores between all query spectrum/spectra and all reference spectra. Each row corresponds to a query spectrum, the left-most column contains the query spectrum/spectra identifier, and the remaining column contain the similarity scores with respect to all reference library spectra. If no argument passed, then this CSV file is written to the current working directory with filename \'output_all_similarity_scores\'.txt.')
     '''
     # load query and reference libraries
@@ -774,11 +806,11 @@ def run_spec_lib_matching_on_NRMS_data(query_data=None, reference_data=None, lik
         sys.exit()
     if output_identification is None:
-        output_identification = f'{Path.cwd()}/output_identification.csv'
+        output_identification = f'{Path.cwd()}/output_identification.txt'
         print(f'Warning: writing identification output to {output_identification}')
     if output_similarity_scores is None:
-        output_similarity_scores = f'{Path.cwd()}/output_all_similarity_scores.csv'
+        output_similarity_scores = f'{Path.cwd()}/output_all_similarity_scores.txt'
         print(f'Warning: writing similarity scores to {output_similarity_scores}')
@@ -886,11 +918,15 @@ def run_spec_lib_matching_on_NRMS_data(query_data=None, reference_data=None, lik
     if print_id_results == True:
         print(df_top_ref_specs.to_string())
-    # write spectral library matching results to disk
-    df_top_ref_specs.to_csv(output_identification)
-    # write all similarity scores to disk
     df_scores.columns = ['Reference Spectrum ID: ' + col for col in  list(map(str,df_scores.columns.tolist()))]
-    df_scores.to_csv(output_similarity_scores)
+    if return_ID_output is False:
+        # write spectral library matching results to disk
+        df_top_ref_specs.to_csv(output_identification, sep='\t')
+        # write all similarity scores to disk
+        df_scores.columns = ['Reference Spectrum ID: ' + col for col in  list(map(str,df_scores.columns.tolist()))]
+        df_scores.to_csv(output_similarity_scores, sep='\t')
+    else:
+        return df_top_ref_specs

{pycompound_fy7392 → pycompound}/spec_lib_matching_CLI.py RENAMED Viewed

@@ -2,8 +2,8 @@
 # this script performs spectral library matching to identify unknown query compound(s) from GC-MS data
 # load libraries
-from pycompound_fy7392.spec_lib_matching import run_spec_lib_matching_on_HRMS_data
-from pycompound_fy7392.spec_lib_matching import run_spec_lib_matching_on_NRMS_data
+from pycompound.spec_lib_matching import run_spec_lib_matching_on_HRMS_data
+from pycompound.spec_lib_matching import run_spec_lib_matching_on_NRMS_data
 from pathlib import Path
 import pandas as pd
 import argparse

{pycompound_fy7392 → pycompound}/tuning_CLI.py RENAMED Viewed

@@ -1,6 +1,6 @@
-from pycompound_fy7392.spec_lib_matching import tune_params_on_HRMS_data
-from pycompound_fy7392.spec_lib_matching import tune_params_on_NRMS_data
+from pycompound.spec_lib_matching import tune_params_on_HRMS_data
+from pycompound.spec_lib_matching import tune_params_on_NRMS_data
 import argparse
 import json
 from pathlib import Path
@@ -40,8 +40,7 @@ else:
     sys.exit()
-grid = {'similarity_measure':args.similarity_measure.split(','), 'weight':args.weights, 'spectrum_preprocessing_order':spectrum_preprocessing_order.split(','), 'mz_min':args.mz_min.split(','), 'mz_max':args.mz_max.split(','), 'int_min':args.int_min.split(','), 'int_max':args.int_max.split(','), 'window_size_centroiding':args.window_size_centroiding.split(','), 'window_size_matching':args.window_size_matching.split(','), 'noise_threshold':args.noise_threshold.split(','), 'wf_mz':args.wf_mz.split(','), 'wf_int':args.wf_intensity.split(','), 'LET_threshold':args.LET_threshold.split(','), 'entropy_dimension':args.entropy_dimension.split(','), 'high_quality_reference_library':args.high_quality_reference_library.split(',')}
+grid = {'similarity_measure':args.similarity_measure.split(','), 'weight':[args.weights], 'spectrum_preprocessing_order':spectrum_preprocessing_order.split(','), 'mz_min':args.mz_min.split(','), 'mz_max':args.mz_max.split(','), 'int_min':args.int_min.split(','), 'int_max':args.int_max.split(','), 'window_size_centroiding':args.window_size_centroiding.split(','), 'window_size_matching':args.window_size_matching.split(','), 'noise_threshold':args.noise_threshold.split(','), 'wf_mz':args.wf_mz.split(','), 'wf_int':args.wf_intensity.split(','), 'LET_threshold':args.LET_threshold.split(','), 'entropy_dimension':args.entropy_dimension.split(','), 'high_quality_reference_library':args.high_quality_reference_library.split(',')}
 if args.chromatography_platform == 'HRMS':
     grid['mz_min'] = [float(x) for x in grid['mz_min']]

{pycompound-0.0.55.dist-info → pycompound-0.1.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pycompound
-Version: 0.0.55
+Version: 0.1.1
 Summary: Python package to perform compound identification in mass spectrometry via spectral library matching.
 Author-email: Hunter Dlugas <fy7392@wayne.edu>
 License-Expression: MIT
@@ -19,6 +19,7 @@ Requires-Dist: pyteomics==4.7.2
 Requires-Dist: netCDF4==1.6.5
 Requires-Dist: lxml>=5.1.0
 Requires-Dist: orjson==3.11.0
+Requires-Dist: shiny==1.4.0
 Requires-Dist: joblib==1.5.2
 Dynamic: license-file

pycompound-0.1.1.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,14 @@
+app.py,sha256=k5mPyctA1eWkGjtnKrJb7STuweh_aH4HmPUH07jO92Y,53841
+pycompound/build_library.py,sha256=8ghpX8wfj6u-3V5X2IdJ-e8G_FRSla1lO0pzLj7hOtI,5373
+pycompound/plot_spectra.py,sha256=Q7nDSW3Y5pR_Ql4JeEmyd6KRRyzvxk9j0yaUR0hfjJc,42275
+pycompound/plot_spectra_CLI.py,sha256=ObaLad5Z5DmfQB-j0HSCg1mLORbYj2BM3hb5Yd0ZdDI,8395
+pycompound/processing.py,sha256=vqtKaZ6vot6wlnKNTYUQFX7ccPpnCAl0L6bN289vZoM,11068
+pycompound/similarity_measures.py,sha256=TuvtEXWwyxE6dfpmuAqRC6gOHvHg3Jf21099pVaNBAs,10702
+pycompound/spec_lib_matching.py,sha256=AAMxWqi6LXWo-tJ-uqJ4QxfHSg8bX3G_DJVt2bLLMcM,61860
+pycompound/spec_lib_matching_CLI.py,sha256=EdXM0dRQfwGQAK4OKxhcVytuUnX9pRyJROwC6rloZ9s,9915
+pycompound/tuning_CLI.py,sha256=dSFLwMiI0_6G4YDZR5ubqn9-75ixOvDPZMOoGS-_B6w,8540
+pycompound-0.1.1.dist-info/licenses/LICENSE,sha256=fPFFlkSGg60VQWyWqTSv8yoJnpLzppzdihVWY5NKom8,1064
+pycompound-0.1.1.dist-info/METADATA,sha256=XZtkvSau_Z723iCgy_LTR1CkYryDxXBdIFtb_D_E9u0,1732
+pycompound-0.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+pycompound-0.1.1.dist-info/top_level.txt,sha256=wFBLVrqpC07HghIU8tsEdgdvgkdOE3GN_1Gfjk-uEUc,15
+pycompound-0.1.1.dist-info/RECORD,,

pycompound-0.1.1.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ app
2	+ pycompound

pycompound-0.0.55.dist-info/RECORD DELETED Viewed

@@ -1,15 +0,0 @@
-app.py,sha256=PKiCJe_18EJIHvs0R7pl_Yf-XakZn5J0AAfI-AnGsX0,21535
-pycompound-0.0.55.dist-info/licenses/LICENSE,sha256=fPFFlkSGg60VQWyWqTSv8yoJnpLzppzdihVWY5NKom8,1064
-pycompound_fy7392/build_library.py,sha256=8ghpX8wfj6u-3V5X2IdJ-e8G_FRSla1lO0pzLj7hOtI,5373
-pycompound_fy7392/plot_spectra.py,sha256=wOnf2oOAfifj7FYkTZAcIeD7dHW1aRHzmsspPpySDcY,42023
-pycompound_fy7392/plot_spectra_CLI.py,sha256=fo0nUmbuy2qE6d9HgVdASn2CNUG8seg2mUCPrUU-rao,8409
-pycompound_fy7392/processing.py,sha256=7cKMX7PQ4Q-I4c8lRo5qXbOVGr8CeRdgNPURJx8DBV0,11075
-pycompound_fy7392/pycompound_shiny.py,sha256=uYfeIuR5j1UK_KE8RbDPaQxqMIU1qykVJ2L-zgaSkY0,30154
-pycompound_fy7392/similarity_measures.py,sha256=TuvtEXWwyxE6dfpmuAqRC6gOHvHg3Jf21099pVaNBAs,10702
-pycompound_fy7392/spec_lib_matching.py,sha256=jtUpG5OBDtIaHIpCNc62a3y-wQ_SmIgXZ9Q_p8xKZu4,59969
-pycompound_fy7392/spec_lib_matching_CLI.py,sha256=TAafJ3DGPorBTDzmXLQaaSH3giKn6q3GrRJPWh03yyo,9929
-pycompound_fy7392/tuning_CLI.py,sha256=qLglxqq-y6EXCDk0P3CkWn6cTFCmWDeKz0-SZBXcwCA,8553
-pycompound-0.0.55.dist-info/METADATA,sha256=3i67ba8TVHHSK-toc2-OI9XJYdQRkrCKGXOrqHyV5e4,1705
-pycompound-0.0.55.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-pycompound-0.0.55.dist-info/top_level.txt,sha256=h_c9lBkHcABTURy4sDAmgRzZdFHYWX9MDdsaiftT-Yw,22
-pycompound-0.0.55.dist-info/RECORD,,

pycompound-0.0.55.dist-info/top_level.txt DELETED Viewed

	@@ -1,2 +0,0 @@
1	- app
2	- pycompound_fy7392

pycompound 0.0.55__py3-none-any.whl → 0.1.1__py3-none-any.whl

pycompound 0.0.55py3-none-any.whl → 0.1.1py3-none-any.whl