pycompound 0.0.55__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -45,7 +45,7 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
45
45
  extension = extension[(len(extension)-1)]
46
46
  if extension == 'mgf' or extension == 'MGF' or extension == 'mzML' or extension == 'mzml' or extension == 'MZML' or extension == 'cdf' or extension == 'CDF':
47
47
  output_path_tmp = query_data[:-3] + 'csv'
48
- build_library_from_raw_data(input_path=query_data, output_path=output_path_tmp, is_reference=False)
48
+ build_library_from_raw_data(input_path=query_data, output_path=output_path_tmp, is_reference=True)
49
49
  df_query = pd.read_csv(output_path_tmp)
50
50
  if extension == 'csv' or extension == 'CSV':
51
51
  df_query = pd.read_csv(query_data)
@@ -309,7 +309,7 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
309
309
  plt.figlegend(loc = 'upper center')
310
310
  fig.text(0.05, 0.18, f'Similarity Measure: {similarity_measure.capitalize()}', fontsize=7)
311
311
  fig.text(0.05, 0.15, f'Similarity Score: {round(similarity_score,4)}', fontsize=7)
312
- fig.text(0.05, 0.12, f'Spectrum Preprocessing Order: {''.join(spectrum_preprocessing_order)}', fontsize=7)
312
+ fig.text(0.05, 0.12, f"Spectrum Preprocessing Order: {''.join(spectrum_preprocessing_order)}", fontsize=7)
313
313
  fig.text(0.05, 0.09, f'High Quality Reference Library: {high_quality_reference_library}', fontsize=7)
314
314
  fig.text(0.05, 0.06, f'Window Size (Centroiding): {window_size_centroiding}', fontsize=7)
315
315
  fig.text(0.05, 0.03, f'Window Size (Matching): {window_size_matching}', fontsize=7)
@@ -318,6 +318,9 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
318
318
  fig.text(0.45, 0.12, f'Noise Threshold: {noise_threshold}', fontsize=7)
319
319
  fig.text(0.45, 0.09, f'Weight Factors (m/z,intensity): ({wf_mz},{wf_intensity})', fontsize=7)
320
320
  fig.text(0.45, 0.06, f'Low-Entropy Threshold: {LET_threshold}', fontsize=7)
321
+ if similarity_measure == 'mixture':
322
+ fig.text(0.45, 0.03, f'Weights for mixture similarity: {weights}', fontsize=7)
323
+
321
324
  plt.savefig(output_path, format='pdf')
322
325
 
323
326
  if return_plot == True:
@@ -604,13 +607,15 @@ def generate_plots_on_NRMS_data(query_data=None, reference_data=None, spectrum_I
604
607
  plt.figlegend(loc = 'upper center')
605
608
  fig.text(0.05, 0.15, f'Similarity Measure: {similarity_measure.capitalize()}', fontsize=7)
606
609
  fig.text(0.05, 0.12, f'Similarity Score: {round(similarity_score,4)}', fontsize=7)
607
- fig.text(0.05, 0.09, f'Spectrum Preprocessing Order: {''.join(spectrum_preprocessing_order)}', fontsize=7)
610
+ fig.text(0.05, 0.09, f"Spectrum Preprocessing Order: {''.join(spectrum_preprocessing_order)}", fontsize=7)
608
611
  fig.text(0.05, 0.06, f'High Quality Reference Library: {high_quality_reference_library}', fontsize=7)
609
612
  fig.text(0.05, 0.03, f'Raw-Scale M/Z Range: [{min_mz},{max_mz}]', fontsize=7)
610
613
  fig.text(0.45, 0.15, f'Raw-Scale Intensity Range: [{int_min_tmp},{int_max_tmp}]', fontsize=7)
611
614
  fig.text(0.45, 0.12, f'Noise Threshold: {noise_threshold}', fontsize=7)
612
615
  fig.text(0.45, 0.09, f'Weight Factors (m/z,intensity): ({wf_mz},{wf_intensity})', fontsize=7)
613
616
  fig.text(0.45, 0.06, f'Low-Entropy Threshold: {LET_threshold}', fontsize=7)
617
+ if similarity_measure=='mixture':
618
+ fig.text(0.45, 0.03, f'Weights for mixture similarity: {weights}', fontsize=7)
614
619
  plt.savefig(output_path, format='pdf')
615
620
 
616
621
  if return_plot == True:
@@ -1,6 +1,6 @@
1
1
 
2
- from pycompound_fy7392.plot_spectra import generate_plots_on_HRMS_data
3
- from pycompound_fy7392.plot_spectra import generate_plots_on_NRMS_data
2
+ from pycompound.plot_spectra import generate_plots_on_HRMS_data
3
+ from pycompound.plot_spectra import generate_plots_on_NRMS_data
4
4
  import pandas as pd
5
5
  import argparse
6
6
  import json
@@ -1,7 +1,7 @@
1
1
 
2
2
  # This script contains the functions used to transform spectra prior to computing similarity scores
3
3
 
4
- from pycompound_fy7392.build_library import build_library_from_raw_data
4
+ from pycompound.build_library import build_library_from_raw_data
5
5
  import scipy.stats
6
6
  import numpy as np
7
7
  import pandas as pd
@@ -1,7 +1,7 @@
1
1
 
2
2
  # this script's function runs spectral library matching to identify unknown query compound(s)
3
3
 
4
- from pycompound_fy7392.build_library import build_library_from_raw_data
4
+ from pycompound.build_library import build_library_from_raw_data
5
5
  from .processing import *
6
6
  from .similarity_measures import *
7
7
  import pandas as pd
@@ -9,6 +9,12 @@ from pathlib import Path
9
9
  import json
10
10
  from itertools import product
11
11
  from joblib import Parallel, delayed
12
+ import csv
13
+
14
+
15
+ default_HRMS_grid = {'similarity_measure':['cosine'], 'weight':[{'Cosine':0.25,'Shannon':0.25,'Renyi':0.25,'Tsallis':0.25}], 'spectrum_preprocessing_order':['FCNMWL'], 'mz_min':[0], 'mz_max':[9999999], 'int_min':[0], 'int_max':[99999999], 'window_size_centroiding':[0.5], 'window_size_matching':[0.5], 'noise_threshold':[0.0], 'wf_mz':[0.0], 'wf_int':[1.0], 'LET_threshold':[0.0], 'entropy_dimension':[1.1], 'high_quality_reference_library':[False]}
16
+ default_NRMS_grid = {'similarity_measure':['cosine'], 'weight':[{'Cosine':0.25,'Shannon':0.25,'Renyi':0.25,'Tsallis':0.25}], 'spectrum_preprocessing_order':['FCNMWL'], 'mz_min':[0], 'mz_max':[9999999], 'int_min':[0], 'int_max':[99999999], 'noise_threshold':[0.0], 'wf_mz':[0.0], 'wf_int':[1.0], 'LET_threshold':[0.0], 'entropy_dimension':[1.1], 'high_quality_reference_library':[False]}
17
+
12
18
 
13
19
  def _eval_one_HRMS(df_query, df_reference, unique_query_ids, unique_reference_ids,
14
20
  similarity_measure_tmp, weight,
@@ -71,7 +77,8 @@ def _eval_one_NRMS(df_query, df_reference, unique_query_ids, unique_reference_id
71
77
  )
72
78
 
73
79
 
74
- def tune_params_on_HRMS_data(query_data=None, reference_data=None, grid={'similarity_measure':['cosine'], 'weight':[{'Cosine':0.25,'Shannon':0.25,'Renyi':0.25,'Tsallis':0.25}], 'spectrum_preprocessing_order':['FCNMWL'], 'mz_min':[0], 'mz_max':[9999999], 'int_min':[0], 'int_max':[99999999], 'window_size_centroiding':[0.5], 'window_size_matching':[0.5], 'noise_threshold':[0.0], 'wf_mz':[0.0], 'wf_int':[1.0], 'LET_threshold':[0.0], 'entropy_dimension':[1.1], 'high_quality_reference_library':[False]}, output_path=None):
80
+
81
+ def tune_params_on_HRMS_data(query_data=None, reference_data=None, grid=None, output_path=None, return_output=False):
75
82
  """
76
83
  runs spectral library matching on high-resolution mass spectrometry (HRMS) data with all possible combinations of parameters in the grid dict, saves results from each choice of parameters to a CSV file, and prints top-performing parameters
77
84
 
@@ -81,6 +88,7 @@ def tune_params_on_HRMS_data(query_data=None, reference_data=None, grid={'simila
81
88
  --output_path: accuracy from each choice of parameter set is saved to a CSV file here.
82
89
  """
83
90
 
91
+ grid = {**default_HRMS_grid, **(grid or {})}
84
92
  for key, value in grid.items():
85
93
  globals()[key] = value
86
94
 
@@ -118,24 +126,35 @@ def tune_params_on_HRMS_data(query_data=None, reference_data=None, grid={'simila
118
126
  print(f'\nNote that there are {len(unique_query_ids)} unique query spectra, {len(unique_reference_ids)} unique reference spectra, and {len(set(unique_query_ids) & set(unique_reference_ids))} of the query and reference spectra IDs are in common.\n')
119
127
 
120
128
  if output_path is None:
121
- output_path = f'{Path.cwd()}/tuning_param_output.csv'
129
+ output_path = f'{Path.cwd()}/tuning_param_output.txt'
122
130
  print(f'Warning: since output_path=None, the output will be written to the current working directory: {output_path}')
123
131
 
124
- # build parameter grid out of the lists you already set
125
132
  param_grid = product(similarity_measure, weight, spectrum_preprocessing_order, mz_min, mz_max, int_min, int_max, noise_threshold,
126
133
  window_size_centroiding, window_size_matching, wf_mz, wf_int, LET_threshold, entropy_dimension, high_quality_reference_library)
127
- # run in parallel on all CPUs
128
134
  results = Parallel(n_jobs=-1, verbose=10)(delayed(_eval_one_HRMS)(df_query, df_reference, unique_query_ids, unique_reference_ids, *params) for params in param_grid)
129
135
 
130
136
  df_out = pd.DataFrame(results, columns=[
131
137
  'ACC','SIMILARITY.MEASURE','WEIGHT','SPECTRUM.PROCESSING.ORDER', 'MZ.MIN','MZ.MAX','INT.MIN','INT.MAX','NOISE.THRESHOLD',
132
138
  'WINDOW.SIZE.CENTROIDING','WINDOW.SIZE.MATCHING', 'WF.MZ','WF.INT','LET.THRESHOLD','ENTROPY.DIMENSION', 'HIGH.QUALITY.REFERENCE.LIBRARY'
133
139
  ])
134
- df_out = df_out.drop(columns=['WEIGHT'])
135
- df_out.to_csv(output_path, index=False)
140
+ df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("\"","",regex=False)
141
+ df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("{","",regex=False)
142
+ df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("}","",regex=False)
143
+ df_out['WEIGHT'] = df_out['WEIGHT'].str.replace(":","",regex=False)
144
+ df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("Cosine","",regex=False)
145
+ df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("Shannon","",regex=False)
146
+ df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("Renyi","",regex=False)
147
+ df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("Tsallis","",regex=False)
148
+ df_out['WEIGHT'] = df_out['WEIGHT'].str.replace(" ","",regex=False)
149
+ df_out.to_csv(output_path, index=False, sep='\t', quoting=csv.QUOTE_NONE)
150
+
151
+ if return_output is False:
152
+ df_out.to_csv(output_path, index=False, sep='\t', quoting=csv.QUOTE_NONE)
153
+ else:
154
+ return df_out
136
155
 
137
156
 
138
- def tune_params_on_NRMS_data(query_data=None, reference_data=None, grid={'similarity_measure':['cosine'], 'weight':[{'Cosine':0.25,'Shannon':0.25,'Renyi':0.25,'Tsallis':0.25}], 'spectrum_preprocessing_order':['FNLW'], 'mz_min':[0], 'mz_max':[9999999], 'int_min':[0], 'int_max':[99999999], 'noise_threshold':[0.0], 'wf_mz':[0.0], 'wf_int':[1.0], 'LET_threshold':[0.0], 'entropy_dimension':[1.1], 'high_quality_reference_library':[False]}, output_path=None):
157
+ def tune_params_on_NRMS_data(query_data=None, reference_data=None, grid=None, output_path=None, return_output=False):
139
158
  """
140
159
  runs spectral library matching on nominal-resolution mass spectrometry (NRMS) data with all possible combinations of parameters in the grid dict, saves results from each choice of parameters to a CSV file, and prints top-performing parameters
141
160
 
@@ -145,10 +164,10 @@ def tune_params_on_NRMS_data(query_data=None, reference_data=None, grid={'simila
145
164
  --output_path: accuracy from each choice of parameter set is saved to a CSV file here
146
165
  """
147
166
 
167
+ grid = {**default_NRMS_grid, **(grid or {})}
148
168
  for key, value in grid.items():
149
169
  globals()[key] = value
150
170
 
151
- # load query and reference libraries
152
171
  if query_data is None:
153
172
  print('\nError: No argument passed to the mandatory query_data. Please pass the path to the CSV file of the query data.')
154
173
  sys.exit()
@@ -182,21 +201,30 @@ def tune_params_on_NRMS_data(query_data=None, reference_data=None, grid={'simila
182
201
  print(f'\nNote that there are {len(unique_query_ids)} unique query spectra, {len(unique_reference_ids)} unique reference spectra, and {len(set(unique_query_ids) & set(unique_reference_ids))} of the query and reference spectra IDs are in common.\n')
183
202
 
184
203
  if output_path is None:
185
- output_path = f'{Path.cwd()}/tuning_param_output.csv'
204
+ output_path = f'{Path.cwd()}/tuning_param_output.txt'
186
205
  print(f'Warning: since output_path=None, the output will be written to the current working directory: {output_path}')
187
206
 
188
- # build parameter grid out of the lists you already set
189
207
  param_grid = product(similarity_measure, weight, spectrum_preprocessing_order, mz_min, mz_max, int_min, int_max,
190
208
  noise_threshold, wf_mz, wf_int, LET_threshold, entropy_dimension, high_quality_reference_library)
191
- # run in parallel on all CPUs
192
209
  results = Parallel(n_jobs=-1, verbose=10)(delayed(_eval_one_NRMS)(df_query, df_reference, unique_query_ids, unique_reference_ids, *params) for params in param_grid)
193
210
 
194
211
  df_out = pd.DataFrame(results, columns=[
195
212
  'ACC','SIMILARITY.MEASURE','WEIGHT','SPECTRUM.PROCESSING.ORDER', 'MZ.MIN','MZ.MAX','INT.MIN','INT.MAX',
196
213
  'NOISE.THRESHOLD','WF.MZ','WF.INT','LET.THRESHOLD','ENTROPY.DIMENSION', 'HIGH.QUALITY.REFERENCE.LIBRARY'
197
214
  ])
198
- df_out = df_out.drop(columns=['WEIGHT'])
199
- df_out.to_csv(output_path, index=False)
215
+ df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("\"","",regex=False)
216
+ df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("{","",regex=False)
217
+ df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("}","",regex=False)
218
+ df_out['WEIGHT'] = df_out['WEIGHT'].str.replace(":","",regex=False)
219
+ df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("Cosine","",regex=False)
220
+ df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("Shannon","",regex=False)
221
+ df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("Renyi","",regex=False)
222
+ df_out['WEIGHT'] = df_out['WEIGHT'].str.replace("Tsallis","",regex=False)
223
+ df_out['WEIGHT'] = df_out['WEIGHT'].str.replace(" ","",regex=False)
224
+ if return_output is False:
225
+ df_out.to_csv(output_path, index=False, sep='\t', quoting=csv.QUOTE_NONE)
226
+ else:
227
+ return df_out
200
228
 
201
229
 
202
230
 
@@ -389,7 +417,7 @@ def get_acc_NRMS(df_query, df_reference, unique_query_ids, unique_reference_ids,
389
417
 
390
418
 
391
419
 
392
- def run_spec_lib_matching_on_HRMS_data(query_data=None, reference_data=None, likely_reference_ids=None, similarity_measure='cosine', weights={'Cosine':0.25,'Shannon':0.25,'Renyi':0.25,'Tsallis':0.25}, spectrum_preprocessing_order='FCNMWL', high_quality_reference_library=False, mz_min=0, mz_max=9999999, int_min=0, int_max=9999999, window_size_centroiding=0.5, window_size_matching=0.5, noise_threshold=0.0, wf_mz=0.0, wf_intensity=1.0, LET_threshold=0.0, entropy_dimension=1.1, n_top_matches_to_save=1, print_id_results=False, output_identification=None, output_similarity_scores=None):
420
+ def run_spec_lib_matching_on_HRMS_data(query_data=None, reference_data=None, likely_reference_ids=None, similarity_measure='cosine', weights={'Cosine':0.25,'Shannon':0.25,'Renyi':0.25,'Tsallis':0.25}, spectrum_preprocessing_order='FCNMWL', high_quality_reference_library=False, mz_min=0, mz_max=9999999, int_min=0, int_max=9999999, window_size_centroiding=0.5, window_size_matching=0.5, noise_threshold=0.0, wf_mz=0.0, wf_intensity=1.0, LET_threshold=0.0, entropy_dimension=1.1, n_top_matches_to_save=1, print_id_results=False, output_identification=None, output_similarity_scores=None, return_ID_output=False):
393
421
  '''
394
422
  runs spectral library matching on high-resolution mass spectrometry (HRMS) data
395
423
 
@@ -413,8 +441,8 @@ def run_spec_lib_matching_on_HRMS_data(query_data=None, reference_data=None, lik
413
441
  --entropy_dimension: Entropy dimension parameter. Must have positive value other than 1. When the entropy dimension is 1, then Renyi and Tsallis entropy are equivalent to Shannon entropy. Therefore, this parameter only applies to the renyi and tsallis similarity measures. This parameter will be ignored if similarity measure cosine or shannon is chosen. Default: 1.1
414
442
  --n_top_matches_to_save: The number of top matches to report. For example, if n_top_matches_to_save=5, then for each query spectrum, the five reference spectra with the largest similarity with the given query spectrum will be reported. Default: 1
415
443
  --print_id_results: Flag that prints identification results if True. Default: False
416
- --output_identification: Output CSV file containing the most-similar reference spectra for each query spectrum along with the corresponding similarity scores. Default is to save identification output in current working directory with filename \'output_identification.csv\'.
417
- --output_similarity_scores: Output CSV file containing similarity scores between all query spectrum/spectra and all reference spectra. Each row corresponds to a query spectrum, the left-most column contains the query spectrum/spectra identifier, and the remaining column contain the similarity scores with respect to all reference library spectra. If no argument passed, then this CSV file is written to the current working directory with filename \'output_all_similarity_scores\'.csv.')
444
+ --output_identification: Output CSV file containing the most-similar reference spectra for each query spectrum along with the corresponding similarity scores. Default is to save identification output in current working directory with filename \'output_identification.txt\'.
445
+ --output_similarity_scores: Output CSV file containing similarity scores between all query spectrum/spectra and all reference spectra. Each row corresponds to a query spectrum, the left-most column contains the query spectrum/spectra identifier, and the remaining column contain the similarity scores with respect to all reference library spectra. If no argument passed, then this CSV file is written to the current working directory with filename \'output_all_similarity_scores\'.txt.')
418
446
  '''
419
447
 
420
448
  # load query and reference libraries
@@ -528,11 +556,11 @@ def run_spec_lib_matching_on_HRMS_data(query_data=None, reference_data=None, lik
528
556
  sys.exit()
529
557
 
530
558
  if output_identification is None:
531
- output_identification = f'{Path.cwd()}/output_identification.csv'
559
+ output_identification = f'{Path.cwd()}/output_identification.txt'
532
560
  print(f'Warning: writing identification output to {output_identification}')
533
561
 
534
562
  if output_similarity_scores is None:
535
- output_similarity_scores = f'{Path.cwd()}/output_all_similarity_scores.csv'
563
+ output_similarity_scores = f'{Path.cwd()}/output_all_similarity_scores.txt'
536
564
  print(f'Warning: writing similarity scores to {output_similarity_scores}')
537
565
 
538
566
 
@@ -636,22 +664,26 @@ def run_spec_lib_matching_on_HRMS_data(query_data=None, reference_data=None, lik
636
664
  df_top_ref_specs.index = unique_query_ids
637
665
  df_top_ref_specs.index.names = ['Query Spectrum ID']
638
666
 
667
+ df_scores.columns = ['Reference Spectrum ID: ' + col for col in list(map(str,df_scores.columns.tolist()))]
668
+
639
669
  # print the identification results if the user desires
640
670
  if print_id_results == True:
641
671
  print(df_top_ref_specs.to_string())
642
672
 
643
- # write spectral library matching results to disk
644
- df_top_ref_specs.to_csv(output_identification)
673
+ if return_ID_output is False:
674
+ # write spectral library matching results to disk
675
+ df_top_ref_specs.to_csv(output_identification, sep='\t')
645
676
 
646
- # write all similarity scores to disk
647
- df_scores.columns = ['Reference Spectrum ID: ' + col for col in list(map(str,df_scores.columns.tolist()))]
648
- df_scores.to_csv(output_similarity_scores)
677
+ # write all similarity scores to disk
678
+ df_scores.to_csv(output_similarity_scores, sep='\t')
679
+ else:
680
+ return df_top_ref_specs
649
681
 
650
682
 
651
683
 
652
684
 
653
685
 
654
- def run_spec_lib_matching_on_NRMS_data(query_data=None, reference_data=None, likely_reference_ids=None, spectrum_preprocessing_order='FNLW', similarity_measure='cosine', weights={'Cosine':0.25,'Shannon':0.25,'Renyi':0.25,'Tsallis':0.25}, high_quality_reference_library=False, mz_min=0, mz_max=9999999, int_min=0, int_max=9999999, noise_threshold=0.0, wf_mz=0.0, wf_intensity=1.0, LET_threshold=0.0, entropy_dimension=1.1, n_top_matches_to_save=1, print_id_results=False, output_identification=None, output_similarity_scores=None):
686
+ def run_spec_lib_matching_on_NRMS_data(query_data=None, reference_data=None, likely_reference_ids=None, spectrum_preprocessing_order='FNLW', similarity_measure='cosine', weights={'Cosine':0.25,'Shannon':0.25,'Renyi':0.25,'Tsallis':0.25}, high_quality_reference_library=False, mz_min=0, mz_max=9999999, int_min=0, int_max=9999999, noise_threshold=0.0, wf_mz=0.0, wf_intensity=1.0, LET_threshold=0.0, entropy_dimension=1.1, n_top_matches_to_save=1, print_id_results=False, output_identification=None, output_similarity_scores=None, return_ID_output=False):
655
687
  '''
656
688
  runs spectral library matching on nominal-resolution mass spectrometry (NRMS) data
657
689
 
@@ -674,8 +706,8 @@ def run_spec_lib_matching_on_NRMS_data(query_data=None, reference_data=None, lik
674
706
  --normalization_method: Method used to normalize the intensities of each spectrum so that the intensities sum to 1. Since the objects entropy quantifies the uncertainy of must be probability distributions, the intensities of a given spectrum must sum to 1 prior to computing the entropy of the given spectrum intensities. Options: \'standard\' and \'softmax\'. Default: standard.
675
707
  --n_top_matches_to_save: The number of top matches to report. For example, if n_top_matches_to_save=5, then for each query spectrum, the five reference spectra with the largest similarity with the given query spectrum will be reported. Default: 1
676
708
  --print_id_results: Flag that prints identification results if True. Default: False
677
- --output_identification: Output CSV file containing the most-similar reference spectra for each query spectrum along with the corresponding similarity scores. Default is to save identification output in current working directory with filename \'output_identification.csv\'.
678
- --output_similarity_scores: Output CSV file containing similarity scores between all query spectrum/spectra and all reference spectra. Each row corresponds to a query spectrum, the left-most column contains the query spectrum/spectra identifier, and the remaining column contain the similarity scores with respect to all reference library spectra. If no argument passed, then this CSV file is written to the current working directory with filename \'output_all_similarity_scores\'.csv.')
709
+ --output_identification: Output CSV file containing the most-similar reference spectra for each query spectrum along with the corresponding similarity scores. Default is to save identification output in current working directory with filename \'output_identification.txt\'.
710
+ --output_similarity_scores: Output CSV file containing similarity scores between all query spectrum/spectra and all reference spectra. Each row corresponds to a query spectrum, the left-most column contains the query spectrum/spectra identifier, and the remaining column contain the similarity scores with respect to all reference library spectra. If no argument passed, then this CSV file is written to the current working directory with filename \'output_all_similarity_scores\'.txt.')
679
711
  '''
680
712
 
681
713
  # load query and reference libraries
@@ -774,11 +806,11 @@ def run_spec_lib_matching_on_NRMS_data(query_data=None, reference_data=None, lik
774
806
  sys.exit()
775
807
 
776
808
  if output_identification is None:
777
- output_identification = f'{Path.cwd()}/output_identification.csv'
809
+ output_identification = f'{Path.cwd()}/output_identification.txt'
778
810
  print(f'Warning: writing identification output to {output_identification}')
779
811
 
780
812
  if output_similarity_scores is None:
781
- output_similarity_scores = f'{Path.cwd()}/output_all_similarity_scores.csv'
813
+ output_similarity_scores = f'{Path.cwd()}/output_all_similarity_scores.txt'
782
814
  print(f'Warning: writing similarity scores to {output_similarity_scores}')
783
815
 
784
816
 
@@ -886,11 +918,15 @@ def run_spec_lib_matching_on_NRMS_data(query_data=None, reference_data=None, lik
886
918
  if print_id_results == True:
887
919
  print(df_top_ref_specs.to_string())
888
920
 
889
- # write spectral library matching results to disk
890
- df_top_ref_specs.to_csv(output_identification)
891
-
892
- # write all similarity scores to disk
893
921
  df_scores.columns = ['Reference Spectrum ID: ' + col for col in list(map(str,df_scores.columns.tolist()))]
894
- df_scores.to_csv(output_similarity_scores)
895
922
 
923
+ if return_ID_output is False:
924
+ # write spectral library matching results to disk
925
+ df_top_ref_specs.to_csv(output_identification, sep='\t')
926
+
927
+ # write all similarity scores to disk
928
+ df_scores.columns = ['Reference Spectrum ID: ' + col for col in list(map(str,df_scores.columns.tolist()))]
929
+ df_scores.to_csv(output_similarity_scores, sep='\t')
930
+ else:
931
+ return df_top_ref_specs
896
932
 
@@ -2,8 +2,8 @@
2
2
  # this script performs spectral library matching to identify unknown query compound(s) from GC-MS data
3
3
 
4
4
  # load libraries
5
- from pycompound_fy7392.spec_lib_matching import run_spec_lib_matching_on_HRMS_data
6
- from pycompound_fy7392.spec_lib_matching import run_spec_lib_matching_on_NRMS_data
5
+ from pycompound.spec_lib_matching import run_spec_lib_matching_on_HRMS_data
6
+ from pycompound.spec_lib_matching import run_spec_lib_matching_on_NRMS_data
7
7
  from pathlib import Path
8
8
  import pandas as pd
9
9
  import argparse
@@ -1,6 +1,6 @@
1
1
 
2
- from pycompound_fy7392.spec_lib_matching import tune_params_on_HRMS_data
3
- from pycompound_fy7392.spec_lib_matching import tune_params_on_NRMS_data
2
+ from pycompound.spec_lib_matching import tune_params_on_HRMS_data
3
+ from pycompound.spec_lib_matching import tune_params_on_NRMS_data
4
4
  import argparse
5
5
  import json
6
6
  from pathlib import Path
@@ -40,8 +40,7 @@ else:
40
40
  sys.exit()
41
41
 
42
42
 
43
- grid = {'similarity_measure':args.similarity_measure.split(','), 'weight':args.weights, 'spectrum_preprocessing_order':spectrum_preprocessing_order.split(','), 'mz_min':args.mz_min.split(','), 'mz_max':args.mz_max.split(','), 'int_min':args.int_min.split(','), 'int_max':args.int_max.split(','), 'window_size_centroiding':args.window_size_centroiding.split(','), 'window_size_matching':args.window_size_matching.split(','), 'noise_threshold':args.noise_threshold.split(','), 'wf_mz':args.wf_mz.split(','), 'wf_int':args.wf_intensity.split(','), 'LET_threshold':args.LET_threshold.split(','), 'entropy_dimension':args.entropy_dimension.split(','), 'high_quality_reference_library':args.high_quality_reference_library.split(',')}
44
-
43
+ grid = {'similarity_measure':args.similarity_measure.split(','), 'weight':[args.weights], 'spectrum_preprocessing_order':spectrum_preprocessing_order.split(','), 'mz_min':args.mz_min.split(','), 'mz_max':args.mz_max.split(','), 'int_min':args.int_min.split(','), 'int_max':args.int_max.split(','), 'window_size_centroiding':args.window_size_centroiding.split(','), 'window_size_matching':args.window_size_matching.split(','), 'noise_threshold':args.noise_threshold.split(','), 'wf_mz':args.wf_mz.split(','), 'wf_int':args.wf_intensity.split(','), 'LET_threshold':args.LET_threshold.split(','), 'entropy_dimension':args.entropy_dimension.split(','), 'high_quality_reference_library':args.high_quality_reference_library.split(',')}
45
44
 
46
45
  if args.chromatography_platform == 'HRMS':
47
46
  grid['mz_min'] = [float(x) for x in grid['mz_min']]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pycompound
3
- Version: 0.0.55
3
+ Version: 0.1.1
4
4
  Summary: Python package to perform compound identification in mass spectrometry via spectral library matching.
5
5
  Author-email: Hunter Dlugas <fy7392@wayne.edu>
6
6
  License-Expression: MIT
@@ -19,6 +19,7 @@ Requires-Dist: pyteomics==4.7.2
19
19
  Requires-Dist: netCDF4==1.6.5
20
20
  Requires-Dist: lxml>=5.1.0
21
21
  Requires-Dist: orjson==3.11.0
22
+ Requires-Dist: shiny==1.4.0
22
23
  Requires-Dist: joblib==1.5.2
23
24
  Dynamic: license-file
24
25
 
@@ -0,0 +1,14 @@
1
+ app.py,sha256=k5mPyctA1eWkGjtnKrJb7STuweh_aH4HmPUH07jO92Y,53841
2
+ pycompound/build_library.py,sha256=8ghpX8wfj6u-3V5X2IdJ-e8G_FRSla1lO0pzLj7hOtI,5373
3
+ pycompound/plot_spectra.py,sha256=Q7nDSW3Y5pR_Ql4JeEmyd6KRRyzvxk9j0yaUR0hfjJc,42275
4
+ pycompound/plot_spectra_CLI.py,sha256=ObaLad5Z5DmfQB-j0HSCg1mLORbYj2BM3hb5Yd0ZdDI,8395
5
+ pycompound/processing.py,sha256=vqtKaZ6vot6wlnKNTYUQFX7ccPpnCAl0L6bN289vZoM,11068
6
+ pycompound/similarity_measures.py,sha256=TuvtEXWwyxE6dfpmuAqRC6gOHvHg3Jf21099pVaNBAs,10702
7
+ pycompound/spec_lib_matching.py,sha256=AAMxWqi6LXWo-tJ-uqJ4QxfHSg8bX3G_DJVt2bLLMcM,61860
8
+ pycompound/spec_lib_matching_CLI.py,sha256=EdXM0dRQfwGQAK4OKxhcVytuUnX9pRyJROwC6rloZ9s,9915
9
+ pycompound/tuning_CLI.py,sha256=dSFLwMiI0_6G4YDZR5ubqn9-75ixOvDPZMOoGS-_B6w,8540
10
+ pycompound-0.1.1.dist-info/licenses/LICENSE,sha256=fPFFlkSGg60VQWyWqTSv8yoJnpLzppzdihVWY5NKom8,1064
11
+ pycompound-0.1.1.dist-info/METADATA,sha256=XZtkvSau_Z723iCgy_LTR1CkYryDxXBdIFtb_D_E9u0,1732
12
+ pycompound-0.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
13
+ pycompound-0.1.1.dist-info/top_level.txt,sha256=wFBLVrqpC07HghIU8tsEdgdvgkdOE3GN_1Gfjk-uEUc,15
14
+ pycompound-0.1.1.dist-info/RECORD,,
@@ -0,0 +1,2 @@
1
+ app
2
+ pycompound
@@ -1,15 +0,0 @@
1
- app.py,sha256=PKiCJe_18EJIHvs0R7pl_Yf-XakZn5J0AAfI-AnGsX0,21535
2
- pycompound-0.0.55.dist-info/licenses/LICENSE,sha256=fPFFlkSGg60VQWyWqTSv8yoJnpLzppzdihVWY5NKom8,1064
3
- pycompound_fy7392/build_library.py,sha256=8ghpX8wfj6u-3V5X2IdJ-e8G_FRSla1lO0pzLj7hOtI,5373
4
- pycompound_fy7392/plot_spectra.py,sha256=wOnf2oOAfifj7FYkTZAcIeD7dHW1aRHzmsspPpySDcY,42023
5
- pycompound_fy7392/plot_spectra_CLI.py,sha256=fo0nUmbuy2qE6d9HgVdASn2CNUG8seg2mUCPrUU-rao,8409
6
- pycompound_fy7392/processing.py,sha256=7cKMX7PQ4Q-I4c8lRo5qXbOVGr8CeRdgNPURJx8DBV0,11075
7
- pycompound_fy7392/pycompound_shiny.py,sha256=uYfeIuR5j1UK_KE8RbDPaQxqMIU1qykVJ2L-zgaSkY0,30154
8
- pycompound_fy7392/similarity_measures.py,sha256=TuvtEXWwyxE6dfpmuAqRC6gOHvHg3Jf21099pVaNBAs,10702
9
- pycompound_fy7392/spec_lib_matching.py,sha256=jtUpG5OBDtIaHIpCNc62a3y-wQ_SmIgXZ9Q_p8xKZu4,59969
10
- pycompound_fy7392/spec_lib_matching_CLI.py,sha256=TAafJ3DGPorBTDzmXLQaaSH3giKn6q3GrRJPWh03yyo,9929
11
- pycompound_fy7392/tuning_CLI.py,sha256=qLglxqq-y6EXCDk0P3CkWn6cTFCmWDeKz0-SZBXcwCA,8553
12
- pycompound-0.0.55.dist-info/METADATA,sha256=3i67ba8TVHHSK-toc2-OI9XJYdQRkrCKGXOrqHyV5e4,1705
13
- pycompound-0.0.55.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
14
- pycompound-0.0.55.dist-info/top_level.txt,sha256=h_c9lBkHcABTURy4sDAmgRzZdFHYWX9MDdsaiftT-Yw,22
15
- pycompound-0.0.55.dist-info/RECORD,,
@@ -1,2 +0,0 @@
1
- app
2
- pycompound_fy7392