pycompound 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,4 @@
1
1
 
2
- # this script has a function to extract the mass spectra from an mgf, mzML, or cdf file and write them in the necessary format for use in spectral library matching
3
-
4
2
  import netCDF4 as nc
5
3
  import numpy as np
6
4
  import pandas as pd
@@ -14,7 +12,7 @@ def build_library_from_raw_data(input_path=None, output_path=None, is_reference=
14
12
  Converts mgf, mzML, cdf, or msp file to the necessary format for spectral library matching.
15
13
 
16
14
  --input_path: Path to input file (must be mgf, mzML, cdf, or msp file). Mandatory argument.
17
- --output_path: Path to output CSV file. Default: current working directory.
15
+ --output_path: Path to output TXT file. Default: current working directory.
18
16
  --is_reference: Boolean flag indicating whether IDs of spectra should be written to output. Only pass true if building a reference library with known compound IDs. Only applicable to mgf and msp files. Options: \'True\', \'False\'. Optional argument. Default: False.
19
17
  '''
20
18
 
@@ -23,7 +21,6 @@ def build_library_from_raw_data(input_path=None, output_path=None, is_reference=
23
21
  sys.exit()
24
22
 
25
23
  if output_path is None:
26
- #print('Warning: no output_path specified, so library is written to {Path.cwd()}/build_library.csv')
27
24
  tmp = input_path.split('/')
28
25
  tmp = tmp[(len(tmp)-1)]
29
26
  basename = tmp.split('.')[0]
@@ -34,7 +31,6 @@ def build_library_from_raw_data(input_path=None, output_path=None, is_reference=
34
31
  print('Error: is_reference must be either \'True\' or \'False\'.')
35
32
  sys.exit()
36
33
 
37
- # determine whether an mgf or a mzML file was passed to --input_path
38
34
  last_three_chars = input_path[(len(input_path)-3):len(input_path)]
39
35
  last_four_chars = input_path[(len(input_path)-4):len(input_path)]
40
36
  if last_three_chars == 'mgf' or last_three_chars == 'MGF':
@@ -50,7 +46,6 @@ def build_library_from_raw_data(input_path=None, output_path=None, is_reference=
50
46
  sys.exit()
51
47
 
52
48
 
53
- # obtain a list of spectra from the input file
54
49
  spectra = []
55
50
  if input_file_type == 'mgf':
56
51
  with mgf.read(input_path, index_by_scans = True) as reader:
@@ -62,7 +57,6 @@ def build_library_from_raw_data(input_path=None, output_path=None, is_reference=
62
57
  spectra.append(spec)
63
58
 
64
59
 
65
- # extract the relevant information from each spectra (i.e m/z ratios and intensities)
66
60
  if input_file_type == 'mgf' or input_file_type == 'mzML':
67
61
  ids = []
68
62
  mzs = []
@@ -128,8 +122,7 @@ def build_library_from_raw_data(input_path=None, output_path=None, is_reference=
128
122
  continue
129
123
 
130
124
 
131
- # write CSV file of spectra for use in spectral library matching
132
125
  df = pd.DataFrame({'id':ids, 'mz_ratio':mzs, 'intensity':ints})
133
- df.to_csv(output_path, index=False)
126
+ df.to_csv(output_path, index=False, sep='\t')
134
127
 
135
128
 
@@ -1,6 +1,4 @@
1
1
 
2
- # this script's functions plot a given query spectrum against a given reference spectrum before and after spectrum preprocessing transformations
3
-
4
2
  from .processing import *
5
3
  from .similarity_measures import *
6
4
  import pandas as pd
@@ -36,7 +34,6 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
36
34
  --output_path: path to output PDF file containing the plots of the spectra before and after preprocessing transformations. If no argument is passed, then the plots will be saved to the PDF ./spectrum1_{spectrum_ID1}_spectrum2_{spectrum_ID2}_plot.pdf in the current working directory.
37
35
  '''
38
36
 
39
- # load query and reference libraries
40
37
  if query_data is None:
41
38
  print('\nError: No argument passed to the mandatory query_data. Please pass the path to the CSV file of the query data.')
42
39
  sys.exit()
@@ -68,7 +65,6 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
68
65
  unique_reference_ids = [str(tmp) for tmp in unique_reference_ids]
69
66
 
70
67
 
71
- ##### process input parameters and ensure they are in a valid format #####
72
68
  if spectrum_ID1 is not None:
73
69
  spectrum_ID1 = str(spectrum_ID1)
74
70
  else:
@@ -96,8 +92,8 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
96
92
  print(f'Error: spectrum_preprocessing_order must contain only \'C\', \'F\', \'M\', \'N\', \'L\', \'W\'.')
97
93
  sys.exit()
98
94
 
99
- if similarity_measure not in ['cosine','shannon','renyi','tsallis','mixture','jaccard','dice','3w_jaccard','sokal_sneath','binary_cosine','mountford','mcconnaughey','driver_kroeber','simpson','braun_banquet','fager_mcgowan','kulczynski','interection','hamming','hellinger']:
100
- print('\nError: similarity_measure must be either cosine, shannon, renyi, tsallis, mixture, jaccard, dice, 3w_jaccard, sokal_sneath, binary_cosine, mountford, mcconnaughey, driver_kroeber, simpson, braun_banquet, fager_mcgowan, kulczynski, interection, hamming, or hellinger.')
95
+ if similarity_measure not in ['cosine','shannon','renyi','tsallis','mixture','jaccard','dice','3w_jaccard','sokal_sneath','binary_cosine','mountford','mcconnaughey','driver_kroeber','simpson','braun_banquet','fager_mcgowan','kulczynski','intersection','hamming','hellinger']:
96
+ print('\nError: similarity_measure must be either cosine, shannon, renyi, tsallis, mixture, jaccard, dice, 3w_jaccard, sokal_sneath, binary_cosine, mountford, mcconnaughey, driver_kroeber, simpson, braun_banquet, fager_mcgowan, kulczynski, intersection, hamming, or hellinger.')
101
97
  sys.exit()
102
98
 
103
99
  if isinstance(int_min,int) is True:
@@ -190,7 +186,6 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
190
186
  q_spec_pre_trans[:,1] = q_spec_pre_trans[:,1].astype(float)
191
187
  r_spec_pre_trans[:,1] = r_spec_pre_trans[:,1].astype(float)
192
188
 
193
- # apply transformation to y-axis if relevant
194
189
  if y_axis_transformation == 'normalized':
195
190
  q_spec_pre_trans[:,1] = q_spec_pre_trans[:,1] / np.max(q_spec_pre_trans[:,1])
196
191
  r_spec_pre_trans[:,1] = r_spec_pre_trans[:,1] / np.max(r_spec_pre_trans[:,1])
@@ -206,10 +201,8 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
206
201
  else:
207
202
  ylab = 'Raw Intensity'
208
203
 
209
- # create the figure
210
204
  fig, axes = plt.subplots(nrows=2, ncols=1)
211
205
 
212
- # plot the untransformed spectra
213
206
  plt.subplot(2,1,1)
214
207
  plt.vlines(x=q_spec_pre_trans[:,0], ymin=[0]*q_spec_pre_trans.shape[0], ymax=q_spec_pre_trans[:,1], linewidth=3, color='blue', label=f'Spectrum ID 1: {spectrum_ID1}')
215
208
  plt.vlines(x=r_spec_pre_trans[:,0], ymin=[0]*r_spec_pre_trans.shape[0], ymax=-r_spec_pre_trans[:,1], linewidth=3, color='red', label=f'Spectrum ID 2: {spectrum_ID2}')
@@ -219,7 +212,6 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
219
212
  plt.yticks(fontsize=7)
220
213
  plt.title('Untransformed Spectra', fontsize=10)
221
214
 
222
- # get the ranges of m/z and intensity values to display at the bottom of the two plots
223
215
  mz_min_tmp_q = round(q_spec[:,0].min(),1)
224
216
  mz_min_tmp_r = round(r_spec[:,0].min(),1)
225
217
  int_min_tmp_q = round(q_spec[:,1].min(),1)
@@ -233,51 +225,47 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
233
225
  int_min_tmp = min([int_min_tmp_q,int_min_tmp_r])
234
226
  int_max_tmp = max([int_max_tmp_q,int_max_tmp_r])
235
227
 
236
- # perform the spectrum preprocessing transformations in the order specified
237
228
  is_matched = False
238
229
  for transformation in spectrum_preprocessing_order:
239
- if transformation == 'C' and q_spec.shape[0] > 1 and r_spec.shape[1] > 1: # centroiding
230
+ if transformation == 'C' and q_spec.shape[0] > 1 and r_spec.shape[1] > 1:
240
231
  q_spec = centroid_spectrum(q_spec, window_size=window_size_centroiding)
241
232
  r_spec = centroid_spectrum(r_spec, window_size=window_size_centroiding)
242
- if transformation == 'M' and q_spec.shape[0] > 1 and r_spec.shape[1] > 1: # matching
233
+ if transformation == 'M' and q_spec.shape[0] > 1 and r_spec.shape[1] > 1:
243
234
  m_spec = match_peaks_in_spectra(spec_a=q_spec, spec_b=r_spec, window_size=window_size_matching)
244
235
  q_spec = m_spec[:,0:2]
245
236
  r_spec = m_spec[:,[0,2]]
246
237
  is_matched = True
247
- if transformation == 'W' and q_spec.shape[0] > 1 and r_spec.shape[1] > 1: # weight factor transformation
238
+ if transformation == 'W' and q_spec.shape[0] > 1 and r_spec.shape[1] > 1:
248
239
  q_spec[:,1] = wf_transform(q_spec[:,0], q_spec[:,1], wf_mz, wf_intensity)
249
240
  r_spec[:,1] = wf_transform(r_spec[:,0], r_spec[:,1], wf_mz, wf_intensity)
250
- if transformation == 'L' and q_spec.shape[0] > 1 and r_spec.shape[1] > 1: # low-entropy transformation
241
+ if transformation == 'L' and q_spec.shape[0] > 1 and r_spec.shape[1] > 1:
251
242
  q_spec[:,1] = LE_transform(q_spec[:,1], LET_threshold, normalization_method=normalization_method)
252
243
  r_spec[:,1] = LE_transform(r_spec[:,1], LET_threshold, normalization_method=normalization_method)
253
- if transformation == 'N' and q_spec.shape[0] > 1 and r_spec.shape[1] > 1: # noise removal
244
+ if transformation == 'N' and q_spec.shape[0] > 1 and r_spec.shape[1] > 1:
254
245
  q_spec = remove_noise(q_spec, nr = noise_threshold)
255
- r_spec = remove_noise(r_spec, nr = noise_threshold)
256
- if transformation == 'F' and q_spec.shape[0] > 1 and r_spec.shape[1] > 1: # filtering
246
+ if high_quality_reference_library == False or high_quality_reference_library == 'False':
247
+ r_spec = remove_noise(r_spec, nr = noise_threshold)
248
+ if transformation == 'F' and q_spec.shape[0] > 1 and r_spec.shape[1] > 1:
257
249
  q_spec = filter_spec_lcms(q_spec, mz_min = mz_min, mz_max = mz_max, int_min = int_min, int_max = int_max, is_matched = is_matched)
258
- r_spec = filter_spec_lcms(r_spec, mz_min = mz_min, mz_max = mz_max, int_min = int_min, int_max = int_max, is_matched = is_matched)
250
+ if high_quality_reference_library == False or high_quality_reference_library == 'False':
251
+ r_spec = filter_spec_lcms(r_spec, mz_min = mz_min, mz_max = mz_max, int_min = int_min, int_max = int_max, is_matched = is_matched)
259
252
 
260
- # intensities of query and reference library
261
253
  q_ints = q_spec[:,1]
262
254
  r_ints = r_spec[:,1]
263
255
 
264
- # if there is at least one non-zero intensity ion fragment in either spectra, compute their similarity
265
256
  if np.sum(q_ints) != 0 and np.sum(r_ints) != 0 and q_spec.shape[0] > 1 and r_spec.shape[1] > 1:
266
257
  similarity_score = get_similarity(similarity_measure, q_ints, r_ints, weights, entropy_dimension)
267
258
  else:
268
259
  similarity_score = 0
269
260
 
270
- # plot the transformed spectra
271
261
  plt.subplot(2,1,2)
272
262
 
273
- # display warning message if either spectra are empty or have no non-zero intensity ion fragments
274
263
  if q_spec.shape[0] > 1:
275
264
  if np.max(q_spec[:,1]) == 0 or np.max(r_spec[:,1]) == 0:
276
265
  plt.text(0.5, 0.5, 'The query and/or reference spectrum has no non-zero intensities after transformations.\n Change transformation parameters.', ha='center', va='center', fontsize=7, color='black')
277
266
  plt.xticks([])
278
267
  plt.yticks([])
279
268
  else:
280
- # apply transformation to y-axis if relevant
281
269
  if y_axis_transformation == 'normalized':
282
270
  q_spec[:,1] = q_spec[:,1] / np.max(q_spec[:,1])
283
271
  r_spec[:,1] = r_spec[:,1] / np.max(r_spec[:,1])
@@ -305,12 +293,15 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
305
293
  plt.yticks([])
306
294
 
307
295
 
296
+ print('\n\n\n')
297
+ print(high_quality_reference_library)
298
+ print('\n\n\n')
308
299
  plt.subplots_adjust(top=0.8, hspace=0.92, bottom=0.3)
309
300
  plt.figlegend(loc = 'upper center')
310
301
  fig.text(0.05, 0.18, f'Similarity Measure: {similarity_measure.capitalize()}', fontsize=7)
311
302
  fig.text(0.05, 0.15, f'Similarity Score: {round(similarity_score,4)}', fontsize=7)
312
303
  fig.text(0.05, 0.12, f"Spectrum Preprocessing Order: {''.join(spectrum_preprocessing_order)}", fontsize=7)
313
- fig.text(0.05, 0.09, f'High Quality Reference Library: {high_quality_reference_library}', fontsize=7)
304
+ fig.text(0.05, 0.09, f'High Quality Reference Library: {str(high_quality_reference_library)}', fontsize=7)
314
305
  fig.text(0.05, 0.06, f'Window Size (Centroiding): {window_size_centroiding}', fontsize=7)
315
306
  fig.text(0.05, 0.03, f'Window Size (Matching): {window_size_matching}', fontsize=7)
316
307
  fig.text(0.45, 0.18, f'Raw-Scale M/Z Range: [{mz_min_tmp},{mz_max_tmp}]', fontsize=7)
@@ -352,7 +343,6 @@ def generate_plots_on_NRMS_data(query_data=None, reference_data=None, spectrum_I
352
343
  --output_path: path to output PDF file containing the plots of the spectra before and after preprocessing transformations. If no argument is passed, then the plots will be saved to the PDF ./spectrum1_{spectrum_ID1}_spectrum2_{spectrum_ID2}_plot.pdf in the current working directory.
353
344
  '''
354
345
 
355
- # load query and reference libraries
356
346
  if query_data is None:
357
347
  print('\nError: No argument passed to the mandatory query_data. Please pass the path to the CSV file of the query data.')
358
348
  sys.exit()
@@ -382,7 +372,6 @@ def generate_plots_on_NRMS_data(query_data=None, reference_data=None, spectrum_I
382
372
  unique_reference_ids = df_reference.iloc[:,0].unique()
383
373
 
384
374
 
385
- ##### process input parameters and ensure they are in a valid format #####
386
375
  if spectrum_ID1 is not None:
387
376
  spectrum_ID1 = str(spectrum_ID1)
388
377
  else:
@@ -403,8 +392,8 @@ def generate_plots_on_NRMS_data(query_data=None, reference_data=None, spectrum_I
403
392
  print(f'Error: spectrum_preprocessing_order must contain only \'F\', \'N\', \'W\', \'L\'.')
404
393
  sys.exit()
405
394
 
406
- if similarity_measure not in ['cosine','shannon','renyi','tsallis','mixture','jaccard','dice','3w_jaccard','sokal_sneath','binary_cosine','mountford','mcconnaughey','driver_kroeber','simpson','braun_banquet','fager_mcgowan','kulczynski','interection','hamming','hellinger']:
407
- print('\nError: similarity_measure must be either cosine, shannon, renyi, tsallis, mixture, jaccard, dice, 3w_jaccard, sokal_sneath, binary_cosine, mountford, mcconnaughey, driver_kroeber, simpson, braun_banquet, fager_mcgowan, kulczynski, interection, hamming, or hellinger.')
395
+ if similarity_measure not in ['cosine','shannon','renyi','tsallis','mixture','jaccard','dice','3w_jaccard','sokal_sneath','binary_cosine','mountford','mcconnaughey','driver_kroeber','simpson','braun_banquet','fager_mcgowan','kulczynski','intersection','hamming','hellinger']:
396
+ print('\nError: similarity_measure must be either cosine, shannon, renyi, tsallis, mixture, jaccard, dice, 3w_jaccard, sokal_sneath, binary_cosine, mountford, mcconnaughey, driver_kroeber, simpson, braun_banquet, fager_mcgowan, kulczynski, intersection, hamming, or hellinger.')
408
397
  sys.exit()
409
398
 
410
399
  if isinstance(int_min,int) is True:
@@ -457,12 +446,10 @@ def generate_plots_on_NRMS_data(query_data=None, reference_data=None, spectrum_I
457
446
  print(f'Warning: plots will be saved to the PDF ./spectrum1_{spectrum_ID1}_spectrum2_{spectrum_ID2}_plot.pdf in the current working directory.')
458
447
  output_path = f'{Path.cwd()}/spectrum1_{spectrum_ID1}_spectrum2_{spectrum_ID2}.pdf'
459
448
 
460
- # get m/z values
461
449
  min_mz = np.min([np.min(df_query.iloc[:,1]), np.min(df_reference.iloc[:,1])])
462
450
  max_mz = np.max([np.max(df_query.iloc[:,1]), np.max(df_reference.iloc[:,1])])
463
451
  mzs = np.linspace(min_mz,max_mz,(max_mz-min_mz+1))
464
452
 
465
- # get unique query/reference library IDs; each query/reference ID corresponds to exactly one query/reference mass spectrum
466
453
  unique_query_ids = df_query.iloc[:,0].unique().tolist()
467
454
  unique_reference_ids = df_reference.iloc[:,0].unique().tolist()
468
455
  unique_query_ids = [str(ID) for ID in unique_query_ids]
@@ -494,7 +481,6 @@ def generate_plots_on_NRMS_data(query_data=None, reference_data=None, spectrum_I
494
481
  q_spec = convert_spec(q_spec,mzs)
495
482
  r_spec = convert_spec(r_spec,mzs)
496
483
 
497
- # get the ranges of m/z and intensity values to display at the bottom of the two plots
498
484
  int_min_tmp_q = min(q_spec[q_spec[:,1].nonzero(),1][0])
499
485
  int_min_tmp_r = min(r_spec[r_spec[:,1].nonzero(),1][0])
500
486
  int_max_tmp_q = max(q_spec[q_spec[:,1].nonzero(),1][0])
@@ -502,13 +488,10 @@ def generate_plots_on_NRMS_data(query_data=None, reference_data=None, spectrum_I
502
488
  int_min_tmp = int(min([int_min_tmp_q,int_min_tmp_r]))
503
489
  int_max_tmp = int(max([int_max_tmp_q,int_max_tmp_r]))
504
490
 
505
- # create the figure
506
491
  fig, axes = plt.subplots(nrows=2, ncols=1)
507
492
 
508
- # plot the untransformed spectra
509
493
  plt.subplot(2,1,1)
510
494
 
511
- # display warning message if either spectra have no non-zero ion fragments
512
495
  if np.max(q_spec[:,1]) == 0 or np.max(r_spec[:,1]) == 0:
513
496
  plt.text(0.5, 0.5, 'The query and/or reference spectrum has no non-zero intensities after transformations.\n Change transformation parameters.', ha='center', va='center', fontsize=7, color='black')
514
497
  plt.xticks([])
@@ -519,7 +502,6 @@ def generate_plots_on_NRMS_data(query_data=None, reference_data=None, spectrum_I
519
502
  q_spec_pre_trans[:,1] = q_spec_pre_trans[:,1].astype(float)
520
503
  r_spec_pre_trans[:,1] = r_spec_pre_trans[:,1].astype(float)
521
504
 
522
- # apply transformation to y-axis if relevant
523
505
  if y_axis_transformation == 'normalized':
524
506
  q_spec_pre_trans[:,1] = q_spec_pre_trans[:,1] / np.max(q_spec_pre_trans[:,1])
525
507
  r_spec_pre_trans[:,1] = r_spec_pre_trans[:,1] / np.max(r_spec_pre_trans[:,1])
@@ -543,32 +525,29 @@ def generate_plots_on_NRMS_data(query_data=None, reference_data=None, spectrum_I
543
525
  plt.title('Untransformed Query and Reference Spectra', fontsize=10)
544
526
 
545
527
  for transformation in spectrum_preprocessing_order:
546
- if transformation == 'W': # weight factor transformation
528
+ if transformation == 'W':
547
529
  q_spec[:,1] = wf_transform(q_spec[:,0], q_spec[:,1], wf_mz, wf_intensity)
548
530
  r_spec[:,1] = wf_transform(r_spec[:,0], r_spec[:,1], wf_mz, wf_intensity)
549
- if transformation == 'L': # low-entropy transformation
531
+ if transformation == 'L':
550
532
  q_spec[:,1] = LE_transform(q_spec[:,1], LET_threshold, normalization_method)
551
533
  r_spec[:,1] = LE_transform(r_spec[:,1], LET_threshold, normalization_method)
552
- if transformation == 'N': # noise removal
534
+ if transformation == 'N':
553
535
  q_spec = remove_noise(q_spec, nr = noise_threshold)
554
- if high_quality_reference_library == False:
536
+ if high_quality_reference_library == False or high_quality_reference_library == 'False':
555
537
  r_spec = remove_noise(r_spec, nr = noise_threshold)
556
- if transformation == 'F': # filtering with respect to mz and/or intensity
538
+ if transformation == 'F':
557
539
  q_spec = filter_spec_gcms(q_spec, mz_min = mz_min, mz_max = mz_max, int_min = int_min, int_max = int_max)
558
- if high_quality_reference_library == False:
540
+ if high_quality_reference_library == False or high_quality_reference_library == 'False':
559
541
  r_spec = filter_spec_gcms(r_spec, mz_min = mz_min, mz_max = mz_max, int_min = int_min, int_max = int_max)
560
542
 
561
- # compute similarity score; if the spectra contain at most one point, their similarity is considered to be 0
562
543
  if q_spec.shape[0] > 1:
563
544
  similarity_score = get_similarity(similarity_measure, q_spec[:,1], r_spec[:,1], weights, entropy_dimension)
564
545
  else:
565
546
  similarity_score = 0
566
547
 
567
548
 
568
- # plot the transformed spectra
569
549
  plt.subplot(2,1,2)
570
550
 
571
- # display warning message if either spectra are empty or have no non-zero intensity ion fragments
572
551
  if q_spec.shape[0] == 0 or r_spec.shape[0] == 0:
573
552
  plt.text(0.5, 0.5, 'The query and/or reference spectrum has no ion fragments left after transformations.\n Change transformation parameters.', ha='center', va='center', fontsize=7, color='black')
574
553
  plt.xticks([])
@@ -578,7 +557,6 @@ def generate_plots_on_NRMS_data(query_data=None, reference_data=None, spectrum_I
578
557
  plt.xticks([])
579
558
  plt.yticks([])
580
559
  else:
581
- # apply transformation to y-axis if relevant
582
560
  if y_axis_transformation == 'normalized':
583
561
  q_spec[:,1] = q_spec[:,1] / np.max(q_spec[:,1])
584
562
  r_spec[:,1] = r_spec[:,1] / np.max(r_spec[:,1])
@@ -602,13 +580,12 @@ def generate_plots_on_NRMS_data(query_data=None, reference_data=None, spectrum_I
602
580
  plt.title(f'Transformed Query and Reference Spectra', fontsize=10)
603
581
 
604
582
 
605
- #plt.subplots_adjust(top = 0.8, hspace = 0.7)
606
583
  plt.subplots_adjust(top=0.8, hspace=0.92, bottom=0.3)
607
584
  plt.figlegend(loc = 'upper center')
608
585
  fig.text(0.05, 0.15, f'Similarity Measure: {similarity_measure.capitalize()}', fontsize=7)
609
586
  fig.text(0.05, 0.12, f'Similarity Score: {round(similarity_score,4)}', fontsize=7)
610
587
  fig.text(0.05, 0.09, f"Spectrum Preprocessing Order: {''.join(spectrum_preprocessing_order)}", fontsize=7)
611
- fig.text(0.05, 0.06, f'High Quality Reference Library: {high_quality_reference_library}', fontsize=7)
588
+ fig.text(0.05, 0.06, f'High Quality Reference Library: {str(high_quality_reference_library)}', fontsize=7)
612
589
  fig.text(0.05, 0.03, f'Raw-Scale M/Z Range: [{min_mz},{max_mz}]', fontsize=7)
613
590
  fig.text(0.45, 0.15, f'Raw-Scale Intensity Range: [{int_min_tmp},{int_max_tmp}]', fontsize=7)
614
591
  fig.text(0.45, 0.12, f'Noise Threshold: {noise_threshold}', fontsize=7)
pycompound/processing.py CHANGED
@@ -1,6 +1,4 @@
1
1
 
2
- # This script contains the functions used to transform spectra prior to computing similarity scores
3
-
4
2
  from pycompound.build_library import build_library_from_raw_data
5
3
  import scipy.stats
6
4
  import numpy as np
@@ -165,7 +163,6 @@ def centroid_spectrum(spec, window_size):
165
163
 
166
164
  spec = spec[np.argsort(spec[:,0])]
167
165
 
168
- #Fast check is the spectrum needs centroiding
169
166
  mz_array = spec[:, 0]
170
167
  need_centroid = 0
171
168
  if mz_array.shape[0] > 1:
@@ -180,7 +177,6 @@ def centroid_spectrum(spec, window_size):
180
177
  mz_delta_allowed = window_size
181
178
 
182
179
  if spec[i, 1] > 0:
183
- #Find left bound for current peak
184
180
  i_left = i - 1
185
181
  while i_left >= 0:
186
182
  mz_delta_left = spec[i, 0] - spec[i_left, 0]
@@ -190,7 +186,6 @@ def centroid_spectrum(spec, window_size):
190
186
  break
191
187
  i_left += 1
192
188
 
193
- #Find right bound for current peak
194
189
  i_right = i + 1
195
190
  while i_right < spec.shape[0]:
196
191
  mz_delta_right = spec[i_right, 0] - spec[i, 0]
@@ -199,7 +194,6 @@ def centroid_spectrum(spec, window_size):
199
194
  else:
200
195
  break
201
196
 
202
- #Merge those peaks
203
197
  intensity_sum = np.sum(spec[i_left:i_right, 1])
204
198
  intensity_weighted_sum = np.sum(spec[i_left:i_right, 0] * spec[i_left:i_right, 1])
205
199
 
@@ -246,16 +240,13 @@ def match_peaks_in_spectra(spec_a, spec_b, window_size):
246
240
  mass_delta = spec_a[a, 0] - spec_b[b, 0]
247
241
 
248
242
  if mass_delta < -window_size:
249
- # Peak only existed in spec a.
250
243
  spec_merged.append([spec_a[a, 0], spec_a[a, 1], peak_b_int])
251
244
  peak_b_int = 0.
252
245
  a += 1
253
246
  elif mass_delta > window_size:
254
- # Peak only existed in spec b.
255
247
  spec_merged.append([spec_b[b, 0], 0., spec_b[b, 1]])
256
248
  b += 1
257
249
  else:
258
- # Peak existed in both spec.
259
250
  peak_b_int += spec_b[b, 1]
260
251
  b += 1
261
252
 
@@ -10,7 +10,6 @@ import sys
10
10
 
11
11
 
12
12
  def S_cos(ints_a, ints_b):
13
- # Cosine Similarity Measure
14
13
  if np.sum(ints_a) == 0 or np.sum(ints_b) == 0:
15
14
  return(0)
16
15
  else:
@@ -18,12 +17,10 @@ def S_cos(ints_a, ints_b):
18
17
 
19
18
 
20
19
  def ent_renyi(ints, q):
21
- # Computes the Renyi entropy of a probability distribution for a given positive entropy dimension q
22
20
  return np.log(sum(np.power(ints,q))) / (1-q)
23
21
 
24
22
 
25
23
  def ent_tsallis(ints, q):
26
- # Computes the Tsallis entropy of a probability distribution for a given positive entropy dimension q
27
24
  return (sum(np.power(ints,q))-1) / (1-q)
28
25
 
29
26