pycompound 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,4 @@
1
1
 
2
- # this script has a function to extract the mass spectra from an mgf, mzML, or cdf file and write them in the necessary format for use in spectral library matching
3
-
4
2
  import netCDF4 as nc
5
3
  import numpy as np
6
4
  import pandas as pd
@@ -14,7 +12,7 @@ def build_library_from_raw_data(input_path=None, output_path=None, is_reference=
14
12
  Converts mgf, mzML, cdf, or msp file to the necessary format for spectral library matching.
15
13
 
16
14
  --input_path: Path to input file (must be mgf, mzML, cdf, or msp file). Mandatory argument.
17
- --output_path: Path to output CSV file. Default: current working directory.
15
+ --output_path: Path to output TXT file. Default: current working directory.
18
16
  --is_reference: Boolean flag indicating whether IDs of spectra should be written to output. Only pass true if building a reference library with known compound IDs. Only applicable to mgf and msp files. Options: \'True\', \'False\'. Optional argument. Default: False.
19
17
  '''
20
18
 
@@ -23,7 +21,6 @@ def build_library_from_raw_data(input_path=None, output_path=None, is_reference=
23
21
  sys.exit()
24
22
 
25
23
  if output_path is None:
26
- #print('Warning: no output_path specified, so library is written to {Path.cwd()}/build_library.csv')
27
24
  tmp = input_path.split('/')
28
25
  tmp = tmp[(len(tmp)-1)]
29
26
  basename = tmp.split('.')[0]
@@ -34,7 +31,6 @@ def build_library_from_raw_data(input_path=None, output_path=None, is_reference=
34
31
  print('Error: is_reference must be either \'True\' or \'False\'.')
35
32
  sys.exit()
36
33
 
37
- # determine whether an mgf or a mzML file was passed to --input_path
38
34
  last_three_chars = input_path[(len(input_path)-3):len(input_path)]
39
35
  last_four_chars = input_path[(len(input_path)-4):len(input_path)]
40
36
  if last_three_chars == 'mgf' or last_three_chars == 'MGF':
@@ -50,7 +46,6 @@ def build_library_from_raw_data(input_path=None, output_path=None, is_reference=
50
46
  sys.exit()
51
47
 
52
48
 
53
- # obtain a list of spectra from the input file
54
49
  spectra = []
55
50
  if input_file_type == 'mgf':
56
51
  with mgf.read(input_path, index_by_scans = True) as reader:
@@ -62,7 +57,6 @@ def build_library_from_raw_data(input_path=None, output_path=None, is_reference=
62
57
  spectra.append(spec)
63
58
 
64
59
 
65
- # extract the relevant information from each spectra (i.e m/z ratios and intensities)
66
60
  if input_file_type == 'mgf' or input_file_type == 'mzML':
67
61
  ids = []
68
62
  mzs = []
@@ -128,8 +122,7 @@ def build_library_from_raw_data(input_path=None, output_path=None, is_reference=
128
122
  continue
129
123
 
130
124
 
131
- # write CSV file of spectra for use in spectral library matching
132
125
  df = pd.DataFrame({'id':ids, 'mz_ratio':mzs, 'intensity':ints})
133
- df.to_csv(output_path, index=False)
126
+ df.to_csv(output_path, index=False, sep='\t')
134
127
 
135
128
 
@@ -1,6 +1,4 @@
1
1
 
2
- # this script's functions plot a given query spectrum against a given reference spectrum before and after spectrum preprocessing transformations
3
-
4
2
  from .processing import *
5
3
  from .similarity_measures import *
6
4
  import pandas as pd
@@ -36,7 +34,6 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
36
34
  --output_path: path to output PDF file containing the plots of the spectra before and after preprocessing transformations. If no argument is passed, then the plots will be saved to the PDF ./spectrum1_{spectrum_ID1}_spectrum2_{spectrum_ID2}_plot.pdf in the current working directory.
37
35
  '''
38
36
 
39
- # load query and reference libraries
40
37
  if query_data is None:
41
38
  print('\nError: No argument passed to the mandatory query_data. Please pass the path to the CSV file of the query data.')
42
39
  sys.exit()
@@ -68,7 +65,6 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
68
65
  unique_reference_ids = [str(tmp) for tmp in unique_reference_ids]
69
66
 
70
67
 
71
- ##### process input parameters and ensure they are in a valid format #####
72
68
  if spectrum_ID1 is not None:
73
69
  spectrum_ID1 = str(spectrum_ID1)
74
70
  else:
@@ -177,8 +173,6 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
177
173
  spec_tmp = spectrum_ID1
178
174
  spectrum_ID1 = spectrum_ID2
179
175
  spectrum_ID2 = spec_tmp
180
- print(unique_query_ids)
181
- print(spectrum_ID1)
182
176
  query_idx = unique_query_ids.index(spectrum_ID1)
183
177
  reference_idx = unique_reference_ids.index(spectrum_ID2)
184
178
  q_idxs_tmp = np.where(df_query.iloc[:,0].astype(str) == unique_query_ids[query_idx])[0]
@@ -192,7 +186,6 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
192
186
  q_spec_pre_trans[:,1] = q_spec_pre_trans[:,1].astype(float)
193
187
  r_spec_pre_trans[:,1] = r_spec_pre_trans[:,1].astype(float)
194
188
 
195
- # apply transformation to y-axis if relevant
196
189
  if y_axis_transformation == 'normalized':
197
190
  q_spec_pre_trans[:,1] = q_spec_pre_trans[:,1] / np.max(q_spec_pre_trans[:,1])
198
191
  r_spec_pre_trans[:,1] = r_spec_pre_trans[:,1] / np.max(r_spec_pre_trans[:,1])
@@ -208,10 +201,8 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
208
201
  else:
209
202
  ylab = 'Raw Intensity'
210
203
 
211
- # create the figure
212
204
  fig, axes = plt.subplots(nrows=2, ncols=1)
213
205
 
214
- # plot the untransformed spectra
215
206
  plt.subplot(2,1,1)
216
207
  plt.vlines(x=q_spec_pre_trans[:,0], ymin=[0]*q_spec_pre_trans.shape[0], ymax=q_spec_pre_trans[:,1], linewidth=3, color='blue', label=f'Spectrum ID 1: {spectrum_ID1}')
217
208
  plt.vlines(x=r_spec_pre_trans[:,0], ymin=[0]*r_spec_pre_trans.shape[0], ymax=-r_spec_pre_trans[:,1], linewidth=3, color='red', label=f'Spectrum ID 2: {spectrum_ID2}')
@@ -221,7 +212,6 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
221
212
  plt.yticks(fontsize=7)
222
213
  plt.title('Untransformed Spectra', fontsize=10)
223
214
 
224
- # get the ranges of m/z and intensity values to display at the bottom of the two plots
225
215
  mz_min_tmp_q = round(q_spec[:,0].min(),1)
226
216
  mz_min_tmp_r = round(r_spec[:,0].min(),1)
227
217
  int_min_tmp_q = round(q_spec[:,1].min(),1)
@@ -235,51 +225,45 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
235
225
  int_min_tmp = min([int_min_tmp_q,int_min_tmp_r])
236
226
  int_max_tmp = max([int_max_tmp_q,int_max_tmp_r])
237
227
 
238
- # perform the spectrum preprocessing transformations in the order specified
239
228
  is_matched = False
240
229
  for transformation in spectrum_preprocessing_order:
241
- if transformation == 'C' and q_spec.shape[0] > 1 and r_spec.shape[1] > 1: # centroiding
230
+ if transformation == 'C' and q_spec.shape[0] > 1 and r_spec.shape[1] > 1:
242
231
  q_spec = centroid_spectrum(q_spec, window_size=window_size_centroiding)
243
232
  r_spec = centroid_spectrum(r_spec, window_size=window_size_centroiding)
244
- if transformation == 'M' and q_spec.shape[0] > 1 and r_spec.shape[1] > 1: # matching
233
+ if transformation == 'M' and q_spec.shape[0] > 1 and r_spec.shape[1] > 1:
245
234
  m_spec = match_peaks_in_spectra(spec_a=q_spec, spec_b=r_spec, window_size=window_size_matching)
246
235
  q_spec = m_spec[:,0:2]
247
236
  r_spec = m_spec[:,[0,2]]
248
237
  is_matched = True
249
- if transformation == 'W' and q_spec.shape[0] > 1 and r_spec.shape[1] > 1: # weight factor transformation
238
+ if transformation == 'W' and q_spec.shape[0] > 1 and r_spec.shape[1] > 1:
250
239
  q_spec[:,1] = wf_transform(q_spec[:,0], q_spec[:,1], wf_mz, wf_intensity)
251
240
  r_spec[:,1] = wf_transform(r_spec[:,0], r_spec[:,1], wf_mz, wf_intensity)
252
- if transformation == 'L' and q_spec.shape[0] > 1 and r_spec.shape[1] > 1: # low-entropy transformation
241
+ if transformation == 'L' and q_spec.shape[0] > 1 and r_spec.shape[1] > 1:
253
242
  q_spec[:,1] = LE_transform(q_spec[:,1], LET_threshold, normalization_method=normalization_method)
254
243
  r_spec[:,1] = LE_transform(r_spec[:,1], LET_threshold, normalization_method=normalization_method)
255
- if transformation == 'N' and q_spec.shape[0] > 1 and r_spec.shape[1] > 1: # noise removal
244
+ if transformation == 'N' and q_spec.shape[0] > 1 and r_spec.shape[1] > 1:
256
245
  q_spec = remove_noise(q_spec, nr = noise_threshold)
257
246
  r_spec = remove_noise(r_spec, nr = noise_threshold)
258
- if transformation == 'F' and q_spec.shape[0] > 1 and r_spec.shape[1] > 1: # filtering
247
+ if transformation == 'F' and q_spec.shape[0] > 1 and r_spec.shape[1] > 1:
259
248
  q_spec = filter_spec_lcms(q_spec, mz_min = mz_min, mz_max = mz_max, int_min = int_min, int_max = int_max, is_matched = is_matched)
260
249
  r_spec = filter_spec_lcms(r_spec, mz_min = mz_min, mz_max = mz_max, int_min = int_min, int_max = int_max, is_matched = is_matched)
261
250
 
262
- # intensities of query and reference library
263
251
  q_ints = q_spec[:,1]
264
252
  r_ints = r_spec[:,1]
265
253
 
266
- # if there is at least one non-zero intensity ion fragment in either spectra, compute their similarity
267
254
  if np.sum(q_ints) != 0 and np.sum(r_ints) != 0 and q_spec.shape[0] > 1 and r_spec.shape[1] > 1:
268
255
  similarity_score = get_similarity(similarity_measure, q_ints, r_ints, weights, entropy_dimension)
269
256
  else:
270
257
  similarity_score = 0
271
258
 
272
- # plot the transformed spectra
273
259
  plt.subplot(2,1,2)
274
260
 
275
- # display warning message if either spectra are empty or have no non-zero intensity ion fragments
276
261
  if q_spec.shape[0] > 1:
277
262
  if np.max(q_spec[:,1]) == 0 or np.max(r_spec[:,1]) == 0:
278
263
  plt.text(0.5, 0.5, 'The query and/or reference spectrum has no non-zero intensities after transformations.\n Change transformation parameters.', ha='center', va='center', fontsize=7, color='black')
279
264
  plt.xticks([])
280
265
  plt.yticks([])
281
266
  else:
282
- # apply transformation to y-axis if relevant
283
267
  if y_axis_transformation == 'normalized':
284
268
  q_spec[:,1] = q_spec[:,1] / np.max(q_spec[:,1])
285
269
  r_spec[:,1] = r_spec[:,1] / np.max(r_spec[:,1])
@@ -311,7 +295,7 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
311
295
  plt.figlegend(loc = 'upper center')
312
296
  fig.text(0.05, 0.18, f'Similarity Measure: {similarity_measure.capitalize()}', fontsize=7)
313
297
  fig.text(0.05, 0.15, f'Similarity Score: {round(similarity_score,4)}', fontsize=7)
314
- fig.text(0.05, 0.12, f'Spectrum Preprocessing Order: {''.join(spectrum_preprocessing_order)}', fontsize=7)
298
+ fig.text(0.05, 0.12, f"Spectrum Preprocessing Order: {''.join(spectrum_preprocessing_order)}", fontsize=7)
315
299
  fig.text(0.05, 0.09, f'High Quality Reference Library: {high_quality_reference_library}', fontsize=7)
316
300
  fig.text(0.05, 0.06, f'Window Size (Centroiding): {window_size_centroiding}', fontsize=7)
317
301
  fig.text(0.05, 0.03, f'Window Size (Matching): {window_size_matching}', fontsize=7)
@@ -320,6 +304,9 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
320
304
  fig.text(0.45, 0.12, f'Noise Threshold: {noise_threshold}', fontsize=7)
321
305
  fig.text(0.45, 0.09, f'Weight Factors (m/z,intensity): ({wf_mz},{wf_intensity})', fontsize=7)
322
306
  fig.text(0.45, 0.06, f'Low-Entropy Threshold: {LET_threshold}', fontsize=7)
307
+ if similarity_measure == 'mixture':
308
+ fig.text(0.45, 0.03, f'Weights for mixture similarity: {weights}', fontsize=7)
309
+
323
310
  plt.savefig(output_path, format='pdf')
324
311
 
325
312
  if return_plot == True:
@@ -351,7 +338,6 @@ def generate_plots_on_NRMS_data(query_data=None, reference_data=None, spectrum_I
351
338
  --output_path: path to output PDF file containing the plots of the spectra before and after preprocessing transformations. If no argument is passed, then the plots will be saved to the PDF ./spectrum1_{spectrum_ID1}_spectrum2_{spectrum_ID2}_plot.pdf in the current working directory.
352
339
  '''
353
340
 
354
- # load query and reference libraries
355
341
  if query_data is None:
356
342
  print('\nError: No argument passed to the mandatory query_data. Please pass the path to the CSV file of the query data.')
357
343
  sys.exit()
@@ -381,7 +367,6 @@ def generate_plots_on_NRMS_data(query_data=None, reference_data=None, spectrum_I
381
367
  unique_reference_ids = df_reference.iloc[:,0].unique()
382
368
 
383
369
 
384
- ##### process input parameters and ensure they are in a valid format #####
385
370
  if spectrum_ID1 is not None:
386
371
  spectrum_ID1 = str(spectrum_ID1)
387
372
  else:
@@ -456,12 +441,10 @@ def generate_plots_on_NRMS_data(query_data=None, reference_data=None, spectrum_I
456
441
  print(f'Warning: plots will be saved to the PDF ./spectrum1_{spectrum_ID1}_spectrum2_{spectrum_ID2}_plot.pdf in the current working directory.')
457
442
  output_path = f'{Path.cwd()}/spectrum1_{spectrum_ID1}_spectrum2_{spectrum_ID2}.pdf'
458
443
 
459
- # get m/z values
460
444
  min_mz = np.min([np.min(df_query.iloc[:,1]), np.min(df_reference.iloc[:,1])])
461
445
  max_mz = np.max([np.max(df_query.iloc[:,1]), np.max(df_reference.iloc[:,1])])
462
446
  mzs = np.linspace(min_mz,max_mz,(max_mz-min_mz+1))
463
447
 
464
- # get unique query/reference library IDs; each query/reference ID corresponds to exactly one query/reference mass spectrum
465
448
  unique_query_ids = df_query.iloc[:,0].unique().tolist()
466
449
  unique_reference_ids = df_reference.iloc[:,0].unique().tolist()
467
450
  unique_query_ids = [str(ID) for ID in unique_query_ids]
@@ -493,7 +476,6 @@ def generate_plots_on_NRMS_data(query_data=None, reference_data=None, spectrum_I
493
476
  q_spec = convert_spec(q_spec,mzs)
494
477
  r_spec = convert_spec(r_spec,mzs)
495
478
 
496
- # get the ranges of m/z and intensity values to display at the bottom of the two plots
497
479
  int_min_tmp_q = min(q_spec[q_spec[:,1].nonzero(),1][0])
498
480
  int_min_tmp_r = min(r_spec[r_spec[:,1].nonzero(),1][0])
499
481
  int_max_tmp_q = max(q_spec[q_spec[:,1].nonzero(),1][0])
@@ -501,13 +483,10 @@ def generate_plots_on_NRMS_data(query_data=None, reference_data=None, spectrum_I
501
483
  int_min_tmp = int(min([int_min_tmp_q,int_min_tmp_r]))
502
484
  int_max_tmp = int(max([int_max_tmp_q,int_max_tmp_r]))
503
485
 
504
- # create the figure
505
486
  fig, axes = plt.subplots(nrows=2, ncols=1)
506
487
 
507
- # plot the untransformed spectra
508
488
  plt.subplot(2,1,1)
509
489
 
510
- # display warning message if either spectra have no non-zero ion fragments
511
490
  if np.max(q_spec[:,1]) == 0 or np.max(r_spec[:,1]) == 0:
512
491
  plt.text(0.5, 0.5, 'The query and/or reference spectrum has no non-zero intensities after transformations.\n Change transformation parameters.', ha='center', va='center', fontsize=7, color='black')
513
492
  plt.xticks([])
@@ -518,7 +497,6 @@ def generate_plots_on_NRMS_data(query_data=None, reference_data=None, spectrum_I
518
497
  q_spec_pre_trans[:,1] = q_spec_pre_trans[:,1].astype(float)
519
498
  r_spec_pre_trans[:,1] = r_spec_pre_trans[:,1].astype(float)
520
499
 
521
- # apply transformation to y-axis if relevant
522
500
  if y_axis_transformation == 'normalized':
523
501
  q_spec_pre_trans[:,1] = q_spec_pre_trans[:,1] / np.max(q_spec_pre_trans[:,1])
524
502
  r_spec_pre_trans[:,1] = r_spec_pre_trans[:,1] / np.max(r_spec_pre_trans[:,1])
@@ -542,32 +520,29 @@ def generate_plots_on_NRMS_data(query_data=None, reference_data=None, spectrum_I
542
520
  plt.title('Untransformed Query and Reference Spectra', fontsize=10)
543
521
 
544
522
  for transformation in spectrum_preprocessing_order:
545
- if transformation == 'W': # weight factor transformation
523
+ if transformation == 'W':
546
524
  q_spec[:,1] = wf_transform(q_spec[:,0], q_spec[:,1], wf_mz, wf_intensity)
547
525
  r_spec[:,1] = wf_transform(r_spec[:,0], r_spec[:,1], wf_mz, wf_intensity)
548
- if transformation == 'L': # low-entropy transformation
526
+ if transformation == 'L':
549
527
  q_spec[:,1] = LE_transform(q_spec[:,1], LET_threshold, normalization_method)
550
528
  r_spec[:,1] = LE_transform(r_spec[:,1], LET_threshold, normalization_method)
551
- if transformation == 'N': # noise removal
529
+ if transformation == 'N':
552
530
  q_spec = remove_noise(q_spec, nr = noise_threshold)
553
531
  if high_quality_reference_library == False:
554
532
  r_spec = remove_noise(r_spec, nr = noise_threshold)
555
- if transformation == 'F': # filtering with respect to mz and/or intensity
533
+ if transformation == 'F':
556
534
  q_spec = filter_spec_gcms(q_spec, mz_min = mz_min, mz_max = mz_max, int_min = int_min, int_max = int_max)
557
535
  if high_quality_reference_library == False:
558
536
  r_spec = filter_spec_gcms(r_spec, mz_min = mz_min, mz_max = mz_max, int_min = int_min, int_max = int_max)
559
537
 
560
- # compute similarity score; if the spectra contain at most one point, their similarity is considered to be 0
561
538
  if q_spec.shape[0] > 1:
562
539
  similarity_score = get_similarity(similarity_measure, q_spec[:,1], r_spec[:,1], weights, entropy_dimension)
563
540
  else:
564
541
  similarity_score = 0
565
542
 
566
543
 
567
- # plot the transformed spectra
568
544
  plt.subplot(2,1,2)
569
545
 
570
- # display warning message if either spectra are empty or have no non-zero intensity ion fragments
571
546
  if q_spec.shape[0] == 0 or r_spec.shape[0] == 0:
572
547
  plt.text(0.5, 0.5, 'The query and/or reference spectrum has no ion fragments left after transformations.\n Change transformation parameters.', ha='center', va='center', fontsize=7, color='black')
573
548
  plt.xticks([])
@@ -577,7 +552,6 @@ def generate_plots_on_NRMS_data(query_data=None, reference_data=None, spectrum_I
577
552
  plt.xticks([])
578
553
  plt.yticks([])
579
554
  else:
580
- # apply transformation to y-axis if relevant
581
555
  if y_axis_transformation == 'normalized':
582
556
  q_spec[:,1] = q_spec[:,1] / np.max(q_spec[:,1])
583
557
  r_spec[:,1] = r_spec[:,1] / np.max(r_spec[:,1])
@@ -601,18 +575,19 @@ def generate_plots_on_NRMS_data(query_data=None, reference_data=None, spectrum_I
601
575
  plt.title(f'Transformed Query and Reference Spectra', fontsize=10)
602
576
 
603
577
 
604
- #plt.subplots_adjust(top = 0.8, hspace = 0.7)
605
578
  plt.subplots_adjust(top=0.8, hspace=0.92, bottom=0.3)
606
579
  plt.figlegend(loc = 'upper center')
607
580
  fig.text(0.05, 0.15, f'Similarity Measure: {similarity_measure.capitalize()}', fontsize=7)
608
581
  fig.text(0.05, 0.12, f'Similarity Score: {round(similarity_score,4)}', fontsize=7)
609
- fig.text(0.05, 0.09, f'Spectrum Preprocessing Order: {''.join(spectrum_preprocessing_order)}', fontsize=7)
582
+ fig.text(0.05, 0.09, f"Spectrum Preprocessing Order: {''.join(spectrum_preprocessing_order)}", fontsize=7)
610
583
  fig.text(0.05, 0.06, f'High Quality Reference Library: {high_quality_reference_library}', fontsize=7)
611
584
  fig.text(0.05, 0.03, f'Raw-Scale M/Z Range: [{min_mz},{max_mz}]', fontsize=7)
612
585
  fig.text(0.45, 0.15, f'Raw-Scale Intensity Range: [{int_min_tmp},{int_max_tmp}]', fontsize=7)
613
586
  fig.text(0.45, 0.12, f'Noise Threshold: {noise_threshold}', fontsize=7)
614
587
  fig.text(0.45, 0.09, f'Weight Factors (m/z,intensity): ({wf_mz},{wf_intensity})', fontsize=7)
615
588
  fig.text(0.45, 0.06, f'Low-Entropy Threshold: {LET_threshold}', fontsize=7)
589
+ if similarity_measure=='mixture':
590
+ fig.text(0.45, 0.03, f'Weights for mixture similarity: {weights}', fontsize=7)
616
591
  plt.savefig(output_path, format='pdf')
617
592
 
618
593
  if return_plot == True:
pycompound/processing.py CHANGED
@@ -1,6 +1,4 @@
1
1
 
2
- # This script contains the functions used to transform spectra prior to computing similarity scores
3
-
4
2
  from pycompound.build_library import build_library_from_raw_data
5
3
  import scipy.stats
6
4
  import numpy as np
@@ -165,7 +163,6 @@ def centroid_spectrum(spec, window_size):
165
163
 
166
164
  spec = spec[np.argsort(spec[:,0])]
167
165
 
168
- #Fast check is the spectrum needs centroiding
169
166
  mz_array = spec[:, 0]
170
167
  need_centroid = 0
171
168
  if mz_array.shape[0] > 1:
@@ -180,7 +177,6 @@ def centroid_spectrum(spec, window_size):
180
177
  mz_delta_allowed = window_size
181
178
 
182
179
  if spec[i, 1] > 0:
183
- #Find left bound for current peak
184
180
  i_left = i - 1
185
181
  while i_left >= 0:
186
182
  mz_delta_left = spec[i, 0] - spec[i_left, 0]
@@ -190,7 +186,6 @@ def centroid_spectrum(spec, window_size):
190
186
  break
191
187
  i_left += 1
192
188
 
193
- #Find right bound for current peak
194
189
  i_right = i + 1
195
190
  while i_right < spec.shape[0]:
196
191
  mz_delta_right = spec[i_right, 0] - spec[i, 0]
@@ -199,7 +194,6 @@ def centroid_spectrum(spec, window_size):
199
194
  else:
200
195
  break
201
196
 
202
- #Merge those peaks
203
197
  intensity_sum = np.sum(spec[i_left:i_right, 1])
204
198
  intensity_weighted_sum = np.sum(spec[i_left:i_right, 0] * spec[i_left:i_right, 1])
205
199
 
@@ -246,16 +240,13 @@ def match_peaks_in_spectra(spec_a, spec_b, window_size):
246
240
  mass_delta = spec_a[a, 0] - spec_b[b, 0]
247
241
 
248
242
  if mass_delta < -window_size:
249
- # Peak only existed in spec a.
250
243
  spec_merged.append([spec_a[a, 0], spec_a[a, 1], peak_b_int])
251
244
  peak_b_int = 0.
252
245
  a += 1
253
246
  elif mass_delta > window_size:
254
- # Peak only existed in spec b.
255
247
  spec_merged.append([spec_b[b, 0], 0., spec_b[b, 1]])
256
248
  b += 1
257
249
  else:
258
- # Peak existed in both spec.
259
250
  peak_b_int += spec_b[b, 1]
260
251
  b += 1
261
252
 
@@ -10,7 +10,6 @@ import sys
10
10
 
11
11
 
12
12
  def S_cos(ints_a, ints_b):
13
- # Cosine Similarity Measure
14
13
  if np.sum(ints_a) == 0 or np.sum(ints_b) == 0:
15
14
  return(0)
16
15
  else:
@@ -18,12 +17,10 @@ def S_cos(ints_a, ints_b):
18
17
 
19
18
 
20
19
  def ent_renyi(ints, q):
21
- # Computes the Renyi entropy of a probability distribution for a given positive entropy dimension q
22
20
  return np.log(sum(np.power(ints,q))) / (1-q)
23
21
 
24
22
 
25
23
  def ent_tsallis(ints, q):
26
- # Computes the Tsallis entropy of a probability distribution for a given positive entropy dimension q
27
24
  return (sum(np.power(ints,q))-1) / (1-q)
28
25
 
29
26