pycompound 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- app.py +155 -194
- pycompound/build_library.py +2 -9
- pycompound/plot_spectra.py +10 -38
- pycompound/processing.py +0 -9
- pycompound/similarity_measures.py +0 -3
- pycompound/spec_lib_matching.py +246 -81
- pycompound/spec_lib_matching_CLI.py +2 -7
- pycompound/tuning_CLI.py +1 -1
- {pycompound-0.1.1.dist-info → pycompound-0.1.2.dist-info}/METADATA +1 -1
- pycompound-0.1.2.dist-info/RECORD +14 -0
- pycompound-0.1.1.dist-info/RECORD +0 -14
- {pycompound-0.1.1.dist-info → pycompound-0.1.2.dist-info}/WHEEL +0 -0
- {pycompound-0.1.1.dist-info → pycompound-0.1.2.dist-info}/licenses/LICENSE +0 -0
- {pycompound-0.1.1.dist-info → pycompound-0.1.2.dist-info}/top_level.txt +0 -0
pycompound/build_library.py
CHANGED
|
@@ -1,6 +1,4 @@
|
|
|
1
1
|
|
|
2
|
-
# this script has a function to extract the mass spectra from an mgf, mzML, or cdf file and write them in the necessary format for use in spectral library matching
|
|
3
|
-
|
|
4
2
|
import netCDF4 as nc
|
|
5
3
|
import numpy as np
|
|
6
4
|
import pandas as pd
|
|
@@ -14,7 +12,7 @@ def build_library_from_raw_data(input_path=None, output_path=None, is_reference=
|
|
|
14
12
|
Converts mgf, mzML, cdf, or msp file to the necessary format for spectral library matching.
|
|
15
13
|
|
|
16
14
|
--input_path: Path to input file (must be mgf, mzML, cdf, or msp file). Mandatory argument.
|
|
17
|
-
--output_path: Path to output
|
|
15
|
+
--output_path: Path to output TXT file. Default: current working directory.
|
|
18
16
|
--is_reference: Boolean flag indicating whether IDs of spectra should be written to output. Only pass true if building a reference library with known compound IDs. Only applicable to mgf and msp files. Options: \'True\', \'False\'. Optional argument. Default: False.
|
|
19
17
|
'''
|
|
20
18
|
|
|
@@ -23,7 +21,6 @@ def build_library_from_raw_data(input_path=None, output_path=None, is_reference=
|
|
|
23
21
|
sys.exit()
|
|
24
22
|
|
|
25
23
|
if output_path is None:
|
|
26
|
-
#print('Warning: no output_path specified, so library is written to {Path.cwd()}/build_library.csv')
|
|
27
24
|
tmp = input_path.split('/')
|
|
28
25
|
tmp = tmp[(len(tmp)-1)]
|
|
29
26
|
basename = tmp.split('.')[0]
|
|
@@ -34,7 +31,6 @@ def build_library_from_raw_data(input_path=None, output_path=None, is_reference=
|
|
|
34
31
|
print('Error: is_reference must be either \'True\' or \'False\'.')
|
|
35
32
|
sys.exit()
|
|
36
33
|
|
|
37
|
-
# determine whether an mgf or a mzML file was passed to --input_path
|
|
38
34
|
last_three_chars = input_path[(len(input_path)-3):len(input_path)]
|
|
39
35
|
last_four_chars = input_path[(len(input_path)-4):len(input_path)]
|
|
40
36
|
if last_three_chars == 'mgf' or last_three_chars == 'MGF':
|
|
@@ -50,7 +46,6 @@ def build_library_from_raw_data(input_path=None, output_path=None, is_reference=
|
|
|
50
46
|
sys.exit()
|
|
51
47
|
|
|
52
48
|
|
|
53
|
-
# obtain a list of spectra from the input file
|
|
54
49
|
spectra = []
|
|
55
50
|
if input_file_type == 'mgf':
|
|
56
51
|
with mgf.read(input_path, index_by_scans = True) as reader:
|
|
@@ -62,7 +57,6 @@ def build_library_from_raw_data(input_path=None, output_path=None, is_reference=
|
|
|
62
57
|
spectra.append(spec)
|
|
63
58
|
|
|
64
59
|
|
|
65
|
-
# extract the relevant information from each spectra (i.e m/z ratios and intensities)
|
|
66
60
|
if input_file_type == 'mgf' or input_file_type == 'mzML':
|
|
67
61
|
ids = []
|
|
68
62
|
mzs = []
|
|
@@ -128,8 +122,7 @@ def build_library_from_raw_data(input_path=None, output_path=None, is_reference=
|
|
|
128
122
|
continue
|
|
129
123
|
|
|
130
124
|
|
|
131
|
-
# write CSV file of spectra for use in spectral library matching
|
|
132
125
|
df = pd.DataFrame({'id':ids, 'mz_ratio':mzs, 'intensity':ints})
|
|
133
|
-
df.to_csv(output_path, index=False)
|
|
126
|
+
df.to_csv(output_path, index=False, sep='\t')
|
|
134
127
|
|
|
135
128
|
|
pycompound/plot_spectra.py
CHANGED
|
@@ -1,6 +1,4 @@
|
|
|
1
1
|
|
|
2
|
-
# this script's functions plot a given query spectrum against a given reference spectrum before and after spectrum preprocessing transformations
|
|
3
|
-
|
|
4
2
|
from .processing import *
|
|
5
3
|
from .similarity_measures import *
|
|
6
4
|
import pandas as pd
|
|
@@ -36,7 +34,6 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
|
|
|
36
34
|
--output_path: path to output PDF file containing the plots of the spectra before and after preprocessing transformations. If no argument is passed, then the plots will be saved to the PDF ./spectrum1_{spectrum_ID1}_spectrum2_{spectrum_ID2}_plot.pdf in the current working directory.
|
|
37
35
|
'''
|
|
38
36
|
|
|
39
|
-
# load query and reference libraries
|
|
40
37
|
if query_data is None:
|
|
41
38
|
print('\nError: No argument passed to the mandatory query_data. Please pass the path to the CSV file of the query data.')
|
|
42
39
|
sys.exit()
|
|
@@ -68,7 +65,6 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
|
|
|
68
65
|
unique_reference_ids = [str(tmp) for tmp in unique_reference_ids]
|
|
69
66
|
|
|
70
67
|
|
|
71
|
-
##### process input parameters and ensure they are in a valid format #####
|
|
72
68
|
if spectrum_ID1 is not None:
|
|
73
69
|
spectrum_ID1 = str(spectrum_ID1)
|
|
74
70
|
else:
|
|
@@ -190,7 +186,6 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
|
|
|
190
186
|
q_spec_pre_trans[:,1] = q_spec_pre_trans[:,1].astype(float)
|
|
191
187
|
r_spec_pre_trans[:,1] = r_spec_pre_trans[:,1].astype(float)
|
|
192
188
|
|
|
193
|
-
# apply transformation to y-axis if relevant
|
|
194
189
|
if y_axis_transformation == 'normalized':
|
|
195
190
|
q_spec_pre_trans[:,1] = q_spec_pre_trans[:,1] / np.max(q_spec_pre_trans[:,1])
|
|
196
191
|
r_spec_pre_trans[:,1] = r_spec_pre_trans[:,1] / np.max(r_spec_pre_trans[:,1])
|
|
@@ -206,10 +201,8 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
|
|
|
206
201
|
else:
|
|
207
202
|
ylab = 'Raw Intensity'
|
|
208
203
|
|
|
209
|
-
# create the figure
|
|
210
204
|
fig, axes = plt.subplots(nrows=2, ncols=1)
|
|
211
205
|
|
|
212
|
-
# plot the untransformed spectra
|
|
213
206
|
plt.subplot(2,1,1)
|
|
214
207
|
plt.vlines(x=q_spec_pre_trans[:,0], ymin=[0]*q_spec_pre_trans.shape[0], ymax=q_spec_pre_trans[:,1], linewidth=3, color='blue', label=f'Spectrum ID 1: {spectrum_ID1}')
|
|
215
208
|
plt.vlines(x=r_spec_pre_trans[:,0], ymin=[0]*r_spec_pre_trans.shape[0], ymax=-r_spec_pre_trans[:,1], linewidth=3, color='red', label=f'Spectrum ID 2: {spectrum_ID2}')
|
|
@@ -219,7 +212,6 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
|
|
|
219
212
|
plt.yticks(fontsize=7)
|
|
220
213
|
plt.title('Untransformed Spectra', fontsize=10)
|
|
221
214
|
|
|
222
|
-
# get the ranges of m/z and intensity values to display at the bottom of the two plots
|
|
223
215
|
mz_min_tmp_q = round(q_spec[:,0].min(),1)
|
|
224
216
|
mz_min_tmp_r = round(r_spec[:,0].min(),1)
|
|
225
217
|
int_min_tmp_q = round(q_spec[:,1].min(),1)
|
|
@@ -233,51 +225,45 @@ def generate_plots_on_HRMS_data(query_data=None, reference_data=None, spectrum_I
|
|
|
233
225
|
int_min_tmp = min([int_min_tmp_q,int_min_tmp_r])
|
|
234
226
|
int_max_tmp = max([int_max_tmp_q,int_max_tmp_r])
|
|
235
227
|
|
|
236
|
-
# perform the spectrum preprocessing transformations in the order specified
|
|
237
228
|
is_matched = False
|
|
238
229
|
for transformation in spectrum_preprocessing_order:
|
|
239
|
-
if transformation == 'C' and q_spec.shape[0] > 1 and r_spec.shape[1] > 1:
|
|
230
|
+
if transformation == 'C' and q_spec.shape[0] > 1 and r_spec.shape[1] > 1:
|
|
240
231
|
q_spec = centroid_spectrum(q_spec, window_size=window_size_centroiding)
|
|
241
232
|
r_spec = centroid_spectrum(r_spec, window_size=window_size_centroiding)
|
|
242
|
-
if transformation == 'M' and q_spec.shape[0] > 1 and r_spec.shape[1] > 1:
|
|
233
|
+
if transformation == 'M' and q_spec.shape[0] > 1 and r_spec.shape[1] > 1:
|
|
243
234
|
m_spec = match_peaks_in_spectra(spec_a=q_spec, spec_b=r_spec, window_size=window_size_matching)
|
|
244
235
|
q_spec = m_spec[:,0:2]
|
|
245
236
|
r_spec = m_spec[:,[0,2]]
|
|
246
237
|
is_matched = True
|
|
247
|
-
if transformation == 'W' and q_spec.shape[0] > 1 and r_spec.shape[1] > 1:
|
|
238
|
+
if transformation == 'W' and q_spec.shape[0] > 1 and r_spec.shape[1] > 1:
|
|
248
239
|
q_spec[:,1] = wf_transform(q_spec[:,0], q_spec[:,1], wf_mz, wf_intensity)
|
|
249
240
|
r_spec[:,1] = wf_transform(r_spec[:,0], r_spec[:,1], wf_mz, wf_intensity)
|
|
250
|
-
if transformation == 'L' and q_spec.shape[0] > 1 and r_spec.shape[1] > 1:
|
|
241
|
+
if transformation == 'L' and q_spec.shape[0] > 1 and r_spec.shape[1] > 1:
|
|
251
242
|
q_spec[:,1] = LE_transform(q_spec[:,1], LET_threshold, normalization_method=normalization_method)
|
|
252
243
|
r_spec[:,1] = LE_transform(r_spec[:,1], LET_threshold, normalization_method=normalization_method)
|
|
253
|
-
if transformation == 'N' and q_spec.shape[0] > 1 and r_spec.shape[1] > 1:
|
|
244
|
+
if transformation == 'N' and q_spec.shape[0] > 1 and r_spec.shape[1] > 1:
|
|
254
245
|
q_spec = remove_noise(q_spec, nr = noise_threshold)
|
|
255
246
|
r_spec = remove_noise(r_spec, nr = noise_threshold)
|
|
256
|
-
if transformation == 'F' and q_spec.shape[0] > 1 and r_spec.shape[1] > 1:
|
|
247
|
+
if transformation == 'F' and q_spec.shape[0] > 1 and r_spec.shape[1] > 1:
|
|
257
248
|
q_spec = filter_spec_lcms(q_spec, mz_min = mz_min, mz_max = mz_max, int_min = int_min, int_max = int_max, is_matched = is_matched)
|
|
258
249
|
r_spec = filter_spec_lcms(r_spec, mz_min = mz_min, mz_max = mz_max, int_min = int_min, int_max = int_max, is_matched = is_matched)
|
|
259
250
|
|
|
260
|
-
# intensities of query and reference library
|
|
261
251
|
q_ints = q_spec[:,1]
|
|
262
252
|
r_ints = r_spec[:,1]
|
|
263
253
|
|
|
264
|
-
# if there is at least one non-zero intensity ion fragment in either spectra, compute their similarity
|
|
265
254
|
if np.sum(q_ints) != 0 and np.sum(r_ints) != 0 and q_spec.shape[0] > 1 and r_spec.shape[1] > 1:
|
|
266
255
|
similarity_score = get_similarity(similarity_measure, q_ints, r_ints, weights, entropy_dimension)
|
|
267
256
|
else:
|
|
268
257
|
similarity_score = 0
|
|
269
258
|
|
|
270
|
-
# plot the transformed spectra
|
|
271
259
|
plt.subplot(2,1,2)
|
|
272
260
|
|
|
273
|
-
# display warning message if either spectra are empty or have no non-zero intensity ion fragments
|
|
274
261
|
if q_spec.shape[0] > 1:
|
|
275
262
|
if np.max(q_spec[:,1]) == 0 or np.max(r_spec[:,1]) == 0:
|
|
276
263
|
plt.text(0.5, 0.5, 'The query and/or reference spectrum has no non-zero intensities after transformations.\n Change transformation parameters.', ha='center', va='center', fontsize=7, color='black')
|
|
277
264
|
plt.xticks([])
|
|
278
265
|
plt.yticks([])
|
|
279
266
|
else:
|
|
280
|
-
# apply transformation to y-axis if relevant
|
|
281
267
|
if y_axis_transformation == 'normalized':
|
|
282
268
|
q_spec[:,1] = q_spec[:,1] / np.max(q_spec[:,1])
|
|
283
269
|
r_spec[:,1] = r_spec[:,1] / np.max(r_spec[:,1])
|
|
@@ -352,7 +338,6 @@ def generate_plots_on_NRMS_data(query_data=None, reference_data=None, spectrum_I
|
|
|
352
338
|
--output_path: path to output PDF file containing the plots of the spectra before and after preprocessing transformations. If no argument is passed, then the plots will be saved to the PDF ./spectrum1_{spectrum_ID1}_spectrum2_{spectrum_ID2}_plot.pdf in the current working directory.
|
|
353
339
|
'''
|
|
354
340
|
|
|
355
|
-
# load query and reference libraries
|
|
356
341
|
if query_data is None:
|
|
357
342
|
print('\nError: No argument passed to the mandatory query_data. Please pass the path to the CSV file of the query data.')
|
|
358
343
|
sys.exit()
|
|
@@ -382,7 +367,6 @@ def generate_plots_on_NRMS_data(query_data=None, reference_data=None, spectrum_I
|
|
|
382
367
|
unique_reference_ids = df_reference.iloc[:,0].unique()
|
|
383
368
|
|
|
384
369
|
|
|
385
|
-
##### process input parameters and ensure they are in a valid format #####
|
|
386
370
|
if spectrum_ID1 is not None:
|
|
387
371
|
spectrum_ID1 = str(spectrum_ID1)
|
|
388
372
|
else:
|
|
@@ -457,12 +441,10 @@ def generate_plots_on_NRMS_data(query_data=None, reference_data=None, spectrum_I
|
|
|
457
441
|
print(f'Warning: plots will be saved to the PDF ./spectrum1_{spectrum_ID1}_spectrum2_{spectrum_ID2}_plot.pdf in the current working directory.')
|
|
458
442
|
output_path = f'{Path.cwd()}/spectrum1_{spectrum_ID1}_spectrum2_{spectrum_ID2}.pdf'
|
|
459
443
|
|
|
460
|
-
# get m/z values
|
|
461
444
|
min_mz = np.min([np.min(df_query.iloc[:,1]), np.min(df_reference.iloc[:,1])])
|
|
462
445
|
max_mz = np.max([np.max(df_query.iloc[:,1]), np.max(df_reference.iloc[:,1])])
|
|
463
446
|
mzs = np.linspace(min_mz,max_mz,(max_mz-min_mz+1))
|
|
464
447
|
|
|
465
|
-
# get unique query/reference library IDs; each query/reference ID corresponds to exactly one query/reference mass spectrum
|
|
466
448
|
unique_query_ids = df_query.iloc[:,0].unique().tolist()
|
|
467
449
|
unique_reference_ids = df_reference.iloc[:,0].unique().tolist()
|
|
468
450
|
unique_query_ids = [str(ID) for ID in unique_query_ids]
|
|
@@ -494,7 +476,6 @@ def generate_plots_on_NRMS_data(query_data=None, reference_data=None, spectrum_I
|
|
|
494
476
|
q_spec = convert_spec(q_spec,mzs)
|
|
495
477
|
r_spec = convert_spec(r_spec,mzs)
|
|
496
478
|
|
|
497
|
-
# get the ranges of m/z and intensity values to display at the bottom of the two plots
|
|
498
479
|
int_min_tmp_q = min(q_spec[q_spec[:,1].nonzero(),1][0])
|
|
499
480
|
int_min_tmp_r = min(r_spec[r_spec[:,1].nonzero(),1][0])
|
|
500
481
|
int_max_tmp_q = max(q_spec[q_spec[:,1].nonzero(),1][0])
|
|
@@ -502,13 +483,10 @@ def generate_plots_on_NRMS_data(query_data=None, reference_data=None, spectrum_I
|
|
|
502
483
|
int_min_tmp = int(min([int_min_tmp_q,int_min_tmp_r]))
|
|
503
484
|
int_max_tmp = int(max([int_max_tmp_q,int_max_tmp_r]))
|
|
504
485
|
|
|
505
|
-
# create the figure
|
|
506
486
|
fig, axes = plt.subplots(nrows=2, ncols=1)
|
|
507
487
|
|
|
508
|
-
# plot the untransformed spectra
|
|
509
488
|
plt.subplot(2,1,1)
|
|
510
489
|
|
|
511
|
-
# display warning message if either spectra have no non-zero ion fragments
|
|
512
490
|
if np.max(q_spec[:,1]) == 0 or np.max(r_spec[:,1]) == 0:
|
|
513
491
|
plt.text(0.5, 0.5, 'The query and/or reference spectrum has no non-zero intensities after transformations.\n Change transformation parameters.', ha='center', va='center', fontsize=7, color='black')
|
|
514
492
|
plt.xticks([])
|
|
@@ -519,7 +497,6 @@ def generate_plots_on_NRMS_data(query_data=None, reference_data=None, spectrum_I
|
|
|
519
497
|
q_spec_pre_trans[:,1] = q_spec_pre_trans[:,1].astype(float)
|
|
520
498
|
r_spec_pre_trans[:,1] = r_spec_pre_trans[:,1].astype(float)
|
|
521
499
|
|
|
522
|
-
# apply transformation to y-axis if relevant
|
|
523
500
|
if y_axis_transformation == 'normalized':
|
|
524
501
|
q_spec_pre_trans[:,1] = q_spec_pre_trans[:,1] / np.max(q_spec_pre_trans[:,1])
|
|
525
502
|
r_spec_pre_trans[:,1] = r_spec_pre_trans[:,1] / np.max(r_spec_pre_trans[:,1])
|
|
@@ -543,32 +520,29 @@ def generate_plots_on_NRMS_data(query_data=None, reference_data=None, spectrum_I
|
|
|
543
520
|
plt.title('Untransformed Query and Reference Spectra', fontsize=10)
|
|
544
521
|
|
|
545
522
|
for transformation in spectrum_preprocessing_order:
|
|
546
|
-
if transformation == 'W':
|
|
523
|
+
if transformation == 'W':
|
|
547
524
|
q_spec[:,1] = wf_transform(q_spec[:,0], q_spec[:,1], wf_mz, wf_intensity)
|
|
548
525
|
r_spec[:,1] = wf_transform(r_spec[:,0], r_spec[:,1], wf_mz, wf_intensity)
|
|
549
|
-
if transformation == 'L':
|
|
526
|
+
if transformation == 'L':
|
|
550
527
|
q_spec[:,1] = LE_transform(q_spec[:,1], LET_threshold, normalization_method)
|
|
551
528
|
r_spec[:,1] = LE_transform(r_spec[:,1], LET_threshold, normalization_method)
|
|
552
|
-
if transformation == 'N':
|
|
529
|
+
if transformation == 'N':
|
|
553
530
|
q_spec = remove_noise(q_spec, nr = noise_threshold)
|
|
554
531
|
if high_quality_reference_library == False:
|
|
555
532
|
r_spec = remove_noise(r_spec, nr = noise_threshold)
|
|
556
|
-
if transformation == 'F':
|
|
533
|
+
if transformation == 'F':
|
|
557
534
|
q_spec = filter_spec_gcms(q_spec, mz_min = mz_min, mz_max = mz_max, int_min = int_min, int_max = int_max)
|
|
558
535
|
if high_quality_reference_library == False:
|
|
559
536
|
r_spec = filter_spec_gcms(r_spec, mz_min = mz_min, mz_max = mz_max, int_min = int_min, int_max = int_max)
|
|
560
537
|
|
|
561
|
-
# compute similarity score; if the spectra contain at most one point, their similarity is considered to be 0
|
|
562
538
|
if q_spec.shape[0] > 1:
|
|
563
539
|
similarity_score = get_similarity(similarity_measure, q_spec[:,1], r_spec[:,1], weights, entropy_dimension)
|
|
564
540
|
else:
|
|
565
541
|
similarity_score = 0
|
|
566
542
|
|
|
567
543
|
|
|
568
|
-
# plot the transformed spectra
|
|
569
544
|
plt.subplot(2,1,2)
|
|
570
545
|
|
|
571
|
-
# display warning message if either spectra are empty or have no non-zero intensity ion fragments
|
|
572
546
|
if q_spec.shape[0] == 0 or r_spec.shape[0] == 0:
|
|
573
547
|
plt.text(0.5, 0.5, 'The query and/or reference spectrum has no ion fragments left after transformations.\n Change transformation parameters.', ha='center', va='center', fontsize=7, color='black')
|
|
574
548
|
plt.xticks([])
|
|
@@ -578,7 +552,6 @@ def generate_plots_on_NRMS_data(query_data=None, reference_data=None, spectrum_I
|
|
|
578
552
|
plt.xticks([])
|
|
579
553
|
plt.yticks([])
|
|
580
554
|
else:
|
|
581
|
-
# apply transformation to y-axis if relevant
|
|
582
555
|
if y_axis_transformation == 'normalized':
|
|
583
556
|
q_spec[:,1] = q_spec[:,1] / np.max(q_spec[:,1])
|
|
584
557
|
r_spec[:,1] = r_spec[:,1] / np.max(r_spec[:,1])
|
|
@@ -602,7 +575,6 @@ def generate_plots_on_NRMS_data(query_data=None, reference_data=None, spectrum_I
|
|
|
602
575
|
plt.title(f'Transformed Query and Reference Spectra', fontsize=10)
|
|
603
576
|
|
|
604
577
|
|
|
605
|
-
#plt.subplots_adjust(top = 0.8, hspace = 0.7)
|
|
606
578
|
plt.subplots_adjust(top=0.8, hspace=0.92, bottom=0.3)
|
|
607
579
|
plt.figlegend(loc = 'upper center')
|
|
608
580
|
fig.text(0.05, 0.15, f'Similarity Measure: {similarity_measure.capitalize()}', fontsize=7)
|
pycompound/processing.py
CHANGED
|
@@ -1,6 +1,4 @@
|
|
|
1
1
|
|
|
2
|
-
# This script contains the functions used to transform spectra prior to computing similarity scores
|
|
3
|
-
|
|
4
2
|
from pycompound.build_library import build_library_from_raw_data
|
|
5
3
|
import scipy.stats
|
|
6
4
|
import numpy as np
|
|
@@ -165,7 +163,6 @@ def centroid_spectrum(spec, window_size):
|
|
|
165
163
|
|
|
166
164
|
spec = spec[np.argsort(spec[:,0])]
|
|
167
165
|
|
|
168
|
-
#Fast check is the spectrum needs centroiding
|
|
169
166
|
mz_array = spec[:, 0]
|
|
170
167
|
need_centroid = 0
|
|
171
168
|
if mz_array.shape[0] > 1:
|
|
@@ -180,7 +177,6 @@ def centroid_spectrum(spec, window_size):
|
|
|
180
177
|
mz_delta_allowed = window_size
|
|
181
178
|
|
|
182
179
|
if spec[i, 1] > 0:
|
|
183
|
-
#Find left bound for current peak
|
|
184
180
|
i_left = i - 1
|
|
185
181
|
while i_left >= 0:
|
|
186
182
|
mz_delta_left = spec[i, 0] - spec[i_left, 0]
|
|
@@ -190,7 +186,6 @@ def centroid_spectrum(spec, window_size):
|
|
|
190
186
|
break
|
|
191
187
|
i_left += 1
|
|
192
188
|
|
|
193
|
-
#Find right bound for current peak
|
|
194
189
|
i_right = i + 1
|
|
195
190
|
while i_right < spec.shape[0]:
|
|
196
191
|
mz_delta_right = spec[i_right, 0] - spec[i, 0]
|
|
@@ -199,7 +194,6 @@ def centroid_spectrum(spec, window_size):
|
|
|
199
194
|
else:
|
|
200
195
|
break
|
|
201
196
|
|
|
202
|
-
#Merge those peaks
|
|
203
197
|
intensity_sum = np.sum(spec[i_left:i_right, 1])
|
|
204
198
|
intensity_weighted_sum = np.sum(spec[i_left:i_right, 0] * spec[i_left:i_right, 1])
|
|
205
199
|
|
|
@@ -246,16 +240,13 @@ def match_peaks_in_spectra(spec_a, spec_b, window_size):
|
|
|
246
240
|
mass_delta = spec_a[a, 0] - spec_b[b, 0]
|
|
247
241
|
|
|
248
242
|
if mass_delta < -window_size:
|
|
249
|
-
# Peak only existed in spec a.
|
|
250
243
|
spec_merged.append([spec_a[a, 0], spec_a[a, 1], peak_b_int])
|
|
251
244
|
peak_b_int = 0.
|
|
252
245
|
a += 1
|
|
253
246
|
elif mass_delta > window_size:
|
|
254
|
-
# Peak only existed in spec b.
|
|
255
247
|
spec_merged.append([spec_b[b, 0], 0., spec_b[b, 1]])
|
|
256
248
|
b += 1
|
|
257
249
|
else:
|
|
258
|
-
# Peak existed in both spec.
|
|
259
250
|
peak_b_int += spec_b[b, 1]
|
|
260
251
|
b += 1
|
|
261
252
|
|
|
@@ -10,7 +10,6 @@ import sys
|
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
def S_cos(ints_a, ints_b):
|
|
13
|
-
# Cosine Similarity Measure
|
|
14
13
|
if np.sum(ints_a) == 0 or np.sum(ints_b) == 0:
|
|
15
14
|
return(0)
|
|
16
15
|
else:
|
|
@@ -18,12 +17,10 @@ def S_cos(ints_a, ints_b):
|
|
|
18
17
|
|
|
19
18
|
|
|
20
19
|
def ent_renyi(ints, q):
|
|
21
|
-
# Computes the Renyi entropy of a probability distribution for a given positive entropy dimension q
|
|
22
20
|
return np.log(sum(np.power(ints,q))) / (1-q)
|
|
23
21
|
|
|
24
22
|
|
|
25
23
|
def ent_tsallis(ints, q):
|
|
26
|
-
# Computes the Tsallis entropy of a probability distribution for a given positive entropy dimension q
|
|
27
24
|
return (sum(np.power(ints,q))-1) / (1-q)
|
|
28
25
|
|
|
29
26
|
|