ms-spectral_summing 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/spectral_summing.rb +158 -0
- metadata +2 -1
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
Spectrum = Struct.new(:spectrum, :scan_num, :scan_time, :scan_range, :precursor_mass, :charge_states, :intensities, :mz_values)
|
|
3
|
+
|
|
4
|
+
class Parser
|
|
5
|
+
attr_accessor :spectra
|
|
6
|
+
def initialize(file)
|
|
7
|
+
@file = file
|
|
8
|
+
end
|
|
9
|
+
def parse(file = nil)
|
|
10
|
+
file ||= @file
|
|
11
|
+
require 'ms/msrun'
|
|
12
|
+
@spectra = []
|
|
13
|
+
Ms::Msrun.open(file) do |ms|
|
|
14
|
+
ms.each(:ms_level => 2) do |scan|
|
|
15
|
+
@spectra << Spectrum.new(scan, scan.num, scan.time, (scan.start_mz..scan.end_mz), scan.precursor.mz, scan.precursor.charge_states, scan.spectrum.intensities, scan.spectrum.mzs)
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
def parse_by_scan_num(scan_nums, file = nil)
|
|
20
|
+
file ||= @file
|
|
21
|
+
require 'ms/msrun'
|
|
22
|
+
@spectra = []
|
|
23
|
+
Ms::Msrun.open(file) do |ms|
|
|
24
|
+
ms.each(:ms_level => 2) do |scan|
|
|
25
|
+
if scan_nums.include?(scan.num)
|
|
26
|
+
@spectra << Spectrum.new(scan, scan.num, scan.time, (scan.start_mz..scan.end_mz), scan.precursor.mz, scan.precursor.charge_states, scan.spectrum.intensities, scan.spectrum.mzs)
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
class Combiner
|
|
34
|
+
Defaults = {:bin_window => 0.1, :window_size => 4, :precursor_mass_tolerance_in_ppm => 10, :tolerant => false}
|
|
35
|
+
attr_accessor :output_spectra
|
|
36
|
+
def initialize(spectra = nil, opts = {})
|
|
37
|
+
@spectra = spectra
|
|
38
|
+
@opts = Defaults.merge(opts)
|
|
39
|
+
end
|
|
40
|
+
def combine(spectrum1, spectrum2)
|
|
41
|
+
tolerance = calculate_daltons_from_ppm(spectrum1.precursor_mass, @opts[:precursor_mass_tolerance_in_ppm] )
|
|
42
|
+
if tolerance.include?(spectrum2.precursor_mass) or @opts[:tolerant]
|
|
43
|
+
data_arr = summer(spectrum1.mz_values, spectrum1.intensities, spectrum2.mz_values, spectrum2.intensities)
|
|
44
|
+
end
|
|
45
|
+
data_arr
|
|
46
|
+
end
|
|
47
|
+
def summer(x1,y1,x2,y2) # What should this return?
|
|
48
|
+
endpoints = (x1+x2).each.minmax
|
|
49
|
+
bin_width = @opts[:bin_window]
|
|
50
|
+
num_bins = ((endpoints.last - endpoints.first)/bin_width).ceil
|
|
51
|
+
data_x = [endpoints.first+bin_width/2.0]
|
|
52
|
+
data_y = Array.new(num_bins, 0)
|
|
53
|
+
j, k = 0,0
|
|
54
|
+
one = [x1,y1]; two = [x2,y2]
|
|
55
|
+
if x1.first == endpoints.first
|
|
56
|
+
data_x[0] = x1.first
|
|
57
|
+
data_y[0] += y1.first
|
|
58
|
+
y1[0] = 0
|
|
59
|
+
elsif x2.first == endpoints.first
|
|
60
|
+
data_x[0] = x2.first
|
|
61
|
+
data_y[0] += y2.first
|
|
62
|
+
y2[0] = 0
|
|
63
|
+
end
|
|
64
|
+
(1..num_bins-1).each do |i|
|
|
65
|
+
data_x[i] = data_x[i-1] + bin_width
|
|
66
|
+
check = data_x[i] + bin_width/2.0
|
|
67
|
+
#puts "check= #{check}"
|
|
68
|
+
if one.first[j]
|
|
69
|
+
while one.first[j] < check
|
|
70
|
+
data_y[i] += one.last[j]
|
|
71
|
+
j += 1
|
|
72
|
+
break if one.first[j].nil?
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
if two.first[k]
|
|
76
|
+
while two.first[k] < check
|
|
77
|
+
data_y[i] += two.last[k]
|
|
78
|
+
k += 1
|
|
79
|
+
break if two.first[k].nil?
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
[data_x, data_y]
|
|
84
|
+
end
|
|
85
|
+
def combine_for_more_combining(spectrum1, spectrum2)
|
|
86
|
+
arr = combine(spectrum1, spectrum2)
|
|
87
|
+
joined_spectrum = Spectrum.new()
|
|
88
|
+
joined_spectrum.precursor_mass = (spectrum1.precursor_mass + spectrum2.precursor_mass)/2.0
|
|
89
|
+
joined_spectrum.mz_values = arr.first
|
|
90
|
+
joined_spectrum.intensities = arr.last
|
|
91
|
+
# Spectrum = Struct.new(:spectrum, :scan_num, :scan_time, :scan_range, :precursor_mass, :charge_states, :intensities, :mz_values)
|
|
92
|
+
joined_spectrum
|
|
93
|
+
end
|
|
94
|
+
def calculate_daltons_from_ppm(mass, ppm)
|
|
95
|
+
diff = ppm*mass/1e6
|
|
96
|
+
(mass-diff)..(mass+diff)
|
|
97
|
+
end
|
|
98
|
+
def to_mgf(spectrum, filename)
|
|
99
|
+
File.open(filename,'w') do |out|
|
|
100
|
+
out.puts "BEGIN IONS"
|
|
101
|
+
out.puts "TITLE=Spec1:#{spectrum.precursor_mass}_#{spectrum.charge_states.first}"
|
|
102
|
+
out.puts "CHARGE=#{spectrum.charge_states.to_s}+"
|
|
103
|
+
# our current mzML parser doesn't have scan.time implemented...
|
|
104
|
+
spectrum.mz_values.each_with_index do |mz, i|
|
|
105
|
+
intensity = spectrum.intensities[i]
|
|
106
|
+
out.puts "#{"%.5f" % mz}/t#{"%.5f" % intensity}" unless intensity == 0
|
|
107
|
+
end
|
|
108
|
+
out.puts "END IONS"
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
def combine_to_mgf(spectrum1, spectrum2, filename) # Thanks JOHN!!! Ms-Msrun 0.3.6
|
|
112
|
+
results = combine(spectrum1, spectrum2)
|
|
113
|
+
File.open(filename, 'w') do |out|
|
|
114
|
+
out.puts "BEGIN IONS"
|
|
115
|
+
out.puts "TITLE=Spec1:#{spectrum1.precursor_mass}_Spec2:#{spectrum2.precursor_mass}.#{spectrum1.scan_num}_#{spectrum2.scan_num}_#{spectrum1.charge_states.first}"
|
|
116
|
+
out.puts "CHARGE=#{spectrum1.charge_states.to_s}+"
|
|
117
|
+
# our current mzML parser doesn't have scan.time implemented...
|
|
118
|
+
results.first.each_with_index do |mz, i|
|
|
119
|
+
intensity = results.last[i]
|
|
120
|
+
out.puts "#{"%.5f" % mz}/t#{"%.5f" % intensity}" unless intensity == 0
|
|
121
|
+
end
|
|
122
|
+
out.puts "END IONS"
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
if ARGV.size == 0 or ARGV.size % 2 != 0
|
|
128
|
+
puts "Usage: #{__FILE__} input_file.mzXML scan_nums.txt input_file2.mzXML scan_nums2.txt ... "
|
|
129
|
+
puts "NOTE: scan_nums.txt files must have a new line break between each integer value."
|
|
130
|
+
puts 'Returns input_file_input_file2_..._input_file(n).mgf'
|
|
131
|
+
exit
|
|
132
|
+
else
|
|
133
|
+
mzXMLs = []
|
|
134
|
+
scan_nums = []
|
|
135
|
+
while ARGV.size > 0
|
|
136
|
+
mzXMLs << ARGV.shift
|
|
137
|
+
scan_nums << ARGV.shift
|
|
138
|
+
end
|
|
139
|
+
scan_nums.map {|file| IO.readlines(file) }
|
|
140
|
+
# Parse the files and put the data into spectra objects, held within the list of all spectra to combine.
|
|
141
|
+
spectras = []
|
|
142
|
+
mzXML.each_with_index do |file, i|
|
|
143
|
+
@parse_object = Parser.new(file)
|
|
144
|
+
@parse_object.parse_by_scan_num(scan_nums[i])
|
|
145
|
+
spectras << @parse_object.spectra
|
|
146
|
+
end
|
|
147
|
+
combined_spectrum = spectras.shift
|
|
148
|
+
combiner = Combiner.new(combined_spectrum)
|
|
149
|
+
spectras.each do |spectrum|
|
|
150
|
+
combined_spectrum = combiner.combine_for_more_combining(combined_spectrum, spectrum)
|
|
151
|
+
end
|
|
152
|
+
combiner.to_mgf(combined_spectrum, 'combined_multiple_files.mgf')
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
|
metadata
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
name: ms-spectral_summing
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease:
|
|
5
|
-
version: 0.0.
|
|
5
|
+
version: 0.0.2
|
|
6
6
|
platform: ruby
|
|
7
7
|
authors:
|
|
8
8
|
- Ryan M Taylor
|
|
@@ -25,6 +25,7 @@ files:
|
|
|
25
25
|
- README.rdoc
|
|
26
26
|
- LICENSE.txt
|
|
27
27
|
- lib/spectral_summing.rb
|
|
28
|
+
- bin/spectral_summing.rb
|
|
28
29
|
homepage: https://github.com/princelab/spectral_summing
|
|
29
30
|
licenses: []
|
|
30
31
|
|