mspire-simulator 0.1.2 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +46 -3
- data/Rakefile +1 -1
- data/VERSION +1 -1
- data/bin/mspire-simulator +8 -0
- data/bin/sim_mail +2 -2
- data/lib/cv_parser.rb +7 -0
- data/lib/ms/curvefit/curve_fit_helper.rb +26 -20
- data/lib/ms/curvefit/mzml_reader.rb +1 -1
- data/lib/ms/curvefit.rb +25 -8
- data/lib/ms/isoelectric_calc.rb +162 -103
- data/lib/ms/merger.rb +46 -33
- data/lib/ms/mzml_wrapper.rb +74 -29
- data/lib/ms/noise.rb +28 -28
- data/lib/ms/rt/rt_helper.rb +3 -3
- data/lib/ms/rt/rtgenerator.rb +63 -51
- data/lib/ms/rt/weka.rb +17 -17
- data/lib/ms/sim_digester.rb +45 -26
- data/lib/ms/sim_feature.rb +180 -122
- data/lib/ms/sim_peptide.rb +58 -55
- data/lib/ms/sim_spectra.rb +22 -23
- data/lib/ms/sim_trollop.rb +36 -32
- data/lib/ms/tr_file_writer.rb +111 -98
- data/lib/progress.rb +21 -20
- data/mspire-simulator.gemspec +5 -5
- data/spec/file_writer_spec.rb +2 -1
- data/spec/merger_spec.rb +2 -1
- data/spec/ms-simulate_spec.rb +1 -1
- data/spec/peptide_spec.rb +2 -1
- data/spec/spec_helper.rb +8 -3
- data/spec/spectra_spec.rb +4 -3
- metadata +5 -5
- data/spec/progress_spec.rb +0 -22
data/lib/ms/sim_digester.rb
CHANGED
@@ -1,10 +1,15 @@
|
|
1
|
+
class String
|
2
|
+
abu = 0
|
3
|
+
attr_reader :abu
|
4
|
+
attr_writer :abu
|
5
|
+
end
|
1
6
|
|
2
7
|
module MS
|
3
8
|
class Sim_Digester
|
4
|
-
|
9
|
+
|
5
10
|
attr_reader :digested_file
|
6
11
|
attr_writer :digested_file
|
7
|
-
|
12
|
+
|
8
13
|
def initialize(digestor,pH)
|
9
14
|
@digestor = digestor
|
10
15
|
@pH = pH
|
@@ -13,38 +18,47 @@ module MS
|
|
13
18
|
system("mkdir .m/A .m/R .m/N .m/D .m/C .m/E .m/Q .m/G .m/H .m/I .m/L .m/K .m/M .m/F .m/P .m/S .m/T .m/W .m/Y .m/V .m/U .m/O")
|
14
19
|
system("mkdir .i/A .i/R .i/N .i/D .i/C .i/E .i/Q .i/G .i/H .i/I .i/L .i/K .i/M .i/F .i/P .i/S .i/T .i/W .i/Y .i/V .i/U .i/O")
|
15
20
|
end
|
16
|
-
|
21
|
+
|
17
22
|
def clean
|
18
23
|
system("rm -r -f .m .i")
|
19
24
|
end
|
20
|
-
|
25
|
+
|
21
26
|
def create_digested_file(file)
|
27
|
+
abundances = []
|
22
28
|
inFile = File.open(file,"r")
|
23
29
|
seq = ""
|
24
30
|
inFile.each_line do |sequence|
|
25
|
-
if sequence =~ />/
|
26
|
-
|
27
|
-
|
31
|
+
if sequence =~ />/
|
32
|
+
num = sequence.match(/\#.+/).to_s.chomp.gsub('#','')
|
33
|
+
if num != ""
|
34
|
+
abundances<<(num.to_f)*10.0**-2
|
35
|
+
else
|
36
|
+
abundances<<1.0
|
37
|
+
end
|
38
|
+
sequence
|
39
|
+
seq = seq<<";"
|
40
|
+
elsif sequence == "/n"; else
|
28
41
|
seq = seq<<sequence.chomp
|
29
42
|
end
|
30
43
|
end
|
31
44
|
inFile.close
|
32
|
-
|
45
|
+
|
33
46
|
proteins = seq.split(/;/).delete_if{|str| str == ""}
|
34
47
|
|
35
48
|
trypsin = Mspire::Digester[@digestor]
|
36
|
-
|
49
|
+
|
37
50
|
digested = []
|
38
51
|
d_file = File.open(@digested_file, "w")
|
39
|
-
proteins.
|
52
|
+
proteins.each_with_index do |prot,index|
|
40
53
|
dig = trypsin.digest(prot)
|
41
54
|
dig.each do |d|
|
55
|
+
d.abu = abundances[index]
|
42
56
|
digested<<d
|
43
57
|
end
|
44
58
|
end
|
45
59
|
proteins.clear
|
46
60
|
digested.uniq!
|
47
|
-
|
61
|
+
|
48
62
|
trun_digested = []
|
49
63
|
if digested.length > 50000
|
50
64
|
50000.times do
|
@@ -53,9 +67,9 @@ module MS
|
|
53
67
|
digested.clear
|
54
68
|
digested = trun_digested
|
55
69
|
end
|
56
|
-
|
70
|
+
|
57
71
|
digested.each do |dig|
|
58
|
-
d_file.puts(dig)
|
72
|
+
d_file.puts(dig<<"#"<<dig.abu.to_s)
|
59
73
|
end
|
60
74
|
d_file.close
|
61
75
|
num_digested = digested.size
|
@@ -63,36 +77,41 @@ module MS
|
|
63
77
|
puts "Number of peptides: #{num_digested}"
|
64
78
|
return num_digested
|
65
79
|
end
|
66
|
-
|
80
|
+
|
67
81
|
def digest(file)
|
68
|
-
start = Time.now
|
69
|
-
|
70
82
|
num_digested = create_digested_file(file)
|
71
|
-
|
83
|
+
|
72
84
|
d_file = File.open(@digested_file, "r")
|
73
85
|
i = 0
|
74
|
-
|
86
|
+
|
75
87
|
peptides = []
|
76
88
|
|
89
|
+
prog = Progress.new("Creating peptides '#{file}':")
|
90
|
+
num = 0
|
91
|
+
total = num_digested
|
92
|
+
step = total/100.0
|
77
93
|
d_file.each_line do |peptide_seq|
|
78
94
|
peptide_seq.chomp!
|
79
|
-
|
80
|
-
|
95
|
+
peptide_seq.abu = peptide_seq.match(/#.+/).to_s.chomp.gsub('#','').to_f
|
96
|
+
peptide_seq.gsub!(/#.+/,'')
|
97
|
+
if i > step * (num + 1)
|
98
|
+
num = ((i/total.to_f)*100.0).to_i
|
99
|
+
prog.update(num)
|
100
|
+
end
|
101
|
+
|
81
102
|
charge_ratio = charge_at_pH(identify_potential_charges(peptide_seq), @pH)
|
82
103
|
charge_f = charge_ratio.floor
|
83
104
|
charge_c = charge_ratio.ceil
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
105
|
+
peptide_f = MS::Peptide.new(peptide_seq, charge_f, peptide_seq.abu) if charge_f != 0
|
106
|
+
peptide_c = MS::Peptide.new(peptide_seq, charge_c, peptide_seq.abu) if charge_c != 0
|
107
|
+
|
88
108
|
peptides<<peptide_f if charge_f != 0
|
89
109
|
peptides<<peptide_c if charge_c != 0
|
90
110
|
i += 1
|
91
111
|
end
|
112
|
+
prog.finish!
|
92
113
|
d_file.close
|
93
114
|
File.delete(@digested_file)
|
94
|
-
Progress.progress("Creating peptides '#{file}':",100,Time.now-start)
|
95
|
-
puts ''
|
96
115
|
return peptides
|
97
116
|
end
|
98
117
|
end
|
data/lib/ms/sim_feature.rb
CHANGED
@@ -1,173 +1,231 @@
|
|
1
|
-
|
2
1
|
require 'time'
|
3
2
|
require 'distribution'
|
3
|
+
require 'fragmenter'
|
4
4
|
require 'ms/sim_peptide'
|
5
5
|
require 'ms/rt/rt_helper'
|
6
6
|
require 'ms/tr_file_writer'
|
7
7
|
|
8
|
+
class Array
|
9
|
+
attr_reader :ms2, :ms_level, :pre_mz, :pre_int, :pre_charge
|
10
|
+
attr_writer :ms2, :ms_level, :pre_mz, :pre_int, :pre_charge
|
11
|
+
end
|
12
|
+
|
8
13
|
module MS
|
9
14
|
class Sim_Feature
|
10
15
|
def initialize(peptides,opts,one_d)
|
11
|
-
|
12
|
-
@start = Time.now
|
16
|
+
|
13
17
|
@features = []
|
14
18
|
@data = {}
|
15
19
|
@max_int = 0.0
|
16
20
|
@one_d = one_d
|
17
21
|
@max_time = Sim_Spectra.r_times.max
|
18
22
|
@opts = opts
|
19
|
-
|
20
|
-
|
23
|
+
@max_mz = -1
|
24
|
+
|
25
|
+
|
21
26
|
#------------------Each_Peptide_=>_Feature----------------------
|
27
|
+
prog = Progress.new("Generating features:")
|
28
|
+
num = 0
|
29
|
+
total = peptides.size
|
30
|
+
step = total/100.0
|
22
31
|
peptides.each_with_index do |pep,ind|
|
23
|
-
|
24
|
-
|
25
|
-
|
32
|
+
if ind > step * (num + 1)
|
33
|
+
num = (((ind+1)/total.to_f)*100).to_i
|
34
|
+
prog.update(num)
|
35
|
+
end
|
36
|
+
|
37
|
+
feature = getInts(pep)
|
26
38
|
|
27
|
-
|
39
|
+
@features<<feature
|
28
40
|
end
|
29
|
-
|
30
|
-
puts ""
|
31
|
-
@start = Time.now
|
41
|
+
prog.finish!
|
32
42
|
#---------------------------------------------------------------
|
33
|
-
|
34
|
-
|
35
|
-
|
43
|
+
|
44
|
+
|
45
|
+
|
36
46
|
#-----------------Transform_to_spectra_data_for_mzml------------
|
37
47
|
# rt => [[mzs],[ints]]
|
48
|
+
prog = Progress.new("Generating MS2 & Populating structure for mzml:")
|
49
|
+
num = 0
|
50
|
+
total = @features.size
|
51
|
+
step = total/100.0
|
52
|
+
ms2_count = 0
|
53
|
+
seq = nil
|
54
|
+
|
38
55
|
@features.each_with_index do |fe,k|
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
56
|
+
if k > step * (num + 1)
|
57
|
+
num = ((k/total.to_f)*100).to_i
|
58
|
+
prog.update(num)
|
59
|
+
end
|
60
|
+
|
61
|
+
fe_ints = fe.ints
|
62
|
+
fe_mzs = fe.mzs
|
63
|
+
|
64
|
+
ms2_int = fe.ints.flatten.max
|
65
|
+
ms2 = false
|
66
|
+
pre_mz = nil
|
67
|
+
pre_charge = nil
|
68
|
+
|
69
|
+
fe.rts.each_with_index do |rt,i|
|
70
|
+
rt_mzs = []
|
71
|
+
rt_ints = []
|
72
|
+
|
73
|
+
fe.core_mzs.size.times do |j|
|
74
|
+
mz,int = [ fe_mzs[j][i], fe_ints[j][i] ]
|
75
|
+
if @max_mz < mz
|
76
|
+
@max_mz = mz
|
77
|
+
end
|
78
|
+
if int == nil
|
79
|
+
int = 0.0
|
80
|
+
end
|
81
|
+
if int > 0.9
|
82
|
+
rt_mzs<<mz
|
83
|
+
rt_ints<<int
|
84
|
+
if int == ms2_int and fe.sequence.size > 1
|
85
|
+
ms2 = true
|
86
|
+
pre_mz = mz
|
87
|
+
pre_charge = fe.charge
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
spec = nil
|
93
|
+
if rt_mzs.include?(nil) or rt_mzs.empty?; else
|
94
|
+
if @data.key?(rt)
|
95
|
+
ms1 = @data[rt]
|
96
|
+
spec = [ms1[0] + rt_mzs, ms1[1] + rt_ints]
|
97
|
+
spec.ms_level = ms1.ms_level
|
98
|
+
spec.ms2 = ms1.ms2
|
99
|
+
else
|
100
|
+
spec = [rt_mzs, rt_ints]
|
101
|
+
end
|
102
|
+
if false#ms2 and fe.sequence != seq
|
103
|
+
#add ms2 spec
|
104
|
+
seq = fe.sequence
|
105
|
+
spec.ms_level = 2
|
106
|
+
ms2_mzs = MS::Fragmenter.new.fragment(seq)
|
107
|
+
ms2_ints = Array.new(ms2_mzs.size,500.to_f)
|
108
|
+
spec2 = [(rt + RThelper.RandomFloat(0.01,@opts[:sampling_rate] - 0.1)), ms2_mzs, ms2_ints]
|
109
|
+
spec2.ms_level = 2
|
110
|
+
spec2.pre_mz = pre_mz
|
111
|
+
spec2.pre_int = ms2_int
|
112
|
+
spec2.pre_charge = pre_charge
|
113
|
+
if spec.ms2 != nil
|
114
|
+
ms2_arr = spec.ms2
|
115
|
+
ms2_arr<<spec2
|
116
|
+
spec.ms2 = ms2_arr
|
117
|
+
else
|
118
|
+
spec.ms2 = [spec2]
|
119
|
+
end
|
120
|
+
ms2_count += 1
|
121
|
+
end
|
122
|
+
@data[rt] = spec
|
123
|
+
end
|
124
|
+
ms2 = false
|
125
|
+
end
|
69
126
|
end
|
70
|
-
|
71
|
-
puts ""
|
72
|
-
|
127
|
+
prog.finish!
|
128
|
+
puts "MS2s = #{ms2_count}"
|
129
|
+
|
73
130
|
#---------------------------------------------------------------
|
74
|
-
|
131
|
+
|
75
132
|
end
|
76
|
-
|
77
|
-
attr_reader :data, :features
|
78
|
-
attr_writer :data, :features
|
79
|
-
|
133
|
+
|
134
|
+
attr_reader :data, :features, :max_mz
|
135
|
+
attr_writer :data, :features, :max_mz
|
136
|
+
|
80
137
|
# Intensities are shaped in the rt direction by a gaussian with
|
81
138
|
# a dynamic standard deviation.
|
82
139
|
# They are also shaped in the m/z direction
|
83
140
|
# by a simple gaussian curve (see 'factor' below).
|
84
141
|
#
|
85
142
|
def getInts(pep)
|
86
|
-
|
87
143
|
p_int = pep.p_int + RThelper.RandomFloat(-5,2)
|
88
144
|
if p_int > 10
|
89
|
-
|
145
|
+
p_int -= 10
|
90
146
|
end
|
91
|
-
predicted_int = (p_int * 10**-1) * 14183000.0
|
147
|
+
predicted_int = (p_int * 10**-1) * 14183000.0
|
92
148
|
relative_ints = pep.core_ints
|
93
149
|
avg = pep.p_rt
|
94
|
-
|
150
|
+
|
95
151
|
sampling_rate = @opts[:sampling_rate].to_f
|
96
152
|
tail = @opts[:tail].to_f
|
97
153
|
front = @opts[:front].to_f
|
98
154
|
mu = @opts[:mu].to_f
|
99
|
-
|
155
|
+
|
100
156
|
index = 0
|
101
|
-
|
157
|
+
sx = pep.sx
|
158
|
+
sy = (sx**-1) * Math.sqrt(pep.abu)
|
159
|
+
|
102
160
|
shuff = RThelper.RandomFloat(0.05,1.0)
|
103
161
|
pep.core_mzs.each do |mzmu|
|
104
162
|
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
if fin_ints[i] < 0.01
|
131
|
-
fin_ints[i] = RThelper.RandomFloat(0.001,0.4)
|
132
|
-
end
|
163
|
+
fin_mzs = []
|
164
|
+
fin_ints = []
|
165
|
+
|
166
|
+
relative_abundances_int = relative_ints[index]
|
167
|
+
|
168
|
+
t_index = 1
|
169
|
+
|
170
|
+
pep.rts.each_with_index do |rt,i|
|
171
|
+
|
172
|
+
if !@one_d
|
173
|
+
#-------------Tailing-------------------------
|
174
|
+
shape = (tail * (t_index / sx)) + front
|
175
|
+
fin_ints << (RThelper.gaussian((t_index / sx) ,mu ,shape,100.0))
|
176
|
+
t_index += 1
|
177
|
+
#---------------------------------------------
|
178
|
+
|
179
|
+
else
|
180
|
+
#-----------Random 1d data--------------------
|
181
|
+
fin_ints<<(relative_abundances_int * ints_factor) * shuff
|
182
|
+
#---------------------------------------------
|
183
|
+
end
|
184
|
+
|
185
|
+
if fin_ints[i] < 0.01
|
186
|
+
fin_ints[i] = RThelper.RandomFloat(0.001,0.4)
|
187
|
+
end
|
133
188
|
|
134
189
|
=begin
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
190
|
+
if !@one_d
|
191
|
+
#-------------M/Z Peak shape (Profile?)-------
|
192
|
+
fraction = RThelper.gaussian(fin_mzs[i],mzmu,0.05,1)
|
193
|
+
factor = fraction/1.0
|
194
|
+
fin_ints[i] = fin_ints[i] * factor
|
195
|
+
#---------------------------------------------
|
196
|
+
end
|
142
197
|
=end
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
198
|
+
|
199
|
+
if fin_ints[i] > 0.4
|
200
|
+
#-------------Jagged-ness---------------------
|
201
|
+
sd = (@opts[:jagA] * (1-Math.exp(-(@opts[:jagC]) * fin_ints[i])) + @opts[:jagB])/2
|
202
|
+
diff = (Distribution::Normal.rng(0,sd).call)
|
203
|
+
fin_ints[i] = fin_ints[i] + diff
|
204
|
+
#---------------------------------------------
|
205
|
+
end
|
206
|
+
|
207
|
+
#-------------mz wobble-----------------------
|
208
|
+
y = fin_ints[i]
|
209
|
+
wobble_mz = nil
|
210
|
+
if y > 0
|
211
|
+
wobble_int = @opts[:wobA]*y**(@opts[:wobB])
|
212
|
+
wobble_mz = Distribution::Normal.rng(mzmu,wobble_int).call
|
213
|
+
if wobble_mz < 0
|
214
|
+
wobble_mz = 0.01
|
215
|
+
end
|
216
|
+
|
217
|
+
fin_mzs<<wobble_mz
|
218
|
+
end
|
219
|
+
#---------------------------------------------
|
220
|
+
|
221
|
+
|
222
|
+
fin_ints[i] = fin_ints[i]*(predicted_int*(relative_abundances_int*10**-2)) * sy
|
223
|
+
end
|
224
|
+
|
225
|
+
pep.insert_ints(fin_ints)
|
226
|
+
pep.insert_mzs(fin_mzs)
|
227
|
+
|
228
|
+
index += 1
|
171
229
|
end
|
172
230
|
return pep
|
173
231
|
end
|