mspire-simulator 0.1.2 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,10 +1,15 @@
1
+ class String
2
+ abu = 0
3
+ attr_reader :abu
4
+ attr_writer :abu
5
+ end
1
6
 
2
7
  module MS
3
8
  class Sim_Digester
4
-
9
+
5
10
  attr_reader :digested_file
6
11
  attr_writer :digested_file
7
-
12
+
8
13
  def initialize(digestor,pH)
9
14
  @digestor = digestor
10
15
  @pH = pH
@@ -13,38 +18,47 @@ module MS
13
18
  system("mkdir .m/A .m/R .m/N .m/D .m/C .m/E .m/Q .m/G .m/H .m/I .m/L .m/K .m/M .m/F .m/P .m/S .m/T .m/W .m/Y .m/V .m/U .m/O")
14
19
  system("mkdir .i/A .i/R .i/N .i/D .i/C .i/E .i/Q .i/G .i/H .i/I .i/L .i/K .i/M .i/F .i/P .i/S .i/T .i/W .i/Y .i/V .i/U .i/O")
15
20
  end
16
-
21
+
17
22
  def clean
18
23
  system("rm -r -f .m .i")
19
24
  end
20
-
25
+
21
26
  def create_digested_file(file)
27
+ abundances = []
22
28
  inFile = File.open(file,"r")
23
29
  seq = ""
24
30
  inFile.each_line do |sequence|
25
- if sequence =~ />/ or sequence == "\n"
26
- seq = seq<<";"
27
- else
31
+ if sequence =~ />/
32
+ num = sequence.match(/\#.+/).to_s.chomp.gsub('#','')
33
+ if num != ""
34
+ abundances<<(num.to_f)*10.0**-2
35
+ else
36
+ abundances<<1.0
37
+ end
38
+ sequence
39
+ seq = seq<<";"
40
+ elsif sequence == "/n"; else
28
41
  seq = seq<<sequence.chomp
29
42
  end
30
43
  end
31
44
  inFile.close
32
-
45
+
33
46
  proteins = seq.split(/;/).delete_if{|str| str == ""}
34
47
 
35
48
  trypsin = Mspire::Digester[@digestor]
36
-
49
+
37
50
  digested = []
38
51
  d_file = File.open(@digested_file, "w")
39
- proteins.each do |prot|
52
+ proteins.each_with_index do |prot,index|
40
53
  dig = trypsin.digest(prot)
41
54
  dig.each do |d|
55
+ d.abu = abundances[index]
42
56
  digested<<d
43
57
  end
44
58
  end
45
59
  proteins.clear
46
60
  digested.uniq!
47
-
61
+
48
62
  trun_digested = []
49
63
  if digested.length > 50000
50
64
  50000.times do
@@ -53,9 +67,9 @@ module MS
53
67
  digested.clear
54
68
  digested = trun_digested
55
69
  end
56
-
70
+
57
71
  digested.each do |dig|
58
- d_file.puts(dig)
72
+ d_file.puts(dig<<"#"<<dig.abu.to_s)
59
73
  end
60
74
  d_file.close
61
75
  num_digested = digested.size
@@ -63,36 +77,41 @@ module MS
63
77
  puts "Number of peptides: #{num_digested}"
64
78
  return num_digested
65
79
  end
66
-
80
+
67
81
  def digest(file)
68
- start = Time.now
69
-
70
82
  num_digested = create_digested_file(file)
71
-
83
+
72
84
  d_file = File.open(@digested_file, "r")
73
85
  i = 0
74
-
86
+
75
87
  peptides = []
76
88
 
89
+ prog = Progress.new("Creating peptides '#{file}':")
90
+ num = 0
91
+ total = num_digested
92
+ step = total/100.0
77
93
  d_file.each_line do |peptide_seq|
78
94
  peptide_seq.chomp!
79
- Progress.progress("Creating peptides '#{file}':",((i/num_digested.to_f)*100.0).to_i)
80
-
95
+ peptide_seq.abu = peptide_seq.match(/#.+/).to_s.chomp.gsub('#','').to_f
96
+ peptide_seq.gsub!(/#.+/,'')
97
+ if i > step * (num + 1)
98
+ num = ((i/total.to_f)*100.0).to_i
99
+ prog.update(num)
100
+ end
101
+
81
102
  charge_ratio = charge_at_pH(identify_potential_charges(peptide_seq), @pH)
82
103
  charge_f = charge_ratio.floor
83
104
  charge_c = charge_ratio.ceil
84
-
85
- peptide_f = MS::Peptide.new(peptide_seq, charge_f) if charge_f != 0
86
- peptide_c = MS::Peptide.new(peptide_seq, charge_c) if charge_c != 0
87
-
105
+ peptide_f = MS::Peptide.new(peptide_seq, charge_f, peptide_seq.abu) if charge_f != 0
106
+ peptide_c = MS::Peptide.new(peptide_seq, charge_c, peptide_seq.abu) if charge_c != 0
107
+
88
108
  peptides<<peptide_f if charge_f != 0
89
109
  peptides<<peptide_c if charge_c != 0
90
110
  i += 1
91
111
  end
112
+ prog.finish!
92
113
  d_file.close
93
114
  File.delete(@digested_file)
94
- Progress.progress("Creating peptides '#{file}':",100,Time.now-start)
95
- puts ''
96
115
  return peptides
97
116
  end
98
117
  end
@@ -1,173 +1,231 @@
1
-
2
1
  require 'time'
3
2
  require 'distribution'
3
+ require 'fragmenter'
4
4
  require 'ms/sim_peptide'
5
5
  require 'ms/rt/rt_helper'
6
6
  require 'ms/tr_file_writer'
7
7
 
8
+ class Array
9
+ attr_reader :ms2, :ms_level, :pre_mz, :pre_int, :pre_charge
10
+ attr_writer :ms2, :ms_level, :pre_mz, :pre_int, :pre_charge
11
+ end
12
+
8
13
  module MS
9
14
  class Sim_Feature
10
15
  def initialize(peptides,opts,one_d)
11
-
12
- @start = Time.now
16
+
13
17
  @features = []
14
18
  @data = {}
15
19
  @max_int = 0.0
16
20
  @one_d = one_d
17
21
  @max_time = Sim_Spectra.r_times.max
18
22
  @opts = opts
19
-
20
-
23
+ @max_mz = -1
24
+
25
+
21
26
  #------------------Each_Peptide_=>_Feature----------------------
27
+ prog = Progress.new("Generating features:")
28
+ num = 0
29
+ total = peptides.size
30
+ step = total/100.0
22
31
  peptides.each_with_index do |pep,ind|
23
- Progress.progress("Generating features:",(((ind+1)/peptides.size.to_f)*100).to_i)
24
-
25
- feature = getInts(pep)
32
+ if ind > step * (num + 1)
33
+ num = (((ind+1)/total.to_f)*100).to_i
34
+ prog.update(num)
35
+ end
36
+
37
+ feature = getInts(pep)
26
38
 
27
- @features<<feature
39
+ @features<<feature
28
40
  end
29
- Progress.progress("Generating features:",100,Time.now-@start)
30
- puts ""
31
- @start = Time.now
41
+ prog.finish!
32
42
  #---------------------------------------------------------------
33
-
34
-
35
-
43
+
44
+
45
+
36
46
  #-----------------Transform_to_spectra_data_for_mzml------------
37
47
  # rt => [[mzs],[ints]]
48
+ prog = Progress.new("Generating MS2 & Populating structure for mzml:")
49
+ num = 0
50
+ total = @features.size
51
+ step = total/100.0
52
+ ms2_count = 0
53
+ seq = nil
54
+
38
55
  @features.each_with_index do |fe,k|
39
- Progress.progress("Populating structure for mzml:",((k/@features.size.to_f)*100).to_i)
40
-
41
- fe_ints = fe.ints
42
- fe_mzs = fe.mzs
43
-
44
- fe.rts.each_with_index do |rt,i|
45
- rt_mzs = []
46
- rt_ints = []
47
-
48
- fe.core_mzs.size.times do |j|
49
- mz,int = [ fe_mzs[j][i], fe_ints[j][i] ]
50
- if int == nil
51
- int = 0.0
52
- end
53
- if int > 0.9
54
- rt_mzs<<mz
55
- rt_ints<<int
56
- end
57
- end
58
-
59
- if rt_mzs.include?(nil) or rt_mzs.empty?; else
60
- if @data.key?(rt)
61
- mzs,ints = @data[rt]
62
- @data[rt][0] = mzs + rt_mzs
63
- @data[rt][1] = ints + rt_ints
64
- else
65
- @data[rt] = [rt_mzs, rt_ints]
66
- end
67
- end
68
- end
56
+ if k > step * (num + 1)
57
+ num = ((k/total.to_f)*100).to_i
58
+ prog.update(num)
59
+ end
60
+
61
+ fe_ints = fe.ints
62
+ fe_mzs = fe.mzs
63
+
64
+ ms2_int = fe.ints.flatten.max
65
+ ms2 = false
66
+ pre_mz = nil
67
+ pre_charge = nil
68
+
69
+ fe.rts.each_with_index do |rt,i|
70
+ rt_mzs = []
71
+ rt_ints = []
72
+
73
+ fe.core_mzs.size.times do |j|
74
+ mz,int = [ fe_mzs[j][i], fe_ints[j][i] ]
75
+ if @max_mz < mz
76
+ @max_mz = mz
77
+ end
78
+ if int == nil
79
+ int = 0.0
80
+ end
81
+ if int > 0.9
82
+ rt_mzs<<mz
83
+ rt_ints<<int
84
+ if int == ms2_int and fe.sequence.size > 1
85
+ ms2 = true
86
+ pre_mz = mz
87
+ pre_charge = fe.charge
88
+ end
89
+ end
90
+ end
91
+
92
+ spec = nil
93
+ if rt_mzs.include?(nil) or rt_mzs.empty?; else
94
+ if @data.key?(rt)
95
+ ms1 = @data[rt]
96
+ spec = [ms1[0] + rt_mzs, ms1[1] + rt_ints]
97
+ spec.ms_level = ms1.ms_level
98
+ spec.ms2 = ms1.ms2
99
+ else
100
+ spec = [rt_mzs, rt_ints]
101
+ end
102
+ if false#ms2 and fe.sequence != seq
103
+ #add ms2 spec
104
+ seq = fe.sequence
105
+ spec.ms_level = 2
106
+ ms2_mzs = MS::Fragmenter.new.fragment(seq)
107
+ ms2_ints = Array.new(ms2_mzs.size,500.to_f)
108
+ spec2 = [(rt + RThelper.RandomFloat(0.01,@opts[:sampling_rate] - 0.1)), ms2_mzs, ms2_ints]
109
+ spec2.ms_level = 2
110
+ spec2.pre_mz = pre_mz
111
+ spec2.pre_int = ms2_int
112
+ spec2.pre_charge = pre_charge
113
+ if spec.ms2 != nil
114
+ ms2_arr = spec.ms2
115
+ ms2_arr<<spec2
116
+ spec.ms2 = ms2_arr
117
+ else
118
+ spec.ms2 = [spec2]
119
+ end
120
+ ms2_count += 1
121
+ end
122
+ @data[rt] = spec
123
+ end
124
+ ms2 = false
125
+ end
69
126
  end
70
- Progress.progress("Populating structure for mzml:",100,Time.now-@start)
71
- puts ""
72
-
127
+ prog.finish!
128
+ puts "MS2s = #{ms2_count}"
129
+
73
130
  #---------------------------------------------------------------
74
-
131
+
75
132
  end
76
-
77
- attr_reader :data, :features
78
- attr_writer :data, :features
79
-
133
+
134
+ attr_reader :data, :features, :max_mz
135
+ attr_writer :data, :features, :max_mz
136
+
80
137
  # Intensities are shaped in the rt direction by a gaussian with
81
138
  # a dynamic standard deviation.
82
139
  # They are also shaped in the m/z direction
83
140
  # by a simple gaussian curve (see 'factor' below).
84
141
  #
85
142
  def getInts(pep)
86
-
87
143
  p_int = pep.p_int + RThelper.RandomFloat(-5,2)
88
144
  if p_int > 10
89
- p_int -= 10
145
+ p_int -= 10
90
146
  end
91
- predicted_int = (p_int * 10**-1) * 14183000.0
147
+ predicted_int = (p_int * 10**-1) * 14183000.0
92
148
  relative_ints = pep.core_ints
93
149
  avg = pep.p_rt
94
-
150
+
95
151
  sampling_rate = @opts[:sampling_rate].to_f
96
152
  tail = @opts[:tail].to_f
97
153
  front = @opts[:front].to_f
98
154
  mu = @opts[:mu].to_f
99
-
155
+
100
156
  index = 0
101
-
157
+ sx = pep.sx
158
+ sy = (sx**-1) * Math.sqrt(pep.abu)
159
+
102
160
  shuff = RThelper.RandomFloat(0.05,1.0)
103
161
  pep.core_mzs.each do |mzmu|
104
162
 
105
- fin_mzs = []
106
- fin_ints = []
107
- t_index = 1
108
-
109
- relative_abundances_int = relative_ints[index]
110
-
111
- pep.rts.each_with_index do |rt,i|
112
- percent_time = rt/@max_time
113
- length_factor = 1.0#-3.96 * percent_time**2 + 3.96 * percent_time + 0.01
114
- length_factor_tail = 1.0#-7.96 * percent_time**2 + 7.96 * percent_time + 0.01
115
-
116
-
117
- if !@one_d
118
- #-------------Tailing-------------------------
119
- shape = (tail * length_factor)* t_index + (front * length_factor_tail)
120
- fin_ints << (RThelper.gaussian(t_index,mu,shape,100.0))
121
- t_index += 1
122
- #---------------------------------------------
123
-
124
- else
125
- #-----------Random 1d data--------------------
126
- fin_ints<<(relative_abundances_int * ints_factor) * shuff
127
- #---------------------------------------------
128
- end
129
-
130
- if fin_ints[i] < 0.01
131
- fin_ints[i] = RThelper.RandomFloat(0.001,0.4)
132
- end
163
+ fin_mzs = []
164
+ fin_ints = []
165
+
166
+ relative_abundances_int = relative_ints[index]
167
+
168
+ t_index = 1
169
+
170
+ pep.rts.each_with_index do |rt,i|
171
+
172
+ if !@one_d
173
+ #-------------Tailing-------------------------
174
+ shape = (tail * (t_index / sx)) + front
175
+ fin_ints << (RThelper.gaussian((t_index / sx) ,mu ,shape,100.0))
176
+ t_index += 1
177
+ #---------------------------------------------
178
+
179
+ else
180
+ #-----------Random 1d data--------------------
181
+ fin_ints<<(relative_abundances_int * ints_factor) * shuff
182
+ #---------------------------------------------
183
+ end
184
+
185
+ if fin_ints[i] < 0.01
186
+ fin_ints[i] = RThelper.RandomFloat(0.001,0.4)
187
+ end
133
188
 
134
189
  =begin
135
- if !@one_d
136
- #-------------M/Z Peak shape (Profile?)-------
137
- fraction = RThelper.gaussian(fin_mzs[i],mzmu,0.05,1)
138
- factor = fraction/1.0
139
- fin_ints[i] = fin_ints[i] * factor
140
- #---------------------------------------------
141
- end
190
+ if !@one_d
191
+ #-------------M/Z Peak shape (Profile?)-------
192
+ fraction = RThelper.gaussian(fin_mzs[i],mzmu,0.05,1)
193
+ factor = fraction/1.0
194
+ fin_ints[i] = fin_ints[i] * factor
195
+ #---------------------------------------------
196
+ end
142
197
  =end
143
- #-------------Jagged-ness---------------------
144
- sd = (@opts[:jagA] * (1-Math.exp(-(@opts[:jagC]) * fin_ints[i])) + @opts[:jagB])/2
145
- diff = (Distribution::Normal.rng(0,sd).call)
146
- fin_ints[i] = fin_ints[i] + diff
147
- #---------------------------------------------
148
-
149
-
150
- #-------------mz wobble-----------------------
151
- y = fin_ints[i]
152
- if y > 0
153
- wobble_int = @opts[:wobA]*y**(@opts[:wobB])
154
- wobble_mz = Distribution::Normal.rng(mzmu,wobble_int).call
155
- if wobble_mz < 0
156
- wobble_mz = 0.01
157
- end
158
-
159
- fin_mzs<<wobble_mz
160
- end
161
- #---------------------------------------------
162
-
163
-
164
- fin_ints[i] = fin_ints[i]*(predicted_int*(relative_abundances_int*10**-2))
165
- end
166
-
167
- pep.insert_ints(fin_ints)
168
- pep.insert_mzs(fin_mzs)
169
-
170
- index += 1
198
+
199
+ if fin_ints[i] > 0.4
200
+ #-------------Jagged-ness---------------------
201
+ sd = (@opts[:jagA] * (1-Math.exp(-(@opts[:jagC]) * fin_ints[i])) + @opts[:jagB])/2
202
+ diff = (Distribution::Normal.rng(0,sd).call)
203
+ fin_ints[i] = fin_ints[i] + diff
204
+ #---------------------------------------------
205
+ end
206
+
207
+ #-------------mz wobble-----------------------
208
+ y = fin_ints[i]
209
+ wobble_mz = nil
210
+ if y > 0
211
+ wobble_int = @opts[:wobA]*y**(@opts[:wobB])
212
+ wobble_mz = Distribution::Normal.rng(mzmu,wobble_int).call
213
+ if wobble_mz < 0
214
+ wobble_mz = 0.01
215
+ end
216
+
217
+ fin_mzs<<wobble_mz
218
+ end
219
+ #---------------------------------------------
220
+
221
+
222
+ fin_ints[i] = fin_ints[i]*(predicted_int*(relative_abundances_int*10**-2)) * sy
223
+ end
224
+
225
+ pep.insert_ints(fin_ints)
226
+ pep.insert_mzs(fin_mzs)
227
+
228
+ index += 1
171
229
  end
172
230
  return pep
173
231
  end