mspire-simulator 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,15 @@
1
+ class String
2
+ abu = 0
3
+ attr_reader :abu
4
+ attr_writer :abu
5
+ end
1
6
 
2
7
  module MS
3
8
  class Sim_Digester
4
-
9
+
5
10
  attr_reader :digested_file
6
11
  attr_writer :digested_file
7
-
12
+
8
13
  def initialize(digestor,pH)
9
14
  @digestor = digestor
10
15
  @pH = pH
@@ -13,38 +18,47 @@ module MS
13
18
  system("mkdir .m/A .m/R .m/N .m/D .m/C .m/E .m/Q .m/G .m/H .m/I .m/L .m/K .m/M .m/F .m/P .m/S .m/T .m/W .m/Y .m/V .m/U .m/O")
14
19
  system("mkdir .i/A .i/R .i/N .i/D .i/C .i/E .i/Q .i/G .i/H .i/I .i/L .i/K .i/M .i/F .i/P .i/S .i/T .i/W .i/Y .i/V .i/U .i/O")
15
20
  end
16
-
21
+
17
22
  def clean
18
23
  system("rm -r -f .m .i")
19
24
  end
20
-
25
+
21
26
  def create_digested_file(file)
27
+ abundances = []
22
28
  inFile = File.open(file,"r")
23
29
  seq = ""
24
30
  inFile.each_line do |sequence|
25
- if sequence =~ />/ or sequence == "\n"
26
- seq = seq<<";"
27
- else
31
+ if sequence =~ />/
32
+ num = sequence.match(/\#.+/).to_s.chomp.gsub('#','')
33
+ if num != ""
34
+ abundances<<(num.to_f)*10.0**-2
35
+ else
36
+ abundances<<1.0
37
+ end
38
+ sequence
39
+ seq = seq<<";"
40
+ elsif sequence == "/n"; else
28
41
  seq = seq<<sequence.chomp
29
42
  end
30
43
  end
31
44
  inFile.close
32
-
45
+
33
46
  proteins = seq.split(/;/).delete_if{|str| str == ""}
34
47
 
35
48
  trypsin = Mspire::Digester[@digestor]
36
-
49
+
37
50
  digested = []
38
51
  d_file = File.open(@digested_file, "w")
39
- proteins.each do |prot|
52
+ proteins.each_with_index do |prot,index|
40
53
  dig = trypsin.digest(prot)
41
54
  dig.each do |d|
55
+ d.abu = abundances[index]
42
56
  digested<<d
43
57
  end
44
58
  end
45
59
  proteins.clear
46
60
  digested.uniq!
47
-
61
+
48
62
  trun_digested = []
49
63
  if digested.length > 50000
50
64
  50000.times do
@@ -53,9 +67,9 @@ module MS
53
67
  digested.clear
54
68
  digested = trun_digested
55
69
  end
56
-
70
+
57
71
  digested.each do |dig|
58
- d_file.puts(dig)
72
+ d_file.puts(dig<<"#"<<dig.abu.to_s)
59
73
  end
60
74
  d_file.close
61
75
  num_digested = digested.size
@@ -63,36 +77,41 @@ module MS
63
77
  puts "Number of peptides: #{num_digested}"
64
78
  return num_digested
65
79
  end
66
-
80
+
67
81
  def digest(file)
68
- start = Time.now
69
-
70
82
  num_digested = create_digested_file(file)
71
-
83
+
72
84
  d_file = File.open(@digested_file, "r")
73
85
  i = 0
74
-
86
+
75
87
  peptides = []
76
88
 
89
+ prog = Progress.new("Creating peptides '#{file}':")
90
+ num = 0
91
+ total = num_digested
92
+ step = total/100.0
77
93
  d_file.each_line do |peptide_seq|
78
94
  peptide_seq.chomp!
79
- Progress.progress("Creating peptides '#{file}':",((i/num_digested.to_f)*100.0).to_i)
80
-
95
+ peptide_seq.abu = peptide_seq.match(/#.+/).to_s.chomp.gsub('#','').to_f
96
+ peptide_seq.gsub!(/#.+/,'')
97
+ if i > step * (num + 1)
98
+ num = ((i/total.to_f)*100.0).to_i
99
+ prog.update(num)
100
+ end
101
+
81
102
  charge_ratio = charge_at_pH(identify_potential_charges(peptide_seq), @pH)
82
103
  charge_f = charge_ratio.floor
83
104
  charge_c = charge_ratio.ceil
84
-
85
- peptide_f = MS::Peptide.new(peptide_seq, charge_f) if charge_f != 0
86
- peptide_c = MS::Peptide.new(peptide_seq, charge_c) if charge_c != 0
87
-
105
+ peptide_f = MS::Peptide.new(peptide_seq, charge_f, peptide_seq.abu) if charge_f != 0
106
+ peptide_c = MS::Peptide.new(peptide_seq, charge_c, peptide_seq.abu) if charge_c != 0
107
+
88
108
  peptides<<peptide_f if charge_f != 0
89
109
  peptides<<peptide_c if charge_c != 0
90
110
  i += 1
91
111
  end
112
+ prog.finish!
92
113
  d_file.close
93
114
  File.delete(@digested_file)
94
- Progress.progress("Creating peptides '#{file}':",100,Time.now-start)
95
- puts ''
96
115
  return peptides
97
116
  end
98
117
  end
@@ -1,173 +1,231 @@
1
-
2
1
  require 'time'
3
2
  require 'distribution'
3
+ require 'fragmenter'
4
4
  require 'ms/sim_peptide'
5
5
  require 'ms/rt/rt_helper'
6
6
  require 'ms/tr_file_writer'
7
7
 
8
+ class Array
9
+ attr_reader :ms2, :ms_level, :pre_mz, :pre_int, :pre_charge
10
+ attr_writer :ms2, :ms_level, :pre_mz, :pre_int, :pre_charge
11
+ end
12
+
8
13
  module MS
9
14
  class Sim_Feature
10
15
  def initialize(peptides,opts,one_d)
11
-
12
- @start = Time.now
16
+
13
17
  @features = []
14
18
  @data = {}
15
19
  @max_int = 0.0
16
20
  @one_d = one_d
17
21
  @max_time = Sim_Spectra.r_times.max
18
22
  @opts = opts
19
-
20
-
23
+ @max_mz = -1
24
+
25
+
21
26
  #------------------Each_Peptide_=>_Feature----------------------
27
+ prog = Progress.new("Generating features:")
28
+ num = 0
29
+ total = peptides.size
30
+ step = total/100.0
22
31
  peptides.each_with_index do |pep,ind|
23
- Progress.progress("Generating features:",(((ind+1)/peptides.size.to_f)*100).to_i)
24
-
25
- feature = getInts(pep)
32
+ if ind > step * (num + 1)
33
+ num = (((ind+1)/total.to_f)*100).to_i
34
+ prog.update(num)
35
+ end
36
+
37
+ feature = getInts(pep)
26
38
 
27
- @features<<feature
39
+ @features<<feature
28
40
  end
29
- Progress.progress("Generating features:",100,Time.now-@start)
30
- puts ""
31
- @start = Time.now
41
+ prog.finish!
32
42
  #---------------------------------------------------------------
33
-
34
-
35
-
43
+
44
+
45
+
36
46
  #-----------------Transform_to_spectra_data_for_mzml------------
37
47
  # rt => [[mzs],[ints]]
48
+ prog = Progress.new("Generating MS2 & Populating structure for mzml:")
49
+ num = 0
50
+ total = @features.size
51
+ step = total/100.0
52
+ ms2_count = 0
53
+ seq = nil
54
+
38
55
  @features.each_with_index do |fe,k|
39
- Progress.progress("Populating structure for mzml:",((k/@features.size.to_f)*100).to_i)
40
-
41
- fe_ints = fe.ints
42
- fe_mzs = fe.mzs
43
-
44
- fe.rts.each_with_index do |rt,i|
45
- rt_mzs = []
46
- rt_ints = []
47
-
48
- fe.core_mzs.size.times do |j|
49
- mz,int = [ fe_mzs[j][i], fe_ints[j][i] ]
50
- if int == nil
51
- int = 0.0
52
- end
53
- if int > 0.9
54
- rt_mzs<<mz
55
- rt_ints<<int
56
- end
57
- end
58
-
59
- if rt_mzs.include?(nil) or rt_mzs.empty?; else
60
- if @data.key?(rt)
61
- mzs,ints = @data[rt]
62
- @data[rt][0] = mzs + rt_mzs
63
- @data[rt][1] = ints + rt_ints
64
- else
65
- @data[rt] = [rt_mzs, rt_ints]
66
- end
67
- end
68
- end
56
+ if k > step * (num + 1)
57
+ num = ((k/total.to_f)*100).to_i
58
+ prog.update(num)
59
+ end
60
+
61
+ fe_ints = fe.ints
62
+ fe_mzs = fe.mzs
63
+
64
+ ms2_int = fe.ints.flatten.max
65
+ ms2 = false
66
+ pre_mz = nil
67
+ pre_charge = nil
68
+
69
+ fe.rts.each_with_index do |rt,i|
70
+ rt_mzs = []
71
+ rt_ints = []
72
+
73
+ fe.core_mzs.size.times do |j|
74
+ mz,int = [ fe_mzs[j][i], fe_ints[j][i] ]
75
+ if @max_mz < mz
76
+ @max_mz = mz
77
+ end
78
+ if int == nil
79
+ int = 0.0
80
+ end
81
+ if int > 0.9
82
+ rt_mzs<<mz
83
+ rt_ints<<int
84
+ if int == ms2_int and fe.sequence.size > 1
85
+ ms2 = true
86
+ pre_mz = mz
87
+ pre_charge = fe.charge
88
+ end
89
+ end
90
+ end
91
+
92
+ spec = nil
93
+ if rt_mzs.include?(nil) or rt_mzs.empty?; else
94
+ if @data.key?(rt)
95
+ ms1 = @data[rt]
96
+ spec = [ms1[0] + rt_mzs, ms1[1] + rt_ints]
97
+ spec.ms_level = ms1.ms_level
98
+ spec.ms2 = ms1.ms2
99
+ else
100
+ spec = [rt_mzs, rt_ints]
101
+ end
102
+ if false#ms2 and fe.sequence != seq
103
+ #add ms2 spec
104
+ seq = fe.sequence
105
+ spec.ms_level = 2
106
+ ms2_mzs = MS::Fragmenter.new.fragment(seq)
107
+ ms2_ints = Array.new(ms2_mzs.size,500.to_f)
108
+ spec2 = [(rt + RThelper.RandomFloat(0.01,@opts[:sampling_rate] - 0.1)), ms2_mzs, ms2_ints]
109
+ spec2.ms_level = 2
110
+ spec2.pre_mz = pre_mz
111
+ spec2.pre_int = ms2_int
112
+ spec2.pre_charge = pre_charge
113
+ if spec.ms2 != nil
114
+ ms2_arr = spec.ms2
115
+ ms2_arr<<spec2
116
+ spec.ms2 = ms2_arr
117
+ else
118
+ spec.ms2 = [spec2]
119
+ end
120
+ ms2_count += 1
121
+ end
122
+ @data[rt] = spec
123
+ end
124
+ ms2 = false
125
+ end
69
126
  end
70
- Progress.progress("Populating structure for mzml:",100,Time.now-@start)
71
- puts ""
72
-
127
+ prog.finish!
128
+ puts "MS2s = #{ms2_count}"
129
+
73
130
  #---------------------------------------------------------------
74
-
131
+
75
132
  end
76
-
77
- attr_reader :data, :features
78
- attr_writer :data, :features
79
-
133
+
134
+ attr_reader :data, :features, :max_mz
135
+ attr_writer :data, :features, :max_mz
136
+
80
137
  # Intensities are shaped in the rt direction by a gaussian with
81
138
  # a dynamic standard deviation.
82
139
  # They are also shaped in the m/z direction
83
140
  # by a simple gaussian curve (see 'factor' below).
84
141
  #
85
142
  def getInts(pep)
86
-
87
143
  p_int = pep.p_int + RThelper.RandomFloat(-5,2)
88
144
  if p_int > 10
89
- p_int -= 10
145
+ p_int -= 10
90
146
  end
91
- predicted_int = (p_int * 10**-1) * 14183000.0
147
+ predicted_int = (p_int * 10**-1) * 14183000.0
92
148
  relative_ints = pep.core_ints
93
149
  avg = pep.p_rt
94
-
150
+
95
151
  sampling_rate = @opts[:sampling_rate].to_f
96
152
  tail = @opts[:tail].to_f
97
153
  front = @opts[:front].to_f
98
154
  mu = @opts[:mu].to_f
99
-
155
+
100
156
  index = 0
101
-
157
+ sx = pep.sx
158
+ sy = (sx**-1) * Math.sqrt(pep.abu)
159
+
102
160
  shuff = RThelper.RandomFloat(0.05,1.0)
103
161
  pep.core_mzs.each do |mzmu|
104
162
 
105
- fin_mzs = []
106
- fin_ints = []
107
- t_index = 1
108
-
109
- relative_abundances_int = relative_ints[index]
110
-
111
- pep.rts.each_with_index do |rt,i|
112
- percent_time = rt/@max_time
113
- length_factor = 1.0#-3.96 * percent_time**2 + 3.96 * percent_time + 0.01
114
- length_factor_tail = 1.0#-7.96 * percent_time**2 + 7.96 * percent_time + 0.01
115
-
116
-
117
- if !@one_d
118
- #-------------Tailing-------------------------
119
- shape = (tail * length_factor)* t_index + (front * length_factor_tail)
120
- fin_ints << (RThelper.gaussian(t_index,mu,shape,100.0))
121
- t_index += 1
122
- #---------------------------------------------
123
-
124
- else
125
- #-----------Random 1d data--------------------
126
- fin_ints<<(relative_abundances_int * ints_factor) * shuff
127
- #---------------------------------------------
128
- end
129
-
130
- if fin_ints[i] < 0.01
131
- fin_ints[i] = RThelper.RandomFloat(0.001,0.4)
132
- end
163
+ fin_mzs = []
164
+ fin_ints = []
165
+
166
+ relative_abundances_int = relative_ints[index]
167
+
168
+ t_index = 1
169
+
170
+ pep.rts.each_with_index do |rt,i|
171
+
172
+ if !@one_d
173
+ #-------------Tailing-------------------------
174
+ shape = (tail * (t_index / sx)) + front
175
+ fin_ints << (RThelper.gaussian((t_index / sx) ,mu ,shape,100.0))
176
+ t_index += 1
177
+ #---------------------------------------------
178
+
179
+ else
180
+ #-----------Random 1d data--------------------
181
+ fin_ints<<(relative_abundances_int * ints_factor) * shuff
182
+ #---------------------------------------------
183
+ end
184
+
185
+ if fin_ints[i] < 0.01
186
+ fin_ints[i] = RThelper.RandomFloat(0.001,0.4)
187
+ end
133
188
 
134
189
  =begin
135
- if !@one_d
136
- #-------------M/Z Peak shape (Profile?)-------
137
- fraction = RThelper.gaussian(fin_mzs[i],mzmu,0.05,1)
138
- factor = fraction/1.0
139
- fin_ints[i] = fin_ints[i] * factor
140
- #---------------------------------------------
141
- end
190
+ if !@one_d
191
+ #-------------M/Z Peak shape (Profile?)-------
192
+ fraction = RThelper.gaussian(fin_mzs[i],mzmu,0.05,1)
193
+ factor = fraction/1.0
194
+ fin_ints[i] = fin_ints[i] * factor
195
+ #---------------------------------------------
196
+ end
142
197
  =end
143
- #-------------Jagged-ness---------------------
144
- sd = (@opts[:jagA] * (1-Math.exp(-(@opts[:jagC]) * fin_ints[i])) + @opts[:jagB])/2
145
- diff = (Distribution::Normal.rng(0,sd).call)
146
- fin_ints[i] = fin_ints[i] + diff
147
- #---------------------------------------------
148
-
149
-
150
- #-------------mz wobble-----------------------
151
- y = fin_ints[i]
152
- if y > 0
153
- wobble_int = @opts[:wobA]*y**(@opts[:wobB])
154
- wobble_mz = Distribution::Normal.rng(mzmu,wobble_int).call
155
- if wobble_mz < 0
156
- wobble_mz = 0.01
157
- end
158
-
159
- fin_mzs<<wobble_mz
160
- end
161
- #---------------------------------------------
162
-
163
-
164
- fin_ints[i] = fin_ints[i]*(predicted_int*(relative_abundances_int*10**-2))
165
- end
166
-
167
- pep.insert_ints(fin_ints)
168
- pep.insert_mzs(fin_mzs)
169
-
170
- index += 1
198
+
199
+ if fin_ints[i] > 0.4
200
+ #-------------Jagged-ness---------------------
201
+ sd = (@opts[:jagA] * (1-Math.exp(-(@opts[:jagC]) * fin_ints[i])) + @opts[:jagB])/2
202
+ diff = (Distribution::Normal.rng(0,sd).call)
203
+ fin_ints[i] = fin_ints[i] + diff
204
+ #---------------------------------------------
205
+ end
206
+
207
+ #-------------mz wobble-----------------------
208
+ y = fin_ints[i]
209
+ wobble_mz = nil
210
+ if y > 0
211
+ wobble_int = @opts[:wobA]*y**(@opts[:wobB])
212
+ wobble_mz = Distribution::Normal.rng(mzmu,wobble_int).call
213
+ if wobble_mz < 0
214
+ wobble_mz = 0.01
215
+ end
216
+
217
+ fin_mzs<<wobble_mz
218
+ end
219
+ #---------------------------------------------
220
+
221
+
222
+ fin_ints[i] = fin_ints[i]*(predicted_int*(relative_abundances_int*10**-2)) * sy
223
+ end
224
+
225
+ pep.insert_ints(fin_ints)
226
+ pep.insert_mzs(fin_mzs)
227
+
228
+ index += 1
171
229
  end
172
230
  return pep
173
231
  end