mspire-simulator 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE.txt +22 -0
- data/README.rdoc +17 -0
- data/Rakefile +51 -0
- data/VERSION +1 -0
- data/bin/mspire-simulator +125 -0
- data/bin/sim_mail.rb +26 -0
- data/bin/weka/M5P.model +0 -0
- data/bin/weka/M5Rules.model +0 -0
- data/bin/weka/weka.jar +0 -0
- data/lib/ms/curvefit/curve_fit_helper.rb +152 -0
- data/lib/ms/curvefit/fit_graph.rb +84 -0
- data/lib/ms/curvefit/mzml_reader.rb +28 -0
- data/lib/ms/curvefit.rb +120 -0
- data/lib/ms/isoelectric_calc.rb +122 -0
- data/lib/ms/merger.rb +101 -0
- data/lib/ms/mzml_wrapper.rb +67 -0
- data/lib/ms/noise.rb +51 -0
- data/lib/ms/rt/rt_helper.rb +31 -0
- data/lib/ms/rt/rtgenerator.rb +81 -0
- data/lib/ms/rt/weka.rb +150 -0
- data/lib/ms/sim_digester.rb +92 -0
- data/lib/ms/sim_feature.rb +175 -0
- data/lib/ms/sim_peptide.rb +182 -0
- data/lib/ms/sim_spectra.rb +70 -0
- data/lib/ms/sim_trollop.rb +68 -0
- data/lib/ms/tr_file_writer.rb +175 -0
- data/lib/progress.rb +24 -0
- data/mspire-simulator.gemspec +103 -0
- data/spec/file_writer_spec.rb +74 -0
- data/spec/merger_spec.rb +23 -0
- data/spec/ms-simulate_spec.rb +9 -0
- data/spec/peptide_spec.rb +16 -0
- data/spec/progress_spec.rb +22 -0
- data/spec/spec_helper.rb +11 -0
- data/spec/spectra_spec.rb +111 -0
- data/testFiles/contam/hum_keratin.fasta +11 -0
- metadata +246 -0
@@ -0,0 +1,175 @@
|
|
1
|
+
|
2
|
+
require 'time'
|
3
|
+
require 'distribution'
|
4
|
+
require 'ms/sim_peptide'
|
5
|
+
require 'ms/rt/rt_helper'
|
6
|
+
require 'ms/tr_file_writer'
|
7
|
+
|
8
|
+
module MS
|
9
|
+
class Sim_Feature
|
10
|
+
def initialize(peptides,opts,one_d)
|
11
|
+
|
12
|
+
@start = Time.now
|
13
|
+
@features = []
|
14
|
+
@data = {}
|
15
|
+
@max_int = 0.0
|
16
|
+
@one_d = one_d
|
17
|
+
@max_time = Sim_Spectra.r_times.max
|
18
|
+
@opts = opts
|
19
|
+
|
20
|
+
|
21
|
+
#------------------Each_Peptide_=>_Feature----------------------
|
22
|
+
peptides.each_with_index do |pep,ind|
|
23
|
+
Progress.progress("Generating features:",(((ind+1)/peptides.size.to_f)*100).to_i)
|
24
|
+
|
25
|
+
feature = getInts(pep)
|
26
|
+
|
27
|
+
@features<<feature
|
28
|
+
end
|
29
|
+
Progress.progress("Generating features:",100,Time.now-@start)
|
30
|
+
puts ""
|
31
|
+
@start = Time.now
|
32
|
+
#---------------------------------------------------------------
|
33
|
+
|
34
|
+
|
35
|
+
|
36
|
+
#-----------------Transform_to_spectra_data_for_mzml------------
|
37
|
+
# rt => [[mzs],[ints]]
|
38
|
+
@features.each_with_index do |fe,k|
|
39
|
+
Progress.progress("Populating structure for mzml:",((k/@features.size.to_f)*100).to_i)
|
40
|
+
|
41
|
+
fe_ints = fe.ints
|
42
|
+
fe_mzs = fe.mzs
|
43
|
+
|
44
|
+
fe.rts.each_with_index do |rt,i|
|
45
|
+
rt_mzs = []
|
46
|
+
rt_ints = []
|
47
|
+
|
48
|
+
fe.core_mzs.size.times do |j|
|
49
|
+
mz,int = [ fe_mzs[j][i], fe_ints[j][i] ]
|
50
|
+
if int == nil
|
51
|
+
int = 0.0
|
52
|
+
end
|
53
|
+
if int > 0.9
|
54
|
+
rt_mzs<<mz
|
55
|
+
rt_ints<<int
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
if rt_mzs.include?(nil) or rt_mzs.empty?; else
|
60
|
+
if @data.key?(rt)
|
61
|
+
mzs,ints = @data[rt]
|
62
|
+
@data[rt][0] = mzs + rt_mzs
|
63
|
+
@data[rt][1] = ints + rt_ints
|
64
|
+
else
|
65
|
+
@data[rt] = [rt_mzs, rt_ints]
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
Progress.progress("Populating structure for mzml:",100,Time.now-@start)
|
71
|
+
puts ""
|
72
|
+
|
73
|
+
#---------------------------------------------------------------
|
74
|
+
|
75
|
+
end
|
76
|
+
|
77
|
+
attr_reader :data, :features
|
78
|
+
attr_writer :data, :features
|
79
|
+
|
80
|
+
# Intensities are shaped in the rt direction by a gaussian with
|
81
|
+
# a dynamic standard deviation.
|
82
|
+
# They are also shaped in the m/z direction
|
83
|
+
# by a simple gaussian curve (see 'factor' below).
|
84
|
+
#
|
85
|
+
def getInts(pep)
|
86
|
+
|
87
|
+
p_int = pep.p_int + RThelper.RandomFloat(-5,2)
|
88
|
+
if p_int > 10
|
89
|
+
p_int -= 10
|
90
|
+
end
|
91
|
+
predicted_int = (p_int * 10**-1) * 14183000.0
|
92
|
+
relative_ints = pep.core_ints
|
93
|
+
avg = pep.p_rt
|
94
|
+
|
95
|
+
sampling_rate = @opts[:sampling_rate].to_f
|
96
|
+
tail = @opts[:tail].to_f
|
97
|
+
front = @opts[:front].to_f
|
98
|
+
mu = @opts[:mu].to_f
|
99
|
+
|
100
|
+
index = 0
|
101
|
+
|
102
|
+
shuff = RThelper.RandomFloat(0.05,1.0)
|
103
|
+
pep.core_mzs.each do |mzmu|
|
104
|
+
|
105
|
+
fin_mzs = []
|
106
|
+
fin_ints = []
|
107
|
+
t_index = 1
|
108
|
+
|
109
|
+
relative_abundances_int = relative_ints[index]
|
110
|
+
|
111
|
+
pep.rts.each_with_index do |rt,i|
|
112
|
+
percent_time = rt/@max_time
|
113
|
+
length_factor = 1.0#-3.96 * percent_time**2 + 3.96 * percent_time + 0.01
|
114
|
+
length_factor_tail = 1.0#-7.96 * percent_time**2 + 7.96 * percent_time + 0.01
|
115
|
+
|
116
|
+
|
117
|
+
if !@one_d
|
118
|
+
#-------------Tailing-------------------------
|
119
|
+
shape = (tail * length_factor)* t_index + (front * length_factor_tail)
|
120
|
+
fin_ints << (RThelper.gaussian(t_index,mu,shape,100.0))
|
121
|
+
t_index += 1
|
122
|
+
#---------------------------------------------
|
123
|
+
|
124
|
+
else
|
125
|
+
#-----------Random 1d data--------------------
|
126
|
+
fin_ints<<(relative_abundances_int * ints_factor) * shuff
|
127
|
+
#---------------------------------------------
|
128
|
+
end
|
129
|
+
|
130
|
+
if fin_ints[i] < 0.01
|
131
|
+
fin_ints[i] = RThelper.RandomFloat(0.001,0.4)
|
132
|
+
end
|
133
|
+
|
134
|
+
=begin
|
135
|
+
if !@one_d
|
136
|
+
#-------------M/Z Peak shape (Profile?)-------
|
137
|
+
fraction = RThelper.gaussian(fin_mzs[i],mzmu,0.05,1)
|
138
|
+
factor = fraction/1.0
|
139
|
+
fin_ints[i] = fin_ints[i] * factor
|
140
|
+
#---------------------------------------------
|
141
|
+
end
|
142
|
+
=end
|
143
|
+
#-------------Jagged-ness---------------------
|
144
|
+
sd = (@opts[:jagA] * (1-Math.exp(-(@opts[:jagC]) * fin_ints[i])) + @opts[:jagB])/2
|
145
|
+
diff = (Distribution::Normal.rng(0,sd).call)
|
146
|
+
fin_ints[i] = fin_ints[i] + diff
|
147
|
+
#---------------------------------------------
|
148
|
+
|
149
|
+
|
150
|
+
#-------------mz wobble-----------------------
|
151
|
+
y = fin_ints[i]
|
152
|
+
if y > 0
|
153
|
+
wobble_int = @opts[:wobA]*y**(@opts[:wobB])
|
154
|
+
wobble_mz = Distribution::Normal.rng(mzmu,wobble_int).call
|
155
|
+
if wobble_mz < 0
|
156
|
+
wobble_mz = 0.01
|
157
|
+
end
|
158
|
+
|
159
|
+
fin_mzs<<wobble_mz
|
160
|
+
end
|
161
|
+
#---------------------------------------------
|
162
|
+
|
163
|
+
|
164
|
+
fin_ints[i] = fin_ints[i]*(predicted_int*(relative_abundances_int*10**-2))
|
165
|
+
end
|
166
|
+
|
167
|
+
pep.insert_ints(fin_ints)
|
168
|
+
pep.insert_mzs(fin_mzs)
|
169
|
+
|
170
|
+
index += 1
|
171
|
+
end
|
172
|
+
return pep
|
173
|
+
end
|
174
|
+
end
|
175
|
+
end
|
@@ -0,0 +1,182 @@
|
|
1
|
+
|
2
|
+
require 'mspire/isotope/distribution'
|
3
|
+
|
4
|
+
module MS
|
5
|
+
class Peptide
|
6
|
+
def initialize(sequence, charge)
|
7
|
+
@p_rt = 0
|
8
|
+
@p_int = 0
|
9
|
+
@rts = []
|
10
|
+
@charge = charge #this is saved in the file name as well
|
11
|
+
|
12
|
+
spec = calcSpectrum(sequence, @charge)
|
13
|
+
|
14
|
+
@core_ints = spec.intensities.clone
|
15
|
+
@core_mzs = spec.mzs.clone
|
16
|
+
@mzs_file = ".m/#{sequence[0]}/#{sequence[0...15]}_#{charge}"
|
17
|
+
@ints_file = ".i/#{sequence[0]}/#{sequence[0...15]}_#{charge}"
|
18
|
+
file = File.open(@mzs_file, "w")
|
19
|
+
file.puts(sequence)
|
20
|
+
file.close
|
21
|
+
@mono_mz = spec.mzs[spec.intensities.index(spec.intensities.max)]
|
22
|
+
@mass = @mono_mz * @charge
|
23
|
+
#U,O,X ???
|
24
|
+
amino_acids = ['A','R','N','D','B','C','E','Q','Z','G','H','I',
|
25
|
+
'L','K','M','F','P','S','T','W','Y','V','J']
|
26
|
+
@aa_counts = amino_acids.map do |aa|
|
27
|
+
sequence.count(aa)
|
28
|
+
end
|
29
|
+
@aa_counts<<0.0
|
30
|
+
end
|
31
|
+
|
32
|
+
attr_reader :mass, :charge, :mono_mz, :core_mzs, :p_rt, :p_int, :core_ints, :hydro, :pi, :aa_counts, :p_rt_i
|
33
|
+
attr_writer :mass, :charge, :mono_mz, :core_mzs, :p_rt, :p_int, :core_ints, :hydro, :pi, :aa_counts, :p_rt_i
|
34
|
+
|
35
|
+
def to_s
|
36
|
+
file = File.open(@mzs_file,"r")
|
37
|
+
seq = file.gets.chomp
|
38
|
+
file.close
|
39
|
+
"Peptide: #{seq}"
|
40
|
+
end
|
41
|
+
|
42
|
+
def sequence
|
43
|
+
file = File.open(@mzs_file,"r")
|
44
|
+
seq = file.gets.chomp
|
45
|
+
file.close
|
46
|
+
seq
|
47
|
+
end
|
48
|
+
|
49
|
+
#---------------------------------------------------------------------------
|
50
|
+
def ints
|
51
|
+
file = File.open(@ints_file, "r")
|
52
|
+
line = file.gets.chomp.split(/;/)
|
53
|
+
file.close
|
54
|
+
ints = []
|
55
|
+
line.each do |iso|
|
56
|
+
ints<<iso.chomp.split(/,/).map!{|fl| fl.to_f}
|
57
|
+
end
|
58
|
+
return ints
|
59
|
+
end
|
60
|
+
|
61
|
+
def insert_ints(arr)
|
62
|
+
file = File.open(@ints_file, "a")
|
63
|
+
arr.each do |val|
|
64
|
+
file.print("#{val},")
|
65
|
+
end
|
66
|
+
file.print(";")
|
67
|
+
file.close
|
68
|
+
end
|
69
|
+
|
70
|
+
def mzs
|
71
|
+
file = File.open(@mzs_file, "r")
|
72
|
+
line = file.gets
|
73
|
+
line = file.gets.chomp.split(/;/)
|
74
|
+
file.close
|
75
|
+
mzs = []
|
76
|
+
line.each do |iso|
|
77
|
+
mzs<<iso.chomp.split(/,/).map!{|fl| fl.to_f}
|
78
|
+
end
|
79
|
+
return mzs
|
80
|
+
end
|
81
|
+
|
82
|
+
def insert_mzs(arr)
|
83
|
+
file = File.open(@mzs_file, "a")
|
84
|
+
arr.each do |val|
|
85
|
+
file.print("#{val},")
|
86
|
+
end
|
87
|
+
file.print(";")
|
88
|
+
file.close
|
89
|
+
end
|
90
|
+
|
91
|
+
def rts
|
92
|
+
return Sim_Spectra::r_times[@rts[0]..@rts[1]]
|
93
|
+
end
|
94
|
+
|
95
|
+
def set_rts(a,b)
|
96
|
+
@rts = [a,b]
|
97
|
+
end
|
98
|
+
|
99
|
+
def delete
|
100
|
+
if File.exists?(@mzs_file)
|
101
|
+
File.delete(@mzs_file)
|
102
|
+
end
|
103
|
+
if File.exists?(@ints_file)
|
104
|
+
File.delete(@ints_file)
|
105
|
+
end
|
106
|
+
end
|
107
|
+
#---------------------------------------------------------------------------
|
108
|
+
|
109
|
+
# Calculates theoretical specturm
|
110
|
+
#
|
111
|
+
def calcSpectrum(seq, charge)
|
112
|
+
#isotope.rb from Dr. Prince
|
113
|
+
atoms = countAtoms(seq)
|
114
|
+
|
115
|
+
var = ""
|
116
|
+
var<<"O"
|
117
|
+
var<<atoms[0].to_s
|
118
|
+
var<<"N"
|
119
|
+
var<<atoms[1].to_s
|
120
|
+
var<<"C"
|
121
|
+
var<<atoms[2].to_s
|
122
|
+
var<<"H"
|
123
|
+
var<<atoms[3].to_s
|
124
|
+
var<<"S"
|
125
|
+
var<<atoms[4].to_s
|
126
|
+
var<<"P"
|
127
|
+
var<<atoms[5].to_s
|
128
|
+
var<<"Se"
|
129
|
+
var<<atoms[6].to_s
|
130
|
+
|
131
|
+
mf = Mspire::MolecularFormula.from_string(var, charge)
|
132
|
+
spec = Mspire::Isotope::Distribution.spectrum(mf, :max, 0.001)
|
133
|
+
|
134
|
+
spec.intensities.map!{|i| i = i*100.0}
|
135
|
+
|
136
|
+
return spec
|
137
|
+
end
|
138
|
+
|
139
|
+
|
140
|
+
# Counts the number of each atom in the peptide sequence.
|
141
|
+
#
|
142
|
+
def countAtoms(seq)
|
143
|
+
o = 0
|
144
|
+
n = 0
|
145
|
+
c = 0
|
146
|
+
h = 0
|
147
|
+
s = 0
|
148
|
+
p = 0
|
149
|
+
se = 0
|
150
|
+
seq.each_char do |aa|
|
151
|
+
|
152
|
+
#poly amino acids
|
153
|
+
#"X" is for any (I exclude uncommon "U" and "O")
|
154
|
+
if aa == "X"
|
155
|
+
aas = Mspire::Isotope::AA::ATOM_COUNTS.keys[0..19]
|
156
|
+
aa = aas[rand(20)]
|
157
|
+
#"B" is "N" or "D"
|
158
|
+
elsif aa == "B"
|
159
|
+
aas = ["N","D"]
|
160
|
+
aa = aas[rand(2)]
|
161
|
+
#"Z" is "Q" or "E"
|
162
|
+
elsif aa == "Z"
|
163
|
+
aas = ["Q","E"]
|
164
|
+
aa = aas[rand(2)]
|
165
|
+
end
|
166
|
+
|
167
|
+
if aa !~ /A|R|N|D|C|E|Q|G|H|I|L|K|M|F|P|S|T|W|Y|V|U|O/
|
168
|
+
puts "No amino acid match for #{aa}"
|
169
|
+
else
|
170
|
+
o = o + Mspire::Isotope::AA::ATOM_COUNTS[aa][:o]
|
171
|
+
n = n + Mspire::Isotope::AA::ATOM_COUNTS[aa][:n]
|
172
|
+
c = c + Mspire::Isotope::AA::ATOM_COUNTS[aa][:c]
|
173
|
+
h = h + Mspire::Isotope::AA::ATOM_COUNTS[aa][:h]
|
174
|
+
s = s + Mspire::Isotope::AA::ATOM_COUNTS[aa][:s]
|
175
|
+
p = p + Mspire::Isotope::AA::ATOM_COUNTS[aa][:p]
|
176
|
+
se = se + Mspire::Isotope::AA::ATOM_COUNTS[aa][:se]
|
177
|
+
end
|
178
|
+
end
|
179
|
+
return (o + 1),n,c,(h + 2) ,s,p,se
|
180
|
+
end
|
181
|
+
end
|
182
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
$LOAD_PATH << './lib'
|
2
|
+
require 'ms/rt/rt_helper'
|
3
|
+
require 'ms/noise'
|
4
|
+
require 'ms/rt/rtgenerator'
|
5
|
+
require 'ms/sim_feature'
|
6
|
+
|
7
|
+
module MS
|
8
|
+
class Sim_Spectra
|
9
|
+
def initialize(peptides,opts,one_d = false)
|
10
|
+
@density = opts[:noise_density]
|
11
|
+
@data
|
12
|
+
@max_mz
|
13
|
+
sampling_rate = opts[:sampling_rate]
|
14
|
+
run_time = opts[:run_time]
|
15
|
+
drop_percentage = opts[:dropout_percentage]
|
16
|
+
#RTS
|
17
|
+
var = 0.1/(sampling_rate*2)
|
18
|
+
@@r_times = []
|
19
|
+
num_of_spec = sampling_rate*run_time
|
20
|
+
spec_time = 1/sampling_rate
|
21
|
+
num_of_spec.to_i.times do
|
22
|
+
@@r_times<<spec_time+RThelper.RandomFloat(-var,var)
|
23
|
+
spec_time = spec_time + (1/sampling_rate)
|
24
|
+
end
|
25
|
+
@@r_times = MS::Noise.spec_drops(drop_percentage)
|
26
|
+
|
27
|
+
pre_features = MS::Rtgenerator.generateRT(peptides,one_d)
|
28
|
+
|
29
|
+
#Features
|
30
|
+
features_o = MS::Sim_Feature.new(pre_features,opts,one_d)
|
31
|
+
@features = features_o.features
|
32
|
+
@data = features_o.data
|
33
|
+
@max_mz = @data.max_by{|key,val| if val != nil;val[0].max;else;0;end}[1][0].max
|
34
|
+
@spectra = @data.clone
|
35
|
+
|
36
|
+
@noise = nil
|
37
|
+
|
38
|
+
end
|
39
|
+
|
40
|
+
def noiseify
|
41
|
+
@noise = MS::Noise.noiseify(@density,@max_mz)
|
42
|
+
|
43
|
+
@@r_times.each do |k|
|
44
|
+
s_v = @data[k]
|
45
|
+
n_v = @noise[k]
|
46
|
+
if s_v != nil
|
47
|
+
@spectra[k] = [s_v[0]+n_v[0],s_v[1]+n_v[1]]
|
48
|
+
else
|
49
|
+
@spectra[k] = [n_v[0],n_v[1]]
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
return @noise
|
54
|
+
end
|
55
|
+
|
56
|
+
def self.r_times
|
57
|
+
@@r_times
|
58
|
+
end
|
59
|
+
|
60
|
+
attr_reader :data, :max_mz, :spectra, :noise, :features
|
61
|
+
attr_writer :data, :max_mz, :spectra, :noise, :features
|
62
|
+
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
#charge ratio: take both charge states, determine pH effective
|
67
|
+
#more small peaks from lesser charge states
|
68
|
+
|
69
|
+
#one_d
|
70
|
+
#fit to other labs data - different machine
|
@@ -0,0 +1,68 @@
|
|
1
|
+
require 'ms/curvefit'
|
2
|
+
|
3
|
+
module MS
|
4
|
+
class Troll
|
5
|
+
def initialize
|
6
|
+
@opts = Trollop::options do
|
7
|
+
version "mspire-simulator 0.0.1a (c) 2012 Brigham Young University"
|
8
|
+
banner <<-EOS
|
9
|
+
|
10
|
+
*********************************************************************
|
11
|
+
Description: Simulates ms runs given protein fasta files. Outputs
|
12
|
+
a mzML file.
|
13
|
+
|
14
|
+
|
15
|
+
Usage:
|
16
|
+
mspire-simulator [options] <filenames>+
|
17
|
+
|
18
|
+
where [options] are:
|
19
|
+
EOS
|
20
|
+
opt :digestor, "Digestion Enzyme; one of: \n\t\targ_c,\n \t\tasp_n,
|
21
|
+
asp_n_ambic,
|
22
|
+
chymotrypsin,\n \t\tcnbr,
|
23
|
+
lys_c,\n \t\tlys_c_p,
|
24
|
+
pepsin_a,\n\t\ttryp_cnbr,
|
25
|
+
tryp_chymo,\n \t\ttrypsin_p,
|
26
|
+
v8_de,\n \t\tv8_e,
|
27
|
+
trypsin,\n \t\tv8_e_trypsin,
|
28
|
+
v8_de_trypsin",
|
29
|
+
:default => "trypsin"
|
30
|
+
opt :sampling_rate, "How many scans per second", :default => 0.5
|
31
|
+
opt :run_time, "Run time in seconds", :default => 1000.0
|
32
|
+
opt :noise, "Noise on or off", :default => "true"
|
33
|
+
opt :noise_density, "Determines the density of white noise", :default => 10
|
34
|
+
opt :pH, "The pH that the sample is in - for determining charge", :default => 2.6
|
35
|
+
opt :out_file, "Name of the output file", :default => "test.mzml"
|
36
|
+
opt :contaminants, "Fasta file containing contaminant sequences", :default => "testFiles/contam/hum_keratin.fasta"
|
37
|
+
opt :dropout_percentage, "Defines the percentage of random dropouts in the run. 0.0 <= percentage < 1.0", :default => 0.12
|
38
|
+
opt :shuffle, "Option shuffles the scans to simulate 1d data", :default => "false"
|
39
|
+
opt :one_d, "Turns on one dimension simulation; run_time is automatically set to 300.0", :default => "false"
|
40
|
+
opt :truth, "Determines truth file type; false gives no truth file; one of: xml or csv", :default => "false"
|
41
|
+
opt :front, "Fronting chromatography parameter", :default => 6.65
|
42
|
+
opt :tail, "Tailing chromatography parameter", :default => 0.30
|
43
|
+
opt :mu, "Expected value of the chromatography curve", :default => 25.0
|
44
|
+
opt :wobA, "m/z wobble parameter", :default => 0.001071
|
45
|
+
opt :wobB, "m/z wobble parameter", :default => -0.5430
|
46
|
+
opt :jagA, "intensity variance parameter", :default => 10.34
|
47
|
+
opt :jagC, "intensity variance parameter", :default => 0.00712
|
48
|
+
opt :jagB, "intensity variance parameter", :default => 0.12
|
49
|
+
opt :overlapRange, "range in which to determine overlapping peaks", :default => 1.0724699230489427
|
50
|
+
opt :email, "Email address to send completion messages to", :default => "nil"
|
51
|
+
opt :mzml, "Mzml file to extract simulation parameters from", :default => "nil"
|
52
|
+
opt :generations, "If an mzml file is provided this specifies the number of generations for the curve fitting algorithm", :default => 30000
|
53
|
+
|
54
|
+
end
|
55
|
+
|
56
|
+
if @opts[:mzml] != "nil"
|
57
|
+
@opts = CurveFit.get_parameters(@opts)
|
58
|
+
end
|
59
|
+
Trollop::die :sampling_rate, "must be greater than 0" if @opts[:sampling_rate] <= 0
|
60
|
+
Trollop::die :run_time, "must be non-negative" if @opts[:run_time] < 0
|
61
|
+
Trollop::die "must supply a .fasta protien sequence file" if ARGV.empty?
|
62
|
+
Trollop::die :dropout_percentage, "must be between greater than or equal to 0.0 or less than 1.0" if @opts[:dropout_percentage] < 0.0 or @opts[:dropout_percentage] >= 1.0
|
63
|
+
@opts[:overlapRange] = (@opts[:overlapRange]*10.0**-6)/2.0
|
64
|
+
end
|
65
|
+
|
66
|
+
def get; @opts; end
|
67
|
+
end
|
68
|
+
end
|
@@ -0,0 +1,175 @@
|
|
1
|
+
require 'progress'
|
2
|
+
|
3
|
+
#if m/z value is in "[m/z, percentage contributed to peak]" it's a
|
4
|
+
#merged peak.
|
5
|
+
module MS
|
6
|
+
class Txml_file_writer
|
7
|
+
def self.write(features,spectra,file_name)
|
8
|
+
@spectra = spectra
|
9
|
+
@start = Time.now
|
10
|
+
file = File.open("#{file_name}_truth.xml","w")
|
11
|
+
|
12
|
+
r_times = spectra.keys.sort
|
13
|
+
|
14
|
+
file.puts "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
|
15
|
+
file.puts "<simulated_peptides>"
|
16
|
+
total = features.size.to_f
|
17
|
+
features.each_with_index do |fe,k|
|
18
|
+
sequence = fe.sequence
|
19
|
+
charge = fe.charge
|
20
|
+
mzs = fe.mzs
|
21
|
+
ints = fe.ints
|
22
|
+
rts = fe.rts
|
23
|
+
Progress.progress("Writing xml:",(((k/total)*100).to_i))
|
24
|
+
file.puts "\t<simulated_peptide sequence=\"#{sequence}\" charge=\"#{charge.round}\">"
|
25
|
+
mzs.each_with_index do |mzs,i|
|
26
|
+
tags = ""
|
27
|
+
centroids = ""
|
28
|
+
tags<<"\t\t<lc_centroids isotopic_index=\"#{i}\">"
|
29
|
+
mzs.each_with_index do |mz,ind|
|
30
|
+
if ints[i][ind] > 0.9
|
31
|
+
index = get_ind(mz,rts[ind])
|
32
|
+
centroids<<"#{r_times.index(rts[ind])},#{index.inspect};"
|
33
|
+
end
|
34
|
+
end
|
35
|
+
if centroids != ""
|
36
|
+
tags<<centroids
|
37
|
+
tags<<"</lc_centroids>\n"
|
38
|
+
file<<tags
|
39
|
+
end
|
40
|
+
end
|
41
|
+
file.puts "\t</simulated_peptide>"
|
42
|
+
end
|
43
|
+
file.puts "</simulated_peptides>"
|
44
|
+
file.close
|
45
|
+
|
46
|
+
Progress.progress("Writing xml:",100,Time.now-@start)
|
47
|
+
puts ''
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.get_ind(mz,rt)
|
51
|
+
index = nil
|
52
|
+
if @spectra[rt] != nil
|
53
|
+
mzs = @spectra[rt][0]
|
54
|
+
ints = @spectra[rt][1]
|
55
|
+
mzs.each_with_index do |m, i|
|
56
|
+
if m == mz
|
57
|
+
index = i
|
58
|
+
elsif m.class == Hash
|
59
|
+
if ind = m.values[0].index(mz)
|
60
|
+
index = [i,m.keys[0][ind+1]]
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
return index
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
class Tcsv_file_writer
|
70
|
+
def self.write(full_spectra,spectra,noise,features,file_name)
|
71
|
+
@start = Time.now
|
72
|
+
@spectra = full_spectra
|
73
|
+
|
74
|
+
#create indices for real peaks
|
75
|
+
ind_hash = create_indicies(features)
|
76
|
+
|
77
|
+
#create data structure with indices
|
78
|
+
data = data_with_indicies(full_spectra,spectra,noise,ind_hash)
|
79
|
+
|
80
|
+
#group by retention time
|
81
|
+
data = data.group_by{|d| d[0]}
|
82
|
+
|
83
|
+
#write
|
84
|
+
file = File.open("#{file_name}_truth.csv","w")
|
85
|
+
file.puts "rt,mz,int,index"
|
86
|
+
total = data.size.to_f
|
87
|
+
count = 0
|
88
|
+
data.each_value do |val|
|
89
|
+
Progress.progress("Writing csv(process 2 of 2):",(((count/total)*100).to_i))
|
90
|
+
val.each do |a|
|
91
|
+
if a[3] >= 1
|
92
|
+
file.puts "#{a[0]},#{a[1]},#{a[2]},#{a[3]}"
|
93
|
+
else
|
94
|
+
file.puts "#{a[0]},#{a[1]},#{a[2]},#{0}"
|
95
|
+
end
|
96
|
+
end
|
97
|
+
count += 1
|
98
|
+
end
|
99
|
+
file.close
|
100
|
+
|
101
|
+
Progress.progress("Writing csv:",100,Time.now-@start)
|
102
|
+
puts ''
|
103
|
+
end
|
104
|
+
|
105
|
+
def self.get_merged_mz(mz,rt)
|
106
|
+
m_mz = nil
|
107
|
+
int = nil
|
108
|
+
mzs = @spectra[rt][0]
|
109
|
+
ints = @spectra[rt][1]
|
110
|
+
mzs.each_with_index do |m, i|
|
111
|
+
if m == mz
|
112
|
+
m_mz = mz
|
113
|
+
int = ints[i]
|
114
|
+
elsif m.class == Hash
|
115
|
+
if ind = m.values[0].index(mz)
|
116
|
+
m_mz = [m.keys[0][0],m.keys[0][ind+1]]
|
117
|
+
int = ints[i].flatten.inject(:+)
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
return m_mz,int
|
122
|
+
end
|
123
|
+
|
124
|
+
def self.create_indicies(features)
|
125
|
+
ind_hash = {}
|
126
|
+
features.each_with_index do |pep,i|
|
127
|
+
pep.mzs.each_with_index do |m_ar,j|
|
128
|
+
m_ar.each do |mz|
|
129
|
+
ind_hash[mz] = "#{i + 1}.#{j + 1}".to_f
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
return ind_hash
|
134
|
+
end
|
135
|
+
|
136
|
+
def self.data_with_indicies(full_spectra,spectra,noise,ind_hash)
|
137
|
+
count = 1
|
138
|
+
time_i = 0.0
|
139
|
+
data = []
|
140
|
+
total = spectra.length
|
141
|
+
spectra.each do |k,v|
|
142
|
+
Progress.progress("Writing csv(process 1 of 2):",(((time_i/total)*100).to_i))
|
143
|
+
|
144
|
+
merged_d = full_spectra[k]
|
145
|
+
merged_mzs = merged_d[0]
|
146
|
+
merged_ints = merged_d[1]
|
147
|
+
|
148
|
+
if noise != "false"
|
149
|
+
n_data = noise[k]
|
150
|
+
end
|
151
|
+
|
152
|
+
if v != nil
|
153
|
+
v.each_slice(2) do |m,i|
|
154
|
+
m.each_with_index do |mz,index|
|
155
|
+
peak_index = ind_hash[mz]
|
156
|
+
mz,int = get_merged_mz(mz,k)
|
157
|
+
data<<[k,mz.inspect,int,peak_index]
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
if noise != "false"
|
163
|
+
n_data.each_slice(2) do |m,i|
|
164
|
+
m.each_with_index do |mz,index|
|
165
|
+
mz,int = get_merged_mz(mz,k)
|
166
|
+
data<<[k,mz.inspect,int,0]
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
170
|
+
time_i += 1
|
171
|
+
end
|
172
|
+
return data
|
173
|
+
end
|
174
|
+
end
|
175
|
+
end
|
data/lib/progress.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
|
2
|
+
module Progress
|
3
|
+
module_function
|
4
|
+
def progress(message, num, time = '')
|
5
|
+
# move cursor to beginning of line
|
6
|
+
cr = "\r"
|
7
|
+
|
8
|
+
# ANSI escape code to clear line from cursor to end of line
|
9
|
+
# "\e" is an alternative to "\033"
|
10
|
+
# cf. http://en.wikipedia.org/wiki/ANSI_escape_code
|
11
|
+
clear = "\e[0K"
|
12
|
+
|
13
|
+
# reset lines
|
14
|
+
reset = cr + clear
|
15
|
+
if time == ''
|
16
|
+
print "#{reset} #{message}" + "#{num}%".rjust(60-message.length)
|
17
|
+
$stdout.flush
|
18
|
+
else
|
19
|
+
str = "#{reset} #{message}" + "#{num}%".rjust(60-message.length)
|
20
|
+
print str + "Took: #{"%.2f" % time} sec.".rjust(100-str.length)
|
21
|
+
$stdout.flush
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|