mspire-simulator 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE.txt +22 -0
- data/README.rdoc +17 -0
- data/Rakefile +51 -0
- data/VERSION +1 -0
- data/bin/mspire-simulator +125 -0
- data/bin/sim_mail.rb +26 -0
- data/bin/weka/M5P.model +0 -0
- data/bin/weka/M5Rules.model +0 -0
- data/bin/weka/weka.jar +0 -0
- data/lib/ms/curvefit/curve_fit_helper.rb +152 -0
- data/lib/ms/curvefit/fit_graph.rb +84 -0
- data/lib/ms/curvefit/mzml_reader.rb +28 -0
- data/lib/ms/curvefit.rb +120 -0
- data/lib/ms/isoelectric_calc.rb +122 -0
- data/lib/ms/merger.rb +101 -0
- data/lib/ms/mzml_wrapper.rb +67 -0
- data/lib/ms/noise.rb +51 -0
- data/lib/ms/rt/rt_helper.rb +31 -0
- data/lib/ms/rt/rtgenerator.rb +81 -0
- data/lib/ms/rt/weka.rb +150 -0
- data/lib/ms/sim_digester.rb +92 -0
- data/lib/ms/sim_feature.rb +175 -0
- data/lib/ms/sim_peptide.rb +182 -0
- data/lib/ms/sim_spectra.rb +70 -0
- data/lib/ms/sim_trollop.rb +68 -0
- data/lib/ms/tr_file_writer.rb +175 -0
- data/lib/progress.rb +24 -0
- data/mspire-simulator.gemspec +103 -0
- data/spec/file_writer_spec.rb +74 -0
- data/spec/merger_spec.rb +23 -0
- data/spec/ms-simulate_spec.rb +9 -0
- data/spec/peptide_spec.rb +16 -0
- data/spec/progress_spec.rb +22 -0
- data/spec/spec_helper.rb +11 -0
- data/spec/spectra_spec.rb +111 -0
- data/testFiles/contam/hum_keratin.fasta +11 -0
- metadata +246 -0
@@ -0,0 +1,175 @@
|
|
1
|
+
|
2
|
+
require 'time'
|
3
|
+
require 'distribution'
|
4
|
+
require 'ms/sim_peptide'
|
5
|
+
require 'ms/rt/rt_helper'
|
6
|
+
require 'ms/tr_file_writer'
|
7
|
+
|
8
|
+
module MS
|
9
|
+
class Sim_Feature
|
10
|
+
def initialize(peptides,opts,one_d)
|
11
|
+
|
12
|
+
@start = Time.now
|
13
|
+
@features = []
|
14
|
+
@data = {}
|
15
|
+
@max_int = 0.0
|
16
|
+
@one_d = one_d
|
17
|
+
@max_time = Sim_Spectra.r_times.max
|
18
|
+
@opts = opts
|
19
|
+
|
20
|
+
|
21
|
+
#------------------Each_Peptide_=>_Feature----------------------
|
22
|
+
peptides.each_with_index do |pep,ind|
|
23
|
+
Progress.progress("Generating features:",(((ind+1)/peptides.size.to_f)*100).to_i)
|
24
|
+
|
25
|
+
feature = getInts(pep)
|
26
|
+
|
27
|
+
@features<<feature
|
28
|
+
end
|
29
|
+
Progress.progress("Generating features:",100,Time.now-@start)
|
30
|
+
puts ""
|
31
|
+
@start = Time.now
|
32
|
+
#---------------------------------------------------------------
|
33
|
+
|
34
|
+
|
35
|
+
|
36
|
+
#-----------------Transform_to_spectra_data_for_mzml------------
|
37
|
+
# rt => [[mzs],[ints]]
|
38
|
+
@features.each_with_index do |fe,k|
|
39
|
+
Progress.progress("Populating structure for mzml:",((k/@features.size.to_f)*100).to_i)
|
40
|
+
|
41
|
+
fe_ints = fe.ints
|
42
|
+
fe_mzs = fe.mzs
|
43
|
+
|
44
|
+
fe.rts.each_with_index do |rt,i|
|
45
|
+
rt_mzs = []
|
46
|
+
rt_ints = []
|
47
|
+
|
48
|
+
fe.core_mzs.size.times do |j|
|
49
|
+
mz,int = [ fe_mzs[j][i], fe_ints[j][i] ]
|
50
|
+
if int == nil
|
51
|
+
int = 0.0
|
52
|
+
end
|
53
|
+
if int > 0.9
|
54
|
+
rt_mzs<<mz
|
55
|
+
rt_ints<<int
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
if rt_mzs.include?(nil) or rt_mzs.empty?; else
|
60
|
+
if @data.key?(rt)
|
61
|
+
mzs,ints = @data[rt]
|
62
|
+
@data[rt][0] = mzs + rt_mzs
|
63
|
+
@data[rt][1] = ints + rt_ints
|
64
|
+
else
|
65
|
+
@data[rt] = [rt_mzs, rt_ints]
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
Progress.progress("Populating structure for mzml:",100,Time.now-@start)
|
71
|
+
puts ""
|
72
|
+
|
73
|
+
#---------------------------------------------------------------
|
74
|
+
|
75
|
+
end
|
76
|
+
|
77
|
+
attr_reader :data, :features
|
78
|
+
attr_writer :data, :features
|
79
|
+
|
80
|
+
# Intensities are shaped in the rt direction by a gaussian with
|
81
|
+
# a dynamic standard deviation.
|
82
|
+
# They are also shaped in the m/z direction
|
83
|
+
# by a simple gaussian curve (see 'factor' below).
|
84
|
+
#
|
85
|
+
def getInts(pep)
|
86
|
+
|
87
|
+
p_int = pep.p_int + RThelper.RandomFloat(-5,2)
|
88
|
+
if p_int > 10
|
89
|
+
p_int -= 10
|
90
|
+
end
|
91
|
+
predicted_int = (p_int * 10**-1) * 14183000.0
|
92
|
+
relative_ints = pep.core_ints
|
93
|
+
avg = pep.p_rt
|
94
|
+
|
95
|
+
sampling_rate = @opts[:sampling_rate].to_f
|
96
|
+
tail = @opts[:tail].to_f
|
97
|
+
front = @opts[:front].to_f
|
98
|
+
mu = @opts[:mu].to_f
|
99
|
+
|
100
|
+
index = 0
|
101
|
+
|
102
|
+
shuff = RThelper.RandomFloat(0.05,1.0)
|
103
|
+
pep.core_mzs.each do |mzmu|
|
104
|
+
|
105
|
+
fin_mzs = []
|
106
|
+
fin_ints = []
|
107
|
+
t_index = 1
|
108
|
+
|
109
|
+
relative_abundances_int = relative_ints[index]
|
110
|
+
|
111
|
+
pep.rts.each_with_index do |rt,i|
|
112
|
+
percent_time = rt/@max_time
|
113
|
+
length_factor = 1.0#-3.96 * percent_time**2 + 3.96 * percent_time + 0.01
|
114
|
+
length_factor_tail = 1.0#-7.96 * percent_time**2 + 7.96 * percent_time + 0.01
|
115
|
+
|
116
|
+
|
117
|
+
if !@one_d
|
118
|
+
#-------------Tailing-------------------------
|
119
|
+
shape = (tail * length_factor)* t_index + (front * length_factor_tail)
|
120
|
+
fin_ints << (RThelper.gaussian(t_index,mu,shape,100.0))
|
121
|
+
t_index += 1
|
122
|
+
#---------------------------------------------
|
123
|
+
|
124
|
+
else
|
125
|
+
#-----------Random 1d data--------------------
|
126
|
+
fin_ints<<(relative_abundances_int * ints_factor) * shuff
|
127
|
+
#---------------------------------------------
|
128
|
+
end
|
129
|
+
|
130
|
+
if fin_ints[i] < 0.01
|
131
|
+
fin_ints[i] = RThelper.RandomFloat(0.001,0.4)
|
132
|
+
end
|
133
|
+
|
134
|
+
=begin
|
135
|
+
if !@one_d
|
136
|
+
#-------------M/Z Peak shape (Profile?)-------
|
137
|
+
fraction = RThelper.gaussian(fin_mzs[i],mzmu,0.05,1)
|
138
|
+
factor = fraction/1.0
|
139
|
+
fin_ints[i] = fin_ints[i] * factor
|
140
|
+
#---------------------------------------------
|
141
|
+
end
|
142
|
+
=end
|
143
|
+
#-------------Jagged-ness---------------------
|
144
|
+
sd = (@opts[:jagA] * (1-Math.exp(-(@opts[:jagC]) * fin_ints[i])) + @opts[:jagB])/2
|
145
|
+
diff = (Distribution::Normal.rng(0,sd).call)
|
146
|
+
fin_ints[i] = fin_ints[i] + diff
|
147
|
+
#---------------------------------------------
|
148
|
+
|
149
|
+
|
150
|
+
#-------------mz wobble-----------------------
|
151
|
+
y = fin_ints[i]
|
152
|
+
if y > 0
|
153
|
+
wobble_int = @opts[:wobA]*y**(@opts[:wobB])
|
154
|
+
wobble_mz = Distribution::Normal.rng(mzmu,wobble_int).call
|
155
|
+
if wobble_mz < 0
|
156
|
+
wobble_mz = 0.01
|
157
|
+
end
|
158
|
+
|
159
|
+
fin_mzs<<wobble_mz
|
160
|
+
end
|
161
|
+
#---------------------------------------------
|
162
|
+
|
163
|
+
|
164
|
+
fin_ints[i] = fin_ints[i]*(predicted_int*(relative_abundances_int*10**-2))
|
165
|
+
end
|
166
|
+
|
167
|
+
pep.insert_ints(fin_ints)
|
168
|
+
pep.insert_mzs(fin_mzs)
|
169
|
+
|
170
|
+
index += 1
|
171
|
+
end
|
172
|
+
return pep
|
173
|
+
end
|
174
|
+
end
|
175
|
+
end
|
@@ -0,0 +1,182 @@
|
|
1
|
+
|
2
|
+
require 'mspire/isotope/distribution'
|
3
|
+
|
4
|
+
module MS
|
5
|
+
class Peptide
|
6
|
+
def initialize(sequence, charge)
|
7
|
+
@p_rt = 0
|
8
|
+
@p_int = 0
|
9
|
+
@rts = []
|
10
|
+
@charge = charge #this is saved in the file name as well
|
11
|
+
|
12
|
+
spec = calcSpectrum(sequence, @charge)
|
13
|
+
|
14
|
+
@core_ints = spec.intensities.clone
|
15
|
+
@core_mzs = spec.mzs.clone
|
16
|
+
@mzs_file = ".m/#{sequence[0]}/#{sequence[0...15]}_#{charge}"
|
17
|
+
@ints_file = ".i/#{sequence[0]}/#{sequence[0...15]}_#{charge}"
|
18
|
+
file = File.open(@mzs_file, "w")
|
19
|
+
file.puts(sequence)
|
20
|
+
file.close
|
21
|
+
@mono_mz = spec.mzs[spec.intensities.index(spec.intensities.max)]
|
22
|
+
@mass = @mono_mz * @charge
|
23
|
+
#U,O,X ???
|
24
|
+
amino_acids = ['A','R','N','D','B','C','E','Q','Z','G','H','I',
|
25
|
+
'L','K','M','F','P','S','T','W','Y','V','J']
|
26
|
+
@aa_counts = amino_acids.map do |aa|
|
27
|
+
sequence.count(aa)
|
28
|
+
end
|
29
|
+
@aa_counts<<0.0
|
30
|
+
end
|
31
|
+
|
32
|
+
attr_reader :mass, :charge, :mono_mz, :core_mzs, :p_rt, :p_int, :core_ints, :hydro, :pi, :aa_counts, :p_rt_i
|
33
|
+
attr_writer :mass, :charge, :mono_mz, :core_mzs, :p_rt, :p_int, :core_ints, :hydro, :pi, :aa_counts, :p_rt_i
|
34
|
+
|
35
|
+
def to_s
|
36
|
+
file = File.open(@mzs_file,"r")
|
37
|
+
seq = file.gets.chomp
|
38
|
+
file.close
|
39
|
+
"Peptide: #{seq}"
|
40
|
+
end
|
41
|
+
|
42
|
+
def sequence
|
43
|
+
file = File.open(@mzs_file,"r")
|
44
|
+
seq = file.gets.chomp
|
45
|
+
file.close
|
46
|
+
seq
|
47
|
+
end
|
48
|
+
|
49
|
+
#---------------------------------------------------------------------------
|
50
|
+
def ints
|
51
|
+
file = File.open(@ints_file, "r")
|
52
|
+
line = file.gets.chomp.split(/;/)
|
53
|
+
file.close
|
54
|
+
ints = []
|
55
|
+
line.each do |iso|
|
56
|
+
ints<<iso.chomp.split(/,/).map!{|fl| fl.to_f}
|
57
|
+
end
|
58
|
+
return ints
|
59
|
+
end
|
60
|
+
|
61
|
+
def insert_ints(arr)
|
62
|
+
file = File.open(@ints_file, "a")
|
63
|
+
arr.each do |val|
|
64
|
+
file.print("#{val},")
|
65
|
+
end
|
66
|
+
file.print(";")
|
67
|
+
file.close
|
68
|
+
end
|
69
|
+
|
70
|
+
def mzs
|
71
|
+
file = File.open(@mzs_file, "r")
|
72
|
+
line = file.gets
|
73
|
+
line = file.gets.chomp.split(/;/)
|
74
|
+
file.close
|
75
|
+
mzs = []
|
76
|
+
line.each do |iso|
|
77
|
+
mzs<<iso.chomp.split(/,/).map!{|fl| fl.to_f}
|
78
|
+
end
|
79
|
+
return mzs
|
80
|
+
end
|
81
|
+
|
82
|
+
def insert_mzs(arr)
|
83
|
+
file = File.open(@mzs_file, "a")
|
84
|
+
arr.each do |val|
|
85
|
+
file.print("#{val},")
|
86
|
+
end
|
87
|
+
file.print(";")
|
88
|
+
file.close
|
89
|
+
end
|
90
|
+
|
91
|
+
def rts
|
92
|
+
return Sim_Spectra::r_times[@rts[0]..@rts[1]]
|
93
|
+
end
|
94
|
+
|
95
|
+
def set_rts(a,b)
|
96
|
+
@rts = [a,b]
|
97
|
+
end
|
98
|
+
|
99
|
+
def delete
|
100
|
+
if File.exists?(@mzs_file)
|
101
|
+
File.delete(@mzs_file)
|
102
|
+
end
|
103
|
+
if File.exists?(@ints_file)
|
104
|
+
File.delete(@ints_file)
|
105
|
+
end
|
106
|
+
end
|
107
|
+
#---------------------------------------------------------------------------
|
108
|
+
|
109
|
+
# Calculates theoretical specturm
|
110
|
+
#
|
111
|
+
def calcSpectrum(seq, charge)
|
112
|
+
#isotope.rb from Dr. Prince
|
113
|
+
atoms = countAtoms(seq)
|
114
|
+
|
115
|
+
var = ""
|
116
|
+
var<<"O"
|
117
|
+
var<<atoms[0].to_s
|
118
|
+
var<<"N"
|
119
|
+
var<<atoms[1].to_s
|
120
|
+
var<<"C"
|
121
|
+
var<<atoms[2].to_s
|
122
|
+
var<<"H"
|
123
|
+
var<<atoms[3].to_s
|
124
|
+
var<<"S"
|
125
|
+
var<<atoms[4].to_s
|
126
|
+
var<<"P"
|
127
|
+
var<<atoms[5].to_s
|
128
|
+
var<<"Se"
|
129
|
+
var<<atoms[6].to_s
|
130
|
+
|
131
|
+
mf = Mspire::MolecularFormula.from_string(var, charge)
|
132
|
+
spec = Mspire::Isotope::Distribution.spectrum(mf, :max, 0.001)
|
133
|
+
|
134
|
+
spec.intensities.map!{|i| i = i*100.0}
|
135
|
+
|
136
|
+
return spec
|
137
|
+
end
|
138
|
+
|
139
|
+
|
140
|
+
# Counts the number of each atom in the peptide sequence.
|
141
|
+
#
|
142
|
+
def countAtoms(seq)
|
143
|
+
o = 0
|
144
|
+
n = 0
|
145
|
+
c = 0
|
146
|
+
h = 0
|
147
|
+
s = 0
|
148
|
+
p = 0
|
149
|
+
se = 0
|
150
|
+
seq.each_char do |aa|
|
151
|
+
|
152
|
+
#poly amino acids
|
153
|
+
#"X" is for any (I exclude uncommon "U" and "O")
|
154
|
+
if aa == "X"
|
155
|
+
aas = Mspire::Isotope::AA::ATOM_COUNTS.keys[0..19]
|
156
|
+
aa = aas[rand(20)]
|
157
|
+
#"B" is "N" or "D"
|
158
|
+
elsif aa == "B"
|
159
|
+
aas = ["N","D"]
|
160
|
+
aa = aas[rand(2)]
|
161
|
+
#"Z" is "Q" or "E"
|
162
|
+
elsif aa == "Z"
|
163
|
+
aas = ["Q","E"]
|
164
|
+
aa = aas[rand(2)]
|
165
|
+
end
|
166
|
+
|
167
|
+
if aa !~ /A|R|N|D|C|E|Q|G|H|I|L|K|M|F|P|S|T|W|Y|V|U|O/
|
168
|
+
puts "No amino acid match for #{aa}"
|
169
|
+
else
|
170
|
+
o = o + Mspire::Isotope::AA::ATOM_COUNTS[aa][:o]
|
171
|
+
n = n + Mspire::Isotope::AA::ATOM_COUNTS[aa][:n]
|
172
|
+
c = c + Mspire::Isotope::AA::ATOM_COUNTS[aa][:c]
|
173
|
+
h = h + Mspire::Isotope::AA::ATOM_COUNTS[aa][:h]
|
174
|
+
s = s + Mspire::Isotope::AA::ATOM_COUNTS[aa][:s]
|
175
|
+
p = p + Mspire::Isotope::AA::ATOM_COUNTS[aa][:p]
|
176
|
+
se = se + Mspire::Isotope::AA::ATOM_COUNTS[aa][:se]
|
177
|
+
end
|
178
|
+
end
|
179
|
+
return (o + 1),n,c,(h + 2) ,s,p,se
|
180
|
+
end
|
181
|
+
end
|
182
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
$LOAD_PATH << './lib'
|
2
|
+
require 'ms/rt/rt_helper'
|
3
|
+
require 'ms/noise'
|
4
|
+
require 'ms/rt/rtgenerator'
|
5
|
+
require 'ms/sim_feature'
|
6
|
+
|
7
|
+
module MS
|
8
|
+
class Sim_Spectra
|
9
|
+
def initialize(peptides,opts,one_d = false)
|
10
|
+
@density = opts[:noise_density]
|
11
|
+
@data
|
12
|
+
@max_mz
|
13
|
+
sampling_rate = opts[:sampling_rate]
|
14
|
+
run_time = opts[:run_time]
|
15
|
+
drop_percentage = opts[:dropout_percentage]
|
16
|
+
#RTS
|
17
|
+
var = 0.1/(sampling_rate*2)
|
18
|
+
@@r_times = []
|
19
|
+
num_of_spec = sampling_rate*run_time
|
20
|
+
spec_time = 1/sampling_rate
|
21
|
+
num_of_spec.to_i.times do
|
22
|
+
@@r_times<<spec_time+RThelper.RandomFloat(-var,var)
|
23
|
+
spec_time = spec_time + (1/sampling_rate)
|
24
|
+
end
|
25
|
+
@@r_times = MS::Noise.spec_drops(drop_percentage)
|
26
|
+
|
27
|
+
pre_features = MS::Rtgenerator.generateRT(peptides,one_d)
|
28
|
+
|
29
|
+
#Features
|
30
|
+
features_o = MS::Sim_Feature.new(pre_features,opts,one_d)
|
31
|
+
@features = features_o.features
|
32
|
+
@data = features_o.data
|
33
|
+
@max_mz = @data.max_by{|key,val| if val != nil;val[0].max;else;0;end}[1][0].max
|
34
|
+
@spectra = @data.clone
|
35
|
+
|
36
|
+
@noise = nil
|
37
|
+
|
38
|
+
end
|
39
|
+
|
40
|
+
def noiseify
|
41
|
+
@noise = MS::Noise.noiseify(@density,@max_mz)
|
42
|
+
|
43
|
+
@@r_times.each do |k|
|
44
|
+
s_v = @data[k]
|
45
|
+
n_v = @noise[k]
|
46
|
+
if s_v != nil
|
47
|
+
@spectra[k] = [s_v[0]+n_v[0],s_v[1]+n_v[1]]
|
48
|
+
else
|
49
|
+
@spectra[k] = [n_v[0],n_v[1]]
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
return @noise
|
54
|
+
end
|
55
|
+
|
56
|
+
def self.r_times
|
57
|
+
@@r_times
|
58
|
+
end
|
59
|
+
|
60
|
+
attr_reader :data, :max_mz, :spectra, :noise, :features
|
61
|
+
attr_writer :data, :max_mz, :spectra, :noise, :features
|
62
|
+
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
#charge ratio: take both charge states, determine pH effective
|
67
|
+
#more small peaks from lesser charge states
|
68
|
+
|
69
|
+
#one_d
|
70
|
+
#fit to other labs data - different machine
|
@@ -0,0 +1,68 @@
|
|
1
|
+
require 'ms/curvefit'
|
2
|
+
|
3
|
+
module MS
|
4
|
+
class Troll
|
5
|
+
def initialize
|
6
|
+
@opts = Trollop::options do
|
7
|
+
version "mspire-simulator 0.0.1a (c) 2012 Brigham Young University"
|
8
|
+
banner <<-EOS
|
9
|
+
|
10
|
+
*********************************************************************
|
11
|
+
Description: Simulates ms runs given protein fasta files. Outputs
|
12
|
+
a mzML file.
|
13
|
+
|
14
|
+
|
15
|
+
Usage:
|
16
|
+
mspire-simulator [options] <filenames>+
|
17
|
+
|
18
|
+
where [options] are:
|
19
|
+
EOS
|
20
|
+
opt :digestor, "Digestion Enzyme; one of: \n\t\targ_c,\n \t\tasp_n,
|
21
|
+
asp_n_ambic,
|
22
|
+
chymotrypsin,\n \t\tcnbr,
|
23
|
+
lys_c,\n \t\tlys_c_p,
|
24
|
+
pepsin_a,\n\t\ttryp_cnbr,
|
25
|
+
tryp_chymo,\n \t\ttrypsin_p,
|
26
|
+
v8_de,\n \t\tv8_e,
|
27
|
+
trypsin,\n \t\tv8_e_trypsin,
|
28
|
+
v8_de_trypsin",
|
29
|
+
:default => "trypsin"
|
30
|
+
opt :sampling_rate, "How many scans per second", :default => 0.5
|
31
|
+
opt :run_time, "Run time in seconds", :default => 1000.0
|
32
|
+
opt :noise, "Noise on or off", :default => "true"
|
33
|
+
opt :noise_density, "Determines the density of white noise", :default => 10
|
34
|
+
opt :pH, "The pH that the sample is in - for determining charge", :default => 2.6
|
35
|
+
opt :out_file, "Name of the output file", :default => "test.mzml"
|
36
|
+
opt :contaminants, "Fasta file containing contaminant sequences", :default => "testFiles/contam/hum_keratin.fasta"
|
37
|
+
opt :dropout_percentage, "Defines the percentage of random dropouts in the run. 0.0 <= percentage < 1.0", :default => 0.12
|
38
|
+
opt :shuffle, "Option shuffles the scans to simulate 1d data", :default => "false"
|
39
|
+
opt :one_d, "Turns on one dimension simulation; run_time is automatically set to 300.0", :default => "false"
|
40
|
+
opt :truth, "Determines truth file type; false gives no truth file; one of: xml or csv", :default => "false"
|
41
|
+
opt :front, "Fronting chromatography parameter", :default => 6.65
|
42
|
+
opt :tail, "Tailing chromatography parameter", :default => 0.30
|
43
|
+
opt :mu, "Expected value of the chromatography curve", :default => 25.0
|
44
|
+
opt :wobA, "m/z wobble parameter", :default => 0.001071
|
45
|
+
opt :wobB, "m/z wobble parameter", :default => -0.5430
|
46
|
+
opt :jagA, "intensity variance parameter", :default => 10.34
|
47
|
+
opt :jagC, "intensity variance parameter", :default => 0.00712
|
48
|
+
opt :jagB, "intensity variance parameter", :default => 0.12
|
49
|
+
opt :overlapRange, "range in which to determine overlapping peaks", :default => 1.0724699230489427
|
50
|
+
opt :email, "Email address to send completion messages to", :default => "nil"
|
51
|
+
opt :mzml, "Mzml file to extract simulation parameters from", :default => "nil"
|
52
|
+
opt :generations, "If an mzml file is provided this specifies the number of generations for the curve fitting algorithm", :default => 30000
|
53
|
+
|
54
|
+
end
|
55
|
+
|
56
|
+
if @opts[:mzml] != "nil"
|
57
|
+
@opts = CurveFit.get_parameters(@opts)
|
58
|
+
end
|
59
|
+
Trollop::die :sampling_rate, "must be greater than 0" if @opts[:sampling_rate] <= 0
|
60
|
+
Trollop::die :run_time, "must be non-negative" if @opts[:run_time] < 0
|
61
|
+
Trollop::die "must supply a .fasta protien sequence file" if ARGV.empty?
|
62
|
+
Trollop::die :dropout_percentage, "must be between greater than or equal to 0.0 or less than 1.0" if @opts[:dropout_percentage] < 0.0 or @opts[:dropout_percentage] >= 1.0
|
63
|
+
@opts[:overlapRange] = (@opts[:overlapRange]*10.0**-6)/2.0
|
64
|
+
end
|
65
|
+
|
66
|
+
def get; @opts; end
|
67
|
+
end
|
68
|
+
end
|
@@ -0,0 +1,175 @@
|
|
1
|
+
require 'progress'
|
2
|
+
|
3
|
+
#if m/z value is in "[m/z, percentage contributed to peak]" it's a
|
4
|
+
#merged peak.
|
5
|
+
module MS
|
6
|
+
class Txml_file_writer
|
7
|
+
def self.write(features,spectra,file_name)
|
8
|
+
@spectra = spectra
|
9
|
+
@start = Time.now
|
10
|
+
file = File.open("#{file_name}_truth.xml","w")
|
11
|
+
|
12
|
+
r_times = spectra.keys.sort
|
13
|
+
|
14
|
+
file.puts "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
|
15
|
+
file.puts "<simulated_peptides>"
|
16
|
+
total = features.size.to_f
|
17
|
+
features.each_with_index do |fe,k|
|
18
|
+
sequence = fe.sequence
|
19
|
+
charge = fe.charge
|
20
|
+
mzs = fe.mzs
|
21
|
+
ints = fe.ints
|
22
|
+
rts = fe.rts
|
23
|
+
Progress.progress("Writing xml:",(((k/total)*100).to_i))
|
24
|
+
file.puts "\t<simulated_peptide sequence=\"#{sequence}\" charge=\"#{charge.round}\">"
|
25
|
+
mzs.each_with_index do |mzs,i|
|
26
|
+
tags = ""
|
27
|
+
centroids = ""
|
28
|
+
tags<<"\t\t<lc_centroids isotopic_index=\"#{i}\">"
|
29
|
+
mzs.each_with_index do |mz,ind|
|
30
|
+
if ints[i][ind] > 0.9
|
31
|
+
index = get_ind(mz,rts[ind])
|
32
|
+
centroids<<"#{r_times.index(rts[ind])},#{index.inspect};"
|
33
|
+
end
|
34
|
+
end
|
35
|
+
if centroids != ""
|
36
|
+
tags<<centroids
|
37
|
+
tags<<"</lc_centroids>\n"
|
38
|
+
file<<tags
|
39
|
+
end
|
40
|
+
end
|
41
|
+
file.puts "\t</simulated_peptide>"
|
42
|
+
end
|
43
|
+
file.puts "</simulated_peptides>"
|
44
|
+
file.close
|
45
|
+
|
46
|
+
Progress.progress("Writing xml:",100,Time.now-@start)
|
47
|
+
puts ''
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.get_ind(mz,rt)
|
51
|
+
index = nil
|
52
|
+
if @spectra[rt] != nil
|
53
|
+
mzs = @spectra[rt][0]
|
54
|
+
ints = @spectra[rt][1]
|
55
|
+
mzs.each_with_index do |m, i|
|
56
|
+
if m == mz
|
57
|
+
index = i
|
58
|
+
elsif m.class == Hash
|
59
|
+
if ind = m.values[0].index(mz)
|
60
|
+
index = [i,m.keys[0][ind+1]]
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
return index
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
class Tcsv_file_writer
|
70
|
+
def self.write(full_spectra,spectra,noise,features,file_name)
|
71
|
+
@start = Time.now
|
72
|
+
@spectra = full_spectra
|
73
|
+
|
74
|
+
#create indices for real peaks
|
75
|
+
ind_hash = create_indicies(features)
|
76
|
+
|
77
|
+
#create data structure with indices
|
78
|
+
data = data_with_indicies(full_spectra,spectra,noise,ind_hash)
|
79
|
+
|
80
|
+
#group by retention time
|
81
|
+
data = data.group_by{|d| d[0]}
|
82
|
+
|
83
|
+
#write
|
84
|
+
file = File.open("#{file_name}_truth.csv","w")
|
85
|
+
file.puts "rt,mz,int,index"
|
86
|
+
total = data.size.to_f
|
87
|
+
count = 0
|
88
|
+
data.each_value do |val|
|
89
|
+
Progress.progress("Writing csv(process 2 of 2):",(((count/total)*100).to_i))
|
90
|
+
val.each do |a|
|
91
|
+
if a[3] >= 1
|
92
|
+
file.puts "#{a[0]},#{a[1]},#{a[2]},#{a[3]}"
|
93
|
+
else
|
94
|
+
file.puts "#{a[0]},#{a[1]},#{a[2]},#{0}"
|
95
|
+
end
|
96
|
+
end
|
97
|
+
count += 1
|
98
|
+
end
|
99
|
+
file.close
|
100
|
+
|
101
|
+
Progress.progress("Writing csv:",100,Time.now-@start)
|
102
|
+
puts ''
|
103
|
+
end
|
104
|
+
|
105
|
+
def self.get_merged_mz(mz,rt)
|
106
|
+
m_mz = nil
|
107
|
+
int = nil
|
108
|
+
mzs = @spectra[rt][0]
|
109
|
+
ints = @spectra[rt][1]
|
110
|
+
mzs.each_with_index do |m, i|
|
111
|
+
if m == mz
|
112
|
+
m_mz = mz
|
113
|
+
int = ints[i]
|
114
|
+
elsif m.class == Hash
|
115
|
+
if ind = m.values[0].index(mz)
|
116
|
+
m_mz = [m.keys[0][0],m.keys[0][ind+1]]
|
117
|
+
int = ints[i].flatten.inject(:+)
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
return m_mz,int
|
122
|
+
end
|
123
|
+
|
124
|
+
def self.create_indicies(features)
|
125
|
+
ind_hash = {}
|
126
|
+
features.each_with_index do |pep,i|
|
127
|
+
pep.mzs.each_with_index do |m_ar,j|
|
128
|
+
m_ar.each do |mz|
|
129
|
+
ind_hash[mz] = "#{i + 1}.#{j + 1}".to_f
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
return ind_hash
|
134
|
+
end
|
135
|
+
|
136
|
+
def self.data_with_indicies(full_spectra,spectra,noise,ind_hash)
|
137
|
+
count = 1
|
138
|
+
time_i = 0.0
|
139
|
+
data = []
|
140
|
+
total = spectra.length
|
141
|
+
spectra.each do |k,v|
|
142
|
+
Progress.progress("Writing csv(process 1 of 2):",(((time_i/total)*100).to_i))
|
143
|
+
|
144
|
+
merged_d = full_spectra[k]
|
145
|
+
merged_mzs = merged_d[0]
|
146
|
+
merged_ints = merged_d[1]
|
147
|
+
|
148
|
+
if noise != "false"
|
149
|
+
n_data = noise[k]
|
150
|
+
end
|
151
|
+
|
152
|
+
if v != nil
|
153
|
+
v.each_slice(2) do |m,i|
|
154
|
+
m.each_with_index do |mz,index|
|
155
|
+
peak_index = ind_hash[mz]
|
156
|
+
mz,int = get_merged_mz(mz,k)
|
157
|
+
data<<[k,mz.inspect,int,peak_index]
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
if noise != "false"
|
163
|
+
n_data.each_slice(2) do |m,i|
|
164
|
+
m.each_with_index do |mz,index|
|
165
|
+
mz,int = get_merged_mz(mz,k)
|
166
|
+
data<<[k,mz.inspect,int,0]
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
170
|
+
time_i += 1
|
171
|
+
end
|
172
|
+
return data
|
173
|
+
end
|
174
|
+
end
|
175
|
+
end
|
data/lib/progress.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
|
2
|
+
module Progress
|
3
|
+
module_function
|
4
|
+
def progress(message, num, time = '')
|
5
|
+
# move cursor to beginning of line
|
6
|
+
cr = "\r"
|
7
|
+
|
8
|
+
# ANSI escape code to clear line from cursor to end of line
|
9
|
+
# "\e" is an alternative to "\033"
|
10
|
+
# cf. http://en.wikipedia.org/wiki/ANSI_escape_code
|
11
|
+
clear = "\e[0K"
|
12
|
+
|
13
|
+
# reset lines
|
14
|
+
reset = cr + clear
|
15
|
+
if time == ''
|
16
|
+
print "#{reset} #{message}" + "#{num}%".rjust(60-message.length)
|
17
|
+
$stdout.flush
|
18
|
+
else
|
19
|
+
str = "#{reset} #{message}" + "#{num}%".rjust(60-message.length)
|
20
|
+
print str + "Took: #{"%.2f" % time} sec.".rjust(100-str.length)
|
21
|
+
$stdout.flush
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|