mspire-simulator 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +46 -3
- data/Rakefile +1 -1
- data/VERSION +1 -1
- data/bin/mspire-simulator +8 -0
- data/bin/sim_mail +2 -2
- data/lib/cv_parser.rb +7 -0
- data/lib/ms/curvefit/curve_fit_helper.rb +26 -20
- data/lib/ms/curvefit/mzml_reader.rb +1 -1
- data/lib/ms/curvefit.rb +25 -8
- data/lib/ms/isoelectric_calc.rb +162 -103
- data/lib/ms/merger.rb +46 -33
- data/lib/ms/mzml_wrapper.rb +74 -29
- data/lib/ms/noise.rb +28 -28
- data/lib/ms/rt/rt_helper.rb +3 -3
- data/lib/ms/rt/rtgenerator.rb +63 -51
- data/lib/ms/rt/weka.rb +17 -17
- data/lib/ms/sim_digester.rb +45 -26
- data/lib/ms/sim_feature.rb +180 -122
- data/lib/ms/sim_peptide.rb +58 -55
- data/lib/ms/sim_spectra.rb +22 -23
- data/lib/ms/sim_trollop.rb +36 -32
- data/lib/ms/tr_file_writer.rb +111 -98
- data/lib/progress.rb +21 -20
- data/mspire-simulator.gemspec +5 -5
- data/spec/file_writer_spec.rb +2 -1
- data/spec/merger_spec.rb +2 -1
- data/spec/ms-simulate_spec.rb +1 -1
- data/spec/peptide_spec.rb +2 -1
- data/spec/spec_helper.rb +8 -3
- data/spec/spectra_spec.rb +4 -3
- metadata +5 -5
- data/spec/progress_spec.rb +0 -22
data/lib/ms/sim_peptide.rb
CHANGED
@@ -3,14 +3,17 @@ require 'mspire/isotope/distribution'
|
|
3
3
|
|
4
4
|
module MS
|
5
5
|
class Peptide
|
6
|
-
def initialize(sequence, charge)
|
6
|
+
def initialize(sequence, charge, abu = 1.0)
|
7
|
+
@abu = abu
|
7
8
|
@p_rt = 0
|
8
9
|
@p_int = 0
|
9
10
|
@rts = []
|
10
11
|
@charge = charge #this is saved in the file name as well
|
11
|
-
|
12
|
+
|
12
13
|
spec = calcSpectrum(sequence, @charge)
|
13
|
-
|
14
|
+
|
15
|
+
# TODO Ryan: alter this to handle variable and static mass modifications... Add it from the Katamari code
|
16
|
+
|
14
17
|
@core_ints = spec.intensities.clone
|
15
18
|
@core_mzs = spec.mzs.clone
|
16
19
|
@mzs_file = ".m/#{sequence[0]}/#{sequence[0...15]}_#{charge}"
|
@@ -22,30 +25,30 @@ module MS
|
|
22
25
|
@mass = @mono_mz * @charge
|
23
26
|
#U,O,X ???
|
24
27
|
amino_acids = ['A','R','N','D','B','C','E','Q','Z','G','H','I',
|
25
|
-
|
28
|
+
'L','K','M','F','P','S','T','W','Y','V','J']
|
26
29
|
@aa_counts = amino_acids.map do |aa|
|
27
|
-
|
30
|
+
sequence.count(aa)
|
28
31
|
end
|
29
32
|
@aa_counts<<0.0
|
30
33
|
end
|
31
|
-
|
32
|
-
attr_reader :mass, :charge, :mono_mz, :core_mzs, :p_rt, :p_int, :core_ints, :hydro, :pi, :aa_counts, :p_rt_i
|
33
|
-
attr_writer :mass, :charge, :mono_mz, :core_mzs, :p_rt, :p_int, :core_ints, :hydro, :pi, :aa_counts, :p_rt_i
|
34
|
-
|
34
|
+
|
35
|
+
attr_reader :mass, :charge, :mono_mz, :core_mzs, :p_rt, :p_int, :core_ints, :hydro, :pi, :aa_counts, :p_rt_i, :abu, :sx
|
36
|
+
attr_writer :mass, :charge, :mono_mz, :core_mzs, :p_rt, :p_int, :core_ints, :hydro, :pi, :aa_counts, :p_rt_i, :abu, :sx
|
37
|
+
|
35
38
|
def to_s
|
36
39
|
file = File.open(@mzs_file,"r")
|
37
40
|
seq = file.gets.chomp
|
38
41
|
file.close
|
39
42
|
"Peptide: #{seq}"
|
40
43
|
end
|
41
|
-
|
44
|
+
|
42
45
|
def sequence
|
43
46
|
file = File.open(@mzs_file,"r")
|
44
47
|
seq = file.gets.chomp
|
45
48
|
file.close
|
46
49
|
seq
|
47
50
|
end
|
48
|
-
|
51
|
+
|
49
52
|
#---------------------------------------------------------------------------
|
50
53
|
def ints
|
51
54
|
file = File.open(@ints_file, "r")
|
@@ -53,20 +56,20 @@ module MS
|
|
53
56
|
file.close
|
54
57
|
ints = []
|
55
58
|
line.each do |iso|
|
56
|
-
|
59
|
+
ints<<iso.chomp.split(/,/).map!{|fl| fl.to_f}
|
57
60
|
end
|
58
61
|
return ints
|
59
62
|
end
|
60
|
-
|
63
|
+
|
61
64
|
def insert_ints(arr)
|
62
65
|
file = File.open(@ints_file, "a")
|
63
66
|
arr.each do |val|
|
64
|
-
|
67
|
+
file.print("#{val},")
|
65
68
|
end
|
66
69
|
file.print(";")
|
67
70
|
file.close
|
68
71
|
end
|
69
|
-
|
72
|
+
|
70
73
|
def mzs
|
71
74
|
file = File.open(@mzs_file, "r")
|
72
75
|
line = file.gets
|
@@ -74,44 +77,44 @@ module MS
|
|
74
77
|
file.close
|
75
78
|
mzs = []
|
76
79
|
line.each do |iso|
|
77
|
-
|
80
|
+
mzs<<iso.chomp.split(/,/).map!{|fl| fl.to_f}
|
78
81
|
end
|
79
82
|
return mzs
|
80
83
|
end
|
81
|
-
|
84
|
+
|
82
85
|
def insert_mzs(arr)
|
83
86
|
file = File.open(@mzs_file, "a")
|
84
87
|
arr.each do |val|
|
85
|
-
|
88
|
+
file.print("#{val},")
|
86
89
|
end
|
87
90
|
file.print(";")
|
88
91
|
file.close
|
89
92
|
end
|
90
|
-
|
93
|
+
|
91
94
|
def rts
|
92
95
|
return Sim_Spectra::r_times[@rts[0]..@rts[1]]
|
93
96
|
end
|
94
|
-
|
97
|
+
|
95
98
|
def set_rts(a,b)
|
96
99
|
@rts = [a,b]
|
97
100
|
end
|
98
|
-
|
101
|
+
|
99
102
|
def delete
|
100
103
|
if File.exists?(@mzs_file)
|
101
|
-
|
104
|
+
File.delete(@mzs_file)
|
102
105
|
end
|
103
106
|
if File.exists?(@ints_file)
|
104
|
-
|
107
|
+
File.delete(@ints_file)
|
105
108
|
end
|
106
109
|
end
|
107
110
|
#---------------------------------------------------------------------------
|
108
|
-
|
111
|
+
|
109
112
|
# Calculates theoretical specturm
|
110
113
|
#
|
111
114
|
def calcSpectrum(seq, charge)
|
112
115
|
#isotope.rb from Dr. Prince
|
113
116
|
atoms = countAtoms(seq)
|
114
|
-
|
117
|
+
|
115
118
|
var = ""
|
116
119
|
var<<"O"
|
117
120
|
var<<atoms[0].to_s
|
@@ -127,7 +130,7 @@ module MS
|
|
127
130
|
var<<atoms[5].to_s
|
128
131
|
var<<"Se"
|
129
132
|
var<<atoms[6].to_s
|
130
|
-
|
133
|
+
|
131
134
|
mf = Mspire::MolecularFormula.from_string(var, charge)
|
132
135
|
spec = Mspire::Isotope::Distribution.spectrum(mf, :max, 0.001)
|
133
136
|
|
@@ -135,8 +138,8 @@ module MS
|
|
135
138
|
|
136
139
|
return spec
|
137
140
|
end
|
138
|
-
|
139
|
-
|
141
|
+
|
142
|
+
|
140
143
|
# Counts the number of each atom in the peptide sequence.
|
141
144
|
#
|
142
145
|
def countAtoms(seq)
|
@@ -148,33 +151,33 @@ module MS
|
|
148
151
|
p = 0
|
149
152
|
se = 0
|
150
153
|
seq.each_char do |aa|
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
154
|
+
|
155
|
+
#poly amino acids
|
156
|
+
#"X" is for any (I exclude uncommon "U" and "O")
|
157
|
+
if aa == "X"
|
158
|
+
aas = Mspire::Isotope::AA::ATOM_COUNTS.keys[0..19]
|
159
|
+
aa = aas[rand(20)]
|
160
|
+
#"B" is "N" or "D"
|
161
|
+
elsif aa == "B"
|
162
|
+
aas = ["N","D"]
|
163
|
+
aa = aas[rand(2)]
|
164
|
+
#"Z" is "Q" or "E"
|
165
|
+
elsif aa == "Z"
|
166
|
+
aas = ["Q","E"]
|
167
|
+
aa = aas[rand(2)]
|
168
|
+
end
|
169
|
+
|
170
|
+
if aa !~ /A|R|N|D|C|E|Q|G|H|I|L|K|M|F|P|S|T|W|Y|V|U|O/
|
171
|
+
puts "No amino acid match for #{aa}"
|
172
|
+
else
|
173
|
+
o = o + Mspire::Isotope::AA::ATOM_COUNTS[aa][:o]
|
174
|
+
n = n + Mspire::Isotope::AA::ATOM_COUNTS[aa][:n]
|
175
|
+
c = c + Mspire::Isotope::AA::ATOM_COUNTS[aa][:c]
|
176
|
+
h = h + Mspire::Isotope::AA::ATOM_COUNTS[aa][:h]
|
177
|
+
s = s + Mspire::Isotope::AA::ATOM_COUNTS[aa][:s]
|
178
|
+
p = p + Mspire::Isotope::AA::ATOM_COUNTS[aa][:p]
|
179
|
+
se = se + Mspire::Isotope::AA::ATOM_COUNTS[aa][:se]
|
180
|
+
end
|
178
181
|
end
|
179
182
|
return (o + 1),n,c,(h + 2) ,s,p,se
|
180
183
|
end
|
data/lib/ms/sim_spectra.rb
CHANGED
@@ -7,12 +7,11 @@ require 'ms/sim_feature'
|
|
7
7
|
module MS
|
8
8
|
class Sim_Spectra
|
9
9
|
def initialize(peptides,opts,one_d = false)
|
10
|
-
@density = opts[:noise_density]
|
11
10
|
@data
|
12
11
|
@max_mz
|
13
|
-
|
14
|
-
|
15
|
-
|
12
|
+
sampling_rate = opts[:sampling_rate]
|
13
|
+
run_time = opts[:run_time]
|
14
|
+
drop_percentage = opts[:dropout_percentage]
|
16
15
|
#RTS
|
17
16
|
var = 0.1/(sampling_rate*2)
|
18
17
|
@@r_times = []
|
@@ -23,43 +22,43 @@ module MS
|
|
23
22
|
spec_time = spec_time + (1/sampling_rate)
|
24
23
|
end
|
25
24
|
@@r_times = MS::Noise.spec_drops(drop_percentage)
|
26
|
-
|
25
|
+
|
27
26
|
pre_features = MS::Rtgenerator.generateRT(peptides,one_d)
|
28
|
-
|
27
|
+
|
29
28
|
#Features
|
30
29
|
features_o = MS::Sim_Feature.new(pre_features,opts,one_d)
|
31
30
|
@features = features_o.features
|
32
31
|
@data = features_o.data
|
33
|
-
@max_mz =
|
32
|
+
@max_mz = features_o.max_mz
|
34
33
|
@spectra = @data.clone
|
35
|
-
|
34
|
+
|
36
35
|
@noise = nil
|
37
|
-
|
36
|
+
|
38
37
|
end
|
39
|
-
|
38
|
+
|
40
39
|
def noiseify
|
41
|
-
@noise = MS::Noise.noiseify(
|
42
|
-
|
40
|
+
@noise = MS::Noise.noiseify(opts,@max_mz)
|
41
|
+
|
43
42
|
@@r_times.each do |k|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
43
|
+
s_v = @data[k]
|
44
|
+
n_v = @noise[k]
|
45
|
+
if s_v != nil
|
46
|
+
@spectra[k] = [s_v[0]+n_v[0],s_v[1]+n_v[1]]
|
47
|
+
else
|
48
|
+
@spectra[k] = [n_v[0],n_v[1]]
|
49
|
+
end
|
51
50
|
end
|
52
|
-
|
51
|
+
|
53
52
|
return @noise
|
54
53
|
end
|
55
|
-
|
54
|
+
|
56
55
|
def self.r_times
|
57
56
|
@@r_times
|
58
57
|
end
|
59
|
-
|
58
|
+
|
60
59
|
attr_reader :data, :max_mz, :spectra, :noise, :features
|
61
60
|
attr_writer :data, :max_mz, :spectra, :noise, :features
|
62
|
-
|
61
|
+
|
63
62
|
end
|
64
63
|
end
|
65
64
|
|
data/lib/ms/sim_trollop.rb
CHANGED
@@ -6,19 +6,19 @@ module MS
|
|
6
6
|
@opts = Trollop::options do
|
7
7
|
version "mspire-simulator 0.0.1a (c) 2012 Brigham Young University"
|
8
8
|
banner <<-EOS
|
9
|
-
|
9
|
+
|
10
10
|
*********************************************************************
|
11
11
|
Description: Simulates ms runs given protein fasta files. Outputs
|
12
12
|
a mzML file.
|
13
|
-
|
14
|
-
|
13
|
+
|
14
|
+
|
15
15
|
Usage:
|
16
16
|
mspire-simulator [options] <filenames>+
|
17
|
-
|
17
|
+
|
18
18
|
where [options] are:
|
19
19
|
EOS
|
20
20
|
opt :digestor, "Digestion Enzyme; one of: \n\t\targ_c,\n \t\tasp_n,
|
21
|
-
|
21
|
+
asp_n_ambic,
|
22
22
|
chymotrypsin,\n \t\tcnbr,
|
23
23
|
lys_c,\n \t\tlys_c_p,
|
24
24
|
pepsin_a,\n\t\ttryp_cnbr,
|
@@ -27,42 +27,46 @@ module MS
|
|
27
27
|
trypsin,\n \t\tv8_e_trypsin,
|
28
28
|
v8_de_trypsin",
|
29
29
|
:default => "trypsin"
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
30
|
+
opt :sampling_rate, "How many scans per second", :default => 0.5
|
31
|
+
opt :run_time, "Run time in seconds", :default => 1000.0
|
32
|
+
opt :noise, "Noise on or off", :default => "true"
|
33
|
+
opt :noise_density, "Determines the density of white noise", :default => 10
|
34
|
+
opt :noiseMaxInt, "The max noise intensity level", :default => 1000
|
35
|
+
opt :noiseMinInt, "The minimum noise intensity level", :default => 50
|
36
|
+
opt :pH, "The pH that the sample is in - for determining charge", :default => 2.6
|
37
|
+
opt :out_file, "Name of the output file", :default => "test.mzml"
|
38
|
+
opt :contaminants, "Fasta file containing contaminant sequences", :default => "testFiles/contam/hum_keratin.fasta"
|
39
|
+
opt :dropout_percentage, "Defines the percentage of random dropouts in the run. 0.0 <= percentage < 1.0", :default => 0.01
|
40
|
+
opt :shuffle, "Option shuffles the scans to simulate 1d data", :default => "false"
|
41
|
+
opt :one_d, "Turns on one dimension simulation; run_time is automatically set to 300.0", :default => "false"
|
42
|
+
opt :truth, "Determines truth file type; false gives no truth file; one of: xml or csv", :default => "false"
|
43
|
+
opt :front, "Fronting chromatography parameter", :default => 6.65
|
44
|
+
opt :tail, "Tailing chromatography parameter", :default => 0.30
|
45
|
+
opt :mu, "Expected value of the chromatography curve", :default => 25.0
|
46
|
+
opt :wobA, "m/z wobble parameter", :default => 0.001071
|
47
|
+
opt :wobB, "m/z wobble parameter", :default => -0.5430
|
48
|
+
opt :jagA, "intensity variance parameter", :default => 10.34
|
49
|
+
opt :jagC, "intensity variance parameter", :default => 0.00712
|
50
|
+
opt :jagB, "intensity variance parameter", :default => 0.12
|
51
|
+
opt :overlapRange, "range in which to determine overlapping peaks", :default => 1.0724699230489427
|
52
|
+
opt :email, "Email address to send completion messages to", :default => "nil"
|
53
|
+
opt :mzml, "Mzml file to extract simulation parameters from", :default => "nil"
|
54
|
+
opt :generations, "If an mzml file is provided this specifies the number of generations for the curve fitting algorithm", :default => 30000
|
55
|
+
opt :mass_label, "Specify a mass tag pattern", :default => 0
|
56
|
+
opt :modifications, "Use a specific modifications file, or read them from a header of the fasta file, perhaps... TBD..."
|
57
|
+
|
54
58
|
end
|
55
|
-
|
59
|
+
|
56
60
|
if @opts[:mzml] != "nil"
|
57
61
|
@opts = CurveFit.get_parameters(@opts)
|
58
62
|
end
|
59
63
|
Trollop::die :sampling_rate, "must be greater than 0" if @opts[:sampling_rate] <= 0
|
60
64
|
Trollop::die :run_time, "must be non-negative" if @opts[:run_time] < 0
|
61
|
-
Trollop::die "must supply a .fasta
|
65
|
+
Trollop::die "must supply a .fasta protein sequence file" if ARGV.empty?
|
62
66
|
Trollop::die :dropout_percentage, "must be between greater than or equal to 0.0 or less than 1.0" if @opts[:dropout_percentage] < 0.0 or @opts[:dropout_percentage] >= 1.0
|
63
67
|
@opts[:overlapRange] = (@opts[:overlapRange]*10.0**-6)/2.0
|
64
68
|
end
|
65
|
-
|
69
|
+
|
66
70
|
def get; @opts; end
|
67
71
|
end
|
68
72
|
end
|
data/lib/ms/tr_file_writer.rb
CHANGED
@@ -6,47 +6,51 @@ module MS
|
|
6
6
|
class Txml_file_writer
|
7
7
|
def self.write(features,spectra,file_name)
|
8
8
|
@spectra = spectra
|
9
|
-
@start = Time.now
|
10
9
|
file = File.open("#{file_name}_truth.xml","w")
|
11
|
-
|
10
|
+
|
12
11
|
r_times = spectra.keys.sort
|
13
|
-
|
12
|
+
|
14
13
|
file.puts "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
|
15
14
|
file.puts "<simulated_peptides>"
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
15
|
+
total = features.size.to_f
|
16
|
+
prog = Progress.new("Writing xml:")
|
17
|
+
num = 0
|
18
|
+
step = total/100.0
|
19
|
+
features.each_with_index do |fe,k|
|
20
|
+
sequence = fe.sequence
|
21
|
+
charge = fe.charge
|
22
|
+
mzs = fe.mzs
|
23
|
+
ints = fe.ints
|
24
|
+
rts = fe.rts
|
25
|
+
if k > step * (num + 1)
|
26
|
+
num = (((k/total)*100).to_i)
|
27
|
+
prog.update(num)
|
28
|
+
end
|
29
|
+
file.puts "\t<simulated_peptide sequence=\"#{sequence}\" charge=\"#{charge.round}\">"
|
30
|
+
mzs.each_with_index do |mzs,i|
|
31
|
+
tags = ""
|
32
|
+
centroids = ""
|
33
|
+
tags<<"\t\t<lc_centroids isotopic_index=\"#{i}\">"
|
34
|
+
mzs.each_with_index do |mz,ind|
|
35
|
+
if ints[i][ind] > 0.9
|
36
|
+
index = get_ind(mz,rts[ind])
|
37
|
+
centroids<<"#{r_times.index(rts[ind])},#{index.inspect};"
|
38
|
+
end
|
39
|
+
end
|
40
|
+
if centroids != ""
|
41
|
+
tags<<centroids
|
42
|
+
tags<<"</lc_centroids>\n"
|
43
|
+
file<<tags
|
44
|
+
end
|
45
|
+
end
|
46
|
+
file.puts "\t</simulated_peptide>"
|
47
|
+
end
|
43
48
|
file.puts "</simulated_peptides>"
|
44
49
|
file.close
|
45
|
-
|
46
|
-
|
47
|
-
puts ''
|
50
|
+
|
51
|
+
prog.finish!
|
48
52
|
end
|
49
|
-
|
53
|
+
|
50
54
|
def self.get_ind(mz,rt)
|
51
55
|
index = nil
|
52
56
|
if @spectra[rt] != nil
|
@@ -65,109 +69,118 @@ module MS
|
|
65
69
|
return index
|
66
70
|
end
|
67
71
|
end
|
68
|
-
|
72
|
+
|
69
73
|
class Tcsv_file_writer
|
70
74
|
def self.write(full_spectra,spectra,noise,features,file_name)
|
71
|
-
@start = Time.now
|
72
75
|
@spectra = full_spectra
|
73
|
-
|
76
|
+
|
74
77
|
#create indices for real peaks
|
75
78
|
ind_hash = create_indicies(features)
|
76
|
-
|
79
|
+
|
77
80
|
#create data structure with indices
|
78
81
|
data = data_with_indicies(full_spectra,spectra,noise,ind_hash)
|
79
|
-
|
82
|
+
|
80
83
|
#group by retention time
|
81
84
|
data = data.group_by{|d| d[0]}
|
82
|
-
|
85
|
+
|
83
86
|
#write
|
84
87
|
file = File.open("#{file_name}_truth.csv","w")
|
85
88
|
file.puts "rt,mz,int,index"
|
86
89
|
total = data.size.to_f
|
87
90
|
count = 0
|
91
|
+
prog = Progress.new("Writing csv(process 2 of 2):")
|
92
|
+
num = 0
|
93
|
+
step = total/100
|
88
94
|
data.each_value do |val|
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
95
|
+
if count > step * (num + 1)
|
96
|
+
num = (((count/total)*100).to_i)
|
97
|
+
prog.update(num)
|
98
|
+
end
|
99
|
+
val.each do |a|
|
100
|
+
if a[3] >= 1
|
101
|
+
file.puts "#{a[0]},#{a[1]},#{a[2]},#{a[3]}"
|
102
|
+
else
|
103
|
+
file.puts "#{a[0]},#{a[1]},#{a[2]},#{0}"
|
104
|
+
end
|
105
|
+
end
|
106
|
+
count += 1
|
98
107
|
end
|
99
108
|
file.close
|
100
|
-
|
101
|
-
Progress.progress("Writing csv:",100,Time.now-@start)
|
102
|
-
puts ''
|
109
|
+
prog.finish!
|
103
110
|
end
|
104
|
-
|
111
|
+
|
105
112
|
def self.get_merged_mz(mz,rt)
|
106
113
|
m_mz = nil
|
107
114
|
int = nil
|
108
115
|
mzs = @spectra[rt][0]
|
109
116
|
ints = @spectra[rt][1]
|
110
117
|
mzs.each_with_index do |m, i|
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
118
|
+
if m == mz
|
119
|
+
m_mz = mz
|
120
|
+
int = ints[i]
|
121
|
+
elsif m.class == Hash
|
122
|
+
if ind = m.values[0].index(mz)
|
123
|
+
m_mz = [m.keys[0][0],m.keys[0][ind+1]]
|
124
|
+
int = ints[i].flatten.inject(:+)
|
125
|
+
end
|
126
|
+
end
|
120
127
|
end
|
121
128
|
return m_mz,int
|
122
129
|
end
|
123
|
-
|
130
|
+
|
124
131
|
def self.create_indicies(features)
|
125
132
|
ind_hash = {}
|
126
133
|
features.each_with_index do |pep,i|
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
134
|
+
pep.mzs.each_with_index do |m_ar,j|
|
135
|
+
m_ar.each do |mz|
|
136
|
+
ind_hash[mz] = "#{i + 1}.#{j + 1}".to_f
|
137
|
+
end
|
138
|
+
end
|
132
139
|
end
|
133
140
|
return ind_hash
|
134
141
|
end
|
135
|
-
|
142
|
+
|
136
143
|
def self.data_with_indicies(full_spectra,spectra,noise,ind_hash)
|
137
144
|
count = 1
|
138
145
|
time_i = 0.0
|
139
146
|
data = []
|
140
147
|
total = spectra.length
|
148
|
+
prog = Progress.new("Writing csv(process 1 of 2):")
|
149
|
+
num = 0
|
150
|
+
step = total/100
|
141
151
|
spectra.each do |k,v|
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
152
|
+
if time_i > step * (num + 1)
|
153
|
+
num = (((time_i/total)*100).to_i)
|
154
|
+
prog.update(num)
|
155
|
+
end
|
156
|
+
|
157
|
+
merged_d = full_spectra[k]
|
158
|
+
merged_mzs = merged_d[0]
|
159
|
+
merged_ints = merged_d[1]
|
160
|
+
|
161
|
+
if noise != "false"
|
162
|
+
n_data = noise[k]
|
163
|
+
end
|
164
|
+
|
165
|
+
if v != nil
|
166
|
+
v.each_slice(2) do |m,i|
|
167
|
+
m.each_with_index do |mz,index|
|
168
|
+
peak_index = ind_hash[mz]
|
169
|
+
mz,int = get_merged_mz(mz,k)
|
170
|
+
data<<[k,mz.inspect,int,peak_index]
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
if noise != "false"
|
176
|
+
n_data.each_slice(2) do |m,i|
|
177
|
+
m.each_with_index do |mz,index|
|
178
|
+
mz,int = get_merged_mz(mz,k)
|
179
|
+
data<<[k,mz.inspect,int,0]
|
180
|
+
end
|
181
|
+
end
|
182
|
+
end
|
183
|
+
time_i += 1
|
171
184
|
end
|
172
185
|
return data
|
173
186
|
end
|