mspire-simulator 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +11 -8
- data/Rakefile +2 -1
- data/VERSION +1 -1
- data/bin/mspire-simulator +49 -48
- data/lib/ms/merger.rb +20 -59
- data/lib/ms/mzml_wrapper.rb +60 -40
- data/lib/ms/noise.rb +1 -1
- data/lib/ms/rt/rtgenerator.rb +33 -30
- data/lib/ms/rt/weka.rb +16 -12
- data/lib/ms/sim_digester.rb +37 -62
- data/lib/ms/sim_feature.rb +58 -151
- data/lib/ms/sim_modifications.rb +40 -0
- data/lib/ms/sim_peptide.rb +61 -103
- data/lib/ms/sim_spectra.rb +17 -31
- data/lib/ms/sim_trollop.rb +9 -4
- data/lib/ms/tr_file_writer.rb +34 -151
- data/mspire-simulator.gemspec +9 -5
- metadata +21 -4
@@ -0,0 +1,40 @@
|
|
1
|
+
|
2
|
+
require 'obo/ontology'
|
3
|
+
|
4
|
+
class Modifications
|
5
|
+
def initialize(mods)
|
6
|
+
@modifications = mods
|
7
|
+
@modifications = @modifications.split(/_/)
|
8
|
+
if @modifications[0] != "false"
|
9
|
+
get_mods
|
10
|
+
end
|
11
|
+
return @modifications
|
12
|
+
end
|
13
|
+
|
14
|
+
def get_mods()
|
15
|
+
mods = {}
|
16
|
+
obo = Obo::Ontology.new(Obo::Ontology::DIR + '/mod.obo')
|
17
|
+
@modifications.each do |mod|
|
18
|
+
diff = nil
|
19
|
+
residue = mod[9..mod.size-1]
|
20
|
+
mod = (obo.id_to_element[mod[0..8]]).tagvalues
|
21
|
+
xref = mod['xref']
|
22
|
+
xref.each do |x|
|
23
|
+
if x =~ /DiffFormula/
|
24
|
+
diff = (x.split(/"/))[1]
|
25
|
+
end
|
26
|
+
end
|
27
|
+
if mods[residue] == nil
|
28
|
+
mods[residue] = [[mod['id'][0],diff]]
|
29
|
+
else
|
30
|
+
mds = mods[residue]
|
31
|
+
mds<<[mod['id'][0],diff]
|
32
|
+
mods[residue] = mds
|
33
|
+
end
|
34
|
+
end
|
35
|
+
@modifications = mods
|
36
|
+
end
|
37
|
+
|
38
|
+
attr_reader :modifications
|
39
|
+
attr_writer :modifications
|
40
|
+
end
|
data/lib/ms/sim_peptide.rb
CHANGED
@@ -1,117 +1,45 @@
|
|
1
|
-
|
2
1
|
require 'mspire/isotope/distribution'
|
3
2
|
|
4
3
|
module MS
|
5
4
|
class Peptide
|
6
|
-
def initialize(sequence, charge, abu = 1.0)
|
5
|
+
def initialize(sequence, charge, abu = 1.0,db,id,prot_id,modifications)
|
7
6
|
@abu = abu
|
8
7
|
@p_rt = 0
|
9
8
|
@p_int = 0
|
10
9
|
@rts = []
|
11
|
-
@charge = charge
|
10
|
+
@charge = charge
|
11
|
+
|
12
|
+
@mods = modifications
|
13
|
+
|
14
|
+
spec = calcSpectrum(sequence)
|
12
15
|
|
13
|
-
|
16
|
+
# TODO Ryan: alter this to handle variable and static mass modifications...Add it from the Katamari code
|
14
17
|
|
15
|
-
#
|
18
|
+
#core mzs, ints
|
19
|
+
db.execute "INSERT INTO core_spec VALUES(#{id},'#{spec.mzs}','#{spec.intensities}')"
|
16
20
|
|
17
|
-
@core_ints = spec.intensities.clone
|
18
|
-
@core_mzs = spec.mzs.clone
|
19
|
-
@mzs_file = ".m/#{sequence[0]}/#{sequence[0...15]}_#{charge}"
|
20
|
-
@ints_file = ".i/#{sequence[0]}/#{sequence[0...15]}_#{charge}"
|
21
|
-
file = File.open(@mzs_file, "w")
|
22
|
-
file.puts(sequence)
|
23
|
-
file.close
|
24
21
|
@mono_mz = spec.mzs[spec.intensities.index(spec.intensities.max)]
|
25
22
|
@mass = @mono_mz * @charge
|
26
23
|
#U,O,X ???
|
24
|
+
@aa_counts = []
|
25
|
+
stm = "INSERT INTO aac VALUES(#{id},"
|
27
26
|
amino_acids = ['A','R','N','D','B','C','E','Q','Z','G','H','I',
|
28
27
|
'L','K','M','F','P','S','T','W','Y','V','J']
|
29
|
-
|
30
|
-
sequence.count(aa)
|
28
|
+
amino_acids.map do |aa|
|
29
|
+
count = sequence.count(aa)
|
30
|
+
stm<<"#{count},"
|
31
|
+
count
|
31
32
|
end
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
def to_s
|
39
|
-
file = File.open(@mzs_file,"r")
|
40
|
-
seq = file.gets.chomp
|
41
|
-
file.close
|
42
|
-
"Peptide: #{seq}"
|
43
|
-
end
|
44
|
-
|
45
|
-
def sequence
|
46
|
-
file = File.open(@mzs_file,"r")
|
47
|
-
seq = file.gets.chomp
|
48
|
-
file.close
|
49
|
-
seq
|
33
|
+
stm<<"0.0)" #place holder for predicted values
|
34
|
+
stm = db.prepare(stm)
|
35
|
+
stm.execute
|
36
|
+
stm.close if stm
|
37
|
+
db.execute "INSERT INTO peptides VALUES(#{id},'#{sequence}', #{@mass}, #{charge}, #{@mono_mz}, #{@p_rt},NULL, #{@p_int}, #{@abu}, NULL,NULL,NULL,#{prot_id})"
|
50
38
|
end
|
51
39
|
|
52
|
-
#---------------------------------------------------------------------------
|
53
|
-
def ints
|
54
|
-
file = File.open(@ints_file, "r")
|
55
|
-
line = file.gets.chomp.split(/;/)
|
56
|
-
file.close
|
57
|
-
ints = []
|
58
|
-
line.each do |iso|
|
59
|
-
ints<<iso.chomp.split(/,/).map!{|fl| fl.to_f}
|
60
|
-
end
|
61
|
-
return ints
|
62
|
-
end
|
63
|
-
|
64
|
-
def insert_ints(arr)
|
65
|
-
file = File.open(@ints_file, "a")
|
66
|
-
arr.each do |val|
|
67
|
-
file.print("#{val},")
|
68
|
-
end
|
69
|
-
file.print(";")
|
70
|
-
file.close
|
71
|
-
end
|
72
|
-
|
73
|
-
def mzs
|
74
|
-
file = File.open(@mzs_file, "r")
|
75
|
-
line = file.gets
|
76
|
-
line = file.gets.chomp.split(/;/)
|
77
|
-
file.close
|
78
|
-
mzs = []
|
79
|
-
line.each do |iso|
|
80
|
-
mzs<<iso.chomp.split(/,/).map!{|fl| fl.to_f}
|
81
|
-
end
|
82
|
-
return mzs
|
83
|
-
end
|
84
|
-
|
85
|
-
def insert_mzs(arr)
|
86
|
-
file = File.open(@mzs_file, "a")
|
87
|
-
arr.each do |val|
|
88
|
-
file.print("#{val},")
|
89
|
-
end
|
90
|
-
file.print(";")
|
91
|
-
file.close
|
92
|
-
end
|
93
|
-
|
94
|
-
def rts
|
95
|
-
return Sim_Spectra::r_times[@rts[0]..@rts[1]]
|
96
|
-
end
|
97
|
-
|
98
|
-
def set_rts(a,b)
|
99
|
-
@rts = [a,b]
|
100
|
-
end
|
101
|
-
|
102
|
-
def delete
|
103
|
-
if File.exists?(@mzs_file)
|
104
|
-
File.delete(@mzs_file)
|
105
|
-
end
|
106
|
-
if File.exists?(@ints_file)
|
107
|
-
File.delete(@ints_file)
|
108
|
-
end
|
109
|
-
end
|
110
|
-
#---------------------------------------------------------------------------
|
111
|
-
|
112
40
|
# Calculates theoretical specturm
|
113
41
|
#
|
114
|
-
def calcSpectrum(seq
|
42
|
+
def calcSpectrum(seq)
|
115
43
|
#isotope.rb from Dr. Prince
|
116
44
|
atoms = countAtoms(seq)
|
117
45
|
|
@@ -131,7 +59,7 @@ module MS
|
|
131
59
|
var<<"Se"
|
132
60
|
var<<atoms[6].to_s
|
133
61
|
|
134
|
-
mf = Mspire::MolecularFormula.from_string(var, charge)
|
62
|
+
mf = Mspire::MolecularFormula.from_string(var, @charge)
|
135
63
|
spec = Mspire::Isotope::Distribution.spectrum(mf, :max, 0.001)
|
136
64
|
|
137
65
|
spec.intensities.map!{|i| i = i*100.0}
|
@@ -143,6 +71,7 @@ module MS
|
|
143
71
|
# Counts the number of each atom in the peptide sequence.
|
144
72
|
#
|
145
73
|
def countAtoms(seq)
|
74
|
+
atom_indexes = {'O' => 0,'N' => 1,'C' => 2,'H' => 3,'S' => 4,'P' => 5,'Se' => 6}
|
146
75
|
o = 0
|
147
76
|
n = 0
|
148
77
|
c = 0
|
@@ -150,9 +79,12 @@ module MS
|
|
150
79
|
s = 0
|
151
80
|
p = 0
|
152
81
|
se = 0
|
82
|
+
@charge.times {h += 1}
|
83
|
+
atom_counts = [(o + 1),n,c,(h + 2),s,p,se]
|
84
|
+
|
153
85
|
seq.each_char do |aa|
|
154
|
-
|
155
86
|
#poly amino acids
|
87
|
+
#maybe in the future ignore fringe case amino acids
|
156
88
|
#"X" is for any (I exclude uncommon "U" and "O")
|
157
89
|
if aa == "X"
|
158
90
|
aas = Mspire::Isotope::AA::ATOM_COUNTS.keys[0..19]
|
@@ -166,20 +98,46 @@ module MS
|
|
166
98
|
aas = ["Q","E"]
|
167
99
|
aa = aas[rand(2)]
|
168
100
|
end
|
101
|
+
|
102
|
+
#perform modification for residue
|
103
|
+
if @mods != nil
|
104
|
+
if @mods[aa] != nil
|
105
|
+
mods = @mods[aa]
|
106
|
+
mods.each do |mod|
|
107
|
+
mod[1].split(/\s/).each_slice(2) do |sl|
|
108
|
+
atom_counts[atom_indexes[sl[0]]] = atom_counts[atom_indexes[sl[0]]] + sl[1].to_i
|
109
|
+
end
|
110
|
+
end
|
111
|
+
elsif seq[0] == aa and @mods["CT"] != nil#N-terminus
|
112
|
+
mods = @mods["CT"]
|
113
|
+
mods.each do |mod|
|
114
|
+
mod[1].split(/\s/).each_slice(2) do |sl|
|
115
|
+
atom_counts[atom_indexes[sl[0]]] = atom_counts[atom_indexes[sl[0]]] + sl[1].to_i
|
116
|
+
end
|
117
|
+
end
|
118
|
+
elsif seq[-1] == aa and @mods["NT"] != nil#C-terminus
|
119
|
+
mods = @mods["NT"]
|
120
|
+
mods.each do |mod|
|
121
|
+
mod[1].split(/\s/).each_slice(2) do |sl|
|
122
|
+
atom_counts[atom_indexes[sl[0]]] = atom_counts[atom_indexes[sl[0]]] + sl[1].to_i
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
169
127
|
|
170
128
|
if aa !~ /A|R|N|D|C|E|Q|G|H|I|L|K|M|F|P|S|T|W|Y|V|U|O/
|
171
129
|
puts "No amino acid match for #{aa}"
|
172
130
|
else
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
131
|
+
atom_counts[0] = atom_counts[0] + Mspire::Isotope::AA::ATOM_COUNTS[aa][:o]
|
132
|
+
atom_counts[1] = atom_counts[1] + Mspire::Isotope::AA::ATOM_COUNTS[aa][:n]
|
133
|
+
atom_counts[2] = atom_counts[2] + Mspire::Isotope::AA::ATOM_COUNTS[aa][:c]
|
134
|
+
atom_counts[3] = atom_counts[3] + Mspire::Isotope::AA::ATOM_COUNTS[aa][:h]
|
135
|
+
atom_counts[4] = atom_counts[4] + Mspire::Isotope::AA::ATOM_COUNTS[aa][:s]
|
136
|
+
atom_counts[5] = atom_counts[5] + Mspire::Isotope::AA::ATOM_COUNTS[aa][:p]
|
137
|
+
atom_counts[6] = atom_counts[6] + Mspire::Isotope::AA::ATOM_COUNTS[aa][:se]
|
180
138
|
end
|
181
139
|
end
|
182
|
-
return
|
140
|
+
return atom_counts
|
183
141
|
end
|
184
142
|
end
|
185
143
|
end
|
data/lib/ms/sim_spectra.rb
CHANGED
@@ -6,8 +6,8 @@ require 'ms/sim_feature'
|
|
6
6
|
|
7
7
|
module MS
|
8
8
|
class Sim_Spectra
|
9
|
-
def initialize(
|
10
|
-
@
|
9
|
+
def initialize(opts,one_d = false,db)
|
10
|
+
@opts = opts
|
11
11
|
@max_mz
|
12
12
|
sampling_rate = opts[:sampling_rate]
|
13
13
|
run_time = opts[:run_time]
|
@@ -17,53 +17,39 @@ module MS
|
|
17
17
|
@@r_times = []
|
18
18
|
num_of_spec = sampling_rate*run_time
|
19
19
|
spec_time = 1/sampling_rate
|
20
|
-
num_of_spec.to_i.times do
|
20
|
+
num_of_spec.to_i.times do |k|
|
21
21
|
@@r_times<<spec_time+RThelper.RandomFloat(-var,var)
|
22
22
|
spec_time = spec_time + (1/sampling_rate)
|
23
23
|
end
|
24
24
|
@@r_times = MS::Noise.spec_drops(drop_percentage)
|
25
25
|
|
26
|
-
|
26
|
+
MS::Rtgenerator.generateRT(one_d,db)
|
27
27
|
|
28
28
|
#Features
|
29
|
-
features_o = MS::Sim_Feature.new(
|
30
|
-
@
|
31
|
-
@data = features_o.data
|
32
|
-
@max_mz = features_o.max_mz
|
33
|
-
@spectra = @data.clone
|
34
|
-
|
35
|
-
@noise = nil
|
29
|
+
@features_o = MS::Sim_Feature.new(opts,one_d,db)
|
30
|
+
@max_mz = @features_o.max_mz
|
36
31
|
|
37
32
|
end
|
38
33
|
|
39
|
-
def noiseify
|
40
|
-
@noise = MS::Noise.noiseify(opts,@max_mz)
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
@spectra[k] = [n_v[0],n_v[1]]
|
34
|
+
def noiseify(db)
|
35
|
+
@noise = MS::Noise.noiseify(@opts,@max_mz)
|
36
|
+
cent_id = @features_o.cent_id + 1
|
37
|
+
@noise.each do |key,val|
|
38
|
+
mzs = val[0]
|
39
|
+
ints = val[1]
|
40
|
+
mzs.each_with_index do |mz,index|
|
41
|
+
db.execute "INSERT INTO spectra VALUES(#{cent_id},NULL,#{key},#{mz},#{ints[index]},NULL)"
|
42
|
+
cent_id += 1
|
49
43
|
end
|
50
44
|
end
|
51
|
-
|
52
|
-
return @noise
|
53
45
|
end
|
54
46
|
|
55
47
|
def self.r_times
|
56
48
|
@@r_times
|
57
49
|
end
|
58
50
|
|
59
|
-
attr_reader :
|
60
|
-
attr_writer :
|
51
|
+
attr_reader :max_mz
|
52
|
+
attr_writer :max_mz
|
61
53
|
|
62
54
|
end
|
63
55
|
end
|
64
|
-
|
65
|
-
#charge ratio: take both charge states, determine pH effective
|
66
|
-
#more small peaks from lesser charge states
|
67
|
-
|
68
|
-
#one_d
|
69
|
-
#fit to other labs data - different machine
|
data/lib/ms/sim_trollop.rb
CHANGED
@@ -27,19 +27,20 @@ module MS
|
|
27
27
|
trypsin,\n \t\tv8_e_trypsin,
|
28
28
|
v8_de_trypsin",
|
29
29
|
:default => "trypsin"
|
30
|
+
opt :missed_cleavages, "Number of missed cleavages during digestion", :default => 2
|
30
31
|
opt :sampling_rate, "How many scans per second", :default => 0.5
|
31
32
|
opt :run_time, "Run time in seconds", :default => 1000.0
|
32
33
|
opt :noise, "Noise on or off", :default => "true"
|
33
34
|
opt :noise_density, "Determines the density of white noise", :default => 10
|
34
|
-
|
35
|
-
|
35
|
+
opt :noiseMaxInt, "The max noise intensity level", :default => 1000
|
36
|
+
opt :noiseMinInt, "The minimum noise intensity level", :default => 50
|
36
37
|
opt :pH, "The pH that the sample is in - for determining charge", :default => 2.6
|
37
38
|
opt :out_file, "Name of the output file", :default => "test.mzml"
|
38
39
|
opt :contaminants, "Fasta file containing contaminant sequences", :default => "testFiles/contam/hum_keratin.fasta"
|
39
40
|
opt :dropout_percentage, "Defines the percentage of random dropouts in the run. 0.0 <= percentage < 1.0", :default => 0.01
|
40
41
|
opt :shuffle, "Option shuffles the scans to simulate 1d data", :default => "false"
|
41
42
|
opt :one_d, "Turns on one dimension simulation; run_time is automatically set to 300.0", :default => "false"
|
42
|
-
opt :truth, "Determines truth file type; false gives no truth file; one of: xml or csv", :default => "
|
43
|
+
opt :truth, "Determines truth file type; false gives no truth file; one of: 'xml' or 'csv' or 'xml_csv' (for both)", :default => "csv"
|
43
44
|
opt :front, "Fronting chromatography parameter", :default => 6.65
|
44
45
|
opt :tail, "Tailing chromatography parameter", :default => 0.30
|
45
46
|
opt :mu, "Expected value of the chromatography curve", :default => 25.0
|
@@ -53,7 +54,11 @@ module MS
|
|
53
54
|
opt :mzml, "Mzml file to extract simulation parameters from", :default => "nil"
|
54
55
|
opt :generations, "If an mzml file is provided this specifies the number of generations for the curve fitting algorithm", :default => 30000
|
55
56
|
opt :mass_label, "Specify a mass tag pattern", :default => 0
|
56
|
-
opt :
|
57
|
+
opt :ms2s, "Number of peptide ms2s to perform on each scan", :default => 1
|
58
|
+
opt :ms2, "Turn on/off ms2 (true == on)", :default => "true"
|
59
|
+
opt :databaseName, "Name of database file", :default => "peptides_[Time.now.sec]"
|
60
|
+
opt :memory, "Determines whether to store the database in memory or write to file (false == write to file) Note: if true no database file will be accessible after simulation", :default => "false"
|
61
|
+
opt :modifications, "To define residue or termini modifications. Enter a string Id1R1_Id2R2_ ... where Idi is a modification Id from http://psidev.cvs.sourceforge.net/viewvc/psidev/psi/mod/data/PSI-MOD.obo and Ri is the residue/terminus to apply it to (c-term = CT, n-term = NT)", :default => "false"
|
57
62
|
|
58
63
|
end
|
59
64
|
|
data/lib/ms/tr_file_writer.rb
CHANGED
@@ -1,188 +1,71 @@
|
|
1
1
|
require 'progress'
|
2
2
|
|
3
|
-
#if m/z value is in "[m/z, percentage contributed to peak]" it's a
|
4
|
-
#merged peak.
|
5
3
|
module MS
|
6
4
|
class Txml_file_writer
|
7
|
-
def self.write(
|
8
|
-
|
5
|
+
def self.write(db,file_name,opts)
|
6
|
+
prog = Progress.new("Writing xml:")
|
9
7
|
file = File.open("#{file_name}_truth.xml","w")
|
10
|
-
|
11
|
-
r_times = spectra.keys.sort
|
8
|
+
peps = db.execute "SELECT * FROM peptides"
|
12
9
|
|
13
10
|
file.puts "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
|
14
11
|
file.puts "<simulated_peptides>"
|
15
|
-
|
16
|
-
|
12
|
+
file.puts "<simulator_options>\n"
|
13
|
+
opts.each do |k,v|
|
14
|
+
file.puts "\t#{k}=#{v},"
|
15
|
+
end
|
16
|
+
file.puts "</simulator_options>\n"
|
17
|
+
total = peps.size.to_f
|
18
|
+
|
17
19
|
num = 0
|
18
20
|
step = total/100.0
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
mzs = fe.mzs
|
23
|
-
ints = fe.ints
|
24
|
-
rts = fe.rts
|
21
|
+
|
22
|
+
peps.each do |pep|
|
23
|
+
k = pep[0]
|
25
24
|
if k > step * (num + 1)
|
26
25
|
num = (((k/total)*100).to_i)
|
27
26
|
prog.update(num)
|
28
27
|
end
|
28
|
+
sequence = pep[1]
|
29
|
+
charge = pep[3]
|
30
|
+
cents = db.execute "SELECT * FROM spectra WHERE pep_id=#{k}"
|
31
|
+
|
29
32
|
file.puts "\t<simulated_peptide sequence=\"#{sequence}\" charge=\"#{charge.round}\">"
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
if ints[i][ind] > 0.9
|
36
|
-
index = get_ind(mz,rts[ind])
|
37
|
-
centroids<<"#{r_times.index(rts[ind])},#{index.inspect};"
|
38
|
-
end
|
39
|
-
end
|
40
|
-
if centroids != ""
|
41
|
-
tags<<centroids
|
42
|
-
tags<<"</lc_centroids>\n"
|
43
|
-
file<<tags
|
44
|
-
end
|
33
|
+
tags = ""
|
34
|
+
tags<<"\t\t<centroids>\n"
|
35
|
+
centroids = ""
|
36
|
+
cents.each do |cent|
|
37
|
+
centroids<<"\t\t\tcent_id=#{cent[0]},pep_id=#{cent[1]},rt=#{cent[2]},mz=#{cent[3]},int=#{cent[4]},merge_id=#{cent[5]}\n"
|
45
38
|
end
|
39
|
+
tags<<centroids
|
40
|
+
tags<<"\t\t</centroids>\n"
|
41
|
+
file<<tags
|
46
42
|
file.puts "\t</simulated_peptide>"
|
47
43
|
end
|
48
44
|
file.puts "</simulated_peptides>"
|
49
45
|
file.close
|
50
|
-
|
51
46
|
prog.finish!
|
52
47
|
end
|
53
|
-
|
54
|
-
def self.get_ind(mz,rt)
|
55
|
-
index = nil
|
56
|
-
if @spectra[rt] != nil
|
57
|
-
mzs = @spectra[rt][0]
|
58
|
-
ints = @spectra[rt][1]
|
59
|
-
mzs.each_with_index do |m, i|
|
60
|
-
if m == mz
|
61
|
-
index = i
|
62
|
-
elsif m.class == Hash
|
63
|
-
if ind = m.values[0].index(mz)
|
64
|
-
index = [i,m.keys[0][ind+1]]
|
65
|
-
end
|
66
|
-
end
|
67
|
-
end
|
68
|
-
end
|
69
|
-
return index
|
70
|
-
end
|
71
48
|
end
|
72
49
|
|
73
50
|
class Tcsv_file_writer
|
74
|
-
def self.write(
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
ind_hash = create_indicies(features)
|
79
|
-
|
80
|
-
#create data structure with indices
|
81
|
-
data = data_with_indicies(full_spectra,spectra,noise,ind_hash)
|
82
|
-
|
83
|
-
#group by retention time
|
84
|
-
data = data.group_by{|d| d[0]}
|
51
|
+
def self.write(db,file_name,opts)
|
52
|
+
prog = Progress.new("Writing csv:")
|
53
|
+
spectra = db.execute "SELECT * FROM spectra AS S JOIN peptides AS P ON S.pep_id=P.Id"
|
54
|
+
total = spectra.size
|
85
55
|
|
86
56
|
#write
|
87
57
|
file = File.open("#{file_name}_truth.csv","w")
|
88
|
-
file.puts "
|
89
|
-
|
58
|
+
file.puts "simulator_options=#{opts.inspect}"
|
59
|
+
file.puts "rt,mz,int,centroid_id,merge_id,peptide_id,protien_id,seq,charge,abu"
|
90
60
|
count = 0
|
91
|
-
|
61
|
+
|
92
62
|
num = 0
|
93
63
|
step = total/100
|
94
|
-
|
95
|
-
|
96
|
-
num = (((count/total)*100).to_i)
|
97
|
-
prog.update(num)
|
98
|
-
end
|
99
|
-
val.each do |a|
|
100
|
-
if a[3] >= 1
|
101
|
-
file.puts "#{a[0]},#{a[1]},#{a[2]},#{a[3]}"
|
102
|
-
else
|
103
|
-
file.puts "#{a[0]},#{a[1]},#{a[2]},#{0}"
|
104
|
-
end
|
105
|
-
end
|
106
|
-
count += 1
|
64
|
+
spectra.each do |cent|
|
65
|
+
file.puts "#{cent[2]},#{cent[3]},#{cent[4]},#{cent[0]},#{cent[5]},#{cent[1]},#{cent[18]},#{cent[7]},#{cent[9]},#{cent[14]}"
|
107
66
|
end
|
108
67
|
file.close
|
109
68
|
prog.finish!
|
110
69
|
end
|
111
|
-
|
112
|
-
def self.get_merged_mz(mz,rt)
|
113
|
-
m_mz = nil
|
114
|
-
int = nil
|
115
|
-
mzs = @spectra[rt][0]
|
116
|
-
ints = @spectra[rt][1]
|
117
|
-
mzs.each_with_index do |m, i|
|
118
|
-
if m == mz
|
119
|
-
m_mz = mz
|
120
|
-
int = ints[i]
|
121
|
-
elsif m.class == Hash
|
122
|
-
if ind = m.values[0].index(mz)
|
123
|
-
m_mz = [m.keys[0][0],m.keys[0][ind+1]]
|
124
|
-
int = ints[i].flatten.inject(:+)
|
125
|
-
end
|
126
|
-
end
|
127
|
-
end
|
128
|
-
return m_mz,int
|
129
|
-
end
|
130
|
-
|
131
|
-
def self.create_indicies(features)
|
132
|
-
ind_hash = {}
|
133
|
-
features.each_with_index do |pep,i|
|
134
|
-
pep.mzs.each_with_index do |m_ar,j|
|
135
|
-
m_ar.each do |mz|
|
136
|
-
ind_hash[mz] = "#{i + 1}.#{j + 1}".to_f
|
137
|
-
end
|
138
|
-
end
|
139
|
-
end
|
140
|
-
return ind_hash
|
141
|
-
end
|
142
|
-
|
143
|
-
def self.data_with_indicies(full_spectra,spectra,noise,ind_hash)
|
144
|
-
count = 1
|
145
|
-
time_i = 0.0
|
146
|
-
data = []
|
147
|
-
total = spectra.length
|
148
|
-
prog = Progress.new("Writing csv(process 1 of 2):")
|
149
|
-
num = 0
|
150
|
-
step = total/100
|
151
|
-
spectra.each do |k,v|
|
152
|
-
if time_i > step * (num + 1)
|
153
|
-
num = (((time_i/total)*100).to_i)
|
154
|
-
prog.update(num)
|
155
|
-
end
|
156
|
-
|
157
|
-
merged_d = full_spectra[k]
|
158
|
-
merged_mzs = merged_d[0]
|
159
|
-
merged_ints = merged_d[1]
|
160
|
-
|
161
|
-
if noise != "false"
|
162
|
-
n_data = noise[k]
|
163
|
-
end
|
164
|
-
|
165
|
-
if v != nil
|
166
|
-
v.each_slice(2) do |m,i|
|
167
|
-
m.each_with_index do |mz,index|
|
168
|
-
peak_index = ind_hash[mz]
|
169
|
-
mz,int = get_merged_mz(mz,k)
|
170
|
-
data<<[k,mz.inspect,int,peak_index]
|
171
|
-
end
|
172
|
-
end
|
173
|
-
end
|
174
|
-
|
175
|
-
if noise != "false"
|
176
|
-
n_data.each_slice(2) do |m,i|
|
177
|
-
m.each_with_index do |mz,index|
|
178
|
-
mz,int = get_merged_mz(mz,k)
|
179
|
-
data<<[k,mz.inspect,int,0]
|
180
|
-
end
|
181
|
-
end
|
182
|
-
end
|
183
|
-
time_i += 1
|
184
|
-
end
|
185
|
-
return data
|
186
|
-
end
|
187
70
|
end
|
188
71
|
end
|