mspire-simulator 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,14 +3,17 @@ require 'mspire/isotope/distribution'
3
3
 
4
4
  module MS
5
5
  class Peptide
6
- def initialize(sequence, charge)
6
+ def initialize(sequence, charge, abu = 1.0)
7
+ @abu = abu
7
8
  @p_rt = 0
8
9
  @p_int = 0
9
10
  @rts = []
10
11
  @charge = charge #this is saved in the file name as well
11
-
12
+
12
13
  spec = calcSpectrum(sequence, @charge)
13
-
14
+
15
+ # TODO Ryan: alter this to handle variable and static mass modifications... Add it from the Katamari code
16
+
14
17
  @core_ints = spec.intensities.clone
15
18
  @core_mzs = spec.mzs.clone
16
19
  @mzs_file = ".m/#{sequence[0]}/#{sequence[0...15]}_#{charge}"
@@ -22,30 +25,30 @@ module MS
22
25
  @mass = @mono_mz * @charge
23
26
  #U,O,X ???
24
27
  amino_acids = ['A','R','N','D','B','C','E','Q','Z','G','H','I',
25
- 'L','K','M','F','P','S','T','W','Y','V','J']
28
+ 'L','K','M','F','P','S','T','W','Y','V','J']
26
29
  @aa_counts = amino_acids.map do |aa|
27
- sequence.count(aa)
30
+ sequence.count(aa)
28
31
  end
29
32
  @aa_counts<<0.0
30
33
  end
31
-
32
- attr_reader :mass, :charge, :mono_mz, :core_mzs, :p_rt, :p_int, :core_ints, :hydro, :pi, :aa_counts, :p_rt_i
33
- attr_writer :mass, :charge, :mono_mz, :core_mzs, :p_rt, :p_int, :core_ints, :hydro, :pi, :aa_counts, :p_rt_i
34
-
34
+
35
+ attr_reader :mass, :charge, :mono_mz, :core_mzs, :p_rt, :p_int, :core_ints, :hydro, :pi, :aa_counts, :p_rt_i, :abu, :sx
36
+ attr_writer :mass, :charge, :mono_mz, :core_mzs, :p_rt, :p_int, :core_ints, :hydro, :pi, :aa_counts, :p_rt_i, :abu, :sx
37
+
35
38
  def to_s
36
39
  file = File.open(@mzs_file,"r")
37
40
  seq = file.gets.chomp
38
41
  file.close
39
42
  "Peptide: #{seq}"
40
43
  end
41
-
44
+
42
45
  def sequence
43
46
  file = File.open(@mzs_file,"r")
44
47
  seq = file.gets.chomp
45
48
  file.close
46
49
  seq
47
50
  end
48
-
51
+
49
52
  #---------------------------------------------------------------------------
50
53
  def ints
51
54
  file = File.open(@ints_file, "r")
@@ -53,20 +56,20 @@ module MS
53
56
  file.close
54
57
  ints = []
55
58
  line.each do |iso|
56
- ints<<iso.chomp.split(/,/).map!{|fl| fl.to_f}
59
+ ints<<iso.chomp.split(/,/).map!{|fl| fl.to_f}
57
60
  end
58
61
  return ints
59
62
  end
60
-
63
+
61
64
  def insert_ints(arr)
62
65
  file = File.open(@ints_file, "a")
63
66
  arr.each do |val|
64
- file.print("#{val},")
67
+ file.print("#{val},")
65
68
  end
66
69
  file.print(";")
67
70
  file.close
68
71
  end
69
-
72
+
70
73
  def mzs
71
74
  file = File.open(@mzs_file, "r")
72
75
  line = file.gets
@@ -74,44 +77,44 @@ module MS
74
77
  file.close
75
78
  mzs = []
76
79
  line.each do |iso|
77
- mzs<<iso.chomp.split(/,/).map!{|fl| fl.to_f}
80
+ mzs<<iso.chomp.split(/,/).map!{|fl| fl.to_f}
78
81
  end
79
82
  return mzs
80
83
  end
81
-
84
+
82
85
  def insert_mzs(arr)
83
86
  file = File.open(@mzs_file, "a")
84
87
  arr.each do |val|
85
- file.print("#{val},")
88
+ file.print("#{val},")
86
89
  end
87
90
  file.print(";")
88
91
  file.close
89
92
  end
90
-
93
+
91
94
  def rts
92
95
  return Sim_Spectra::r_times[@rts[0]..@rts[1]]
93
96
  end
94
-
97
+
95
98
  def set_rts(a,b)
96
99
  @rts = [a,b]
97
100
  end
98
-
101
+
99
102
  def delete
100
103
  if File.exists?(@mzs_file)
101
- File.delete(@mzs_file)
104
+ File.delete(@mzs_file)
102
105
  end
103
106
  if File.exists?(@ints_file)
104
- File.delete(@ints_file)
107
+ File.delete(@ints_file)
105
108
  end
106
109
  end
107
110
  #---------------------------------------------------------------------------
108
-
111
+
109
112
  # Calculates theoretical specturm
110
113
  #
111
114
  def calcSpectrum(seq, charge)
112
115
  #isotope.rb from Dr. Prince
113
116
  atoms = countAtoms(seq)
114
-
117
+
115
118
  var = ""
116
119
  var<<"O"
117
120
  var<<atoms[0].to_s
@@ -127,7 +130,7 @@ module MS
127
130
  var<<atoms[5].to_s
128
131
  var<<"Se"
129
132
  var<<atoms[6].to_s
130
-
133
+
131
134
  mf = Mspire::MolecularFormula.from_string(var, charge)
132
135
  spec = Mspire::Isotope::Distribution.spectrum(mf, :max, 0.001)
133
136
 
@@ -135,8 +138,8 @@ module MS
135
138
 
136
139
  return spec
137
140
  end
138
-
139
-
141
+
142
+
140
143
  # Counts the number of each atom in the peptide sequence.
141
144
  #
142
145
  def countAtoms(seq)
@@ -148,33 +151,33 @@ module MS
148
151
  p = 0
149
152
  se = 0
150
153
  seq.each_char do |aa|
151
-
152
- #poly amino acids
153
- #"X" is for any (I exclude uncommon "U" and "O")
154
- if aa == "X"
155
- aas = Mspire::Isotope::AA::ATOM_COUNTS.keys[0..19]
156
- aa = aas[rand(20)]
157
- #"B" is "N" or "D"
158
- elsif aa == "B"
159
- aas = ["N","D"]
160
- aa = aas[rand(2)]
161
- #"Z" is "Q" or "E"
162
- elsif aa == "Z"
163
- aas = ["Q","E"]
164
- aa = aas[rand(2)]
165
- end
166
-
167
- if aa !~ /A|R|N|D|C|E|Q|G|H|I|L|K|M|F|P|S|T|W|Y|V|U|O/
168
- puts "No amino acid match for #{aa}"
169
- else
170
- o = o + Mspire::Isotope::AA::ATOM_COUNTS[aa][:o]
171
- n = n + Mspire::Isotope::AA::ATOM_COUNTS[aa][:n]
172
- c = c + Mspire::Isotope::AA::ATOM_COUNTS[aa][:c]
173
- h = h + Mspire::Isotope::AA::ATOM_COUNTS[aa][:h]
174
- s = s + Mspire::Isotope::AA::ATOM_COUNTS[aa][:s]
175
- p = p + Mspire::Isotope::AA::ATOM_COUNTS[aa][:p]
176
- se = se + Mspire::Isotope::AA::ATOM_COUNTS[aa][:se]
177
- end
154
+
155
+ #poly amino acids
156
+ #"X" is for any (I exclude uncommon "U" and "O")
157
+ if aa == "X"
158
+ aas = Mspire::Isotope::AA::ATOM_COUNTS.keys[0..19]
159
+ aa = aas[rand(20)]
160
+ #"B" is "N" or "D"
161
+ elsif aa == "B"
162
+ aas = ["N","D"]
163
+ aa = aas[rand(2)]
164
+ #"Z" is "Q" or "E"
165
+ elsif aa == "Z"
166
+ aas = ["Q","E"]
167
+ aa = aas[rand(2)]
168
+ end
169
+
170
+ if aa !~ /A|R|N|D|C|E|Q|G|H|I|L|K|M|F|P|S|T|W|Y|V|U|O/
171
+ puts "No amino acid match for #{aa}"
172
+ else
173
+ o = o + Mspire::Isotope::AA::ATOM_COUNTS[aa][:o]
174
+ n = n + Mspire::Isotope::AA::ATOM_COUNTS[aa][:n]
175
+ c = c + Mspire::Isotope::AA::ATOM_COUNTS[aa][:c]
176
+ h = h + Mspire::Isotope::AA::ATOM_COUNTS[aa][:h]
177
+ s = s + Mspire::Isotope::AA::ATOM_COUNTS[aa][:s]
178
+ p = p + Mspire::Isotope::AA::ATOM_COUNTS[aa][:p]
179
+ se = se + Mspire::Isotope::AA::ATOM_COUNTS[aa][:se]
180
+ end
178
181
  end
179
182
  return (o + 1),n,c,(h + 2) ,s,p,se
180
183
  end
@@ -7,12 +7,11 @@ require 'ms/sim_feature'
7
7
  module MS
8
8
  class Sim_Spectra
9
9
  def initialize(peptides,opts,one_d = false)
10
- @density = opts[:noise_density]
11
10
  @data
12
11
  @max_mz
13
- sampling_rate = opts[:sampling_rate]
14
- run_time = opts[:run_time]
15
- drop_percentage = opts[:dropout_percentage]
12
+ sampling_rate = opts[:sampling_rate]
13
+ run_time = opts[:run_time]
14
+ drop_percentage = opts[:dropout_percentage]
16
15
  #RTS
17
16
  var = 0.1/(sampling_rate*2)
18
17
  @@r_times = []
@@ -23,43 +22,43 @@ module MS
23
22
  spec_time = spec_time + (1/sampling_rate)
24
23
  end
25
24
  @@r_times = MS::Noise.spec_drops(drop_percentage)
26
-
25
+
27
26
  pre_features = MS::Rtgenerator.generateRT(peptides,one_d)
28
-
27
+
29
28
  #Features
30
29
  features_o = MS::Sim_Feature.new(pre_features,opts,one_d)
31
30
  @features = features_o.features
32
31
  @data = features_o.data
33
- @max_mz = @data.max_by{|key,val| if val != nil;val[0].max;else;0;end}[1][0].max
32
+ @max_mz = features_o.max_mz
34
33
  @spectra = @data.clone
35
-
34
+
36
35
  @noise = nil
37
-
36
+
38
37
  end
39
-
38
+
40
39
  def noiseify
41
- @noise = MS::Noise.noiseify(@density,@max_mz)
42
-
40
+ @noise = MS::Noise.noiseify(opts,@max_mz)
41
+
43
42
  @@r_times.each do |k|
44
- s_v = @data[k]
45
- n_v = @noise[k]
46
- if s_v != nil
47
- @spectra[k] = [s_v[0]+n_v[0],s_v[1]+n_v[1]]
48
- else
49
- @spectra[k] = [n_v[0],n_v[1]]
50
- end
43
+ s_v = @data[k]
44
+ n_v = @noise[k]
45
+ if s_v != nil
46
+ @spectra[k] = [s_v[0]+n_v[0],s_v[1]+n_v[1]]
47
+ else
48
+ @spectra[k] = [n_v[0],n_v[1]]
49
+ end
51
50
  end
52
-
51
+
53
52
  return @noise
54
53
  end
55
-
54
+
56
55
  def self.r_times
57
56
  @@r_times
58
57
  end
59
-
58
+
60
59
  attr_reader :data, :max_mz, :spectra, :noise, :features
61
60
  attr_writer :data, :max_mz, :spectra, :noise, :features
62
-
61
+
63
62
  end
64
63
  end
65
64
 
@@ -6,19 +6,19 @@ module MS
6
6
  @opts = Trollop::options do
7
7
  version "mspire-simulator 0.0.1a (c) 2012 Brigham Young University"
8
8
  banner <<-EOS
9
-
9
+
10
10
  *********************************************************************
11
11
  Description: Simulates ms runs given protein fasta files. Outputs
12
12
  a mzML file.
13
-
14
-
13
+
14
+
15
15
  Usage:
16
16
  mspire-simulator [options] <filenames>+
17
-
17
+
18
18
  where [options] are:
19
19
  EOS
20
20
  opt :digestor, "Digestion Enzyme; one of: \n\t\targ_c,\n \t\tasp_n,
21
- asp_n_ambic,
21
+ asp_n_ambic,
22
22
  chymotrypsin,\n \t\tcnbr,
23
23
  lys_c,\n \t\tlys_c_p,
24
24
  pepsin_a,\n\t\ttryp_cnbr,
@@ -27,42 +27,46 @@ module MS
27
27
  trypsin,\n \t\tv8_e_trypsin,
28
28
  v8_de_trypsin",
29
29
  :default => "trypsin"
30
- opt :sampling_rate, "How many scans per second", :default => 0.5
31
- opt :run_time, "Run time in seconds", :default => 1000.0
32
- opt :noise, "Noise on or off", :default => "true"
33
- opt :noise_density, "Determines the density of white noise", :default => 10
34
- opt :pH, "The pH that the sample is in - for determining charge", :default => 2.6
35
- opt :out_file, "Name of the output file", :default => "test.mzml"
36
- opt :contaminants, "Fasta file containing contaminant sequences", :default => "testFiles/contam/hum_keratin.fasta"
37
- opt :dropout_percentage, "Defines the percentage of random dropouts in the run. 0.0 <= percentage < 1.0", :default => 0.12
38
- opt :shuffle, "Option shuffles the scans to simulate 1d data", :default => "false"
39
- opt :one_d, "Turns on one dimension simulation; run_time is automatically set to 300.0", :default => "false"
40
- opt :truth, "Determines truth file type; false gives no truth file; one of: xml or csv", :default => "false"
41
- opt :front, "Fronting chromatography parameter", :default => 6.65
42
- opt :tail, "Tailing chromatography parameter", :default => 0.30
43
- opt :mu, "Expected value of the chromatography curve", :default => 25.0
44
- opt :wobA, "m/z wobble parameter", :default => 0.001071
45
- opt :wobB, "m/z wobble parameter", :default => -0.5430
46
- opt :jagA, "intensity variance parameter", :default => 10.34
47
- opt :jagC, "intensity variance parameter", :default => 0.00712
48
- opt :jagB, "intensity variance parameter", :default => 0.12
49
- opt :overlapRange, "range in which to determine overlapping peaks", :default => 1.0724699230489427
50
- opt :email, "Email address to send completion messages to", :default => "nil"
51
- opt :mzml, "Mzml file to extract simulation parameters from", :default => "nil"
52
- opt :generations, "If an mzml file is provided this specifies the number of generations for the curve fitting algorithm", :default => 30000
53
-
30
+ opt :sampling_rate, "How many scans per second", :default => 0.5
31
+ opt :run_time, "Run time in seconds", :default => 1000.0
32
+ opt :noise, "Noise on or off", :default => "true"
33
+ opt :noise_density, "Determines the density of white noise", :default => 10
34
+ opt :noiseMaxInt, "The max noise intensity level", :default => 1000
35
+ opt :noiseMinInt, "The minimum noise intensity level", :default => 50
36
+ opt :pH, "The pH that the sample is in - for determining charge", :default => 2.6
37
+ opt :out_file, "Name of the output file", :default => "test.mzml"
38
+ opt :contaminants, "Fasta file containing contaminant sequences", :default => "testFiles/contam/hum_keratin.fasta"
39
+ opt :dropout_percentage, "Defines the percentage of random dropouts in the run. 0.0 <= percentage < 1.0", :default => 0.01
40
+ opt :shuffle, "Option shuffles the scans to simulate 1d data", :default => "false"
41
+ opt :one_d, "Turns on one dimension simulation; run_time is automatically set to 300.0", :default => "false"
42
+ opt :truth, "Determines truth file type; false gives no truth file; one of: xml or csv", :default => "false"
43
+ opt :front, "Fronting chromatography parameter", :default => 6.65
44
+ opt :tail, "Tailing chromatography parameter", :default => 0.30
45
+ opt :mu, "Expected value of the chromatography curve", :default => 25.0
46
+ opt :wobA, "m/z wobble parameter", :default => 0.001071
47
+ opt :wobB, "m/z wobble parameter", :default => -0.5430
48
+ opt :jagA, "intensity variance parameter", :default => 10.34
49
+ opt :jagC, "intensity variance parameter", :default => 0.00712
50
+ opt :jagB, "intensity variance parameter", :default => 0.12
51
+ opt :overlapRange, "range in which to determine overlapping peaks", :default => 1.0724699230489427
52
+ opt :email, "Email address to send completion messages to", :default => "nil"
53
+ opt :mzml, "Mzml file to extract simulation parameters from", :default => "nil"
54
+ opt :generations, "If an mzml file is provided this specifies the number of generations for the curve fitting algorithm", :default => 30000
55
+ opt :mass_label, "Specify a mass tag pattern", :default => 0
56
+ opt :modifications, "Use a specific modifications file, or read them from a header of the fasta file, perhaps... TBD..."
57
+
54
58
  end
55
-
59
+
56
60
  if @opts[:mzml] != "nil"
57
61
  @opts = CurveFit.get_parameters(@opts)
58
62
  end
59
63
  Trollop::die :sampling_rate, "must be greater than 0" if @opts[:sampling_rate] <= 0
60
64
  Trollop::die :run_time, "must be non-negative" if @opts[:run_time] < 0
61
- Trollop::die "must supply a .fasta protien sequence file" if ARGV.empty?
65
+ Trollop::die "must supply a .fasta protein sequence file" if ARGV.empty?
62
66
  Trollop::die :dropout_percentage, "must be between greater than or equal to 0.0 or less than 1.0" if @opts[:dropout_percentage] < 0.0 or @opts[:dropout_percentage] >= 1.0
63
67
  @opts[:overlapRange] = (@opts[:overlapRange]*10.0**-6)/2.0
64
68
  end
65
-
69
+
66
70
  def get; @opts; end
67
71
  end
68
72
  end
@@ -6,47 +6,51 @@ module MS
6
6
  class Txml_file_writer
7
7
  def self.write(features,spectra,file_name)
8
8
  @spectra = spectra
9
- @start = Time.now
10
9
  file = File.open("#{file_name}_truth.xml","w")
11
-
10
+
12
11
  r_times = spectra.keys.sort
13
-
12
+
14
13
  file.puts "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
15
14
  file.puts "<simulated_peptides>"
16
- total = features.size.to_f
17
- features.each_with_index do |fe,k|
18
- sequence = fe.sequence
19
- charge = fe.charge
20
- mzs = fe.mzs
21
- ints = fe.ints
22
- rts = fe.rts
23
- Progress.progress("Writing xml:",(((k/total)*100).to_i))
24
- file.puts "\t<simulated_peptide sequence=\"#{sequence}\" charge=\"#{charge.round}\">"
25
- mzs.each_with_index do |mzs,i|
26
- tags = ""
27
- centroids = ""
28
- tags<<"\t\t<lc_centroids isotopic_index=\"#{i}\">"
29
- mzs.each_with_index do |mz,ind|
30
- if ints[i][ind] > 0.9
31
- index = get_ind(mz,rts[ind])
32
- centroids<<"#{r_times.index(rts[ind])},#{index.inspect};"
33
- end
34
- end
35
- if centroids != ""
36
- tags<<centroids
37
- tags<<"</lc_centroids>\n"
38
- file<<tags
39
- end
40
- end
41
- file.puts "\t</simulated_peptide>"
42
- end
15
+ total = features.size.to_f
16
+ prog = Progress.new("Writing xml:")
17
+ num = 0
18
+ step = total/100.0
19
+ features.each_with_index do |fe,k|
20
+ sequence = fe.sequence
21
+ charge = fe.charge
22
+ mzs = fe.mzs
23
+ ints = fe.ints
24
+ rts = fe.rts
25
+ if k > step * (num + 1)
26
+ num = (((k/total)*100).to_i)
27
+ prog.update(num)
28
+ end
29
+ file.puts "\t<simulated_peptide sequence=\"#{sequence}\" charge=\"#{charge.round}\">"
30
+ mzs.each_with_index do |mzs,i|
31
+ tags = ""
32
+ centroids = ""
33
+ tags<<"\t\t<lc_centroids isotopic_index=\"#{i}\">"
34
+ mzs.each_with_index do |mz,ind|
35
+ if ints[i][ind] > 0.9
36
+ index = get_ind(mz,rts[ind])
37
+ centroids<<"#{r_times.index(rts[ind])},#{index.inspect};"
38
+ end
39
+ end
40
+ if centroids != ""
41
+ tags<<centroids
42
+ tags<<"</lc_centroids>\n"
43
+ file<<tags
44
+ end
45
+ end
46
+ file.puts "\t</simulated_peptide>"
47
+ end
43
48
  file.puts "</simulated_peptides>"
44
49
  file.close
45
-
46
- Progress.progress("Writing xml:",100,Time.now-@start)
47
- puts ''
50
+
51
+ prog.finish!
48
52
  end
49
-
53
+
50
54
  def self.get_ind(mz,rt)
51
55
  index = nil
52
56
  if @spectra[rt] != nil
@@ -65,109 +69,118 @@ module MS
65
69
  return index
66
70
  end
67
71
  end
68
-
72
+
69
73
  class Tcsv_file_writer
70
74
  def self.write(full_spectra,spectra,noise,features,file_name)
71
- @start = Time.now
72
75
  @spectra = full_spectra
73
-
76
+
74
77
  #create indices for real peaks
75
78
  ind_hash = create_indicies(features)
76
-
79
+
77
80
  #create data structure with indices
78
81
  data = data_with_indicies(full_spectra,spectra,noise,ind_hash)
79
-
82
+
80
83
  #group by retention time
81
84
  data = data.group_by{|d| d[0]}
82
-
85
+
83
86
  #write
84
87
  file = File.open("#{file_name}_truth.csv","w")
85
88
  file.puts "rt,mz,int,index"
86
89
  total = data.size.to_f
87
90
  count = 0
91
+ prog = Progress.new("Writing csv(process 2 of 2):")
92
+ num = 0
93
+ step = total/100
88
94
  data.each_value do |val|
89
- Progress.progress("Writing csv(process 2 of 2):",(((count/total)*100).to_i))
90
- val.each do |a|
91
- if a[3] >= 1
92
- file.puts "#{a[0]},#{a[1]},#{a[2]},#{a[3]}"
93
- else
94
- file.puts "#{a[0]},#{a[1]},#{a[2]},#{0}"
95
- end
96
- end
97
- count += 1
95
+ if count > step * (num + 1)
96
+ num = (((count/total)*100).to_i)
97
+ prog.update(num)
98
+ end
99
+ val.each do |a|
100
+ if a[3] >= 1
101
+ file.puts "#{a[0]},#{a[1]},#{a[2]},#{a[3]}"
102
+ else
103
+ file.puts "#{a[0]},#{a[1]},#{a[2]},#{0}"
104
+ end
105
+ end
106
+ count += 1
98
107
  end
99
108
  file.close
100
-
101
- Progress.progress("Writing csv:",100,Time.now-@start)
102
- puts ''
109
+ prog.finish!
103
110
  end
104
-
111
+
105
112
  def self.get_merged_mz(mz,rt)
106
113
  m_mz = nil
107
114
  int = nil
108
115
  mzs = @spectra[rt][0]
109
116
  ints = @spectra[rt][1]
110
117
  mzs.each_with_index do |m, i|
111
- if m == mz
112
- m_mz = mz
113
- int = ints[i]
114
- elsif m.class == Hash
115
- if ind = m.values[0].index(mz)
116
- m_mz = [m.keys[0][0],m.keys[0][ind+1]]
117
- int = ints[i].flatten.inject(:+)
118
- end
119
- end
118
+ if m == mz
119
+ m_mz = mz
120
+ int = ints[i]
121
+ elsif m.class == Hash
122
+ if ind = m.values[0].index(mz)
123
+ m_mz = [m.keys[0][0],m.keys[0][ind+1]]
124
+ int = ints[i].flatten.inject(:+)
125
+ end
126
+ end
120
127
  end
121
128
  return m_mz,int
122
129
  end
123
-
130
+
124
131
  def self.create_indicies(features)
125
132
  ind_hash = {}
126
133
  features.each_with_index do |pep,i|
127
- pep.mzs.each_with_index do |m_ar,j|
128
- m_ar.each do |mz|
129
- ind_hash[mz] = "#{i + 1}.#{j + 1}".to_f
130
- end
131
- end
134
+ pep.mzs.each_with_index do |m_ar,j|
135
+ m_ar.each do |mz|
136
+ ind_hash[mz] = "#{i + 1}.#{j + 1}".to_f
137
+ end
138
+ end
132
139
  end
133
140
  return ind_hash
134
141
  end
135
-
142
+
136
143
  def self.data_with_indicies(full_spectra,spectra,noise,ind_hash)
137
144
  count = 1
138
145
  time_i = 0.0
139
146
  data = []
140
147
  total = spectra.length
148
+ prog = Progress.new("Writing csv(process 1 of 2):")
149
+ num = 0
150
+ step = total/100
141
151
  spectra.each do |k,v|
142
- Progress.progress("Writing csv(process 1 of 2):",(((time_i/total)*100).to_i))
143
-
144
- merged_d = full_spectra[k]
145
- merged_mzs = merged_d[0]
146
- merged_ints = merged_d[1]
147
-
148
- if noise != "false"
149
- n_data = noise[k]
150
- end
151
-
152
- if v != nil
153
- v.each_slice(2) do |m,i|
154
- m.each_with_index do |mz,index|
155
- peak_index = ind_hash[mz]
156
- mz,int = get_merged_mz(mz,k)
157
- data<<[k,mz.inspect,int,peak_index]
158
- end
159
- end
160
- end
161
-
162
- if noise != "false"
163
- n_data.each_slice(2) do |m,i|
164
- m.each_with_index do |mz,index|
165
- mz,int = get_merged_mz(mz,k)
166
- data<<[k,mz.inspect,int,0]
167
- end
168
- end
169
- end
170
- time_i += 1
152
+ if time_i > step * (num + 1)
153
+ num = (((time_i/total)*100).to_i)
154
+ prog.update(num)
155
+ end
156
+
157
+ merged_d = full_spectra[k]
158
+ merged_mzs = merged_d[0]
159
+ merged_ints = merged_d[1]
160
+
161
+ if noise != "false"
162
+ n_data = noise[k]
163
+ end
164
+
165
+ if v != nil
166
+ v.each_slice(2) do |m,i|
167
+ m.each_with_index do |mz,index|
168
+ peak_index = ind_hash[mz]
169
+ mz,int = get_merged_mz(mz,k)
170
+ data<<[k,mz.inspect,int,peak_index]
171
+ end
172
+ end
173
+ end
174
+
175
+ if noise != "false"
176
+ n_data.each_slice(2) do |m,i|
177
+ m.each_with_index do |mz,index|
178
+ mz,int = get_merged_mz(mz,k)
179
+ data<<[k,mz.inspect,int,0]
180
+ end
181
+ end
182
+ end
183
+ time_i += 1
171
184
  end
172
185
  return data
173
186
  end