mspire-simulator 0.1.2 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/ms/merger.rb CHANGED
@@ -33,65 +33,78 @@ class Merger
33
33
  b = weights.flatten.inject(:+)
34
34
  return a/b
35
35
  end
36
-
36
+
37
37
  def self.merge(spectra,half_range)
38
- @start = Time.now
39
38
  new_data = {}
40
39
  total = spectra.size
41
40
  k = 0
41
+ prog = Progress.new("Merging Overlaps:")
42
42
  spectra.each do |rt,val|
43
- Progress.progress("Merging Overlaps:",(((k/total)*100).to_i))
43
+ if k.even?
44
+ num = (((k/total)*100).to_i)
45
+ prog.update(num)
46
+ end
44
47
  peaks = val.transpose
45
- peaks.sort_by!{|a| a[0]}
48
+ peaks.sort_by!{|a| a[0]} #mz
46
49
  peaks = peaks.transpose
47
50
  mzs = peaks[0]
48
51
  ints = peaks[1]
49
52
  mzs.each_with_index do |mz,i|
50
- next if mz.class == Hash
51
- o_mz = mz
52
- mz = mz.keys[0][0] if mz.class == Hash
53
- range = (mz..mz+half_range)
54
- if range.include?(mzs[i+1])
55
- metaA_mz = [o_mz, mzs[i+1]]
56
- meta_int = [ints[i],ints[i+1]]
57
- sum = meta_int.flatten.inject(:+).to_f
58
- i1 = ints[i]
59
- i1 = ints[i].flatten.inject(:+) if ints[i].class == Array
60
- frac1 = (i1/sum) * 100
61
- frac2 = (ints[i+1]/sum) * 100
62
- metaB_mz = {[w_avg(metaA_mz,meta_int),frac1,frac2] => metaA_mz}
63
-
64
- mzs[i] = nil; mzs[i+1] = metaB_mz
65
- ints[i] = nil; ints[i+1] = meta_int
66
- end
53
+ next if mz.class == Hash
54
+ o_mz = mz
55
+ mz = mz.keys[0][0] if mz.class == Hash
56
+ range = (mz..mz+half_range)
57
+ if range.include?(mzs[i+1])
58
+ metaA_mz = [o_mz, mzs[i+1]]
59
+ meta_int = [ints[i],ints[i+1]]
60
+ sum = meta_int.flatten.inject(:+).to_f
61
+ i1 = ints[i]
62
+ i1 = ints[i].flatten.inject(:+) if ints[i].class == Array
63
+ frac1 = (i1/sum) * 100
64
+ frac2 = (ints[i+1]/sum) * 100
65
+ metaB_mz = {[w_avg(metaA_mz,meta_int),frac1,frac2] => metaA_mz}
66
+
67
+ mzs[i] = nil; mzs[i+1] = metaB_mz
68
+ ints[i] = nil; ints[i+1] = meta_int
69
+ end
67
70
  end
68
- new_data[rt] = [mzs.compact,ints.compact]
71
+ spec = [mzs.compact,ints.compact]
72
+ spec.ms_level = val.ms_level
73
+ spec.ms2 = val.ms2
74
+ new_data[rt] = spec
69
75
  k += 1
70
76
  end
71
- Progress.progress("Merging Overlaps:",100,Time.now-@start)
72
- puts ''
77
+ prog.finish!
73
78
  return new_data
74
79
  end
75
-
80
+
76
81
  def self.compact(spectra)
77
82
  @start = Time.now
78
83
  total = spectra.size
79
84
  k = 0
85
+ num = 0
86
+ prog = Progress.new("Merge Finishing:")
87
+ step = total/100.0
80
88
  spectra.each do |rt,val|
81
- Progress.progress("Merge Finishing:",(((k/total)*100).to_i))
89
+ if k > step * (num + 1)
90
+ num = (((k/total)*100).to_i)
91
+ prog.update(num)
92
+ end
82
93
  mzs = val[0]
83
94
  ints = val[1]
84
95
  mzs.each_with_index do |m,i|
85
- if m.class == Hash
86
- mzs[i] = m.keys[0][0]
87
- ints[i] = ints[i].flatten.inject(:+)
88
- end
96
+ if m.class == Hash
97
+ mzs[i] = m.keys[0][0]
98
+ ints[i] = ints[i].flatten.inject(:+)
99
+ end
89
100
  end
90
- spectra[rt] = [mzs,ints]
101
+ spec = [mzs,ints]
102
+ spec.ms_level = val.ms_level
103
+ spec.ms2 = val.ms2
104
+ spectra[rt] = spec
91
105
  k += 1
92
106
  end
93
- Progress.progress("Merge Finishing:",100,Time.now-@start)
94
- puts ''
107
+ prog.finish!
95
108
  return spectra
96
109
  end
97
110
  end
@@ -6,42 +6,87 @@ require 'mspire/mzml'
6
6
  class Mzml_Wrapper
7
7
 
8
8
  def initialize(spectra)
9
- #spectra is a Hash rt=>[[mzs],[ints]]
10
- @start = Time.now
11
-
12
-
9
+ #spectra is a Hash rt=>[[mzs],[ints]]
10
+ ms2_count = 0
13
11
  count = 0.0
14
12
  scan_number = 1
15
13
  specs = []
16
- spectra.each do |rt,data|
17
- Progress.progress("Converting to mzml:",(((count/spectra.size)*100).to_i))
14
+ prog = Progress.new("Converting to mzml:")
15
+ num = 0
16
+ total = spectra.size
17
+ step = total/100
18
+ spec_id = nil
19
+ t_rt = 0
20
+ spectra.sort.map do |rt,data|
21
+ if count > step * (num + 1)
22
+ num = (((count/total)*100).to_i)
23
+ prog.update(num)
24
+ end
25
+ if t_rt > rt
26
+ puts "OUT of ORDER"
27
+ end
28
+ t_rt = rt
29
+
30
+ ms_level = data.ms_level # method added to array class
18
31
 
19
32
  spc = Mspire::Mzml::Spectrum.new("scan=#{scan_number}") do |spec|
20
- spec.describe_many!(['MS:1000127', ['MS:1000511', 1]])
21
- spec.data_arrays = [
22
- Mspire::Mzml::DataArray.new(data[0]).describe!('MS:1000514'),
23
- Mspire::Mzml::DataArray.new(data[1]).describe!('MS:1000515')
24
- ]
25
- spec.scan_list = Mspire::Mzml::ScanList.new do |sl|
26
- scan = Mspire::Mzml::Scan.new do |scan|
27
- scan.describe! 'MS:1000016', rt, 'UO:0000010'
28
- end
29
- sl << scan
30
- end
33
+ spec.describe_many!(['MS:1000127', ['MS:1000511', 1]])
34
+ spec.data_arrays = [
35
+ Mspire::Mzml::DataArray.new(data[0]).describe!('MS:1000514'),
36
+ Mspire::Mzml::DataArray.new(data[1]).describe!('MS:1000515')
37
+ ]
38
+ spec.scan_list = Mspire::Mzml::ScanList.new do |sl|
39
+ scan = Mspire::Mzml::Scan.new do |scan|
40
+ scan.describe! 'MS:1000016', rt, 'UO:0000010'
41
+ end
42
+ sl << scan
43
+ end
44
+ end
45
+ specs<<spc
46
+ if ms_level == 2
47
+ #[rt,[mzs],[ints]]
48
+ ms2 = data.ms2
49
+ ms2.each do |data|
50
+ ms2_count += 1
51
+ scan_number += 1
52
+ spc2 = Mspire::Mzml::Spectrum.new("scan=#{scan_number}") do |spec|
53
+ spec.describe_many!(['MS:1000127', ['MS:1000511', 2]])
54
+ spec.data_arrays = [
55
+ Mspire::Mzml::DataArray.new(data[1]).describe!('MS:1000514'),
56
+ Mspire::Mzml::DataArray.new(data[2]).describe!('MS:1000515')
57
+ ]
58
+ spec.scan_list = Mspire::Mzml::ScanList.new do |sl|
59
+ scan = Mspire::Mzml::Scan.new do |scan|
60
+ scan.describe! 'MS:1000016', data[0], 'UO:0000010'
61
+ end
62
+ sl << scan
63
+ end
64
+ precursor = Mspire::Mzml::Precursor.new( spc.id )
65
+ si = Mspire::Mzml::SelectedIon.new
66
+ # the selected ion m/z:
67
+ si.describe! "MS:1000744", data.pre_mz
68
+ # the selected ion charge state
69
+ si.describe! "MS:1000041", data.pre_charge
70
+ # the selected ion intensity
71
+ si.describe! "MS:1000042", data.pre_int
72
+ precursor.selected_ions = [si]
73
+ spec.precursors = [precursor]
74
+ end
75
+ specs<<spc2
76
+ end
31
77
  end
32
78
  count += 1
33
79
  scan_number += 1
34
- specs<<spc
35
80
  end
36
-
37
-
38
-
81
+
82
+
83
+
39
84
  @mzml = Mspire::Mzml.new do |mzml|
40
- mzml.id = 'ms1'
85
+ mzml.id = 'ms1_and_ms2'
41
86
  mzml.cvs = Mspire::Mzml::CV::DEFAULT_CVS
42
87
  mzml.file_description = Mspire::Mzml::FileDescription.new do |fd|
43
- fd.file_content = Mspire::Mzml::FileContent.new
44
- fd.source_files << Mspire::Mzml::SourceFile.new
88
+ fd.file_content = Mspire::Mzml::FileContent.new
89
+ fd.source_files << Mspire::Mzml::SourceFile.new
45
90
  end
46
91
  default_instrument_config = Mspire::Mzml::InstrumentConfiguration.new("IC").describe!('MS:1000031')
47
92
  mzml.instrument_configurations << default_instrument_config
@@ -50,15 +95,15 @@ class Mzml_Wrapper
50
95
  default_data_processing = Mspire::Mzml::DataProcessing.new("did_nothing")
51
96
  mzml.data_processing_list << default_data_processing
52
97
  mzml.run = Mspire::Mzml::Run.new("simulated_run", default_instrument_config) do |run|
53
- spectrum_list = Mspire::Mzml::SpectrumList.new(default_data_processing, specs)
54
- run.spectrum_list = spectrum_list
98
+ spectrum_list = Mspire::Mzml::SpectrumList.new(default_data_processing, specs)
99
+ run.spectrum_list = spectrum_list
55
100
  end
56
101
  end
57
- Progress.progress("Converting to mzml:",100,Time.now-@start)
58
- puts ''
102
+ prog.finish!
103
+ puts "ms2 written = #{ms2_count}"
59
104
  return @mzml
60
105
  end
61
-
106
+
62
107
  def to_xml(file)
63
108
  return @mzml.to_xml(file)
64
109
  end
data/lib/ms/noise.rb CHANGED
@@ -5,47 +5,47 @@ require 'ms/rt/rt_helper'
5
5
  module MS
6
6
  module Noise
7
7
  module_function
8
- def noiseify(density,max_mz)
9
- # spectra is {rt => [[mzs],[ints]]}
10
- @start = Time.now
8
+ def noiseify(opts,max_mz)
9
+ # spectra is {rt => [[mzs],[ints]]}
10
+ desity = opts[:noise_density]
11
+ max_int = opts[:noiseMaxInt]
12
+ min_int = opts[:noiseMinInt]
11
13
  @noise = {}
12
14
  r_times = Sim_Spectra.r_times
13
-
14
- count = 0.0
15
+ count = 0
16
+ prog = Progress.new("Adding noise:")
17
+ num = 0
18
+ total = r_times.size
19
+ step = total/100.0
15
20
  r_times.each do |rt|
16
-
17
- Progress.progress("Adding noise:",(((count/r_times.size)*100).to_i))
18
-
19
- nmzs = []
20
- nints = []
21
-
22
- density.times do
23
- rmz = RThelper.RandomFloat(0.0,max_mz)
24
- rint = RThelper.RandomFloat(50,1000)
25
-
26
- nmzs<<rmz
27
- nints<<rint
28
- end
29
- @noise[rt] = [nmzs,nints]
30
- count += 1
21
+ if count > step * (num + 1)
22
+ num = (((count/total)*100.0).to_i)
23
+ prog.update(num)
24
+ end
25
+ nmzs = []
26
+ nints = []
27
+ density.times do
28
+ rmz = RThelper.RandomFloat(0.0,max_mz)
29
+ rint = RThelper.RandomFloat(min_int,max_int)
30
+ nmzs<<rmz
31
+ nints<<rint
32
+ end
33
+ @noise[rt] = [nmzs,nints]
34
+ count += 1
31
35
  end
32
-
33
- Progress.progress("Adding noise:",100,Time.now-@start)
34
- puts ''
35
-
36
+ prog.finish!
36
37
  return @noise
37
38
  end
38
-
39
-
39
+
40
40
  def spec_drops(drop_percentage)
41
41
  r_times = Sim_Spectra.r_times
42
42
  l = r_times.length
43
43
  num_drops = drop_percentage * l
44
44
  num_drops.to_i.times do
45
- r_times.delete_at(rand(l+1))
45
+ r_times.delete_at(rand(l+1))
46
46
  end
47
47
  return r_times
48
48
  end
49
-
49
+
50
50
  end
51
51
  end
@@ -1,6 +1,6 @@
1
1
 
2
2
  module RThelper
3
-
3
+
4
4
  module_function
5
5
  def normalized_gaussian(x,mu,sd)
6
6
  x = x.to_f
@@ -8,7 +8,7 @@ module RThelper
8
8
  sd = sd.to_f
9
9
  return ((1/(Math.sqrt(2*(Math::PI)*(sd**2))))*(Math.exp(-(((x-mu)**2)/((2*sd)**2)))))
10
10
  end
11
-
11
+
12
12
  module_function
13
13
  def gaussian(x,mu,sd,h)
14
14
  x = x.to_f
@@ -17,7 +17,7 @@ module RThelper
17
17
  h = h.to_f
18
18
  return h*Math.exp(-(x-mu)**2/(sd**2))
19
19
  end
20
-
20
+
21
21
  module_function
22
22
  def RandomFloat(a,b)
23
23
  a = a.to_f
@@ -8,74 +8,86 @@ require 'ms/rt/rt_helper'
8
8
 
9
9
  module MS
10
10
  module Rtgenerator
11
-
11
+
12
12
  module_function
13
13
  def generateRT(peptides, one_d)
14
-
15
- @start = Time.now
14
+
16
15
  @r_times = Sim_Spectra.r_times
17
-
16
+
18
17
  # Gets retention times from the weka model
19
18
  peptides = MS::Weka.predict_rts(peptides)
20
19
  MS::Weka.predict_ints(peptides)
20
+
21
+
22
+ #-----------------------------------------------------------------
23
+ prog = Progress.new("Generating retention times:")
24
+ num = 0
25
+ total = peptides.size
26
+ step = total/100.0
21
27
 
28
+ max_rt = 4*(@r_times.max/5)
29
+ r_end = max_rt + (@r_times.max/5)/2
30
+ r_start = @r_times.max/5
22
31
 
23
- #-----------------------------------------------------------------
24
32
  peptides.each_with_index do |pep,ind|
25
- Progress.progress("Generating retention times:",(((ind+1)/peptides.size.to_f)*100).to_i)
26
-
27
-
28
- #Fit retention times into scan times
29
- max_rt = @r_times.max
30
- p_rt = pep.p_rt * 10**-2
31
- if p_rt > 1
32
- pep.p_rt = @r_times.max
33
- pep.p_rt_i = @r_times.index(pep.p_rt)
34
- else
35
- pep.p_rt = @r_times.find {|i| i >= (p_rt * max_rt)}
36
- pep.p_rt_i = @r_times.index(pep.p_rt)
33
+ if ind > step * (num + 1)
34
+ num = (((ind+1)/total.to_f)*100).to_i
35
+ prog.update(num)
37
36
  end
37
+
38
+
39
+ #Fit retention times into scan times
40
+ p_rt = pep.p_rt * 10**-2
41
+ percent_time = p_rt
42
+ sx = RThelper.gaussian(percent_time,0.5,0.45,1.0) * Math.sqrt(pep.abu) #need to figure out what these values should be
43
+ pep.sx = sx
38
44
 
45
+
46
+ if p_rt > 1
47
+ pep.p_rt = @r_times.find {|i| i >= r_end}
48
+ pep.p_rt_i = @r_times.index(pep.p_rt)
49
+ else
50
+ pep.p_rt = @r_times.find {|i| i >= (p_rt * max_rt)}
51
+ pep.p_rt_i = @r_times.index(pep.p_rt)
52
+ end
53
+
39
54
  if pep.p_rt == nil
40
55
  puts "\n\n\t#{pep} TIME-> #{p_rt*max_rt} :: Peptide not predicted in time range: try increasing run time\n\n."
41
- else
42
-
43
- #Give peptide retention times
44
- head_length = nil
45
- tail_length = nil
46
- if one_d
47
- head_length = 300.0
48
- tail_length = 701
49
- else
50
- head_length = 100.0
51
- tail_length = 300
52
- end
53
-
54
- a = @r_times.find {|i| i >= (pep.p_rt-head_length)}
55
- b = @r_times.find {|i| i >= (pep.p_rt+tail_length)}
56
- a = @r_times.index(a)
57
- b = @r_times.index(b)
58
-
59
- if a == nil
60
- a = @r_times[0]
61
- end
62
-
63
- if b == nil
64
- b = @r_times[@r_times.length-1]
65
- end
66
-
67
- pep.set_rts(a,b)
56
+ else
68
57
 
69
- end
58
+ #Give peptide retention times
59
+ head_length = nil
60
+ tail_length = nil
61
+ if one_d
62
+ head_length = 300.0
63
+ tail_length = 701
64
+ else
65
+ head_length = 100.0 * sx
66
+ tail_length = 300 * sx
67
+ end
68
+
69
+ a = @r_times.find {|i| i >= (pep.p_rt-head_length)}
70
+ b = @r_times.find {|i| i >= (pep.p_rt+tail_length)}
71
+ a = @r_times.index(a)
72
+ b = @r_times.index(b)
73
+
74
+ if a == nil
75
+ a = @r_times[0]
76
+ end
77
+
78
+ if b == nil
79
+ b = @r_times[@r_times.length-1]
80
+ end
81
+
82
+ pep.set_rts(a,b)
83
+
84
+ end
70
85
  end
71
86
  #-----------------------------------------------------------------
72
-
73
-
74
- Progress.progress("Generating retention times:",100,Time.now-@start)
75
- puts ""
76
-
87
+ prog.finish!
88
+
77
89
  return peptides
78
-
90
+
79
91
  end
80
92
  end
81
93
  end
data/lib/ms/rt/weka.rb CHANGED
@@ -3,7 +3,7 @@ require 'csv'
3
3
 
4
4
  module MS
5
5
  module Weka
6
- #James Dalg
6
+ #James Dalg
7
7
  module_function
8
8
  def predict_rts(peptides)
9
9
  #mz,charge,intensity,rt,A,R,N,D,B,C,E,Q,Z,G,H,I,L,K,M,F,P,S,T,W,Y,V,J,mass,hydro,pi
@@ -13,12 +13,12 @@ module MS
13
13
  data<<pep.aa_counts
14
14
  end
15
15
  arff = make_rt_arff(Time.now.nsec.to_s,data)
16
-
16
+
17
17
  path = Gem.bin_path('mspire-simulator', 'mspire-simulator').split(/\//)
18
18
  dir = path[0..path.size-3].join("/")
19
19
  system("java weka.classifiers.functions.MultilayerPerceptron -T #{arff} -l #{dir}/lib/weka/M5Rules.model -p 24 > #{arff}.out")
20
20
  system("rm #{arff}")
21
-
21
+
22
22
  #extract what was predicted by weka model
23
23
  file = File.open("#{arff}.out","r")
24
24
  count = 0
@@ -31,38 +31,38 @@ module MS
31
31
  system("rm #{arff}.out")
32
32
  return peptides
33
33
  end
34
-
35
-
36
-
34
+
35
+
36
+
37
37
  def predict_ints(peptides)
38
38
  data = []
39
39
  peptides.each do |pep|
40
- array = []
41
- array<<pep.mono_mz<<pep.charge<<pep.mass<<pep.p_rt
40
+ array = []
41
+ array<<pep.mono_mz<<pep.charge<<pep.mass<<pep.p_rt
42
42
  data << array.concat(pep.aa_counts)
43
43
  end
44
44
  arff = make_int_arff(Time.now.nsec.to_s,data)
45
-
45
+
46
46
  path = Gem.bin_path('mspire-simulator', 'mspire-simulator').split(/\//)
47
47
  dir = path[0..path.size-3].join("/")
48
48
  system("java weka.classifiers.trees.M5P -T #{arff} -l #{dir}/lib/weka/M5P.model -p 27 > #{arff}.out")
49
49
  system("rm #{arff}")
50
-
50
+
51
51
  #extract what was predicted by weka model
52
52
  file = File.open("#{arff}.out","r")
53
53
  count = 0
54
54
  while line = file.gets
55
55
  if line =~ /(\d*\.\d{0,3}){1}/
56
- peptides[count].p_int = line.match(/(\d*\.\d{0,3}){1}/)[0].to_f
56
+ peptides[count].p_int = line.match(/(\d*\.\d{0,3}){1}/)[0].to_f
57
57
  count += 1
58
58
  end
59
59
  end
60
60
  system("rm #{arff}.out")
61
61
  return peptides
62
62
  end
63
-
64
-
65
-
63
+
64
+
65
+
66
66
  #James Dalg
67
67
  def make_rt_arff(sourcefile, training)
68
68
  sourcefile<<".arff"
@@ -105,9 +105,9 @@ module MS
105
105
  end
106
106
  return sourcefile
107
107
  end
108
-
109
-
110
- #James Dalg
108
+
109
+
110
+ #James Dalg
111
111
  def make_int_arff(sourcefile, training)
112
112
  sourcefile<<".arff"
113
113
  File.open(sourcefile, "wb") do |f| # need to cite f.puts (not %Q)? if so http://www.devdaily.com/blog/post/ruby/how-write-text-to-file-ruby-example