mspire-simulator 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/ms/merger.rb CHANGED
@@ -33,65 +33,78 @@ class Merger
33
33
  b = weights.flatten.inject(:+)
34
34
  return a/b
35
35
  end
36
-
36
+
37
37
  def self.merge(spectra,half_range)
38
- @start = Time.now
39
38
  new_data = {}
40
39
  total = spectra.size
41
40
  k = 0
41
+ prog = Progress.new("Merging Overlaps:")
42
42
  spectra.each do |rt,val|
43
- Progress.progress("Merging Overlaps:",(((k/total)*100).to_i))
43
+ if k.even?
44
+ num = (((k/total)*100).to_i)
45
+ prog.update(num)
46
+ end
44
47
  peaks = val.transpose
45
- peaks.sort_by!{|a| a[0]}
48
+ peaks.sort_by!{|a| a[0]} #mz
46
49
  peaks = peaks.transpose
47
50
  mzs = peaks[0]
48
51
  ints = peaks[1]
49
52
  mzs.each_with_index do |mz,i|
50
- next if mz.class == Hash
51
- o_mz = mz
52
- mz = mz.keys[0][0] if mz.class == Hash
53
- range = (mz..mz+half_range)
54
- if range.include?(mzs[i+1])
55
- metaA_mz = [o_mz, mzs[i+1]]
56
- meta_int = [ints[i],ints[i+1]]
57
- sum = meta_int.flatten.inject(:+).to_f
58
- i1 = ints[i]
59
- i1 = ints[i].flatten.inject(:+) if ints[i].class == Array
60
- frac1 = (i1/sum) * 100
61
- frac2 = (ints[i+1]/sum) * 100
62
- metaB_mz = {[w_avg(metaA_mz,meta_int),frac1,frac2] => metaA_mz}
63
-
64
- mzs[i] = nil; mzs[i+1] = metaB_mz
65
- ints[i] = nil; ints[i+1] = meta_int
66
- end
53
+ next if mz.class == Hash
54
+ o_mz = mz
55
+ mz = mz.keys[0][0] if mz.class == Hash
56
+ range = (mz..mz+half_range)
57
+ if range.include?(mzs[i+1])
58
+ metaA_mz = [o_mz, mzs[i+1]]
59
+ meta_int = [ints[i],ints[i+1]]
60
+ sum = meta_int.flatten.inject(:+).to_f
61
+ i1 = ints[i]
62
+ i1 = ints[i].flatten.inject(:+) if ints[i].class == Array
63
+ frac1 = (i1/sum) * 100
64
+ frac2 = (ints[i+1]/sum) * 100
65
+ metaB_mz = {[w_avg(metaA_mz,meta_int),frac1,frac2] => metaA_mz}
66
+
67
+ mzs[i] = nil; mzs[i+1] = metaB_mz
68
+ ints[i] = nil; ints[i+1] = meta_int
69
+ end
67
70
  end
68
- new_data[rt] = [mzs.compact,ints.compact]
71
+ spec = [mzs.compact,ints.compact]
72
+ spec.ms_level = val.ms_level
73
+ spec.ms2 = val.ms2
74
+ new_data[rt] = spec
69
75
  k += 1
70
76
  end
71
- Progress.progress("Merging Overlaps:",100,Time.now-@start)
72
- puts ''
77
+ prog.finish!
73
78
  return new_data
74
79
  end
75
-
80
+
76
81
  def self.compact(spectra)
77
82
  @start = Time.now
78
83
  total = spectra.size
79
84
  k = 0
85
+ num = 0
86
+ prog = Progress.new("Merge Finishing:")
87
+ step = total/100.0
80
88
  spectra.each do |rt,val|
81
- Progress.progress("Merge Finishing:",(((k/total)*100).to_i))
89
+ if k > step * (num + 1)
90
+ num = (((k/total)*100).to_i)
91
+ prog.update(num)
92
+ end
82
93
  mzs = val[0]
83
94
  ints = val[1]
84
95
  mzs.each_with_index do |m,i|
85
- if m.class == Hash
86
- mzs[i] = m.keys[0][0]
87
- ints[i] = ints[i].flatten.inject(:+)
88
- end
96
+ if m.class == Hash
97
+ mzs[i] = m.keys[0][0]
98
+ ints[i] = ints[i].flatten.inject(:+)
99
+ end
89
100
  end
90
- spectra[rt] = [mzs,ints]
101
+ spec = [mzs,ints]
102
+ spec.ms_level = val.ms_level
103
+ spec.ms2 = val.ms2
104
+ spectra[rt] = spec
91
105
  k += 1
92
106
  end
93
- Progress.progress("Merge Finishing:",100,Time.now-@start)
94
- puts ''
107
+ prog.finish!
95
108
  return spectra
96
109
  end
97
110
  end
@@ -6,42 +6,87 @@ require 'mspire/mzml'
6
6
  class Mzml_Wrapper
7
7
 
8
8
  def initialize(spectra)
9
- #spectra is a Hash rt=>[[mzs],[ints]]
10
- @start = Time.now
11
-
12
-
9
+ #spectra is a Hash rt=>[[mzs],[ints]]
10
+ ms2_count = 0
13
11
  count = 0.0
14
12
  scan_number = 1
15
13
  specs = []
16
- spectra.each do |rt,data|
17
- Progress.progress("Converting to mzml:",(((count/spectra.size)*100).to_i))
14
+ prog = Progress.new("Converting to mzml:")
15
+ num = 0
16
+ total = spectra.size
17
+ step = total/100
18
+ spec_id = nil
19
+ t_rt = 0
20
+ spectra.sort.map do |rt,data|
21
+ if count > step * (num + 1)
22
+ num = (((count/total)*100).to_i)
23
+ prog.update(num)
24
+ end
25
+ if t_rt > rt
26
+ puts "OUT of ORDER"
27
+ end
28
+ t_rt = rt
29
+
30
+ ms_level = data.ms_level # method added to array class
18
31
 
19
32
  spc = Mspire::Mzml::Spectrum.new("scan=#{scan_number}") do |spec|
20
- spec.describe_many!(['MS:1000127', ['MS:1000511', 1]])
21
- spec.data_arrays = [
22
- Mspire::Mzml::DataArray.new(data[0]).describe!('MS:1000514'),
23
- Mspire::Mzml::DataArray.new(data[1]).describe!('MS:1000515')
24
- ]
25
- spec.scan_list = Mspire::Mzml::ScanList.new do |sl|
26
- scan = Mspire::Mzml::Scan.new do |scan|
27
- scan.describe! 'MS:1000016', rt, 'UO:0000010'
28
- end
29
- sl << scan
30
- end
33
+ spec.describe_many!(['MS:1000127', ['MS:1000511', 1]])
34
+ spec.data_arrays = [
35
+ Mspire::Mzml::DataArray.new(data[0]).describe!('MS:1000514'),
36
+ Mspire::Mzml::DataArray.new(data[1]).describe!('MS:1000515')
37
+ ]
38
+ spec.scan_list = Mspire::Mzml::ScanList.new do |sl|
39
+ scan = Mspire::Mzml::Scan.new do |scan|
40
+ scan.describe! 'MS:1000016', rt, 'UO:0000010'
41
+ end
42
+ sl << scan
43
+ end
44
+ end
45
+ specs<<spc
46
+ if ms_level == 2
47
+ #[rt,[mzs],[ints]]
48
+ ms2 = data.ms2
49
+ ms2.each do |data|
50
+ ms2_count += 1
51
+ scan_number += 1
52
+ spc2 = Mspire::Mzml::Spectrum.new("scan=#{scan_number}") do |spec|
53
+ spec.describe_many!(['MS:1000127', ['MS:1000511', 2]])
54
+ spec.data_arrays = [
55
+ Mspire::Mzml::DataArray.new(data[1]).describe!('MS:1000514'),
56
+ Mspire::Mzml::DataArray.new(data[2]).describe!('MS:1000515')
57
+ ]
58
+ spec.scan_list = Mspire::Mzml::ScanList.new do |sl|
59
+ scan = Mspire::Mzml::Scan.new do |scan|
60
+ scan.describe! 'MS:1000016', data[0], 'UO:0000010'
61
+ end
62
+ sl << scan
63
+ end
64
+ precursor = Mspire::Mzml::Precursor.new( spc.id )
65
+ si = Mspire::Mzml::SelectedIon.new
66
+ # the selected ion m/z:
67
+ si.describe! "MS:1000744", data.pre_mz
68
+ # the selected ion charge state
69
+ si.describe! "MS:1000041", data.pre_charge
70
+ # the selected ion intensity
71
+ si.describe! "MS:1000042", data.pre_int
72
+ precursor.selected_ions = [si]
73
+ spec.precursors = [precursor]
74
+ end
75
+ specs<<spc2
76
+ end
31
77
  end
32
78
  count += 1
33
79
  scan_number += 1
34
- specs<<spc
35
80
  end
36
-
37
-
38
-
81
+
82
+
83
+
39
84
  @mzml = Mspire::Mzml.new do |mzml|
40
- mzml.id = 'ms1'
85
+ mzml.id = 'ms1_and_ms2'
41
86
  mzml.cvs = Mspire::Mzml::CV::DEFAULT_CVS
42
87
  mzml.file_description = Mspire::Mzml::FileDescription.new do |fd|
43
- fd.file_content = Mspire::Mzml::FileContent.new
44
- fd.source_files << Mspire::Mzml::SourceFile.new
88
+ fd.file_content = Mspire::Mzml::FileContent.new
89
+ fd.source_files << Mspire::Mzml::SourceFile.new
45
90
  end
46
91
  default_instrument_config = Mspire::Mzml::InstrumentConfiguration.new("IC").describe!('MS:1000031')
47
92
  mzml.instrument_configurations << default_instrument_config
@@ -50,15 +95,15 @@ class Mzml_Wrapper
50
95
  default_data_processing = Mspire::Mzml::DataProcessing.new("did_nothing")
51
96
  mzml.data_processing_list << default_data_processing
52
97
  mzml.run = Mspire::Mzml::Run.new("simulated_run", default_instrument_config) do |run|
53
- spectrum_list = Mspire::Mzml::SpectrumList.new(default_data_processing, specs)
54
- run.spectrum_list = spectrum_list
98
+ spectrum_list = Mspire::Mzml::SpectrumList.new(default_data_processing, specs)
99
+ run.spectrum_list = spectrum_list
55
100
  end
56
101
  end
57
- Progress.progress("Converting to mzml:",100,Time.now-@start)
58
- puts ''
102
+ prog.finish!
103
+ puts "ms2 written = #{ms2_count}"
59
104
  return @mzml
60
105
  end
61
-
106
+
62
107
  def to_xml(file)
63
108
  return @mzml.to_xml(file)
64
109
  end
data/lib/ms/noise.rb CHANGED
@@ -5,47 +5,47 @@ require 'ms/rt/rt_helper'
5
5
  module MS
6
6
  module Noise
7
7
  module_function
8
- def noiseify(density,max_mz)
9
- # spectra is {rt => [[mzs],[ints]]}
10
- @start = Time.now
8
+ def noiseify(opts,max_mz)
9
+ # spectra is {rt => [[mzs],[ints]]}
10
+ desity = opts[:noise_density]
11
+ max_int = opts[:noiseMaxInt]
12
+ min_int = opts[:noiseMinInt]
11
13
  @noise = {}
12
14
  r_times = Sim_Spectra.r_times
13
-
14
- count = 0.0
15
+ count = 0
16
+ prog = Progress.new("Adding noise:")
17
+ num = 0
18
+ total = r_times.size
19
+ step = total/100.0
15
20
  r_times.each do |rt|
16
-
17
- Progress.progress("Adding noise:",(((count/r_times.size)*100).to_i))
18
-
19
- nmzs = []
20
- nints = []
21
-
22
- density.times do
23
- rmz = RThelper.RandomFloat(0.0,max_mz)
24
- rint = RThelper.RandomFloat(50,1000)
25
-
26
- nmzs<<rmz
27
- nints<<rint
28
- end
29
- @noise[rt] = [nmzs,nints]
30
- count += 1
21
+ if count > step * (num + 1)
22
+ num = (((count/total)*100.0).to_i)
23
+ prog.update(num)
24
+ end
25
+ nmzs = []
26
+ nints = []
27
+ density.times do
28
+ rmz = RThelper.RandomFloat(0.0,max_mz)
29
+ rint = RThelper.RandomFloat(min_int,max_int)
30
+ nmzs<<rmz
31
+ nints<<rint
32
+ end
33
+ @noise[rt] = [nmzs,nints]
34
+ count += 1
31
35
  end
32
-
33
- Progress.progress("Adding noise:",100,Time.now-@start)
34
- puts ''
35
-
36
+ prog.finish!
36
37
  return @noise
37
38
  end
38
-
39
-
39
+
40
40
  def spec_drops(drop_percentage)
41
41
  r_times = Sim_Spectra.r_times
42
42
  l = r_times.length
43
43
  num_drops = drop_percentage * l
44
44
  num_drops.to_i.times do
45
- r_times.delete_at(rand(l+1))
45
+ r_times.delete_at(rand(l+1))
46
46
  end
47
47
  return r_times
48
48
  end
49
-
49
+
50
50
  end
51
51
  end
@@ -1,6 +1,6 @@
1
1
 
2
2
  module RThelper
3
-
3
+
4
4
  module_function
5
5
  def normalized_gaussian(x,mu,sd)
6
6
  x = x.to_f
@@ -8,7 +8,7 @@ module RThelper
8
8
  sd = sd.to_f
9
9
  return ((1/(Math.sqrt(2*(Math::PI)*(sd**2))))*(Math.exp(-(((x-mu)**2)/((2*sd)**2)))))
10
10
  end
11
-
11
+
12
12
  module_function
13
13
  def gaussian(x,mu,sd,h)
14
14
  x = x.to_f
@@ -17,7 +17,7 @@ module RThelper
17
17
  h = h.to_f
18
18
  return h*Math.exp(-(x-mu)**2/(sd**2))
19
19
  end
20
-
20
+
21
21
  module_function
22
22
  def RandomFloat(a,b)
23
23
  a = a.to_f
@@ -8,74 +8,86 @@ require 'ms/rt/rt_helper'
8
8
 
9
9
  module MS
10
10
  module Rtgenerator
11
-
11
+
12
12
  module_function
13
13
  def generateRT(peptides, one_d)
14
-
15
- @start = Time.now
14
+
16
15
  @r_times = Sim_Spectra.r_times
17
-
16
+
18
17
  # Gets retention times from the weka model
19
18
  peptides = MS::Weka.predict_rts(peptides)
20
19
  MS::Weka.predict_ints(peptides)
20
+
21
+
22
+ #-----------------------------------------------------------------
23
+ prog = Progress.new("Generating retention times:")
24
+ num = 0
25
+ total = peptides.size
26
+ step = total/100.0
21
27
 
28
+ max_rt = 4*(@r_times.max/5)
29
+ r_end = max_rt + (@r_times.max/5)/2
30
+ r_start = @r_times.max/5
22
31
 
23
- #-----------------------------------------------------------------
24
32
  peptides.each_with_index do |pep,ind|
25
- Progress.progress("Generating retention times:",(((ind+1)/peptides.size.to_f)*100).to_i)
26
-
27
-
28
- #Fit retention times into scan times
29
- max_rt = @r_times.max
30
- p_rt = pep.p_rt * 10**-2
31
- if p_rt > 1
32
- pep.p_rt = @r_times.max
33
- pep.p_rt_i = @r_times.index(pep.p_rt)
34
- else
35
- pep.p_rt = @r_times.find {|i| i >= (p_rt * max_rt)}
36
- pep.p_rt_i = @r_times.index(pep.p_rt)
33
+ if ind > step * (num + 1)
34
+ num = (((ind+1)/total.to_f)*100).to_i
35
+ prog.update(num)
37
36
  end
37
+
38
+
39
+ #Fit retention times into scan times
40
+ p_rt = pep.p_rt * 10**-2
41
+ percent_time = p_rt
42
+ sx = RThelper.gaussian(percent_time,0.5,0.45,1.0) * Math.sqrt(pep.abu) #need to figure out what these values should be
43
+ pep.sx = sx
38
44
 
45
+
46
+ if p_rt > 1
47
+ pep.p_rt = @r_times.find {|i| i >= r_end}
48
+ pep.p_rt_i = @r_times.index(pep.p_rt)
49
+ else
50
+ pep.p_rt = @r_times.find {|i| i >= (p_rt * max_rt)}
51
+ pep.p_rt_i = @r_times.index(pep.p_rt)
52
+ end
53
+
39
54
  if pep.p_rt == nil
40
55
  puts "\n\n\t#{pep} TIME-> #{p_rt*max_rt} :: Peptide not predicted in time range: try increasing run time\n\n."
41
- else
42
-
43
- #Give peptide retention times
44
- head_length = nil
45
- tail_length = nil
46
- if one_d
47
- head_length = 300.0
48
- tail_length = 701
49
- else
50
- head_length = 100.0
51
- tail_length = 300
52
- end
53
-
54
- a = @r_times.find {|i| i >= (pep.p_rt-head_length)}
55
- b = @r_times.find {|i| i >= (pep.p_rt+tail_length)}
56
- a = @r_times.index(a)
57
- b = @r_times.index(b)
58
-
59
- if a == nil
60
- a = @r_times[0]
61
- end
62
-
63
- if b == nil
64
- b = @r_times[@r_times.length-1]
65
- end
66
-
67
- pep.set_rts(a,b)
56
+ else
68
57
 
69
- end
58
+ #Give peptide retention times
59
+ head_length = nil
60
+ tail_length = nil
61
+ if one_d
62
+ head_length = 300.0
63
+ tail_length = 701
64
+ else
65
+ head_length = 100.0 * sx
66
+ tail_length = 300 * sx
67
+ end
68
+
69
+ a = @r_times.find {|i| i >= (pep.p_rt-head_length)}
70
+ b = @r_times.find {|i| i >= (pep.p_rt+tail_length)}
71
+ a = @r_times.index(a)
72
+ b = @r_times.index(b)
73
+
74
+ if a == nil
75
+ a = @r_times[0]
76
+ end
77
+
78
+ if b == nil
79
+ b = @r_times[@r_times.length-1]
80
+ end
81
+
82
+ pep.set_rts(a,b)
83
+
84
+ end
70
85
  end
71
86
  #-----------------------------------------------------------------
72
-
73
-
74
- Progress.progress("Generating retention times:",100,Time.now-@start)
75
- puts ""
76
-
87
+ prog.finish!
88
+
77
89
  return peptides
78
-
90
+
79
91
  end
80
92
  end
81
93
  end
data/lib/ms/rt/weka.rb CHANGED
@@ -3,7 +3,7 @@ require 'csv'
3
3
 
4
4
  module MS
5
5
  module Weka
6
- #James Dalg
6
+ #James Dalg
7
7
  module_function
8
8
  def predict_rts(peptides)
9
9
  #mz,charge,intensity,rt,A,R,N,D,B,C,E,Q,Z,G,H,I,L,K,M,F,P,S,T,W,Y,V,J,mass,hydro,pi
@@ -13,12 +13,12 @@ module MS
13
13
  data<<pep.aa_counts
14
14
  end
15
15
  arff = make_rt_arff(Time.now.nsec.to_s,data)
16
-
16
+
17
17
  path = Gem.bin_path('mspire-simulator', 'mspire-simulator').split(/\//)
18
18
  dir = path[0..path.size-3].join("/")
19
19
  system("java weka.classifiers.functions.MultilayerPerceptron -T #{arff} -l #{dir}/lib/weka/M5Rules.model -p 24 > #{arff}.out")
20
20
  system("rm #{arff}")
21
-
21
+
22
22
  #extract what was predicted by weka model
23
23
  file = File.open("#{arff}.out","r")
24
24
  count = 0
@@ -31,38 +31,38 @@ module MS
31
31
  system("rm #{arff}.out")
32
32
  return peptides
33
33
  end
34
-
35
-
36
-
34
+
35
+
36
+
37
37
  def predict_ints(peptides)
38
38
  data = []
39
39
  peptides.each do |pep|
40
- array = []
41
- array<<pep.mono_mz<<pep.charge<<pep.mass<<pep.p_rt
40
+ array = []
41
+ array<<pep.mono_mz<<pep.charge<<pep.mass<<pep.p_rt
42
42
  data << array.concat(pep.aa_counts)
43
43
  end
44
44
  arff = make_int_arff(Time.now.nsec.to_s,data)
45
-
45
+
46
46
  path = Gem.bin_path('mspire-simulator', 'mspire-simulator').split(/\//)
47
47
  dir = path[0..path.size-3].join("/")
48
48
  system("java weka.classifiers.trees.M5P -T #{arff} -l #{dir}/lib/weka/M5P.model -p 27 > #{arff}.out")
49
49
  system("rm #{arff}")
50
-
50
+
51
51
  #extract what was predicted by weka model
52
52
  file = File.open("#{arff}.out","r")
53
53
  count = 0
54
54
  while line = file.gets
55
55
  if line =~ /(\d*\.\d{0,3}){1}/
56
- peptides[count].p_int = line.match(/(\d*\.\d{0,3}){1}/)[0].to_f
56
+ peptides[count].p_int = line.match(/(\d*\.\d{0,3}){1}/)[0].to_f
57
57
  count += 1
58
58
  end
59
59
  end
60
60
  system("rm #{arff}.out")
61
61
  return peptides
62
62
  end
63
-
64
-
65
-
63
+
64
+
65
+
66
66
  #James Dalg
67
67
  def make_rt_arff(sourcefile, training)
68
68
  sourcefile<<".arff"
@@ -105,9 +105,9 @@ module MS
105
105
  end
106
106
  return sourcefile
107
107
  end
108
-
109
-
110
- #James Dalg
108
+
109
+
110
+ #James Dalg
111
111
  def make_int_arff(sourcefile, training)
112
112
  sourcefile<<".arff"
113
113
  File.open(sourcefile, "wb") do |f| # need to cite f.puts (not %Q)? if so http://www.devdaily.com/blog/post/ruby/how-write-text-to-file-ruby-example