mspire-simulator 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2011 Brigham Young University
2
+ Authors: Andrew Noyce, Nozumo Okuda, James Dagliesh, John Prince
3
+ Under the Guidance of: Dr. John Prince
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,17 @@
1
+ = ms-simulate
2
+
3
+ Description:
4
+ Simulates MS runs given amino acid .fasta files. Outputs a .mzML file.
5
+
6
+ == Install
7
+ gem install mspire-simulator
8
+ Dependencies:
9
+ ruby 1.9*
10
+ weka 3.6.0
11
+ fftw 3.2.2 - Tested in Linux Mint 12 and Ubuntu Oneiric Ocelot
12
+ == Examples
13
+
14
+ == Copyright
15
+
16
+ See LICENSE.txt for further details.
17
+
data/Rakefile ADDED
@@ -0,0 +1,51 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ require 'jeweler'
5
+ Jeweler::Tasks.new do |gem|
6
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
7
+ gem.name = "mspire-simulator"
8
+ gem.homepage = "http://dl.dropbox.com/u/42836826/Ms_Sim_Homepage.html"
9
+ gem.license = "MIT"
10
+ gem.summary = %Q{Simulates MS1 runs given amino acid FASTA files. Outputs an MZML file.}
11
+ gem.description = %Q{Simulates MS1 runs given amino acid FASTA files. Outputs an MZML file.
12
+ Can simulate specific data if given an MZML file containing a single isolated peptide peak.}
13
+ gem.email = "andrewbnoyce@gmail.com"
14
+ gem.authors = ["anoyce"]
15
+
16
+ gem.add_dependency "mspire", "0.8.2"
17
+ gem.add_dependency "rubyvis", "= 0.5.2"
18
+ gem.add_dependency "nokogiri", "= 1.5.2"
19
+ gem.add_dependency "ffi", "= 1.0.11"
20
+ gem.add_dependency "ffi-inliner", "= 0.2.4"
21
+ gem.add_dependency "fftw3", "= 0.3"
22
+ gem.add_dependency "distribution", "= 0.7.0"
23
+ gem.add_dependency "pony", "= 1.4"
24
+ gem.add_dependency "obo", "= 0.1.0"
25
+ gem.add_dependency "trollop", "= 1.16.2"
26
+
27
+ gem.executables = ["mspire-simulator"]
28
+ gem.files.exclude "elution_curvefit.svg"
29
+ gem.files.exclude "intensity_var_curvefit.svg"
30
+ gem.files.exclude "lib/pool.rb"
31
+ gem.files.exclude "mz_var_curvefit.svg"
32
+ gem.files.exclude "single.mzML"
33
+ gem.files.exclude "test.mzml"
34
+ gem.files.exclude "test.mzml_truth.csv"
35
+ gem.files.exclude "test.mzml_truth.xml"
36
+ gem.files.exclude "testFiles/*"
37
+ end
38
+ Jeweler::RubygemsDotOrgTasks.new
39
+
40
+ require 'rspec/core'
41
+ require 'rspec/core/rake_task'
42
+ RSpec::Core::RakeTask.new(:spec) do |spec|
43
+ spec.pattern = FileList['spec/**/*_spec.rb']
44
+ end
45
+
46
+ RSpec::Core::RakeTask.new(:rcov) do |spec|
47
+ spec.pattern = 'spec/**/*_spec.rb'
48
+ spec.rcov = true
49
+ end
50
+
51
+ task :default => :spec
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
@@ -0,0 +1,125 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH << './lib'
3
+
4
+ require 'time'
5
+ require 'progress'
6
+ require 'nokogiri'
7
+ require 'mspire/digester'
8
+ require 'mspire/tagged_peak'
9
+ require 'mspire'
10
+ require 'ms/sim_peptide'
11
+ require 'ms/rt/rtgenerator'
12
+ require 'ms/sim_spectra'
13
+ require 'ms/noise'
14
+ require 'ms/mzml_wrapper'
15
+ require 'trollop'
16
+ require 'ms/tr_file_writer'
17
+ require 'ms/isoelectric_calc'
18
+ require 'ms/sim_digester'
19
+ require 'ms/sim_trollop'
20
+ require 'ms/merger'
21
+
22
+ module MspireSimulator
23
+ @opts = MS::Troll.new.get
24
+ begin
25
+
26
+ @start = Time.now
27
+
28
+ one_d = @opts[:one_d]
29
+ noise = @opts[:noise]
30
+ truth = @opts[:truth]
31
+ out_file = @opts[:out_file]
32
+ email = @opts[:email]
33
+
34
+ if one_d == "true"
35
+ one_d = true
36
+ run_time = 300.0
37
+ else
38
+ one_d = false
39
+ end
40
+
41
+ module_function
42
+ def opts; @opts end
43
+
44
+ #------------------------Digest-----------------------------------------------
45
+ peptides = []
46
+ digester = MS::Sim_Digester.new(@opts[:digestor],@opts[:pH])
47
+ ARGV.each do |file|
48
+ peptides<<digester.digest(file)
49
+ end
50
+ peptides.flatten!.uniq!
51
+ #-----------------------------------------------------------------------------
52
+
53
+
54
+
55
+ #------------------------Create Spectrum--------------------------------------
56
+ spectra = MS::Sim_Spectra.new(peptides, @opts, one_d)
57
+ data = spectra.data
58
+
59
+ if noise == 'true'
60
+ noise = spectra.noiseify
61
+ end
62
+ #-----------------------------------------------------------------------------
63
+
64
+
65
+
66
+ #------------------------Merge Overlaps---------------------------------------
67
+ spectra.spectra = Merger.merge(spectra.spectra,@opts[:overlapRange].to_f)
68
+ #-----------------------------------------------------------------------------
69
+
70
+
71
+
72
+ #------------------------Truth Files------------------------------------------
73
+ if truth != "false"
74
+ if truth == "xml"
75
+ MS::Txml_file_writer.write(spectra.features,spectra.spectra,out_file)
76
+ elsif truth == "csv"
77
+ MS::Tcsv_file_writer.write(spectra.spectra,data,noise,spectra.features,out_file)
78
+ end
79
+ end
80
+ #-----------------------------------------------------------------------------
81
+
82
+
83
+ #-----------------------Merge Finish------------------------------------------
84
+ spectra.spectra = Merger.compact(spectra.spectra)
85
+ #-----------------------------------------------------------------------------
86
+
87
+
88
+ #-----------------------Clean UP----------------------------------------------
89
+ spectra.features.each{|fe| fe.delete}
90
+ peptides.clear
91
+ #-----------------------------------------------------------------------------
92
+
93
+
94
+
95
+ #-----------------------MZML--------------------------------------------------
96
+ data = spectra.spectra
97
+ mzml = Mzml_Wrapper.new(data)
98
+ puts "Writing to file..."
99
+ mzml.to_xml(out_file)
100
+ puts "Done."
101
+ #-----------------------------------------------------------------------------
102
+
103
+
104
+
105
+ rescue Exception => e #Clean up if exception
106
+ puts e.message
107
+ puts e.backtrace
108
+ if digester != nil
109
+ if File.exists?(digester.digested_file)
110
+ File.delete(digester.digested_file)
111
+ end
112
+ end
113
+ if spectra != nil
114
+ spectra.features.each{|fe| fe.delete}
115
+ end
116
+ if !peptides.empty?
117
+ peptides.each{|pep| pep.delete}
118
+ end
119
+ puts "Exception - Simulation Failed"
120
+
121
+ system "ruby bin/sim_mail.rb #{email} Exception - Simulation Failed" if email != "nil"
122
+ else
123
+ system "ruby bin/sim_mail.rb #{email} Success! - Simulation Complete" if email != "nil"
124
+ end
125
+ end
data/bin/sim_mail.rb ADDED
@@ -0,0 +1,26 @@
1
+
2
+ require 'pony'
3
+
4
+ begin
5
+ address = ARGV[0]
6
+ msgcount = ARGV.count - 1
7
+ msgbody = ""
8
+
9
+ for i in 1..msgcount
10
+ msgbody << " #{ARGV[i]}"
11
+ end
12
+
13
+ Pony.mail(:to => address, :via => :smtp, :via_options => {
14
+ :address => 'smtp.gmail.com',
15
+ :port => '587',
16
+ :enable_starttls_auto => true,
17
+ :user_name => 'mspire.simulator',
18
+ :password => 'chromatography',
19
+ :authentication => :plain,
20
+ :domain => "localhost.localdomain"
21
+ },
22
+ :subject => 'Mspire-Simulator', :body => msgbody
23
+ )
24
+ rescue
25
+ puts "Email function failed. Check email address and internet connection."
26
+ end
Binary file
Binary file
data/bin/weka/weka.jar ADDED
Binary file
@@ -0,0 +1,152 @@
1
+ require 'time'
2
+ require_relative 'fit_graph'
3
+
4
+
5
+ module Enumerable
6
+ def sum
7
+ self.inject(0){|accum, i| accum + i }
8
+ end
9
+
10
+ def mean
11
+ self.sum/self.length.to_f
12
+ end
13
+
14
+ def sample_variance(mean)
15
+ m = mean
16
+ sum = self.inject(0){|accum, i| accum +(i-m)**2 }
17
+ sum/(self.length - 1).to_f
18
+ end
19
+
20
+ def standard_deviation(mean = self.mean)
21
+ return Math.sqrt(self.sample_variance(mean))
22
+ end
23
+ end
24
+
25
+ class GenCurvefit
26
+ def initialize(pts_in,function = nil,paramsize = nil,mutation_limits = nil,popsize = 0,generations = nil)
27
+ @pts_in = pts_in
28
+ @function = function
29
+ @paramsize = paramsize
30
+ @mutation_limits = mutation_limits
31
+ @popsize = popsize
32
+ @generations = generations
33
+ @population = []
34
+ if @popsize != 0 and @paramsize != nil and @mutation_limits != nil and @function != nil
35
+ init_population
36
+ end
37
+ end
38
+
39
+ attr_reader :function, :paramsize, :mutation_limits, :population, :generations, :popsize
40
+ attr_writer :paramsize, :mutation_limits, :population, :generations, :popsize
41
+
42
+ def init_population
43
+ @popsize.times do
44
+ set = []
45
+ @paramsize.times do |i|
46
+ limits = @mutation_limits[i]
47
+ set<<random_float(limits[0],limits[1])
48
+ end
49
+ set<<fitness(set,@pts_in)
50
+ @population<<set
51
+ end
52
+ end
53
+
54
+ def set_fit_function(func)
55
+ @function = func
56
+ end
57
+
58
+ def mutate(set)
59
+ index = rand(set.size-1)
60
+ limits = @mutation_limits[index]
61
+ set[index] += random_float(limits[0],limits[1])
62
+ end
63
+
64
+ def self.smoothave(arr)
65
+ smooth_ave = [nil,nil,nil]
66
+ queue = []
67
+ arr.each do |i|
68
+ queue.push(i)
69
+ if queue.size > 7
70
+ queue.shift
71
+ end
72
+ smooth_ave<<queue.inject(:+)/queue.size if queue.size == 7
73
+ end
74
+ 3.times do
75
+ smooth_ave<<nil
76
+ end
77
+ return smooth_ave
78
+ end
79
+
80
+ def self.normalize(arr)
81
+ max = arr.max
82
+ arr.map!{|i| (i.to_f/max) * 100}
83
+ end
84
+
85
+ def sort_by_fitness
86
+ @population.sort_by!{|set| set.last}
87
+ end
88
+
89
+ def random_float(a,b)
90
+ a = a.to_f
91
+ b = b.to_f
92
+ random = rand(2147483647.0) / 2147483647.0
93
+ diff = b - a
94
+ r = random * diff
95
+ return a + r
96
+ end
97
+
98
+ def rmsd(v,w)
99
+ n = v.size
100
+ sum = 0.0
101
+ n.times{|i| sum += ((v[i][0]-w[i][0])**2.0 + (v[i][1]-w[i][1])**2.0) }
102
+ return Math.sqrt( (1/n.to_f) * sum )
103
+ end
104
+
105
+
106
+ def fitness(set,pts_in,plot = false)
107
+ pts = []
108
+ xs = pts_in.transpose[0]
109
+ xs.each do |x|
110
+ fit_pt = function.call(set,x)
111
+ pts<<[x,fit_pt]
112
+ end
113
+
114
+ if plot
115
+ return pts
116
+ end
117
+
118
+ return rmsd(pts_in,pts)
119
+ end
120
+
121
+ def fit
122
+ @start = Time.now
123
+ @generations.times do |i|
124
+ Progress.progress("Generation #{i+1}:",((i/@generations.to_f)*100).to_i)
125
+ #Generate mutations
126
+ index = rand(@popsize)
127
+ clone = @population[index].clone
128
+ mutate(clone)
129
+ clone[@paramsize] = fitness(clone,@pts_in)
130
+
131
+ if(clone.last < @population.last.last)
132
+ @population[@population.size - (@paramsize-1)] = clone
133
+ end
134
+ #Re-sort
135
+ @population = sort_by_fitness
136
+
137
+ #Print best
138
+ if i == @generations - 1
139
+ @best = @population.first
140
+ end
141
+ end
142
+ Progress.progress("Generations Done, printing graph:",100,Time.now-@start)
143
+ return @best
144
+ end
145
+
146
+ def plot(file,labels = nil)
147
+ pts = fitness(@best,@pts_in,true)
148
+ Fit_plot.plot(@pts_in,pts,file,labels)
149
+ puts " Output File: #{file}"
150
+ end
151
+
152
+ end
@@ -0,0 +1,84 @@
1
+ require 'rubyvis'
2
+
3
+ class Fit_plot
4
+ def self.plot(pts,pts2,file,labels = ["",""])
5
+ xlab = labels[0]
6
+ ylab = labels[1]
7
+
8
+ w = 600
9
+ h = 300
10
+
11
+ xmin = pts.min_by{|arr| arr[0]}[0]
12
+ xmax = pts.max_by{|arr| arr[0]}[0]
13
+ ymin = pts.min_by{|arr| arr[1]}[1]
14
+ ymax = pts.max_by{|arr| arr[1]}[1]
15
+
16
+ line1 = []
17
+ pts.each do |pt|
18
+ line1<<OpenStruct.new({:x=> pt[0], :y=> pt[1]})
19
+ end
20
+
21
+ line2 = []
22
+ pts2.each do |pt|
23
+ line2<<OpenStruct.new({:x=> pt[0], :y=> pt[1]})
24
+ end
25
+
26
+ x = pv.Scale.linear(xmin, xmax).range(0, w)
27
+ y = pv.Scale.linear(ymin, ymax).range(0, h)
28
+
29
+
30
+ vis = pv.Panel.new()
31
+ .width(w)
32
+ .height(h)
33
+ .bottom(50)
34
+ .left(40)
35
+ .right(30)
36
+ .top(5);
37
+
38
+ vis.add(pv.Dot).
39
+ stroke_style('blue').
40
+ data(line1).
41
+ line_width(2).
42
+ left(lambda {|d| x.scale(d.x)}).
43
+ bottom(lambda {|d| y.scale(d.y)}).
44
+ shape_size(1).
45
+ anchor("bottom");
46
+
47
+ vis.add(pv.Line).
48
+ stroke_style('red').
49
+ data(line2).
50
+ line_width(2).
51
+ left(lambda {|d| x.scale(d.x)}).
52
+ bottom(lambda {|d| y.scale(d.y)}).
53
+ anchor("bottom");
54
+
55
+ vis.add(pv.Label)
56
+ .data(x.ticks())
57
+ .left(lambda {|d| x.scale(d)})
58
+ .bottom(0)
59
+ .text_baseline("top")
60
+ .text_margin(5);
61
+
62
+ vis.add(pv.Label)
63
+ .bottom(-30)
64
+ .text(xlab);
65
+
66
+ vis.add(pv.Label)
67
+ .text_angle(-Math::PI/2.0)
68
+ .left(-10)
69
+ .text(ylab);
70
+
71
+ vis.add(pv.Rule)
72
+ .data(y.ticks())
73
+ .bottom(lambda {|d| y.scale(d)})
74
+ .stroke_style(lambda {|i| i!=0 ? pv.color("#ccc") : pv.color("black")})
75
+ .anchor("right").add(pv.Label)
76
+ .visible(lambda { (self.index & 1)==0})
77
+ .text_margin(6);
78
+ vis.render();
79
+
80
+ file_out = File.open(file,"w")
81
+ file_out.puts vis.to_svg
82
+ file_out.close
83
+ end
84
+ end
@@ -0,0 +1,28 @@
1
+ require 'mspire'
2
+ require 'mspire/mzml'
3
+
4
+ class Mzml_reader
5
+ def self.get_data(file)
6
+ mzs_out = []
7
+ rts_out = []
8
+ ints_out = []
9
+ io = File.open(file)
10
+ mzml = Mspire::Mzml.new(io)
11
+
12
+ mzml.each do |spec|
13
+ next unless spec.ms_level == 1
14
+ ints = spec.intensities
15
+ mzs = spec.mzs
16
+ rt = spec.retention_time
17
+
18
+ if ints.empty?;else
19
+ ints.each_with_index do |i,j|
20
+ mzs_out<<mzs[j]
21
+ rts_out<<rt
22
+ ints_out<<i
23
+ end
24
+ end
25
+ end
26
+ return mzs_out,rts_out,ints_out
27
+ end
28
+ end
@@ -0,0 +1,120 @@
1
+
2
+ require 'ms/curvefit/mzml_reader'
3
+ require 'ms/curvefit/curve_fit_helper'
4
+
5
+ class CurveFit
6
+ def self.get_parameters(opts)
7
+ data = Mzml_reader.get_data(opts[:mzml])
8
+ generations = opts[:generations]
9
+
10
+ @pts_int_var = []
11
+ @pts_mz_var = []
12
+ @pts_elut = []
13
+
14
+ file = File.open(opts[:mzml],"r")
15
+
16
+ mzs_in = data[0]
17
+ rts_in = data[1]
18
+ ints_in = data[2]
19
+
20
+ ints_in = GenCurvefit.normalize(ints_in)
21
+ #-----------------------overlapRange--------------------------------------------
22
+ mean = mzs_in.inject(:+)/mzs_in.size
23
+ opts[:overlapRange] = (mzs_in.sample_variance(mean)*10**6)/4
24
+ #-------------------------------------------------------------------------------
25
+
26
+
27
+ #----------------------create points/curve to fit elution-----------------------
28
+ ints_in.each_with_index do |s,i|
29
+ @pts_elut<<[rts_in[i],s]
30
+ end
31
+ opts[:sampling_rate] = rts_in.size/(rts_in.max - rts_in.min)
32
+
33
+ a_fit = GenCurvefit.new(@pts_elut)
34
+ a_fit.set_fit_function(lambda{|a,i| 100.0*Math.exp(-(rts_in.index(i)-a[2])**2/((a[1]*rts_in.index(i)+a[0])**2))})
35
+ a_fit.mutation_limits = [[-5,5],[-1,1],[-rts_in.size/2,rts_in.size/2]]
36
+ a_fit.popsize = 10
37
+ a_fit.paramsize = 3
38
+ a_fit.init_population
39
+ a_fit.generations = generations
40
+
41
+ best = a_fit.fit
42
+ opts[:front] = best[0]
43
+ opts[:tail] = best[1]
44
+ opts[:mu] = best[2]
45
+ #puts "RMSD = #{best[3]}"
46
+ labels = ["retention time","normalized intensity"]
47
+ a_fit.plot("elution_curvefit.svg",labels)
48
+ #-------------------------------------------------------------------------------
49
+
50
+
51
+ #-----------------create points/curve to fit m/z variance-----------------------
52
+ wobs = []
53
+ mean = mzs_in.inject(:+)/mzs_in.size
54
+ mzs_in.each do |mz|
55
+ wobs<<(mean-mz).abs
56
+ end
57
+
58
+ ints_in.length.times do |d|
59
+ if d >= 3
60
+ sd = wobs[d-3..d].standard_deviation
61
+ @pts_mz_var<<[ints_in[d],sd]
62
+ end
63
+ end
64
+
65
+ b_fit = GenCurvefit.new(@pts_mz_var)
66
+ b_fit.set_fit_function(lambda{|a,i| a[0]*i**a[1]})
67
+ b_fit.mutation_limits = [[-1,1],[-1,1]]
68
+ b_fit.popsize = 10
69
+ b_fit.paramsize = 2
70
+ b_fit.init_population
71
+ b_fit.generations = generations
72
+
73
+ best = b_fit.fit
74
+ opts[:wobA] = best[0]
75
+ opts[:wobB] = best[1]
76
+ #puts "RMSD = #{best[2]}"
77
+ labels = ["normalized intensity","m/z variance"]
78
+ b_fit.plot("mz_var_curvefit.svg",labels)
79
+ #-------------------------------------------------------------------------------
80
+
81
+ #--------------------create points/curve to fit intensity variance--------------
82
+ smooth_ave = GenCurvefit.smoothave(ints_in)
83
+
84
+ diff = []
85
+ smooth_ave.each_with_index do |s,i|
86
+ if s == nil
87
+ diff<<0
88
+ else
89
+ diff<<(s-ints_in[i]).abs
90
+ end
91
+ end
92
+
93
+
94
+ ints_in.each_with_index do |i,d|
95
+ if d >= 3
96
+ sd = diff[d-3..d].standard_deviation
97
+ @pts_int_var<<[i,sd]
98
+ end
99
+ end
100
+
101
+ c_fit = GenCurvefit.new(@pts_int_var)
102
+ c_fit.set_fit_function(lambda{|a,i| a[0]*(1-Math.exp(-a[2]*i))+a[1]})
103
+ c_fit.mutation_limits = [[-20,20],[-0.5,0.5],[-0.5,0.5]]
104
+ c_fit.popsize = 10
105
+ c_fit.paramsize = 3
106
+ c_fit.init_population
107
+ c_fit.generations = generations
108
+
109
+ best = c_fit.fit
110
+ opts[:jagA] = best[0]
111
+ opts[:jagC] = best[1]
112
+ opts[:jagB] = best[2]
113
+ #puts "RMSD = #{best[3]}"
114
+ labels = ["normalized intensity","intensity variance"]
115
+ c_fit.plot("intensity_var_curvefit.svg",labels)
116
+ #-------------------------------------------------------------------------------
117
+
118
+ return opts
119
+ end
120
+ end