mspire-simulator 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2011 Brigham Young University
2
+ Authors: Andrew Noyce, Nozumo Okuda, James Dagliesh, John Prince
3
+ Under the Guidance of: Dr. John Prince
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,17 @@
1
+ = ms-simulate
2
+
3
+ Description:
4
+ Simulates MS runs given amino acid .fasta files. Outputs a .mzML file.
5
+
6
+ == Install
7
+ gem install mspire-simulator
8
+ Dependencies:
9
+ ruby 1.9*
10
+ weka 3.6.0
11
+ fftw 3.2.2 - Tested in Linux Mint 12 and Ubuntu Oneiric Ocelot
12
+ == Examples
13
+
14
+ == Copyright
15
+
16
+ See LICENSE.txt for further details.
17
+
data/Rakefile ADDED
@@ -0,0 +1,51 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ require 'jeweler'
5
+ Jeweler::Tasks.new do |gem|
6
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
7
+ gem.name = "mspire-simulator"
8
+ gem.homepage = "http://dl.dropbox.com/u/42836826/Ms_Sim_Homepage.html"
9
+ gem.license = "MIT"
10
+ gem.summary = %Q{Simulates MS1 runs given amino acid FASTA files. Outputs an MZML file.}
11
+ gem.description = %Q{Simulates MS1 runs given amino acid FASTA files. Outputs an MZML file.
12
+ Can simulate specific data if given an MZML file containing a single isolated peptide peak.}
13
+ gem.email = "andrewbnoyce@gmail.com"
14
+ gem.authors = ["anoyce"]
15
+
16
+ gem.add_dependency "mspire", "0.8.2"
17
+ gem.add_dependency "rubyvis", "= 0.5.2"
18
+ gem.add_dependency "nokogiri", "= 1.5.2"
19
+ gem.add_dependency "ffi", "= 1.0.11"
20
+ gem.add_dependency "ffi-inliner", "= 0.2.4"
21
+ gem.add_dependency "fftw3", "= 0.3"
22
+ gem.add_dependency "distribution", "= 0.7.0"
23
+ gem.add_dependency "pony", "= 1.4"
24
+ gem.add_dependency "obo", "= 0.1.0"
25
+ gem.add_dependency "trollop", "= 1.16.2"
26
+
27
+ gem.executables = ["mspire-simulator"]
28
+ gem.files.exclude "elution_curvefit.svg"
29
+ gem.files.exclude "intensity_var_curvefit.svg"
30
+ gem.files.exclude "lib/pool.rb"
31
+ gem.files.exclude "mz_var_curvefit.svg"
32
+ gem.files.exclude "single.mzML"
33
+ gem.files.exclude "test.mzml"
34
+ gem.files.exclude "test.mzml_truth.csv"
35
+ gem.files.exclude "test.mzml_truth.xml"
36
+ gem.files.exclude "testFiles/*"
37
+ end
38
+ Jeweler::RubygemsDotOrgTasks.new
39
+
40
+ require 'rspec/core'
41
+ require 'rspec/core/rake_task'
42
+ RSpec::Core::RakeTask.new(:spec) do |spec|
43
+ spec.pattern = FileList['spec/**/*_spec.rb']
44
+ end
45
+
46
+ RSpec::Core::RakeTask.new(:rcov) do |spec|
47
+ spec.pattern = 'spec/**/*_spec.rb'
48
+ spec.rcov = true
49
+ end
50
+
51
+ task :default => :spec
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
@@ -0,0 +1,125 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH << './lib'
3
+
4
+ require 'time'
5
+ require 'progress'
6
+ require 'nokogiri'
7
+ require 'mspire/digester'
8
+ require 'mspire/tagged_peak'
9
+ require 'mspire'
10
+ require 'ms/sim_peptide'
11
+ require 'ms/rt/rtgenerator'
12
+ require 'ms/sim_spectra'
13
+ require 'ms/noise'
14
+ require 'ms/mzml_wrapper'
15
+ require 'trollop'
16
+ require 'ms/tr_file_writer'
17
+ require 'ms/isoelectric_calc'
18
+ require 'ms/sim_digester'
19
+ require 'ms/sim_trollop'
20
+ require 'ms/merger'
21
+
22
+ module MspireSimulator
23
+ @opts = MS::Troll.new.get
24
+ begin
25
+
26
+ @start = Time.now
27
+
28
+ one_d = @opts[:one_d]
29
+ noise = @opts[:noise]
30
+ truth = @opts[:truth]
31
+ out_file = @opts[:out_file]
32
+ email = @opts[:email]
33
+
34
+ if one_d == "true"
35
+ one_d = true
36
+ run_time = 300.0
37
+ else
38
+ one_d = false
39
+ end
40
+
41
+ module_function
42
+ def opts; @opts end
43
+
44
+ #------------------------Digest-----------------------------------------------
45
+ peptides = []
46
+ digester = MS::Sim_Digester.new(@opts[:digestor],@opts[:pH])
47
+ ARGV.each do |file|
48
+ peptides<<digester.digest(file)
49
+ end
50
+ peptides.flatten!.uniq!
51
+ #-----------------------------------------------------------------------------
52
+
53
+
54
+
55
+ #------------------------Create Spectrum--------------------------------------
56
+ spectra = MS::Sim_Spectra.new(peptides, @opts, one_d)
57
+ data = spectra.data
58
+
59
+ if noise == 'true'
60
+ noise = spectra.noiseify
61
+ end
62
+ #-----------------------------------------------------------------------------
63
+
64
+
65
+
66
+ #------------------------Merge Overlaps---------------------------------------
67
+ spectra.spectra = Merger.merge(spectra.spectra,@opts[:overlapRange].to_f)
68
+ #-----------------------------------------------------------------------------
69
+
70
+
71
+
72
+ #------------------------Truth Files------------------------------------------
73
+ if truth != "false"
74
+ if truth == "xml"
75
+ MS::Txml_file_writer.write(spectra.features,spectra.spectra,out_file)
76
+ elsif truth == "csv"
77
+ MS::Tcsv_file_writer.write(spectra.spectra,data,noise,spectra.features,out_file)
78
+ end
79
+ end
80
+ #-----------------------------------------------------------------------------
81
+
82
+
83
+ #-----------------------Merge Finish------------------------------------------
84
+ spectra.spectra = Merger.compact(spectra.spectra)
85
+ #-----------------------------------------------------------------------------
86
+
87
+
88
+ #-----------------------Clean UP----------------------------------------------
89
+ spectra.features.each{|fe| fe.delete}
90
+ peptides.clear
91
+ #-----------------------------------------------------------------------------
92
+
93
+
94
+
95
+ #-----------------------MZML--------------------------------------------------
96
+ data = spectra.spectra
97
+ mzml = Mzml_Wrapper.new(data)
98
+ puts "Writing to file..."
99
+ mzml.to_xml(out_file)
100
+ puts "Done."
101
+ #-----------------------------------------------------------------------------
102
+
103
+
104
+
105
+ rescue Exception => e #Clean up if exception
106
+ puts e.message
107
+ puts e.backtrace
108
+ if digester != nil
109
+ if File.exists?(digester.digested_file)
110
+ File.delete(digester.digested_file)
111
+ end
112
+ end
113
+ if spectra != nil
114
+ spectra.features.each{|fe| fe.delete}
115
+ end
116
+ if !peptides.empty?
117
+ peptides.each{|pep| pep.delete}
118
+ end
119
+ puts "Exception - Simulation Failed"
120
+
121
+ system "ruby bin/sim_mail.rb #{email} Exception - Simulation Failed" if email != "nil"
122
+ else
123
+ system "ruby bin/sim_mail.rb #{email} Success! - Simulation Complete" if email != "nil"
124
+ end
125
+ end
data/bin/sim_mail.rb ADDED
@@ -0,0 +1,26 @@
1
+
2
+ require 'pony'
3
+
4
+ begin
5
+ address = ARGV[0]
6
+ msgcount = ARGV.count - 1
7
+ msgbody = ""
8
+
9
+ for i in 1..msgcount
10
+ msgbody << " #{ARGV[i]}"
11
+ end
12
+
13
+ Pony.mail(:to => address, :via => :smtp, :via_options => {
14
+ :address => 'smtp.gmail.com',
15
+ :port => '587',
16
+ :enable_starttls_auto => true,
17
+ :user_name => 'mspire.simulator',
18
+ :password => 'chromatography',
19
+ :authentication => :plain,
20
+ :domain => "localhost.localdomain"
21
+ },
22
+ :subject => 'Mspire-Simulator', :body => msgbody
23
+ )
24
+ rescue
25
+ puts "Email function failed. Check email address and internet connection."
26
+ end
Binary file
Binary file
data/bin/weka/weka.jar ADDED
Binary file
@@ -0,0 +1,152 @@
1
+ require 'time'
2
+ require_relative 'fit_graph'
3
+
4
+
5
+ module Enumerable
6
+ def sum
7
+ self.inject(0){|accum, i| accum + i }
8
+ end
9
+
10
+ def mean
11
+ self.sum/self.length.to_f
12
+ end
13
+
14
+ def sample_variance(mean)
15
+ m = mean
16
+ sum = self.inject(0){|accum, i| accum +(i-m)**2 }
17
+ sum/(self.length - 1).to_f
18
+ end
19
+
20
+ def standard_deviation(mean = self.mean)
21
+ return Math.sqrt(self.sample_variance(mean))
22
+ end
23
+ end
24
+
25
+ class GenCurvefit
26
+ def initialize(pts_in,function = nil,paramsize = nil,mutation_limits = nil,popsize = 0,generations = nil)
27
+ @pts_in = pts_in
28
+ @function = function
29
+ @paramsize = paramsize
30
+ @mutation_limits = mutation_limits
31
+ @popsize = popsize
32
+ @generations = generations
33
+ @population = []
34
+ if @popsize != 0 and @paramsize != nil and @mutation_limits != nil and @function != nil
35
+ init_population
36
+ end
37
+ end
38
+
39
+ attr_reader :function, :paramsize, :mutation_limits, :population, :generations, :popsize
40
+ attr_writer :paramsize, :mutation_limits, :population, :generations, :popsize
41
+
42
+ def init_population
43
+ @popsize.times do
44
+ set = []
45
+ @paramsize.times do |i|
46
+ limits = @mutation_limits[i]
47
+ set<<random_float(limits[0],limits[1])
48
+ end
49
+ set<<fitness(set,@pts_in)
50
+ @population<<set
51
+ end
52
+ end
53
+
54
+ def set_fit_function(func)
55
+ @function = func
56
+ end
57
+
58
+ def mutate(set)
59
+ index = rand(set.size-1)
60
+ limits = @mutation_limits[index]
61
+ set[index] += random_float(limits[0],limits[1])
62
+ end
63
+
64
+ def self.smoothave(arr)
65
+ smooth_ave = [nil,nil,nil]
66
+ queue = []
67
+ arr.each do |i|
68
+ queue.push(i)
69
+ if queue.size > 7
70
+ queue.shift
71
+ end
72
+ smooth_ave<<queue.inject(:+)/queue.size if queue.size == 7
73
+ end
74
+ 3.times do
75
+ smooth_ave<<nil
76
+ end
77
+ return smooth_ave
78
+ end
79
+
80
+ def self.normalize(arr)
81
+ max = arr.max
82
+ arr.map!{|i| (i.to_f/max) * 100}
83
+ end
84
+
85
+ def sort_by_fitness
86
+ @population.sort_by!{|set| set.last}
87
+ end
88
+
89
+ def random_float(a,b)
90
+ a = a.to_f
91
+ b = b.to_f
92
+ random = rand(2147483647.0) / 2147483647.0
93
+ diff = b - a
94
+ r = random * diff
95
+ return a + r
96
+ end
97
+
98
+ def rmsd(v,w)
99
+ n = v.size
100
+ sum = 0.0
101
+ n.times{|i| sum += ((v[i][0]-w[i][0])**2.0 + (v[i][1]-w[i][1])**2.0) }
102
+ return Math.sqrt( (1/n.to_f) * sum )
103
+ end
104
+
105
+
106
+ def fitness(set,pts_in,plot = false)
107
+ pts = []
108
+ xs = pts_in.transpose[0]
109
+ xs.each do |x|
110
+ fit_pt = function.call(set,x)
111
+ pts<<[x,fit_pt]
112
+ end
113
+
114
+ if plot
115
+ return pts
116
+ end
117
+
118
+ return rmsd(pts_in,pts)
119
+ end
120
+
121
+ def fit
122
+ @start = Time.now
123
+ @generations.times do |i|
124
+ Progress.progress("Generation #{i+1}:",((i/@generations.to_f)*100).to_i)
125
+ #Generate mutations
126
+ index = rand(@popsize)
127
+ clone = @population[index].clone
128
+ mutate(clone)
129
+ clone[@paramsize] = fitness(clone,@pts_in)
130
+
131
+ if(clone.last < @population.last.last)
132
+ @population[@population.size - (@paramsize-1)] = clone
133
+ end
134
+ #Re-sort
135
+ @population = sort_by_fitness
136
+
137
+ #Print best
138
+ if i == @generations - 1
139
+ @best = @population.first
140
+ end
141
+ end
142
+ Progress.progress("Generations Done, printing graph:",100,Time.now-@start)
143
+ return @best
144
+ end
145
+
146
+ def plot(file,labels = nil)
147
+ pts = fitness(@best,@pts_in,true)
148
+ Fit_plot.plot(@pts_in,pts,file,labels)
149
+ puts " Output File: #{file}"
150
+ end
151
+
152
+ end
@@ -0,0 +1,84 @@
1
+ require 'rubyvis'
2
+
3
+ class Fit_plot
4
+ def self.plot(pts,pts2,file,labels = ["",""])
5
+ xlab = labels[0]
6
+ ylab = labels[1]
7
+
8
+ w = 600
9
+ h = 300
10
+
11
+ xmin = pts.min_by{|arr| arr[0]}[0]
12
+ xmax = pts.max_by{|arr| arr[0]}[0]
13
+ ymin = pts.min_by{|arr| arr[1]}[1]
14
+ ymax = pts.max_by{|arr| arr[1]}[1]
15
+
16
+ line1 = []
17
+ pts.each do |pt|
18
+ line1<<OpenStruct.new({:x=> pt[0], :y=> pt[1]})
19
+ end
20
+
21
+ line2 = []
22
+ pts2.each do |pt|
23
+ line2<<OpenStruct.new({:x=> pt[0], :y=> pt[1]})
24
+ end
25
+
26
+ x = pv.Scale.linear(xmin, xmax).range(0, w)
27
+ y = pv.Scale.linear(ymin, ymax).range(0, h)
28
+
29
+
30
+ vis = pv.Panel.new()
31
+ .width(w)
32
+ .height(h)
33
+ .bottom(50)
34
+ .left(40)
35
+ .right(30)
36
+ .top(5);
37
+
38
+ vis.add(pv.Dot).
39
+ stroke_style('blue').
40
+ data(line1).
41
+ line_width(2).
42
+ left(lambda {|d| x.scale(d.x)}).
43
+ bottom(lambda {|d| y.scale(d.y)}).
44
+ shape_size(1).
45
+ anchor("bottom");
46
+
47
+ vis.add(pv.Line).
48
+ stroke_style('red').
49
+ data(line2).
50
+ line_width(2).
51
+ left(lambda {|d| x.scale(d.x)}).
52
+ bottom(lambda {|d| y.scale(d.y)}).
53
+ anchor("bottom");
54
+
55
+ vis.add(pv.Label)
56
+ .data(x.ticks())
57
+ .left(lambda {|d| x.scale(d)})
58
+ .bottom(0)
59
+ .text_baseline("top")
60
+ .text_margin(5);
61
+
62
+ vis.add(pv.Label)
63
+ .bottom(-30)
64
+ .text(xlab);
65
+
66
+ vis.add(pv.Label)
67
+ .text_angle(-Math::PI/2.0)
68
+ .left(-10)
69
+ .text(ylab);
70
+
71
+ vis.add(pv.Rule)
72
+ .data(y.ticks())
73
+ .bottom(lambda {|d| y.scale(d)})
74
+ .stroke_style(lambda {|i| i!=0 ? pv.color("#ccc") : pv.color("black")})
75
+ .anchor("right").add(pv.Label)
76
+ .visible(lambda { (self.index & 1)==0})
77
+ .text_margin(6);
78
+ vis.render();
79
+
80
+ file_out = File.open(file,"w")
81
+ file_out.puts vis.to_svg
82
+ file_out.close
83
+ end
84
+ end
@@ -0,0 +1,28 @@
1
+ require 'mspire'
2
+ require 'mspire/mzml'
3
+
4
+ class Mzml_reader
5
+ def self.get_data(file)
6
+ mzs_out = []
7
+ rts_out = []
8
+ ints_out = []
9
+ io = File.open(file)
10
+ mzml = Mspire::Mzml.new(io)
11
+
12
+ mzml.each do |spec|
13
+ next unless spec.ms_level == 1
14
+ ints = spec.intensities
15
+ mzs = spec.mzs
16
+ rt = spec.retention_time
17
+
18
+ if ints.empty?;else
19
+ ints.each_with_index do |i,j|
20
+ mzs_out<<mzs[j]
21
+ rts_out<<rt
22
+ ints_out<<i
23
+ end
24
+ end
25
+ end
26
+ return mzs_out,rts_out,ints_out
27
+ end
28
+ end
@@ -0,0 +1,120 @@
1
+
2
+ require 'ms/curvefit/mzml_reader'
3
+ require 'ms/curvefit/curve_fit_helper'
4
+
5
+ class CurveFit
6
+ def self.get_parameters(opts)
7
+ data = Mzml_reader.get_data(opts[:mzml])
8
+ generations = opts[:generations]
9
+
10
+ @pts_int_var = []
11
+ @pts_mz_var = []
12
+ @pts_elut = []
13
+
14
+ file = File.open(opts[:mzml],"r")
15
+
16
+ mzs_in = data[0]
17
+ rts_in = data[1]
18
+ ints_in = data[2]
19
+
20
+ ints_in = GenCurvefit.normalize(ints_in)
21
+ #-----------------------overlapRange--------------------------------------------
22
+ mean = mzs_in.inject(:+)/mzs_in.size
23
+ opts[:overlapRange] = (mzs_in.sample_variance(mean)*10**6)/4
24
+ #-------------------------------------------------------------------------------
25
+
26
+
27
+ #----------------------create points/curve to fit elution-----------------------
28
+ ints_in.each_with_index do |s,i|
29
+ @pts_elut<<[rts_in[i],s]
30
+ end
31
+ opts[:sampling_rate] = rts_in.size/(rts_in.max - rts_in.min)
32
+
33
+ a_fit = GenCurvefit.new(@pts_elut)
34
+ a_fit.set_fit_function(lambda{|a,i| 100.0*Math.exp(-(rts_in.index(i)-a[2])**2/((a[1]*rts_in.index(i)+a[0])**2))})
35
+ a_fit.mutation_limits = [[-5,5],[-1,1],[-rts_in.size/2,rts_in.size/2]]
36
+ a_fit.popsize = 10
37
+ a_fit.paramsize = 3
38
+ a_fit.init_population
39
+ a_fit.generations = generations
40
+
41
+ best = a_fit.fit
42
+ opts[:front] = best[0]
43
+ opts[:tail] = best[1]
44
+ opts[:mu] = best[2]
45
+ #puts "RMSD = #{best[3]}"
46
+ labels = ["retention time","normalized intensity"]
47
+ a_fit.plot("elution_curvefit.svg",labels)
48
+ #-------------------------------------------------------------------------------
49
+
50
+
51
+ #-----------------create points/curve to fit m/z variance-----------------------
52
+ wobs = []
53
+ mean = mzs_in.inject(:+)/mzs_in.size
54
+ mzs_in.each do |mz|
55
+ wobs<<(mean-mz).abs
56
+ end
57
+
58
+ ints_in.length.times do |d|
59
+ if d >= 3
60
+ sd = wobs[d-3..d].standard_deviation
61
+ @pts_mz_var<<[ints_in[d],sd]
62
+ end
63
+ end
64
+
65
+ b_fit = GenCurvefit.new(@pts_mz_var)
66
+ b_fit.set_fit_function(lambda{|a,i| a[0]*i**a[1]})
67
+ b_fit.mutation_limits = [[-1,1],[-1,1]]
68
+ b_fit.popsize = 10
69
+ b_fit.paramsize = 2
70
+ b_fit.init_population
71
+ b_fit.generations = generations
72
+
73
+ best = b_fit.fit
74
+ opts[:wobA] = best[0]
75
+ opts[:wobB] = best[1]
76
+ #puts "RMSD = #{best[2]}"
77
+ labels = ["normalized intensity","m/z variance"]
78
+ b_fit.plot("mz_var_curvefit.svg",labels)
79
+ #-------------------------------------------------------------------------------
80
+
81
+ #--------------------create points/curve to fit intensity variance--------------
82
+ smooth_ave = GenCurvefit.smoothave(ints_in)
83
+
84
+ diff = []
85
+ smooth_ave.each_with_index do |s,i|
86
+ if s == nil
87
+ diff<<0
88
+ else
89
+ diff<<(s-ints_in[i]).abs
90
+ end
91
+ end
92
+
93
+
94
+ ints_in.each_with_index do |i,d|
95
+ if d >= 3
96
+ sd = diff[d-3..d].standard_deviation
97
+ @pts_int_var<<[i,sd]
98
+ end
99
+ end
100
+
101
+ c_fit = GenCurvefit.new(@pts_int_var)
102
+ c_fit.set_fit_function(lambda{|a,i| a[0]*(1-Math.exp(-a[2]*i))+a[1]})
103
+ c_fit.mutation_limits = [[-20,20],[-0.5,0.5],[-0.5,0.5]]
104
+ c_fit.popsize = 10
105
+ c_fit.paramsize = 3
106
+ c_fit.init_population
107
+ c_fit.generations = generations
108
+
109
+ best = c_fit.fit
110
+ opts[:jagA] = best[0]
111
+ opts[:jagC] = best[1]
112
+ opts[:jagB] = best[2]
113
+ #puts "RMSD = #{best[3]}"
114
+ labels = ["normalized intensity","intensity variance"]
115
+ c_fit.plot("intensity_var_curvefit.svg",labels)
116
+ #-------------------------------------------------------------------------------
117
+
118
+ return opts
119
+ end
120
+ end