mspire-simulator 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE.txt +22 -0
- data/README.rdoc +17 -0
- data/Rakefile +51 -0
- data/VERSION +1 -0
- data/bin/mspire-simulator +125 -0
- data/bin/sim_mail.rb +26 -0
- data/bin/weka/M5P.model +0 -0
- data/bin/weka/M5Rules.model +0 -0
- data/bin/weka/weka.jar +0 -0
- data/lib/ms/curvefit/curve_fit_helper.rb +152 -0
- data/lib/ms/curvefit/fit_graph.rb +84 -0
- data/lib/ms/curvefit/mzml_reader.rb +28 -0
- data/lib/ms/curvefit.rb +120 -0
- data/lib/ms/isoelectric_calc.rb +122 -0
- data/lib/ms/merger.rb +101 -0
- data/lib/ms/mzml_wrapper.rb +67 -0
- data/lib/ms/noise.rb +51 -0
- data/lib/ms/rt/rt_helper.rb +31 -0
- data/lib/ms/rt/rtgenerator.rb +81 -0
- data/lib/ms/rt/weka.rb +150 -0
- data/lib/ms/sim_digester.rb +92 -0
- data/lib/ms/sim_feature.rb +175 -0
- data/lib/ms/sim_peptide.rb +182 -0
- data/lib/ms/sim_spectra.rb +70 -0
- data/lib/ms/sim_trollop.rb +68 -0
- data/lib/ms/tr_file_writer.rb +175 -0
- data/lib/progress.rb +24 -0
- data/mspire-simulator.gemspec +103 -0
- data/spec/file_writer_spec.rb +74 -0
- data/spec/merger_spec.rb +23 -0
- data/spec/ms-simulate_spec.rb +9 -0
- data/spec/peptide_spec.rb +16 -0
- data/spec/progress_spec.rb +22 -0
- data/spec/spec_helper.rb +11 -0
- data/spec/spectra_spec.rb +111 -0
- data/testFiles/contam/hum_keratin.fasta +11 -0
- metadata +246 -0
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2011 Brigham Young University
|
2
|
+
Authors: Andrew Noyce, Nozumo Okuda, James Dagliesh, John Prince
|
3
|
+
Under the Guidance of: Dr. John Prince
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
= ms-simulate
|
2
|
+
|
3
|
+
Description:
|
4
|
+
Simulates MS runs given amino acid .fasta files. Outputs a .mzML file.
|
5
|
+
|
6
|
+
== Install
|
7
|
+
gem install mspire-simulator
|
8
|
+
Dependencies:
|
9
|
+
ruby 1.9*
|
10
|
+
weka 3.6.0
|
11
|
+
fftw 3.2.2 - Tested in Linux Mint 12 and Ubuntu Oneiric Ocelot
|
12
|
+
== Examples
|
13
|
+
|
14
|
+
== Copyright
|
15
|
+
|
16
|
+
See LICENSE.txt for further details.
|
17
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
require 'jeweler'
|
5
|
+
Jeweler::Tasks.new do |gem|
|
6
|
+
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
7
|
+
gem.name = "mspire-simulator"
|
8
|
+
gem.homepage = "http://dl.dropbox.com/u/42836826/Ms_Sim_Homepage.html"
|
9
|
+
gem.license = "MIT"
|
10
|
+
gem.summary = %Q{Simulates MS1 runs given amino acid FASTA files. Outputs an MZML file.}
|
11
|
+
gem.description = %Q{Simulates MS1 runs given amino acid FASTA files. Outputs an MZML file.
|
12
|
+
Can simulate specific data if given an MZML file containing a single isolated peptide peak.}
|
13
|
+
gem.email = "andrewbnoyce@gmail.com"
|
14
|
+
gem.authors = ["anoyce"]
|
15
|
+
|
16
|
+
gem.add_dependency "mspire", "0.8.2"
|
17
|
+
gem.add_dependency "rubyvis", "= 0.5.2"
|
18
|
+
gem.add_dependency "nokogiri", "= 1.5.2"
|
19
|
+
gem.add_dependency "ffi", "= 1.0.11"
|
20
|
+
gem.add_dependency "ffi-inliner", "= 0.2.4"
|
21
|
+
gem.add_dependency "fftw3", "= 0.3"
|
22
|
+
gem.add_dependency "distribution", "= 0.7.0"
|
23
|
+
gem.add_dependency "pony", "= 1.4"
|
24
|
+
gem.add_dependency "obo", "= 0.1.0"
|
25
|
+
gem.add_dependency "trollop", "= 1.16.2"
|
26
|
+
|
27
|
+
gem.executables = ["mspire-simulator"]
|
28
|
+
gem.files.exclude "elution_curvefit.svg"
|
29
|
+
gem.files.exclude "intensity_var_curvefit.svg"
|
30
|
+
gem.files.exclude "lib/pool.rb"
|
31
|
+
gem.files.exclude "mz_var_curvefit.svg"
|
32
|
+
gem.files.exclude "single.mzML"
|
33
|
+
gem.files.exclude "test.mzml"
|
34
|
+
gem.files.exclude "test.mzml_truth.csv"
|
35
|
+
gem.files.exclude "test.mzml_truth.xml"
|
36
|
+
gem.files.exclude "testFiles/*"
|
37
|
+
end
|
38
|
+
Jeweler::RubygemsDotOrgTasks.new
|
39
|
+
|
40
|
+
require 'rspec/core'
|
41
|
+
require 'rspec/core/rake_task'
|
42
|
+
RSpec::Core::RakeTask.new(:spec) do |spec|
|
43
|
+
spec.pattern = FileList['spec/**/*_spec.rb']
|
44
|
+
end
|
45
|
+
|
46
|
+
RSpec::Core::RakeTask.new(:rcov) do |spec|
|
47
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
48
|
+
spec.rcov = true
|
49
|
+
end
|
50
|
+
|
51
|
+
task :default => :spec
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.1.0
|
@@ -0,0 +1,125 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
$LOAD_PATH << './lib'
|
3
|
+
|
4
|
+
require 'time'
|
5
|
+
require 'progress'
|
6
|
+
require 'nokogiri'
|
7
|
+
require 'mspire/digester'
|
8
|
+
require 'mspire/tagged_peak'
|
9
|
+
require 'mspire'
|
10
|
+
require 'ms/sim_peptide'
|
11
|
+
require 'ms/rt/rtgenerator'
|
12
|
+
require 'ms/sim_spectra'
|
13
|
+
require 'ms/noise'
|
14
|
+
require 'ms/mzml_wrapper'
|
15
|
+
require 'trollop'
|
16
|
+
require 'ms/tr_file_writer'
|
17
|
+
require 'ms/isoelectric_calc'
|
18
|
+
require 'ms/sim_digester'
|
19
|
+
require 'ms/sim_trollop'
|
20
|
+
require 'ms/merger'
|
21
|
+
|
22
|
+
module MspireSimulator
|
23
|
+
@opts = MS::Troll.new.get
|
24
|
+
begin
|
25
|
+
|
26
|
+
@start = Time.now
|
27
|
+
|
28
|
+
one_d = @opts[:one_d]
|
29
|
+
noise = @opts[:noise]
|
30
|
+
truth = @opts[:truth]
|
31
|
+
out_file = @opts[:out_file]
|
32
|
+
email = @opts[:email]
|
33
|
+
|
34
|
+
if one_d == "true"
|
35
|
+
one_d = true
|
36
|
+
run_time = 300.0
|
37
|
+
else
|
38
|
+
one_d = false
|
39
|
+
end
|
40
|
+
|
41
|
+
module_function
|
42
|
+
def opts; @opts end
|
43
|
+
|
44
|
+
#------------------------Digest-----------------------------------------------
|
45
|
+
peptides = []
|
46
|
+
digester = MS::Sim_Digester.new(@opts[:digestor],@opts[:pH])
|
47
|
+
ARGV.each do |file|
|
48
|
+
peptides<<digester.digest(file)
|
49
|
+
end
|
50
|
+
peptides.flatten!.uniq!
|
51
|
+
#-----------------------------------------------------------------------------
|
52
|
+
|
53
|
+
|
54
|
+
|
55
|
+
#------------------------Create Spectrum--------------------------------------
|
56
|
+
spectra = MS::Sim_Spectra.new(peptides, @opts, one_d)
|
57
|
+
data = spectra.data
|
58
|
+
|
59
|
+
if noise == 'true'
|
60
|
+
noise = spectra.noiseify
|
61
|
+
end
|
62
|
+
#-----------------------------------------------------------------------------
|
63
|
+
|
64
|
+
|
65
|
+
|
66
|
+
#------------------------Merge Overlaps---------------------------------------
|
67
|
+
spectra.spectra = Merger.merge(spectra.spectra,@opts[:overlapRange].to_f)
|
68
|
+
#-----------------------------------------------------------------------------
|
69
|
+
|
70
|
+
|
71
|
+
|
72
|
+
#------------------------Truth Files------------------------------------------
|
73
|
+
if truth != "false"
|
74
|
+
if truth == "xml"
|
75
|
+
MS::Txml_file_writer.write(spectra.features,spectra.spectra,out_file)
|
76
|
+
elsif truth == "csv"
|
77
|
+
MS::Tcsv_file_writer.write(spectra.spectra,data,noise,spectra.features,out_file)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
#-----------------------------------------------------------------------------
|
81
|
+
|
82
|
+
|
83
|
+
#-----------------------Merge Finish------------------------------------------
|
84
|
+
spectra.spectra = Merger.compact(spectra.spectra)
|
85
|
+
#-----------------------------------------------------------------------------
|
86
|
+
|
87
|
+
|
88
|
+
#-----------------------Clean UP----------------------------------------------
|
89
|
+
spectra.features.each{|fe| fe.delete}
|
90
|
+
peptides.clear
|
91
|
+
#-----------------------------------------------------------------------------
|
92
|
+
|
93
|
+
|
94
|
+
|
95
|
+
#-----------------------MZML--------------------------------------------------
|
96
|
+
data = spectra.spectra
|
97
|
+
mzml = Mzml_Wrapper.new(data)
|
98
|
+
puts "Writing to file..."
|
99
|
+
mzml.to_xml(out_file)
|
100
|
+
puts "Done."
|
101
|
+
#-----------------------------------------------------------------------------
|
102
|
+
|
103
|
+
|
104
|
+
|
105
|
+
rescue Exception => e #Clean up if exception
|
106
|
+
puts e.message
|
107
|
+
puts e.backtrace
|
108
|
+
if digester != nil
|
109
|
+
if File.exists?(digester.digested_file)
|
110
|
+
File.delete(digester.digested_file)
|
111
|
+
end
|
112
|
+
end
|
113
|
+
if spectra != nil
|
114
|
+
spectra.features.each{|fe| fe.delete}
|
115
|
+
end
|
116
|
+
if !peptides.empty?
|
117
|
+
peptides.each{|pep| pep.delete}
|
118
|
+
end
|
119
|
+
puts "Exception - Simulation Failed"
|
120
|
+
|
121
|
+
system "ruby bin/sim_mail.rb #{email} Exception - Simulation Failed" if email != "nil"
|
122
|
+
else
|
123
|
+
system "ruby bin/sim_mail.rb #{email} Success! - Simulation Complete" if email != "nil"
|
124
|
+
end
|
125
|
+
end
|
data/bin/sim_mail.rb
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
|
2
|
+
require 'pony'
|
3
|
+
|
4
|
+
begin
|
5
|
+
address = ARGV[0]
|
6
|
+
msgcount = ARGV.count - 1
|
7
|
+
msgbody = ""
|
8
|
+
|
9
|
+
for i in 1..msgcount
|
10
|
+
msgbody << " #{ARGV[i]}"
|
11
|
+
end
|
12
|
+
|
13
|
+
Pony.mail(:to => address, :via => :smtp, :via_options => {
|
14
|
+
:address => 'smtp.gmail.com',
|
15
|
+
:port => '587',
|
16
|
+
:enable_starttls_auto => true,
|
17
|
+
:user_name => 'mspire.simulator',
|
18
|
+
:password => 'chromatography',
|
19
|
+
:authentication => :plain,
|
20
|
+
:domain => "localhost.localdomain"
|
21
|
+
},
|
22
|
+
:subject => 'Mspire-Simulator', :body => msgbody
|
23
|
+
)
|
24
|
+
rescue
|
25
|
+
puts "Email function failed. Check email address and internet connection."
|
26
|
+
end
|
data/bin/weka/M5P.model
ADDED
Binary file
|
Binary file
|
data/bin/weka/weka.jar
ADDED
Binary file
|
@@ -0,0 +1,152 @@
|
|
1
|
+
require 'time'
|
2
|
+
require_relative 'fit_graph'
|
3
|
+
|
4
|
+
|
5
|
+
module Enumerable
|
6
|
+
def sum
|
7
|
+
self.inject(0){|accum, i| accum + i }
|
8
|
+
end
|
9
|
+
|
10
|
+
def mean
|
11
|
+
self.sum/self.length.to_f
|
12
|
+
end
|
13
|
+
|
14
|
+
def sample_variance(mean)
|
15
|
+
m = mean
|
16
|
+
sum = self.inject(0){|accum, i| accum +(i-m)**2 }
|
17
|
+
sum/(self.length - 1).to_f
|
18
|
+
end
|
19
|
+
|
20
|
+
def standard_deviation(mean = self.mean)
|
21
|
+
return Math.sqrt(self.sample_variance(mean))
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
class GenCurvefit
|
26
|
+
def initialize(pts_in,function = nil,paramsize = nil,mutation_limits = nil,popsize = 0,generations = nil)
|
27
|
+
@pts_in = pts_in
|
28
|
+
@function = function
|
29
|
+
@paramsize = paramsize
|
30
|
+
@mutation_limits = mutation_limits
|
31
|
+
@popsize = popsize
|
32
|
+
@generations = generations
|
33
|
+
@population = []
|
34
|
+
if @popsize != 0 and @paramsize != nil and @mutation_limits != nil and @function != nil
|
35
|
+
init_population
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
attr_reader :function, :paramsize, :mutation_limits, :population, :generations, :popsize
|
40
|
+
attr_writer :paramsize, :mutation_limits, :population, :generations, :popsize
|
41
|
+
|
42
|
+
def init_population
|
43
|
+
@popsize.times do
|
44
|
+
set = []
|
45
|
+
@paramsize.times do |i|
|
46
|
+
limits = @mutation_limits[i]
|
47
|
+
set<<random_float(limits[0],limits[1])
|
48
|
+
end
|
49
|
+
set<<fitness(set,@pts_in)
|
50
|
+
@population<<set
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def set_fit_function(func)
|
55
|
+
@function = func
|
56
|
+
end
|
57
|
+
|
58
|
+
def mutate(set)
|
59
|
+
index = rand(set.size-1)
|
60
|
+
limits = @mutation_limits[index]
|
61
|
+
set[index] += random_float(limits[0],limits[1])
|
62
|
+
end
|
63
|
+
|
64
|
+
def self.smoothave(arr)
|
65
|
+
smooth_ave = [nil,nil,nil]
|
66
|
+
queue = []
|
67
|
+
arr.each do |i|
|
68
|
+
queue.push(i)
|
69
|
+
if queue.size > 7
|
70
|
+
queue.shift
|
71
|
+
end
|
72
|
+
smooth_ave<<queue.inject(:+)/queue.size if queue.size == 7
|
73
|
+
end
|
74
|
+
3.times do
|
75
|
+
smooth_ave<<nil
|
76
|
+
end
|
77
|
+
return smooth_ave
|
78
|
+
end
|
79
|
+
|
80
|
+
def self.normalize(arr)
|
81
|
+
max = arr.max
|
82
|
+
arr.map!{|i| (i.to_f/max) * 100}
|
83
|
+
end
|
84
|
+
|
85
|
+
def sort_by_fitness
|
86
|
+
@population.sort_by!{|set| set.last}
|
87
|
+
end
|
88
|
+
|
89
|
+
def random_float(a,b)
|
90
|
+
a = a.to_f
|
91
|
+
b = b.to_f
|
92
|
+
random = rand(2147483647.0) / 2147483647.0
|
93
|
+
diff = b - a
|
94
|
+
r = random * diff
|
95
|
+
return a + r
|
96
|
+
end
|
97
|
+
|
98
|
+
def rmsd(v,w)
|
99
|
+
n = v.size
|
100
|
+
sum = 0.0
|
101
|
+
n.times{|i| sum += ((v[i][0]-w[i][0])**2.0 + (v[i][1]-w[i][1])**2.0) }
|
102
|
+
return Math.sqrt( (1/n.to_f) * sum )
|
103
|
+
end
|
104
|
+
|
105
|
+
|
106
|
+
def fitness(set,pts_in,plot = false)
|
107
|
+
pts = []
|
108
|
+
xs = pts_in.transpose[0]
|
109
|
+
xs.each do |x|
|
110
|
+
fit_pt = function.call(set,x)
|
111
|
+
pts<<[x,fit_pt]
|
112
|
+
end
|
113
|
+
|
114
|
+
if plot
|
115
|
+
return pts
|
116
|
+
end
|
117
|
+
|
118
|
+
return rmsd(pts_in,pts)
|
119
|
+
end
|
120
|
+
|
121
|
+
def fit
|
122
|
+
@start = Time.now
|
123
|
+
@generations.times do |i|
|
124
|
+
Progress.progress("Generation #{i+1}:",((i/@generations.to_f)*100).to_i)
|
125
|
+
#Generate mutations
|
126
|
+
index = rand(@popsize)
|
127
|
+
clone = @population[index].clone
|
128
|
+
mutate(clone)
|
129
|
+
clone[@paramsize] = fitness(clone,@pts_in)
|
130
|
+
|
131
|
+
if(clone.last < @population.last.last)
|
132
|
+
@population[@population.size - (@paramsize-1)] = clone
|
133
|
+
end
|
134
|
+
#Re-sort
|
135
|
+
@population = sort_by_fitness
|
136
|
+
|
137
|
+
#Print best
|
138
|
+
if i == @generations - 1
|
139
|
+
@best = @population.first
|
140
|
+
end
|
141
|
+
end
|
142
|
+
Progress.progress("Generations Done, printing graph:",100,Time.now-@start)
|
143
|
+
return @best
|
144
|
+
end
|
145
|
+
|
146
|
+
def plot(file,labels = nil)
|
147
|
+
pts = fitness(@best,@pts_in,true)
|
148
|
+
Fit_plot.plot(@pts_in,pts,file,labels)
|
149
|
+
puts " Output File: #{file}"
|
150
|
+
end
|
151
|
+
|
152
|
+
end
|
@@ -0,0 +1,84 @@
|
|
1
|
+
require 'rubyvis'
|
2
|
+
|
3
|
+
class Fit_plot
|
4
|
+
def self.plot(pts,pts2,file,labels = ["",""])
|
5
|
+
xlab = labels[0]
|
6
|
+
ylab = labels[1]
|
7
|
+
|
8
|
+
w = 600
|
9
|
+
h = 300
|
10
|
+
|
11
|
+
xmin = pts.min_by{|arr| arr[0]}[0]
|
12
|
+
xmax = pts.max_by{|arr| arr[0]}[0]
|
13
|
+
ymin = pts.min_by{|arr| arr[1]}[1]
|
14
|
+
ymax = pts.max_by{|arr| arr[1]}[1]
|
15
|
+
|
16
|
+
line1 = []
|
17
|
+
pts.each do |pt|
|
18
|
+
line1<<OpenStruct.new({:x=> pt[0], :y=> pt[1]})
|
19
|
+
end
|
20
|
+
|
21
|
+
line2 = []
|
22
|
+
pts2.each do |pt|
|
23
|
+
line2<<OpenStruct.new({:x=> pt[0], :y=> pt[1]})
|
24
|
+
end
|
25
|
+
|
26
|
+
x = pv.Scale.linear(xmin, xmax).range(0, w)
|
27
|
+
y = pv.Scale.linear(ymin, ymax).range(0, h)
|
28
|
+
|
29
|
+
|
30
|
+
vis = pv.Panel.new()
|
31
|
+
.width(w)
|
32
|
+
.height(h)
|
33
|
+
.bottom(50)
|
34
|
+
.left(40)
|
35
|
+
.right(30)
|
36
|
+
.top(5);
|
37
|
+
|
38
|
+
vis.add(pv.Dot).
|
39
|
+
stroke_style('blue').
|
40
|
+
data(line1).
|
41
|
+
line_width(2).
|
42
|
+
left(lambda {|d| x.scale(d.x)}).
|
43
|
+
bottom(lambda {|d| y.scale(d.y)}).
|
44
|
+
shape_size(1).
|
45
|
+
anchor("bottom");
|
46
|
+
|
47
|
+
vis.add(pv.Line).
|
48
|
+
stroke_style('red').
|
49
|
+
data(line2).
|
50
|
+
line_width(2).
|
51
|
+
left(lambda {|d| x.scale(d.x)}).
|
52
|
+
bottom(lambda {|d| y.scale(d.y)}).
|
53
|
+
anchor("bottom");
|
54
|
+
|
55
|
+
vis.add(pv.Label)
|
56
|
+
.data(x.ticks())
|
57
|
+
.left(lambda {|d| x.scale(d)})
|
58
|
+
.bottom(0)
|
59
|
+
.text_baseline("top")
|
60
|
+
.text_margin(5);
|
61
|
+
|
62
|
+
vis.add(pv.Label)
|
63
|
+
.bottom(-30)
|
64
|
+
.text(xlab);
|
65
|
+
|
66
|
+
vis.add(pv.Label)
|
67
|
+
.text_angle(-Math::PI/2.0)
|
68
|
+
.left(-10)
|
69
|
+
.text(ylab);
|
70
|
+
|
71
|
+
vis.add(pv.Rule)
|
72
|
+
.data(y.ticks())
|
73
|
+
.bottom(lambda {|d| y.scale(d)})
|
74
|
+
.stroke_style(lambda {|i| i!=0 ? pv.color("#ccc") : pv.color("black")})
|
75
|
+
.anchor("right").add(pv.Label)
|
76
|
+
.visible(lambda { (self.index & 1)==0})
|
77
|
+
.text_margin(6);
|
78
|
+
vis.render();
|
79
|
+
|
80
|
+
file_out = File.open(file,"w")
|
81
|
+
file_out.puts vis.to_svg
|
82
|
+
file_out.close
|
83
|
+
end
|
84
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'mspire'
|
2
|
+
require 'mspire/mzml'
|
3
|
+
|
4
|
+
class Mzml_reader
|
5
|
+
def self.get_data(file)
|
6
|
+
mzs_out = []
|
7
|
+
rts_out = []
|
8
|
+
ints_out = []
|
9
|
+
io = File.open(file)
|
10
|
+
mzml = Mspire::Mzml.new(io)
|
11
|
+
|
12
|
+
mzml.each do |spec|
|
13
|
+
next unless spec.ms_level == 1
|
14
|
+
ints = spec.intensities
|
15
|
+
mzs = spec.mzs
|
16
|
+
rt = spec.retention_time
|
17
|
+
|
18
|
+
if ints.empty?;else
|
19
|
+
ints.each_with_index do |i,j|
|
20
|
+
mzs_out<<mzs[j]
|
21
|
+
rts_out<<rt
|
22
|
+
ints_out<<i
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
return mzs_out,rts_out,ints_out
|
27
|
+
end
|
28
|
+
end
|
data/lib/ms/curvefit.rb
ADDED
@@ -0,0 +1,120 @@
|
|
1
|
+
|
2
|
+
require 'ms/curvefit/mzml_reader'
|
3
|
+
require 'ms/curvefit/curve_fit_helper'
|
4
|
+
|
5
|
+
class CurveFit
|
6
|
+
def self.get_parameters(opts)
|
7
|
+
data = Mzml_reader.get_data(opts[:mzml])
|
8
|
+
generations = opts[:generations]
|
9
|
+
|
10
|
+
@pts_int_var = []
|
11
|
+
@pts_mz_var = []
|
12
|
+
@pts_elut = []
|
13
|
+
|
14
|
+
file = File.open(opts[:mzml],"r")
|
15
|
+
|
16
|
+
mzs_in = data[0]
|
17
|
+
rts_in = data[1]
|
18
|
+
ints_in = data[2]
|
19
|
+
|
20
|
+
ints_in = GenCurvefit.normalize(ints_in)
|
21
|
+
#-----------------------overlapRange--------------------------------------------
|
22
|
+
mean = mzs_in.inject(:+)/mzs_in.size
|
23
|
+
opts[:overlapRange] = (mzs_in.sample_variance(mean)*10**6)/4
|
24
|
+
#-------------------------------------------------------------------------------
|
25
|
+
|
26
|
+
|
27
|
+
#----------------------create points/curve to fit elution-----------------------
|
28
|
+
ints_in.each_with_index do |s,i|
|
29
|
+
@pts_elut<<[rts_in[i],s]
|
30
|
+
end
|
31
|
+
opts[:sampling_rate] = rts_in.size/(rts_in.max - rts_in.min)
|
32
|
+
|
33
|
+
a_fit = GenCurvefit.new(@pts_elut)
|
34
|
+
a_fit.set_fit_function(lambda{|a,i| 100.0*Math.exp(-(rts_in.index(i)-a[2])**2/((a[1]*rts_in.index(i)+a[0])**2))})
|
35
|
+
a_fit.mutation_limits = [[-5,5],[-1,1],[-rts_in.size/2,rts_in.size/2]]
|
36
|
+
a_fit.popsize = 10
|
37
|
+
a_fit.paramsize = 3
|
38
|
+
a_fit.init_population
|
39
|
+
a_fit.generations = generations
|
40
|
+
|
41
|
+
best = a_fit.fit
|
42
|
+
opts[:front] = best[0]
|
43
|
+
opts[:tail] = best[1]
|
44
|
+
opts[:mu] = best[2]
|
45
|
+
#puts "RMSD = #{best[3]}"
|
46
|
+
labels = ["retention time","normalized intensity"]
|
47
|
+
a_fit.plot("elution_curvefit.svg",labels)
|
48
|
+
#-------------------------------------------------------------------------------
|
49
|
+
|
50
|
+
|
51
|
+
#-----------------create points/curve to fit m/z variance-----------------------
|
52
|
+
wobs = []
|
53
|
+
mean = mzs_in.inject(:+)/mzs_in.size
|
54
|
+
mzs_in.each do |mz|
|
55
|
+
wobs<<(mean-mz).abs
|
56
|
+
end
|
57
|
+
|
58
|
+
ints_in.length.times do |d|
|
59
|
+
if d >= 3
|
60
|
+
sd = wobs[d-3..d].standard_deviation
|
61
|
+
@pts_mz_var<<[ints_in[d],sd]
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
b_fit = GenCurvefit.new(@pts_mz_var)
|
66
|
+
b_fit.set_fit_function(lambda{|a,i| a[0]*i**a[1]})
|
67
|
+
b_fit.mutation_limits = [[-1,1],[-1,1]]
|
68
|
+
b_fit.popsize = 10
|
69
|
+
b_fit.paramsize = 2
|
70
|
+
b_fit.init_population
|
71
|
+
b_fit.generations = generations
|
72
|
+
|
73
|
+
best = b_fit.fit
|
74
|
+
opts[:wobA] = best[0]
|
75
|
+
opts[:wobB] = best[1]
|
76
|
+
#puts "RMSD = #{best[2]}"
|
77
|
+
labels = ["normalized intensity","m/z variance"]
|
78
|
+
b_fit.plot("mz_var_curvefit.svg",labels)
|
79
|
+
#-------------------------------------------------------------------------------
|
80
|
+
|
81
|
+
#--------------------create points/curve to fit intensity variance--------------
|
82
|
+
smooth_ave = GenCurvefit.smoothave(ints_in)
|
83
|
+
|
84
|
+
diff = []
|
85
|
+
smooth_ave.each_with_index do |s,i|
|
86
|
+
if s == nil
|
87
|
+
diff<<0
|
88
|
+
else
|
89
|
+
diff<<(s-ints_in[i]).abs
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
|
94
|
+
ints_in.each_with_index do |i,d|
|
95
|
+
if d >= 3
|
96
|
+
sd = diff[d-3..d].standard_deviation
|
97
|
+
@pts_int_var<<[i,sd]
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
c_fit = GenCurvefit.new(@pts_int_var)
|
102
|
+
c_fit.set_fit_function(lambda{|a,i| a[0]*(1-Math.exp(-a[2]*i))+a[1]})
|
103
|
+
c_fit.mutation_limits = [[-20,20],[-0.5,0.5],[-0.5,0.5]]
|
104
|
+
c_fit.popsize = 10
|
105
|
+
c_fit.paramsize = 3
|
106
|
+
c_fit.init_population
|
107
|
+
c_fit.generations = generations
|
108
|
+
|
109
|
+
best = c_fit.fit
|
110
|
+
opts[:jagA] = best[0]
|
111
|
+
opts[:jagC] = best[1]
|
112
|
+
opts[:jagB] = best[2]
|
113
|
+
#puts "RMSD = #{best[3]}"
|
114
|
+
labels = ["normalized intensity","intensity variance"]
|
115
|
+
c_fit.plot("intensity_var_curvefit.svg",labels)
|
116
|
+
#-------------------------------------------------------------------------------
|
117
|
+
|
118
|
+
return opts
|
119
|
+
end
|
120
|
+
end
|