mspire-simulator 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE.txt +22 -0
- data/README.rdoc +17 -0
- data/Rakefile +51 -0
- data/VERSION +1 -0
- data/bin/mspire-simulator +125 -0
- data/bin/sim_mail.rb +26 -0
- data/bin/weka/M5P.model +0 -0
- data/bin/weka/M5Rules.model +0 -0
- data/bin/weka/weka.jar +0 -0
- data/lib/ms/curvefit/curve_fit_helper.rb +152 -0
- data/lib/ms/curvefit/fit_graph.rb +84 -0
- data/lib/ms/curvefit/mzml_reader.rb +28 -0
- data/lib/ms/curvefit.rb +120 -0
- data/lib/ms/isoelectric_calc.rb +122 -0
- data/lib/ms/merger.rb +101 -0
- data/lib/ms/mzml_wrapper.rb +67 -0
- data/lib/ms/noise.rb +51 -0
- data/lib/ms/rt/rt_helper.rb +31 -0
- data/lib/ms/rt/rtgenerator.rb +81 -0
- data/lib/ms/rt/weka.rb +150 -0
- data/lib/ms/sim_digester.rb +92 -0
- data/lib/ms/sim_feature.rb +175 -0
- data/lib/ms/sim_peptide.rb +182 -0
- data/lib/ms/sim_spectra.rb +70 -0
- data/lib/ms/sim_trollop.rb +68 -0
- data/lib/ms/tr_file_writer.rb +175 -0
- data/lib/progress.rb +24 -0
- data/mspire-simulator.gemspec +103 -0
- data/spec/file_writer_spec.rb +74 -0
- data/spec/merger_spec.rb +23 -0
- data/spec/ms-simulate_spec.rb +9 -0
- data/spec/peptide_spec.rb +16 -0
- data/spec/progress_spec.rb +22 -0
- data/spec/spec_helper.rb +11 -0
- data/spec/spectra_spec.rb +111 -0
- data/testFiles/contam/hum_keratin.fasta +11 -0
- metadata +246 -0
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2011 Brigham Young University
|
2
|
+
Authors: Andrew Noyce, Nozumo Okuda, James Dagliesh, John Prince
|
3
|
+
Under the Guidance of: Dr. John Prince
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
= ms-simulate
|
2
|
+
|
3
|
+
Description:
|
4
|
+
Simulates MS runs given amino acid .fasta files. Outputs a .mzML file.
|
5
|
+
|
6
|
+
== Install
|
7
|
+
gem install mspire-simulator
|
8
|
+
Dependencies:
|
9
|
+
ruby 1.9*
|
10
|
+
weka 3.6.0
|
11
|
+
fftw 3.2.2 - Tested in Linux Mint 12 and Ubuntu Oneiric Ocelot
|
12
|
+
== Examples
|
13
|
+
|
14
|
+
== Copyright
|
15
|
+
|
16
|
+
See LICENSE.txt for further details.
|
17
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
require 'jeweler'
|
5
|
+
Jeweler::Tasks.new do |gem|
|
6
|
+
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
7
|
+
gem.name = "mspire-simulator"
|
8
|
+
gem.homepage = "http://dl.dropbox.com/u/42836826/Ms_Sim_Homepage.html"
|
9
|
+
gem.license = "MIT"
|
10
|
+
gem.summary = %Q{Simulates MS1 runs given amino acid FASTA files. Outputs an MZML file.}
|
11
|
+
gem.description = %Q{Simulates MS1 runs given amino acid FASTA files. Outputs an MZML file.
|
12
|
+
Can simulate specific data if given an MZML file containing a single isolated peptide peak.}
|
13
|
+
gem.email = "andrewbnoyce@gmail.com"
|
14
|
+
gem.authors = ["anoyce"]
|
15
|
+
|
16
|
+
gem.add_dependency "mspire", "0.8.2"
|
17
|
+
gem.add_dependency "rubyvis", "= 0.5.2"
|
18
|
+
gem.add_dependency "nokogiri", "= 1.5.2"
|
19
|
+
gem.add_dependency "ffi", "= 1.0.11"
|
20
|
+
gem.add_dependency "ffi-inliner", "= 0.2.4"
|
21
|
+
gem.add_dependency "fftw3", "= 0.3"
|
22
|
+
gem.add_dependency "distribution", "= 0.7.0"
|
23
|
+
gem.add_dependency "pony", "= 1.4"
|
24
|
+
gem.add_dependency "obo", "= 0.1.0"
|
25
|
+
gem.add_dependency "trollop", "= 1.16.2"
|
26
|
+
|
27
|
+
gem.executables = ["mspire-simulator"]
|
28
|
+
gem.files.exclude "elution_curvefit.svg"
|
29
|
+
gem.files.exclude "intensity_var_curvefit.svg"
|
30
|
+
gem.files.exclude "lib/pool.rb"
|
31
|
+
gem.files.exclude "mz_var_curvefit.svg"
|
32
|
+
gem.files.exclude "single.mzML"
|
33
|
+
gem.files.exclude "test.mzml"
|
34
|
+
gem.files.exclude "test.mzml_truth.csv"
|
35
|
+
gem.files.exclude "test.mzml_truth.xml"
|
36
|
+
gem.files.exclude "testFiles/*"
|
37
|
+
end
|
38
|
+
Jeweler::RubygemsDotOrgTasks.new
|
39
|
+
|
40
|
+
require 'rspec/core'
|
41
|
+
require 'rspec/core/rake_task'
|
42
|
+
RSpec::Core::RakeTask.new(:spec) do |spec|
|
43
|
+
spec.pattern = FileList['spec/**/*_spec.rb']
|
44
|
+
end
|
45
|
+
|
46
|
+
RSpec::Core::RakeTask.new(:rcov) do |spec|
|
47
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
48
|
+
spec.rcov = true
|
49
|
+
end
|
50
|
+
|
51
|
+
task :default => :spec
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.1.0
|
@@ -0,0 +1,125 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
$LOAD_PATH << './lib'
|
3
|
+
|
4
|
+
require 'time'
|
5
|
+
require 'progress'
|
6
|
+
require 'nokogiri'
|
7
|
+
require 'mspire/digester'
|
8
|
+
require 'mspire/tagged_peak'
|
9
|
+
require 'mspire'
|
10
|
+
require 'ms/sim_peptide'
|
11
|
+
require 'ms/rt/rtgenerator'
|
12
|
+
require 'ms/sim_spectra'
|
13
|
+
require 'ms/noise'
|
14
|
+
require 'ms/mzml_wrapper'
|
15
|
+
require 'trollop'
|
16
|
+
require 'ms/tr_file_writer'
|
17
|
+
require 'ms/isoelectric_calc'
|
18
|
+
require 'ms/sim_digester'
|
19
|
+
require 'ms/sim_trollop'
|
20
|
+
require 'ms/merger'
|
21
|
+
|
22
|
+
module MspireSimulator
|
23
|
+
@opts = MS::Troll.new.get
|
24
|
+
begin
|
25
|
+
|
26
|
+
@start = Time.now
|
27
|
+
|
28
|
+
one_d = @opts[:one_d]
|
29
|
+
noise = @opts[:noise]
|
30
|
+
truth = @opts[:truth]
|
31
|
+
out_file = @opts[:out_file]
|
32
|
+
email = @opts[:email]
|
33
|
+
|
34
|
+
if one_d == "true"
|
35
|
+
one_d = true
|
36
|
+
run_time = 300.0
|
37
|
+
else
|
38
|
+
one_d = false
|
39
|
+
end
|
40
|
+
|
41
|
+
module_function
|
42
|
+
def opts; @opts end
|
43
|
+
|
44
|
+
#------------------------Digest-----------------------------------------------
|
45
|
+
peptides = []
|
46
|
+
digester = MS::Sim_Digester.new(@opts[:digestor],@opts[:pH])
|
47
|
+
ARGV.each do |file|
|
48
|
+
peptides<<digester.digest(file)
|
49
|
+
end
|
50
|
+
peptides.flatten!.uniq!
|
51
|
+
#-----------------------------------------------------------------------------
|
52
|
+
|
53
|
+
|
54
|
+
|
55
|
+
#------------------------Create Spectrum--------------------------------------
|
56
|
+
spectra = MS::Sim_Spectra.new(peptides, @opts, one_d)
|
57
|
+
data = spectra.data
|
58
|
+
|
59
|
+
if noise == 'true'
|
60
|
+
noise = spectra.noiseify
|
61
|
+
end
|
62
|
+
#-----------------------------------------------------------------------------
|
63
|
+
|
64
|
+
|
65
|
+
|
66
|
+
#------------------------Merge Overlaps---------------------------------------
|
67
|
+
spectra.spectra = Merger.merge(spectra.spectra,@opts[:overlapRange].to_f)
|
68
|
+
#-----------------------------------------------------------------------------
|
69
|
+
|
70
|
+
|
71
|
+
|
72
|
+
#------------------------Truth Files------------------------------------------
|
73
|
+
if truth != "false"
|
74
|
+
if truth == "xml"
|
75
|
+
MS::Txml_file_writer.write(spectra.features,spectra.spectra,out_file)
|
76
|
+
elsif truth == "csv"
|
77
|
+
MS::Tcsv_file_writer.write(spectra.spectra,data,noise,spectra.features,out_file)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
#-----------------------------------------------------------------------------
|
81
|
+
|
82
|
+
|
83
|
+
#-----------------------Merge Finish------------------------------------------
|
84
|
+
spectra.spectra = Merger.compact(spectra.spectra)
|
85
|
+
#-----------------------------------------------------------------------------
|
86
|
+
|
87
|
+
|
88
|
+
#-----------------------Clean UP----------------------------------------------
|
89
|
+
spectra.features.each{|fe| fe.delete}
|
90
|
+
peptides.clear
|
91
|
+
#-----------------------------------------------------------------------------
|
92
|
+
|
93
|
+
|
94
|
+
|
95
|
+
#-----------------------MZML--------------------------------------------------
|
96
|
+
data = spectra.spectra
|
97
|
+
mzml = Mzml_Wrapper.new(data)
|
98
|
+
puts "Writing to file..."
|
99
|
+
mzml.to_xml(out_file)
|
100
|
+
puts "Done."
|
101
|
+
#-----------------------------------------------------------------------------
|
102
|
+
|
103
|
+
|
104
|
+
|
105
|
+
rescue Exception => e #Clean up if exception
|
106
|
+
puts e.message
|
107
|
+
puts e.backtrace
|
108
|
+
if digester != nil
|
109
|
+
if File.exists?(digester.digested_file)
|
110
|
+
File.delete(digester.digested_file)
|
111
|
+
end
|
112
|
+
end
|
113
|
+
if spectra != nil
|
114
|
+
spectra.features.each{|fe| fe.delete}
|
115
|
+
end
|
116
|
+
if !peptides.empty?
|
117
|
+
peptides.each{|pep| pep.delete}
|
118
|
+
end
|
119
|
+
puts "Exception - Simulation Failed"
|
120
|
+
|
121
|
+
system "ruby bin/sim_mail.rb #{email} Exception - Simulation Failed" if email != "nil"
|
122
|
+
else
|
123
|
+
system "ruby bin/sim_mail.rb #{email} Success! - Simulation Complete" if email != "nil"
|
124
|
+
end
|
125
|
+
end
|
data/bin/sim_mail.rb
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
|
2
|
+
require 'pony'
|
3
|
+
|
4
|
+
begin
|
5
|
+
address = ARGV[0]
|
6
|
+
msgcount = ARGV.count - 1
|
7
|
+
msgbody = ""
|
8
|
+
|
9
|
+
for i in 1..msgcount
|
10
|
+
msgbody << " #{ARGV[i]}"
|
11
|
+
end
|
12
|
+
|
13
|
+
Pony.mail(:to => address, :via => :smtp, :via_options => {
|
14
|
+
:address => 'smtp.gmail.com',
|
15
|
+
:port => '587',
|
16
|
+
:enable_starttls_auto => true,
|
17
|
+
:user_name => 'mspire.simulator',
|
18
|
+
:password => 'chromatography',
|
19
|
+
:authentication => :plain,
|
20
|
+
:domain => "localhost.localdomain"
|
21
|
+
},
|
22
|
+
:subject => 'Mspire-Simulator', :body => msgbody
|
23
|
+
)
|
24
|
+
rescue
|
25
|
+
puts "Email function failed. Check email address and internet connection."
|
26
|
+
end
|
data/bin/weka/M5P.model
ADDED
Binary file
|
Binary file
|
data/bin/weka/weka.jar
ADDED
Binary file
|
@@ -0,0 +1,152 @@
|
|
1
|
+
require 'time'
|
2
|
+
require_relative 'fit_graph'
|
3
|
+
|
4
|
+
|
5
|
+
module Enumerable
|
6
|
+
def sum
|
7
|
+
self.inject(0){|accum, i| accum + i }
|
8
|
+
end
|
9
|
+
|
10
|
+
def mean
|
11
|
+
self.sum/self.length.to_f
|
12
|
+
end
|
13
|
+
|
14
|
+
def sample_variance(mean)
|
15
|
+
m = mean
|
16
|
+
sum = self.inject(0){|accum, i| accum +(i-m)**2 }
|
17
|
+
sum/(self.length - 1).to_f
|
18
|
+
end
|
19
|
+
|
20
|
+
def standard_deviation(mean = self.mean)
|
21
|
+
return Math.sqrt(self.sample_variance(mean))
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
class GenCurvefit
|
26
|
+
def initialize(pts_in,function = nil,paramsize = nil,mutation_limits = nil,popsize = 0,generations = nil)
|
27
|
+
@pts_in = pts_in
|
28
|
+
@function = function
|
29
|
+
@paramsize = paramsize
|
30
|
+
@mutation_limits = mutation_limits
|
31
|
+
@popsize = popsize
|
32
|
+
@generations = generations
|
33
|
+
@population = []
|
34
|
+
if @popsize != 0 and @paramsize != nil and @mutation_limits != nil and @function != nil
|
35
|
+
init_population
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
attr_reader :function, :paramsize, :mutation_limits, :population, :generations, :popsize
|
40
|
+
attr_writer :paramsize, :mutation_limits, :population, :generations, :popsize
|
41
|
+
|
42
|
+
def init_population
|
43
|
+
@popsize.times do
|
44
|
+
set = []
|
45
|
+
@paramsize.times do |i|
|
46
|
+
limits = @mutation_limits[i]
|
47
|
+
set<<random_float(limits[0],limits[1])
|
48
|
+
end
|
49
|
+
set<<fitness(set,@pts_in)
|
50
|
+
@population<<set
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def set_fit_function(func)
|
55
|
+
@function = func
|
56
|
+
end
|
57
|
+
|
58
|
+
def mutate(set)
|
59
|
+
index = rand(set.size-1)
|
60
|
+
limits = @mutation_limits[index]
|
61
|
+
set[index] += random_float(limits[0],limits[1])
|
62
|
+
end
|
63
|
+
|
64
|
+
def self.smoothave(arr)
|
65
|
+
smooth_ave = [nil,nil,nil]
|
66
|
+
queue = []
|
67
|
+
arr.each do |i|
|
68
|
+
queue.push(i)
|
69
|
+
if queue.size > 7
|
70
|
+
queue.shift
|
71
|
+
end
|
72
|
+
smooth_ave<<queue.inject(:+)/queue.size if queue.size == 7
|
73
|
+
end
|
74
|
+
3.times do
|
75
|
+
smooth_ave<<nil
|
76
|
+
end
|
77
|
+
return smooth_ave
|
78
|
+
end
|
79
|
+
|
80
|
+
def self.normalize(arr)
|
81
|
+
max = arr.max
|
82
|
+
arr.map!{|i| (i.to_f/max) * 100}
|
83
|
+
end
|
84
|
+
|
85
|
+
def sort_by_fitness
|
86
|
+
@population.sort_by!{|set| set.last}
|
87
|
+
end
|
88
|
+
|
89
|
+
def random_float(a,b)
|
90
|
+
a = a.to_f
|
91
|
+
b = b.to_f
|
92
|
+
random = rand(2147483647.0) / 2147483647.0
|
93
|
+
diff = b - a
|
94
|
+
r = random * diff
|
95
|
+
return a + r
|
96
|
+
end
|
97
|
+
|
98
|
+
def rmsd(v,w)
|
99
|
+
n = v.size
|
100
|
+
sum = 0.0
|
101
|
+
n.times{|i| sum += ((v[i][0]-w[i][0])**2.0 + (v[i][1]-w[i][1])**2.0) }
|
102
|
+
return Math.sqrt( (1/n.to_f) * sum )
|
103
|
+
end
|
104
|
+
|
105
|
+
|
106
|
+
def fitness(set,pts_in,plot = false)
|
107
|
+
pts = []
|
108
|
+
xs = pts_in.transpose[0]
|
109
|
+
xs.each do |x|
|
110
|
+
fit_pt = function.call(set,x)
|
111
|
+
pts<<[x,fit_pt]
|
112
|
+
end
|
113
|
+
|
114
|
+
if plot
|
115
|
+
return pts
|
116
|
+
end
|
117
|
+
|
118
|
+
return rmsd(pts_in,pts)
|
119
|
+
end
|
120
|
+
|
121
|
+
def fit
|
122
|
+
@start = Time.now
|
123
|
+
@generations.times do |i|
|
124
|
+
Progress.progress("Generation #{i+1}:",((i/@generations.to_f)*100).to_i)
|
125
|
+
#Generate mutations
|
126
|
+
index = rand(@popsize)
|
127
|
+
clone = @population[index].clone
|
128
|
+
mutate(clone)
|
129
|
+
clone[@paramsize] = fitness(clone,@pts_in)
|
130
|
+
|
131
|
+
if(clone.last < @population.last.last)
|
132
|
+
@population[@population.size - (@paramsize-1)] = clone
|
133
|
+
end
|
134
|
+
#Re-sort
|
135
|
+
@population = sort_by_fitness
|
136
|
+
|
137
|
+
#Print best
|
138
|
+
if i == @generations - 1
|
139
|
+
@best = @population.first
|
140
|
+
end
|
141
|
+
end
|
142
|
+
Progress.progress("Generations Done, printing graph:",100,Time.now-@start)
|
143
|
+
return @best
|
144
|
+
end
|
145
|
+
|
146
|
+
def plot(file,labels = nil)
|
147
|
+
pts = fitness(@best,@pts_in,true)
|
148
|
+
Fit_plot.plot(@pts_in,pts,file,labels)
|
149
|
+
puts " Output File: #{file}"
|
150
|
+
end
|
151
|
+
|
152
|
+
end
|
@@ -0,0 +1,84 @@
|
|
1
|
+
require 'rubyvis'
|
2
|
+
|
3
|
+
class Fit_plot
|
4
|
+
def self.plot(pts,pts2,file,labels = ["",""])
|
5
|
+
xlab = labels[0]
|
6
|
+
ylab = labels[1]
|
7
|
+
|
8
|
+
w = 600
|
9
|
+
h = 300
|
10
|
+
|
11
|
+
xmin = pts.min_by{|arr| arr[0]}[0]
|
12
|
+
xmax = pts.max_by{|arr| arr[0]}[0]
|
13
|
+
ymin = pts.min_by{|arr| arr[1]}[1]
|
14
|
+
ymax = pts.max_by{|arr| arr[1]}[1]
|
15
|
+
|
16
|
+
line1 = []
|
17
|
+
pts.each do |pt|
|
18
|
+
line1<<OpenStruct.new({:x=> pt[0], :y=> pt[1]})
|
19
|
+
end
|
20
|
+
|
21
|
+
line2 = []
|
22
|
+
pts2.each do |pt|
|
23
|
+
line2<<OpenStruct.new({:x=> pt[0], :y=> pt[1]})
|
24
|
+
end
|
25
|
+
|
26
|
+
x = pv.Scale.linear(xmin, xmax).range(0, w)
|
27
|
+
y = pv.Scale.linear(ymin, ymax).range(0, h)
|
28
|
+
|
29
|
+
|
30
|
+
vis = pv.Panel.new()
|
31
|
+
.width(w)
|
32
|
+
.height(h)
|
33
|
+
.bottom(50)
|
34
|
+
.left(40)
|
35
|
+
.right(30)
|
36
|
+
.top(5);
|
37
|
+
|
38
|
+
vis.add(pv.Dot).
|
39
|
+
stroke_style('blue').
|
40
|
+
data(line1).
|
41
|
+
line_width(2).
|
42
|
+
left(lambda {|d| x.scale(d.x)}).
|
43
|
+
bottom(lambda {|d| y.scale(d.y)}).
|
44
|
+
shape_size(1).
|
45
|
+
anchor("bottom");
|
46
|
+
|
47
|
+
vis.add(pv.Line).
|
48
|
+
stroke_style('red').
|
49
|
+
data(line2).
|
50
|
+
line_width(2).
|
51
|
+
left(lambda {|d| x.scale(d.x)}).
|
52
|
+
bottom(lambda {|d| y.scale(d.y)}).
|
53
|
+
anchor("bottom");
|
54
|
+
|
55
|
+
vis.add(pv.Label)
|
56
|
+
.data(x.ticks())
|
57
|
+
.left(lambda {|d| x.scale(d)})
|
58
|
+
.bottom(0)
|
59
|
+
.text_baseline("top")
|
60
|
+
.text_margin(5);
|
61
|
+
|
62
|
+
vis.add(pv.Label)
|
63
|
+
.bottom(-30)
|
64
|
+
.text(xlab);
|
65
|
+
|
66
|
+
vis.add(pv.Label)
|
67
|
+
.text_angle(-Math::PI/2.0)
|
68
|
+
.left(-10)
|
69
|
+
.text(ylab);
|
70
|
+
|
71
|
+
vis.add(pv.Rule)
|
72
|
+
.data(y.ticks())
|
73
|
+
.bottom(lambda {|d| y.scale(d)})
|
74
|
+
.stroke_style(lambda {|i| i!=0 ? pv.color("#ccc") : pv.color("black")})
|
75
|
+
.anchor("right").add(pv.Label)
|
76
|
+
.visible(lambda { (self.index & 1)==0})
|
77
|
+
.text_margin(6);
|
78
|
+
vis.render();
|
79
|
+
|
80
|
+
file_out = File.open(file,"w")
|
81
|
+
file_out.puts vis.to_svg
|
82
|
+
file_out.close
|
83
|
+
end
|
84
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'mspire'
|
2
|
+
require 'mspire/mzml'
|
3
|
+
|
4
|
+
class Mzml_reader
|
5
|
+
def self.get_data(file)
|
6
|
+
mzs_out = []
|
7
|
+
rts_out = []
|
8
|
+
ints_out = []
|
9
|
+
io = File.open(file)
|
10
|
+
mzml = Mspire::Mzml.new(io)
|
11
|
+
|
12
|
+
mzml.each do |spec|
|
13
|
+
next unless spec.ms_level == 1
|
14
|
+
ints = spec.intensities
|
15
|
+
mzs = spec.mzs
|
16
|
+
rt = spec.retention_time
|
17
|
+
|
18
|
+
if ints.empty?;else
|
19
|
+
ints.each_with_index do |i,j|
|
20
|
+
mzs_out<<mzs[j]
|
21
|
+
rts_out<<rt
|
22
|
+
ints_out<<i
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
return mzs_out,rts_out,ints_out
|
27
|
+
end
|
28
|
+
end
|
data/lib/ms/curvefit.rb
ADDED
@@ -0,0 +1,120 @@
|
|
1
|
+
|
2
|
+
require 'ms/curvefit/mzml_reader'
|
3
|
+
require 'ms/curvefit/curve_fit_helper'
|
4
|
+
|
5
|
+
class CurveFit
|
6
|
+
def self.get_parameters(opts)
|
7
|
+
data = Mzml_reader.get_data(opts[:mzml])
|
8
|
+
generations = opts[:generations]
|
9
|
+
|
10
|
+
@pts_int_var = []
|
11
|
+
@pts_mz_var = []
|
12
|
+
@pts_elut = []
|
13
|
+
|
14
|
+
file = File.open(opts[:mzml],"r")
|
15
|
+
|
16
|
+
mzs_in = data[0]
|
17
|
+
rts_in = data[1]
|
18
|
+
ints_in = data[2]
|
19
|
+
|
20
|
+
ints_in = GenCurvefit.normalize(ints_in)
|
21
|
+
#-----------------------overlapRange--------------------------------------------
|
22
|
+
mean = mzs_in.inject(:+)/mzs_in.size
|
23
|
+
opts[:overlapRange] = (mzs_in.sample_variance(mean)*10**6)/4
|
24
|
+
#-------------------------------------------------------------------------------
|
25
|
+
|
26
|
+
|
27
|
+
#----------------------create points/curve to fit elution-----------------------
|
28
|
+
ints_in.each_with_index do |s,i|
|
29
|
+
@pts_elut<<[rts_in[i],s]
|
30
|
+
end
|
31
|
+
opts[:sampling_rate] = rts_in.size/(rts_in.max - rts_in.min)
|
32
|
+
|
33
|
+
a_fit = GenCurvefit.new(@pts_elut)
|
34
|
+
a_fit.set_fit_function(lambda{|a,i| 100.0*Math.exp(-(rts_in.index(i)-a[2])**2/((a[1]*rts_in.index(i)+a[0])**2))})
|
35
|
+
a_fit.mutation_limits = [[-5,5],[-1,1],[-rts_in.size/2,rts_in.size/2]]
|
36
|
+
a_fit.popsize = 10
|
37
|
+
a_fit.paramsize = 3
|
38
|
+
a_fit.init_population
|
39
|
+
a_fit.generations = generations
|
40
|
+
|
41
|
+
best = a_fit.fit
|
42
|
+
opts[:front] = best[0]
|
43
|
+
opts[:tail] = best[1]
|
44
|
+
opts[:mu] = best[2]
|
45
|
+
#puts "RMSD = #{best[3]}"
|
46
|
+
labels = ["retention time","normalized intensity"]
|
47
|
+
a_fit.plot("elution_curvefit.svg",labels)
|
48
|
+
#-------------------------------------------------------------------------------
|
49
|
+
|
50
|
+
|
51
|
+
#-----------------create points/curve to fit m/z variance-----------------------
|
52
|
+
wobs = []
|
53
|
+
mean = mzs_in.inject(:+)/mzs_in.size
|
54
|
+
mzs_in.each do |mz|
|
55
|
+
wobs<<(mean-mz).abs
|
56
|
+
end
|
57
|
+
|
58
|
+
ints_in.length.times do |d|
|
59
|
+
if d >= 3
|
60
|
+
sd = wobs[d-3..d].standard_deviation
|
61
|
+
@pts_mz_var<<[ints_in[d],sd]
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
b_fit = GenCurvefit.new(@pts_mz_var)
|
66
|
+
b_fit.set_fit_function(lambda{|a,i| a[0]*i**a[1]})
|
67
|
+
b_fit.mutation_limits = [[-1,1],[-1,1]]
|
68
|
+
b_fit.popsize = 10
|
69
|
+
b_fit.paramsize = 2
|
70
|
+
b_fit.init_population
|
71
|
+
b_fit.generations = generations
|
72
|
+
|
73
|
+
best = b_fit.fit
|
74
|
+
opts[:wobA] = best[0]
|
75
|
+
opts[:wobB] = best[1]
|
76
|
+
#puts "RMSD = #{best[2]}"
|
77
|
+
labels = ["normalized intensity","m/z variance"]
|
78
|
+
b_fit.plot("mz_var_curvefit.svg",labels)
|
79
|
+
#-------------------------------------------------------------------------------
|
80
|
+
|
81
|
+
#--------------------create points/curve to fit intensity variance--------------
|
82
|
+
smooth_ave = GenCurvefit.smoothave(ints_in)
|
83
|
+
|
84
|
+
diff = []
|
85
|
+
smooth_ave.each_with_index do |s,i|
|
86
|
+
if s == nil
|
87
|
+
diff<<0
|
88
|
+
else
|
89
|
+
diff<<(s-ints_in[i]).abs
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
|
94
|
+
ints_in.each_with_index do |i,d|
|
95
|
+
if d >= 3
|
96
|
+
sd = diff[d-3..d].standard_deviation
|
97
|
+
@pts_int_var<<[i,sd]
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
c_fit = GenCurvefit.new(@pts_int_var)
|
102
|
+
c_fit.set_fit_function(lambda{|a,i| a[0]*(1-Math.exp(-a[2]*i))+a[1]})
|
103
|
+
c_fit.mutation_limits = [[-20,20],[-0.5,0.5],[-0.5,0.5]]
|
104
|
+
c_fit.popsize = 10
|
105
|
+
c_fit.paramsize = 3
|
106
|
+
c_fit.init_population
|
107
|
+
c_fit.generations = generations
|
108
|
+
|
109
|
+
best = c_fit.fit
|
110
|
+
opts[:jagA] = best[0]
|
111
|
+
opts[:jagC] = best[1]
|
112
|
+
opts[:jagB] = best[2]
|
113
|
+
#puts "RMSD = #{best[3]}"
|
114
|
+
labels = ["normalized intensity","intensity variance"]
|
115
|
+
c_fit.plot("intensity_var_curvefit.svg",labels)
|
116
|
+
#-------------------------------------------------------------------------------
|
117
|
+
|
118
|
+
return opts
|
119
|
+
end
|
120
|
+
end
|