RubyGems - mspire-simulator - Versions diffs - 0.1.2 → 0.2.0 - Mend

mspire-simulator 0.1.2 → 0.2.0

Files changed (32) hide show

data/README.rdoc +46 -3
data/Rakefile +1 -1
data/VERSION +1 -1
data/bin/mspire-simulator +8 -0
data/bin/sim_mail +2 -2
data/lib/cv_parser.rb +7 -0
data/lib/ms/curvefit/curve_fit_helper.rb +26 -20
data/lib/ms/curvefit/mzml_reader.rb +1 -1
data/lib/ms/curvefit.rb +25 -8
data/lib/ms/isoelectric_calc.rb +162 -103
data/lib/ms/merger.rb +46 -33
data/lib/ms/mzml_wrapper.rb +74 -29
data/lib/ms/noise.rb +28 -28
data/lib/ms/rt/rt_helper.rb +3 -3
data/lib/ms/rt/rtgenerator.rb +63 -51
data/lib/ms/rt/weka.rb +17 -17
data/lib/ms/sim_digester.rb +45 -26
data/lib/ms/sim_feature.rb +180 -122
data/lib/ms/sim_peptide.rb +58 -55
data/lib/ms/sim_spectra.rb +22 -23
data/lib/ms/sim_trollop.rb +36 -32
data/lib/ms/tr_file_writer.rb +111 -98
data/lib/progress.rb +21 -20
data/mspire-simulator.gemspec +5 -5
data/spec/file_writer_spec.rb +2 -1
data/spec/merger_spec.rb +2 -1
data/spec/ms-simulate_spec.rb +1 -1
data/spec/peptide_spec.rb +2 -1
data/spec/spec_helper.rb +8 -3
data/spec/spectra_spec.rb +4 -3
metadata +5 -5
data/spec/progress_spec.rb +0 -22

data/README.rdoc CHANGED Viewed

@@ -1,7 +1,7 @@
-= ms-simulate
+= mspire-simulator
 Description:
-	Simulates MS runs given amino acid .fasta files. Outputs a .mzML file.
+	Simulates MS runs given amino acid FASTA files. Outputs a .mzML file.
 == Install
   gem install mspire-simulator
@@ -10,8 +10,51 @@ Dependencies:
   weka 3.6.0 - May need to add to CLASSPATH see: http://weka.wikispaces.com/CLASSPATH+problems
   fftw 3.2.2 - Tested in Linux Mint 12 and Ubuntu Oneiric Ocelot
 == Examples
+The simplest way to run mspire-simulator is to give it a MZML file
+with a single centroided elution profile from which, the simulator
+can extract needed parameters including:
+- Elution parameters: front, tail, and mu
+- Overlap range (for merging signals)
+- Sampling rate
+- m/z wobble parameters: wobA, wobB
+- Intensity variance parameters: jagA, jagB, jagC
+    $ mspire-simulator --mzml input.mzml [options] <.fasta file>
+Alternatively all parameters can be specified on the command line:
+    $ mspire-simulator -r 3000 -s 1.0 -n false ...
+To see all the available options:
+    $ mspire-simulator --help
+=== Charge State Calculator
+  $ ruby lib/ms/isoelectric_calc.rb --ph 2 --distribution DRVYIHPFHL DRVYIHPF RVYIHPF VYIHPF
+will return:
+DRVYIHPFHL @ pH 2.0:	+3, 29.040854; +4, 70.959146
+DRVYIHPF @ pH 2.0:	+2, 29.045885; +3, 70.954115
+RVYIHPF @ pH 2.0:	+2, 37.364123; +3, 62.635877
+VYIHPF @ pH 2.0:	+1, 40.341305; +2, 59.658695
+To see all the available options:
+    $ ruby lib/ms/isoelectric_calc.rb --help
+== TODO
+Because of the many options and parameters to specify we will be moving
+to a .init file format with a .init file editor.
+Other improvments to mspire simulator are also pending.
 == Copyright
 See LICENSE.txt for further details.

data/Rakefile CHANGED Viewed

@@ -5,7 +5,7 @@ require 'jeweler'
 Jeweler::Tasks.new do |gem|
   # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
   gem.name = "mspire-simulator"
-  gem.homepage = "http://dl.dropbox.com/u/42836826/Ms_Sim_Homepage.html"
+  gem.homepage = "https://github.com/princelab/mspire-simulator"
   gem.license = "MIT"
   gem.summary = %Q{Simulates MS1 runs given amino acid FASTA files. Outputs an MZML file.}
   gem.description = %Q{Simulates MS1 runs given amino acid FASTA files. Outputs an MZML file.

data/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 0.1.2
1	+ 0.2.0

data/bin/mspire-simulator CHANGED Viewed

@@ -19,6 +19,8 @@ require 'ms/sim_digester'
 require 'ms/sim_trollop'
 require 'ms/merger'
 module MspireSimulator
 @opts = MS::Troll.new.get
   begin
@@ -40,6 +42,10 @@ module MspireSimulator
     module_function
     def opts; @opts end
+    SampleLoad = 1.0 # Instrument dependent scaling, for an Orbitrap, assumed to be 1 ug
+    # TODO define an option for sample loading, and a scaling function to define the peak intensities
     #------------------------Digest-----------------------------------------------
     peptides = []
@@ -80,10 +86,12 @@ module MspireSimulator
     #-----------------------------------------------------------------------------
     #-----------------------Merge Finish------------------------------------------
     spectra.spectra = Merger.compact(spectra.spectra)
     #-----------------------------------------------------------------------------
     #-----------------------Clean UP----------------------------------------------
     spectra.features.each{|fe| fe.delete}

data/bin/sim_mail CHANGED Viewed

@@ -18,9 +18,9 @@ begin
     :password             => 'chromatography',
     :authentication       => :plain,
     :domain               => "localhost.localdomain"
-    },
+  },
     :subject => 'Mspire-Simulator', :body => msgbody
-  )
+           )
 rescue
   puts "Email function failed. Check email address and internet connection."
 end

data/lib/cv_parser.rb ADDED Viewed

@@ -0,0 +1,7 @@
+#require 'ms/modifications/cv'
+string = DATA.read
+puts string
+__END__
+Hello Ryan

data/lib/ms/curvefit/curve_fit_helper.rb CHANGED Viewed

@@ -35,10 +35,10 @@ class GenCurvefit
       init_population
     end
   end
   attr_reader :function, :paramsize, :mutation_limits, :population, :generations, :popsize
   attr_writer :paramsize, :mutation_limits, :population, :generations, :popsize
   def init_population
     @popsize.times do
       set = []
@@ -50,24 +50,24 @@ class GenCurvefit
       @population<<set
     end
   end
   def set_fit_function(func)
     @function = func
   end
   def mutate(set)
     index = rand(set.size-1)
     limits = @mutation_limits[index]
     set[index] += random_float(limits[0],limits[1])
   end
   def self.smoothave(arr)
     smooth_ave = [nil,nil,nil]
     queue = []
     arr.each do |i|
       queue.push(i)
       if queue.size > 7
-	queue.shift
+        queue.shift
       end
       smooth_ave<<queue.inject(:+)/queue.size if queue.size == 7
     end
@@ -76,16 +76,16 @@ class GenCurvefit
     end
     return smooth_ave
   end
   def self.normalize(arr)
     max = arr.max
     arr.map!{|i| (i.to_f/max) * 100}
   end
   def sort_by_fitness
     @population.sort_by!{|set| set.last}
   end
   def random_float(a,b)
     a = a.to_f
     b = b.to_f
@@ -94,15 +94,15 @@ class GenCurvefit
     r = random * diff
     return a + r
   end
   def rmsd(v,w)
     n = v.size
     sum = 0.0
     n.times{|i| sum += ((v[i][0]-w[i][0])**2.0 + (v[i][1]-w[i][1])**2.0) }
     return Math.sqrt( (1/n.to_f) * sum )
   end
   def fitness(set,pts_in,plot = false)
     pts = []
     xs = pts_in.transpose[0]
@@ -110,18 +110,24 @@ class GenCurvefit
       fit_pt = function.call(set,x)
       pts<<[x,fit_pt]
     end
     if plot
       return pts
     end
     return rmsd(pts_in,pts)
   end
   def fit
-    @start = Time.now
+    prog = Progress.new("Generation")
+    num = 0
+    total = @generations
+    step = total/100
     @generations.times do |i|
-      Progress.progress("Generation #{i+1}:",((i/@generations.to_f)*100).to_i)
+      if i > step * (num + 1)
+	num = ((i/total.to_f)*100).to_i
+	prog.update(num," #{i+1}:")
+      end
       #Generate mutations
       index = rand(@popsize)
       clone = @population[index].clone
@@ -139,14 +145,14 @@ class GenCurvefit
         @best = @population.first
       end
     end
-    Progress.progress("Generations Done, printing graph:",100,Time.now-@start)
+    prog.finish!
     return @best
   end
   def plot(file,labels = nil)
     pts = fitness(@best,@pts_in,true)
     Fit_plot.plot(@pts_in,pts,file,labels)
     puts "  Output File: #{file}"
   end
 end

data/lib/ms/curvefit/mzml_reader.rb CHANGED Viewed

@@ -14,7 +14,7 @@ class Mzml_reader
       ints = spec.intensities
       mzs = spec.mzs
       rt = spec.retention_time
       if ints.empty?;else
         ints.each_with_index do |i,j|
           mzs_out<<mzs[j]

data/lib/ms/curvefit.rb CHANGED Viewed

@@ -1,12 +1,15 @@
+require 'progress'
 require 'ms/curvefit/mzml_reader'
 require 'ms/curvefit/curve_fit_helper'
+@@avg_mz = 0
+@@avg_rt = 0
 class CurveFit
   def self.get_parameters(opts)
     data = Mzml_reader.get_data(opts[:mzml])
     generations = opts[:generations]
     @pts_int_var = []
     @pts_mz_var = []
     @pts_elut = []
@@ -17,13 +20,16 @@ class CurveFit
     rts_in = data[1]
     ints_in = data[2]
+    @@avg_mz = mzs_in.inject(:+)/mzs_in.size.to_f
+    @@avg_rt = rts_in.inject(:+)/rts_in.size.to_f
     ints_in = GenCurvefit.normalize(ints_in)
     #-----------------------overlapRange--------------------------------------------
     mean = mzs_in.inject(:+)/mzs_in.size
     opts[:overlapRange] = (mzs_in.sample_variance(mean)*10**6)/4
     #-------------------------------------------------------------------------------
     #----------------------create points/curve to fit elution-----------------------
     ints_in.each_with_index do |s,i|
       @pts_elut<<[rts_in[i],s]
@@ -46,8 +52,8 @@ class CurveFit
     labels = ["retention time","normalized intensity"]
     a_fit.plot("elution_curvefit.svg",labels)
     #-------------------------------------------------------------------------------
     #-----------------create points/curve to fit m/z variance-----------------------
     wobs = []
     mean = mzs_in.inject(:+)/mzs_in.size
@@ -77,7 +83,7 @@ class CurveFit
     labels = ["normalized intensity","m/z variance"]
     b_fit.plot("mz_var_curvefit.svg",labels)
     #-------------------------------------------------------------------------------
     #--------------------create points/curve to fit intensity variance--------------
     smooth_ave = GenCurvefit.smoothave(ints_in)
@@ -114,7 +120,18 @@ class CurveFit
     labels = ["normalized intensity","intensity variance"]
     c_fit.plot("intensity_var_curvefit.svg",labels)
     #-------------------------------------------------------------------------------
     return opts
   end
 end
+=begin
+out_file = File.open("mzvar_params.txt","w")
+out_file.puts "wobA\twobB\tavg_mz\tavg_rt"
+ARGV.each do |file|
+p file
+  opts = {:mzml => file, :generations => 30000}
+  opts = CurveFit.get_parameters(opts)
+  out_file.puts "#{opts[:wobA]}\t#{opts[:wobB]}\t#{@@avg_mz}\t#{@@avg_rt}"
+end
+out_file.close
+=end

data/lib/ms/isoelectric_calc.rb CHANGED Viewed

@@ -4,119 +4,178 @@
 Precision = 0.001
 ResidueTable = {
-	:K => [2.18,8.95,10.53],
-	:E => [2.19,9.67,4.25],
-	:D => [1.88,9.60,3.65],
-	:H => [1.82,9.17,6.00],
-	:R => [2.17,9.04,12.48],
-	:Q => [2.17,9.13,nil],
-	:N => [2.02,8.80,nil],
-	:C => [1.96,10.28,8.18],
-	:T => [2.11,9.62,nil],
-	:S => [2.21,9.15,nil],
-	:W => [2.38,9.39,nil],
-	:Y => [2.20,9.11,10.07],
-	:F => [1.83,9.13,nil],
-	:M => [2.28,9.21,nil],
-	:I => [2.36,9.68,nil],
-	:L => [2.36,9.60,nil],
-	:V => [2.32,9.62,nil],
-	:P => [1.99,10.96,nil],
-	:A => [2.34,9.69,nil],
-	:G => [2.34,9.60,nil],
-# These are the fringe cases... B and Z... Jerks, these are harder to calculate pIs
-	:B => [1.95,9.20,3.65],
-	:Z => [2.18,9.40,4.25],
-	:X => [2.20,9.40,nil],
-	:U => [1.96,10.28,5.20] # Unfortunately, I've only found the pKr for this... so I've used Cysteine's values.
+  :K => [2.18,8.95,10.53],
+  :E => [2.19,9.67,4.25],
+  :D => [1.88,9.60,3.65],
+  :H => [1.82,9.17,6.00],
+  :R => [2.17,9.04,12.48],
+  :Q => [2.17,9.13,nil],
+  :N => [2.02,8.80,nil],
+  :C => [1.96,10.28,8.18],
+  :T => [2.11,9.62,nil],
+  :S => [2.21,9.15,nil],
+  :W => [2.38,9.39,nil],
+  :Y => [2.20,9.11,10.07],
+  :F => [1.83,9.13,nil],
+  :M => [2.28,9.21,nil],
+  :I => [2.36,9.68,nil],
+  :L => [2.36,9.60,nil],
+  :V => [2.32,9.62,nil],
+  :P => [1.99,10.96,nil],
+  :A => [2.34,9.69,nil],
+  :G => [2.34,9.60,nil],
+  # These are the fringe cases... B and Z... Jerks, these are harder to calculate pIs
+  :B => [1.95,9.20,3.65],
+  :Z => [2.18,9.40,4.25],
+  :X => [2.20,9.40,nil],
+  :U => [1.96,10.28,5.20] # Unfortunately, I've only found the pKr for this... so I've used Cysteine's values.
 }
 PepCharges = Struct.new(:seq, :n_term, :c_term, :y_num, :c_num, :k_num, :h_num, :r_num, :d_num, :e_num, :u_num, :polar_num, :hydrophobic_num, :pi)
 def identify_potential_charges(str)
-	string = str.upcase
-	first = string[0]; last = string[-1]
-	puts string if first.nil? or last.nil?
-	begin
-		out = PepCharges.new(string, ResidueTable[first.to_sym][0], ResidueTable[last.to_sym][1], 0, 0, 0 ,0 ,0 ,0, 0, 0, 0, 0, 0)
-	rescue NoMethodError
-		abort string
-	end
-	string.chars.each do |letter|
-		case letter
-			when "Y"
-				out.y_num += 1
-			when "C"
-				out.c_num += 1
-			when "K"
-				out.k_num += 1
-			when "H"
-				out.h_num += 1
-			when "R"
-				out.r_num += 1
-			when "D"
-				out.d_num += 1
-			when "E"
-				out.e_num += 1
-			when "U"
-				out.u_num += 1
-			when "S", "T", "N", "Q"
-				out.polar_num += 1
-			when "A", "V", "I", "L", "M", "F", "W", "G", "P"
-				out.hydrophobic_num += 1
-		end
-	end
-	out
+  string = str.upcase
+  first = string[0]; last = string[-1]
+  puts string if first.nil? or last.nil?
+  begin
+    out = PepCharges.new(string, ResidueTable[first.to_sym][0], ResidueTable[last.to_sym][1], 0, 0, 0 ,0 ,0 ,0, 0, 0, 0, 0, 0)
+  rescue NoMethodError
+    abort string
+  end
+  string.chars.each do |letter|
+    case letter
+    when "Y"
+      out.y_num += 1
+    when "C"
+      out.c_num += 1
+    when "K"
+      out.k_num += 1
+    when "H"
+      out.h_num += 1
+    when "R"
+      out.r_num += 1
+    when "D"
+      out.d_num += 1
+    when "E"
+      out.e_num += 1
+    when "U"
+      out.u_num += 1
+    when "S", "T", "N", "Q"
+      out.polar_num += 1
+    when "A", "V", "I", "L", "M", "F", "W", "G", "P"
+      out.hydrophobic_num += 1
+    end
+  end
+  out
 end # Returns the PepCharges structure
 def charge_at_pH(pep_charges, pH)
-	charge = 0
-	charge += -1/(1+10**(pep_charges.c_term-pH))
-	charge += -pep_charges.d_num/(1+10**(ResidueTable[:D][2]-pH))
-	charge += -pep_charges.e_num/(1+10**(ResidueTable[:E][2]-pH))
-	charge += -pep_charges.c_num/(1+10**(ResidueTable[:C][2]-pH))
-	charge += -pep_charges.y_num/(1+10**(ResidueTable[:Y][2]-pH))
-	charge += 1/(1+10**(pH - pep_charges.n_term))
-	charge += pep_charges.h_num/(1+10**(pH-ResidueTable[:H][2]))
-	charge += pep_charges.k_num/(1+10**(pH-ResidueTable[:K][2]))
-	charge += pep_charges.r_num/(1+10**(pH-ResidueTable[:R][2]))
-	charge
+  charge = 0
+  charge += -1/(1+10**(pep_charges.c_term-pH))
+  charge += -pep_charges.d_num/(1+10**(ResidueTable[:D][2]-pH))
+  charge += -pep_charges.e_num/(1+10**(ResidueTable[:E][2]-pH))
+  charge += -pep_charges.c_num/(1+10**(ResidueTable[:C][2]-pH))
+  charge += -pep_charges.y_num/(1+10**(ResidueTable[:Y][2]-pH))
+  charge += 1/(1+10**(pH - pep_charges.n_term))
+  charge += pep_charges.h_num/(1+10**(pH-ResidueTable[:H][2]))
+  charge += pep_charges.k_num/(1+10**(pH-ResidueTable[:K][2]))
+  charge += pep_charges.r_num/(1+10**(pH-ResidueTable[:R][2]))
+  charge
 end
 def calc_PI(pep_charges)
-	pH = 8; pH_prev = 0.0; pH_next = 14.0
-	charge = charge_at_pH(pep_charges, pH)
-	while pH-pH_prev > Precision and pH_next-pH > Precision
-		if charge < 0.0
-			tmp = pH
-			pH = pH - ((pH-pH_prev)/2)
-			charge = charge_at_pH(pep_charges, pH)
-			pH_next = tmp
-		else
-			tmp = pH
-			pH = pH + ((pH_next - pH)/2)
-			charge = charge_at_pH(pep_charges, pH)
-			pH_prev = tmp
-		end
-	#	puts "charge: #{charge.round(2)}\tpH: #{pH.round(2)}\tpH_next: #{pH_next.round(2)}\tpH_prev: #{pH_prev.round(2)}"
-	end
-	pH
+  pH = 8; pH_prev = 0.0; pH_next = 14.0
+  charge = charge_at_pH(pep_charges, pH)
+  while pH-pH_prev > Precision and pH_next-pH > Precision
+    if charge < 0.0
+      tmp = pH
+      pH = pH - ((pH-pH_prev)/2)
+      charge = charge_at_pH(pep_charges, pH)
+      pH_next = tmp
+    else
+      tmp = pH
+      pH = pH + ((pH_next - pH)/2)
+      charge = charge_at_pH(pep_charges, pH)
+      pH_prev = tmp
+    end
+    #	puts "charge: #{charge.round(2)}\tpH: #{pH.round(2)}\tpH_next: #{pH_next.round(2)}\tpH_prev: #{pH_prev.round(2)}"
+  end
+  pH
 end
-#pepcharges =[]
-=begin
-#  RUN the ENTRY FILE HERE
-pi = []
-io = File.open(ARGV.shift, 'r')
-io.each_line do |line|
-	pi << calc_PI(identify_potential_charges(line[/^([A-Z]+):.*/]))
-end
-=end
-=begin
-pIes = []
-pepcharges.each do |a|
-	pIes << [a, calc_PI(a)]
+def distribution_from_charge(charge, normalization=100)
+  threshold = normalization.to_f
+  f = charge.floor
+  c = charge.ceil
+  charge_ratio = charge - f
+  num = charge_ratio*normalization
+  denom = normalization
+  while num + denom > threshold
+    factor = threshold/(num+denom)
+    num = num * factor
+    denom = denom * factor
+  end
+  [["+#{f}" + ", " + "%5f" % num],["+#{c}" + ", " + "%5f" % denom]]
 end
-=end
-#out_pi = pepcharges.map {|a| calc_PI(a)}
-#require 'yaml'
-#File.open('pi_list.yml', 'w') {|f| YAML.dump( pi, f) }
+#pepcharges =[]
+if $0 == __FILE__
+  VERBOSE = false
+  def putsv(object)
+    puts object if VERBOSE
+  end
+  def out(line, object)
+    line + ":\t" + object.to_s
+  end
+  require 'optparse'
+  options = {pi: true, distribution: false, ph: 7.0}
+  parser = OptionParser.new do |opts|
+    opts.banner = "Takes strings and outputs the PI, or charge distribution"
+    opts.on('-h','--help', "Displays this help message") do |h|
+      puts opts
+      exit
+    end
+    opts.on('-v','--verbose') {|v| VERBOSE = v}
+    opts.on("--[no]-pi", "Turns on (default) or off the pI output") do |p|
+      options[:pi] = p
+    end
+    opts.on("-d", "--distribution", "Output a string representation of the charge state distribution array") do |d|
+      options[:distribution] = true
+      options[:pi] = false
+    end
+    opts.on('--pH N', Float, "Takes a float value representing a pH at which to make the distribution. DEFAULT: 7.0") do |ph|
+      options[:ph] = ph
+    end
+    opts.on('-f', "--file FILENAME", String, "Takes an input file for parsing") do |f|
+      options[:in_file] = f
+    end
+  end
+  parser.parse!
+  #  RUN
+  pi = []
+  lines = []
+  if options[:in_file]
+    file_lines = File.readlines(options[:in_file]).map(&:chomp)
+    lines = file_lines.map {|line| line[/^([A-Z]+).*/] }.compact
+    outfile = File.join(File.dirname(options[:in_file]), 'pi_output_file.txt')
+    outputter = File.open(outfile,'w')
+  else
+    lines = ARGV
+    outputter = STDOUT
+  end
+  if options[:pi]
+    lines.each {|line| outputter.puts out(line, calc_PI(identify_potential_charges(line)) ) }
+  elsif options[:distribution]
+    lines.each do |line|
+      charge = charge_at_pH(identify_potential_charges(line), options[:ph])
+      charge_dist = distribution_from_charge(charge)
+      outputter.puts out(line + " @ pH #{options[:ph]}", charge_dist.join("; "))
+    end
+  end
+  if outfile
+    outputter.close
+    puts "OUTPUT in #{outfile}"
+  end
+end