RubyGems - mspire-simulator - Versions diffs - 0.1.2 → 0.2.0 - Mend

mspire-simulator 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

data/README.rdoc +46 -3
data/Rakefile +1 -1
data/VERSION +1 -1
data/bin/mspire-simulator +8 -0
data/bin/sim_mail +2 -2
data/lib/cv_parser.rb +7 -0
data/lib/ms/curvefit/curve_fit_helper.rb +26 -20
data/lib/ms/curvefit/mzml_reader.rb +1 -1
data/lib/ms/curvefit.rb +25 -8
data/lib/ms/isoelectric_calc.rb +162 -103
data/lib/ms/merger.rb +46 -33
data/lib/ms/mzml_wrapper.rb +74 -29
data/lib/ms/noise.rb +28 -28
data/lib/ms/rt/rt_helper.rb +3 -3
data/lib/ms/rt/rtgenerator.rb +63 -51
data/lib/ms/rt/weka.rb +17 -17
data/lib/ms/sim_digester.rb +45 -26
data/lib/ms/sim_feature.rb +180 -122
data/lib/ms/sim_peptide.rb +58 -55
data/lib/ms/sim_spectra.rb +22 -23
data/lib/ms/sim_trollop.rb +36 -32
data/lib/ms/tr_file_writer.rb +111 -98
data/lib/progress.rb +21 -20
data/mspire-simulator.gemspec +5 -5
data/spec/file_writer_spec.rb +2 -1
data/spec/merger_spec.rb +2 -1
data/spec/ms-simulate_spec.rb +1 -1
data/spec/peptide_spec.rb +2 -1
data/spec/spec_helper.rb +8 -3
data/spec/spectra_spec.rb +4 -3
metadata +5 -5
data/spec/progress_spec.rb +0 -22

data/lib/ms/merger.rb CHANGED Viewed

@@ -33,65 +33,78 @@ class Merger
     b = weights.flatten.inject(:+)
     return a/b
   end
   def self.merge(spectra,half_range)
-    @start = Time.now
     new_data = {}
     total = spectra.size
     k = 0
+    prog = Progress.new("Merging Overlaps:")
     spectra.each do |rt,val|
-      Progress.progress("Merging Overlaps:",(((k/total)*100).to_i))
+      if k.even?
+        num = (((k/total)*100).to_i)
+        prog.update(num)
+      end
       peaks = val.transpose
-      peaks.sort_by!{|a| a[0]}
+      peaks.sort_by!{|a| a[0]} #mz
       peaks = peaks.transpose
       mzs = peaks[0]
       ints = peaks[1]
       mzs.each_with_index do |mz,i|
-	next if mz.class == Hash
-	o_mz = mz
-	mz = mz.keys[0][0] if mz.class == Hash
-	range = (mz..mz+half_range)
-	if range.include?(mzs[i+1])
-	  metaA_mz = [o_mz, mzs[i+1]]
-	  meta_int = [ints[i],ints[i+1]]
-	  sum = meta_int.flatten.inject(:+).to_f
-	  i1 = ints[i]
-	  i1 = ints[i].flatten.inject(:+) if ints[i].class == Array
-	  frac1 = (i1/sum) * 100
-	  frac2 = (ints[i+1]/sum) * 100
-	  metaB_mz = {[w_avg(metaA_mz,meta_int),frac1,frac2] => metaA_mz}
-	  mzs[i] = nil; mzs[i+1] = metaB_mz
-	  ints[i] = nil; ints[i+1] = meta_int
-	end
+        next if mz.class == Hash
+        o_mz = mz
+        mz = mz.keys[0][0] if mz.class == Hash
+        range = (mz..mz+half_range)
+        if range.include?(mzs[i+1])
+          metaA_mz = [o_mz, mzs[i+1]]
+          meta_int = [ints[i],ints[i+1]]
+          sum = meta_int.flatten.inject(:+).to_f
+          i1 = ints[i]
+          i1 = ints[i].flatten.inject(:+) if ints[i].class == Array
+          frac1 = (i1/sum) * 100
+          frac2 = (ints[i+1]/sum) * 100
+          metaB_mz = {[w_avg(metaA_mz,meta_int),frac1,frac2] => metaA_mz}
+          mzs[i] = nil; mzs[i+1] = metaB_mz
+          ints[i] = nil; ints[i+1] = meta_int
+        end
       end
-      new_data[rt] = [mzs.compact,ints.compact]
+      spec = [mzs.compact,ints.compact]
+      spec.ms_level = val.ms_level
+      spec.ms2 = val.ms2
+      new_data[rt] = spec
       k += 1
     end
-    Progress.progress("Merging Overlaps:",100,Time.now-@start)
-    puts ''
+    prog.finish!
     return new_data
   end
   def self.compact(spectra)
     @start = Time.now
     total = spectra.size
     k = 0
+    num = 0
+    prog = Progress.new("Merge Finishing:")
+    step = total/100.0
     spectra.each do |rt,val|
-      Progress.progress("Merge Finishing:",(((k/total)*100).to_i))
+      if k > step * (num + 1)
+        num = (((k/total)*100).to_i)
+        prog.update(num)
+      end
       mzs = val[0]
       ints = val[1]
       mzs.each_with_index do |m,i|
-	if m.class == Hash
-	  mzs[i] = m.keys[0][0]
-	  ints[i] = ints[i].flatten.inject(:+)
-	end
+        if m.class == Hash
+          mzs[i] = m.keys[0][0]
+          ints[i] = ints[i].flatten.inject(:+)
+        end
       end
-      spectra[rt] = [mzs,ints]
+      spec = [mzs,ints]
+      spec.ms_level = val.ms_level
+      spec.ms2 = val.ms2
+      spectra[rt] = spec
       k += 1
     end
-    Progress.progress("Merge Finishing:",100,Time.now-@start)
-    puts ''
+    prog.finish!
     return spectra
   end
 end

data/lib/ms/mzml_wrapper.rb CHANGED Viewed

@@ -6,42 +6,87 @@ require 'mspire/mzml'
 class Mzml_Wrapper
   def initialize(spectra)
-  #spectra is a Hash rt=>[[mzs],[ints]]
-    @start = Time.now
+    #spectra is a Hash rt=>[[mzs],[ints]]
+    ms2_count = 0
     count = 0.0
     scan_number = 1
     specs = []
-    spectra.each do |rt,data|
-      Progress.progress("Converting to mzml:",(((count/spectra.size)*100).to_i))
+    prog = Progress.new("Converting to mzml:")
+    num = 0
+    total = spectra.size
+    step = total/100
+    spec_id = nil
+    t_rt = 0
+    spectra.sort.map do |rt,data|
+      if count > step * (num + 1)
+        num = (((count/total)*100).to_i)
+        prog.update(num)
+      end
+      if t_rt > rt
+	puts "OUT of ORDER"
+      end
+      t_rt = rt
+      ms_level = data.ms_level # method added to array class
       spc = Mspire::Mzml::Spectrum.new("scan=#{scan_number}") do |spec|
-	spec.describe_many!(['MS:1000127', ['MS:1000511', 1]])
-	spec.data_arrays = [
-	  Mspire::Mzml::DataArray.new(data[0]).describe!('MS:1000514'),
-	  Mspire::Mzml::DataArray.new(data[1]).describe!('MS:1000515')
-	]
-	spec.scan_list = Mspire::Mzml::ScanList.new do |sl|
-	  scan = Mspire::Mzml::Scan.new do |scan|
-	    scan.describe! 'MS:1000016', rt, 'UO:0000010'
-	  end
-	  sl << scan
-	end
+        spec.describe_many!(['MS:1000127', ['MS:1000511', 1]])
+        spec.data_arrays = [
+          Mspire::Mzml::DataArray.new(data[0]).describe!('MS:1000514'),
+          Mspire::Mzml::DataArray.new(data[1]).describe!('MS:1000515')
+        ]
+        spec.scan_list = Mspire::Mzml::ScanList.new do |sl|
+          scan = Mspire::Mzml::Scan.new do |scan|
+            scan.describe! 'MS:1000016', rt, 'UO:0000010'
+          end
+          sl << scan
+        end
+      end
+      specs<<spc
+      if ms_level == 2
+        #[rt,[mzs],[ints]]
+        ms2 = data.ms2
+        ms2.each do |data|
+          ms2_count += 1
+          scan_number += 1
+          spc2 = Mspire::Mzml::Spectrum.new("scan=#{scan_number}") do |spec|
+            spec.describe_many!(['MS:1000127', ['MS:1000511', 2]])
+            spec.data_arrays = [
+              Mspire::Mzml::DataArray.new(data[1]).describe!('MS:1000514'),
+              Mspire::Mzml::DataArray.new(data[2]).describe!('MS:1000515')
+            ]
+            spec.scan_list = Mspire::Mzml::ScanList.new do |sl|
+              scan = Mspire::Mzml::Scan.new do |scan|
+                scan.describe! 'MS:1000016', data[0], 'UO:0000010'
+              end
+              sl << scan
+            end
+            precursor = Mspire::Mzml::Precursor.new( spc.id )
+            si = Mspire::Mzml::SelectedIon.new
+            # the selected ion m/z:
+            si.describe! "MS:1000744", data.pre_mz
+            # the selected ion charge state
+            si.describe! "MS:1000041", data.pre_charge
+            # the selected ion intensity
+            si.describe! "MS:1000042", data.pre_int
+            precursor.selected_ions = [si]
+            spec.precursors = [precursor]
+          end
+          specs<<spc2
+        end
       end
       count += 1
       scan_number += 1
-      specs<<spc
     end
     @mzml = Mspire::Mzml.new do |mzml|
-      mzml.id = 'ms1'
+      mzml.id = 'ms1_and_ms2'
       mzml.cvs = Mspire::Mzml::CV::DEFAULT_CVS
       mzml.file_description = Mspire::Mzml::FileDescription.new  do |fd|
-	fd.file_content = Mspire::Mzml::FileContent.new
-	fd.source_files << Mspire::Mzml::SourceFile.new
+        fd.file_content = Mspire::Mzml::FileContent.new
+        fd.source_files << Mspire::Mzml::SourceFile.new
       end
       default_instrument_config = Mspire::Mzml::InstrumentConfiguration.new("IC").describe!('MS:1000031')
       mzml.instrument_configurations << default_instrument_config
@@ -50,15 +95,15 @@ class Mzml_Wrapper
       default_data_processing = Mspire::Mzml::DataProcessing.new("did_nothing")
       mzml.data_processing_list << default_data_processing
       mzml.run = Mspire::Mzml::Run.new("simulated_run", default_instrument_config) do |run|
-	spectrum_list = Mspire::Mzml::SpectrumList.new(default_data_processing, specs)
-	run.spectrum_list = spectrum_list
+        spectrum_list = Mspire::Mzml::SpectrumList.new(default_data_processing, specs)
+        run.spectrum_list = spectrum_list
       end
     end
-    Progress.progress("Converting to mzml:",100,Time.now-@start)
-    puts ''
+    prog.finish!
+    puts "ms2 written = #{ms2_count}"
     return @mzml
   end
   def to_xml(file)
     return @mzml.to_xml(file)
   end

data/lib/ms/noise.rb CHANGED Viewed

@@ -5,47 +5,47 @@ require 'ms/rt/rt_helper'
 module MS
   module Noise
     module_function
-    def noiseify(density,max_mz)
-    # spectra is {rt => [[mzs],[ints]]}
-      @start = Time.now
+    def noiseify(opts,max_mz)
+      # spectra is {rt => [[mzs],[ints]]}
+      desity = opts[:noise_density]
+      max_int = opts[:noiseMaxInt]
+      min_int = opts[:noiseMinInt]
       @noise = {}
       r_times = Sim_Spectra.r_times
-      count = 0.0
+      count = 0
+      prog = Progress.new("Adding noise:")
+      num = 0
+      total = r_times.size
+      step = total/100.0
       r_times.each do |rt|
-	Progress.progress("Adding noise:",(((count/r_times.size)*100).to_i))
-	nmzs = []
-	nints = []
-	density.times do
-	  rmz = RThelper.RandomFloat(0.0,max_mz)
-	  rint = RThelper.RandomFloat(50,1000)
-	  nmzs<<rmz
-	  nints<<rint
-	end
-	@noise[rt] = [nmzs,nints]
-	count += 1
+        if count > step * (num + 1)
+          num = (((count/total)*100.0).to_i)
+          prog.update(num)
+        end
+        nmzs = []
+        nints = []
+        density.times do
+          rmz = RThelper.RandomFloat(0.0,max_mz)
+          rint = RThelper.RandomFloat(min_int,max_int)
+          nmzs<<rmz
+          nints<<rint
+        end
+        @noise[rt] = [nmzs,nints]
+        count += 1
       end
-      Progress.progress("Adding noise:",100,Time.now-@start)
-      puts ''
+      prog.finish!
       return @noise
     end
     def spec_drops(drop_percentage)
       r_times = Sim_Spectra.r_times
       l = r_times.length
       num_drops = drop_percentage * l
       num_drops.to_i.times do
-	r_times.delete_at(rand(l+1))
+        r_times.delete_at(rand(l+1))
       end
       return r_times
     end
   end
 end

data/lib/ms/rt/rt_helper.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 module RThelper
   module_function
   def normalized_gaussian(x,mu,sd)
     x = x.to_f
@@ -8,7 +8,7 @@ module RThelper
     sd = sd.to_f
     return ((1/(Math.sqrt(2*(Math::PI)*(sd**2))))*(Math.exp(-(((x-mu)**2)/((2*sd)**2)))))
   end
   module_function
   def gaussian(x,mu,sd,h)
     x = x.to_f
@@ -17,7 +17,7 @@ module RThelper
     h = h.to_f
     return h*Math.exp(-(x-mu)**2/(sd**2))
   end
   module_function
   def RandomFloat(a,b)
     a = a.to_f

data/lib/ms/rt/rtgenerator.rb CHANGED Viewed

@@ -8,74 +8,86 @@ require 'ms/rt/rt_helper'
 module MS
   module Rtgenerator
     module_function
     def generateRT(peptides, one_d)
-      @start = Time.now
       @r_times = Sim_Spectra.r_times
       # Gets retention times from the weka model
       peptides = MS::Weka.predict_rts(peptides)
       MS::Weka.predict_ints(peptides)
+      #-----------------------------------------------------------------
+      prog = Progress.new("Generating retention times:")
+      num = 0
+      total = peptides.size
+      step = total/100.0
+      max_rt = 4*(@r_times.max/5)
+      r_end = max_rt + (@r_times.max/5)/2
+      r_start = @r_times.max/5
-      #-----------------------------------------------------------------
       peptides.each_with_index do |pep,ind|
-        Progress.progress("Generating retention times:",(((ind+1)/peptides.size.to_f)*100).to_i)
-	#Fit retention times into scan times
-	max_rt = @r_times.max
-	p_rt = pep.p_rt * 10**-2
-	if p_rt > 1
-	  pep.p_rt = @r_times.max
-	  pep.p_rt_i = @r_times.index(pep.p_rt)
-	else
-	  pep.p_rt = @r_times.find {|i| i >= (p_rt * max_rt)}
-	  pep.p_rt_i = @r_times.index(pep.p_rt)
+	if ind > step * (num + 1)
+	  num = (((ind+1)/total.to_f)*100).to_i
+	  prog.update(num)
 	end
+        #Fit retention times into scan times
+        p_rt = pep.p_rt * 10**-2
+	percent_time = p_rt
+	sx = RThelper.gaussian(percent_time,0.5,0.45,1.0) * Math.sqrt(pep.abu) #need to figure out what these values should be
+	pep.sx = sx
+        if p_rt > 1
+          pep.p_rt = @r_times.find {|i| i >= r_end}
+          pep.p_rt_i = @r_times.index(pep.p_rt)
+        else
+          pep.p_rt = @r_times.find {|i| i >= (p_rt * max_rt)}
+          pep.p_rt_i = @r_times.index(pep.p_rt)
+        end
         if pep.p_rt == nil
           puts "\n\n\t#{pep} TIME-> #{p_rt*max_rt} :: Peptide not predicted in time range: try increasing run time\n\n."
-	else
-	#Give peptide retention times
-	  head_length = nil
-	  tail_length = nil
-	  if one_d
-	    head_length = 300.0
-	    tail_length = 701
-	  else
-	    head_length = 100.0
-	    tail_length = 300
-	  end
-	  a = @r_times.find {|i| i >= (pep.p_rt-head_length)}
-	  b = @r_times.find {|i| i >= (pep.p_rt+tail_length)}
-	  a = @r_times.index(a)
-	  b = @r_times.index(b)
-	  if a == nil
-	    a = @r_times[0]
-	  end
-	  if b == nil
-	    b = @r_times[@r_times.length-1]
-	  end
-	  pep.set_rts(a,b)
+        else
-	end
+          #Give peptide retention times
+          head_length = nil
+          tail_length = nil
+          if one_d
+            head_length = 300.0
+            tail_length = 701
+          else
+            head_length = 100.0 * sx
+            tail_length = 300 * sx
+          end
+          a = @r_times.find {|i| i >= (pep.p_rt-head_length)}
+          b = @r_times.find {|i| i >= (pep.p_rt+tail_length)}
+          a = @r_times.index(a)
+          b = @r_times.index(b)
+          if a == nil
+            a = @r_times[0]
+          end
+          if b == nil
+            b = @r_times[@r_times.length-1]
+          end
+          pep.set_rts(a,b)
+        end
       end
       #-----------------------------------------------------------------
-      Progress.progress("Generating retention times:",100,Time.now-@start)
-      puts ""
+      prog.finish!
       return peptides
     end
   end
 end

data/lib/ms/rt/weka.rb CHANGED Viewed

@@ -3,7 +3,7 @@ require 'csv'
 module MS
   module Weka
-  #James Dalg
+    #James Dalg
     module_function
     def predict_rts(peptides)
       #mz,charge,intensity,rt,A,R,N,D,B,C,E,Q,Z,G,H,I,L,K,M,F,P,S,T,W,Y,V,J,mass,hydro,pi
@@ -13,12 +13,12 @@ module MS
         data<<pep.aa_counts
       end
       arff = make_rt_arff(Time.now.nsec.to_s,data)
       path = Gem.bin_path('mspire-simulator', 'mspire-simulator').split(/\//)
       dir = path[0..path.size-3].join("/")
       system("java weka.classifiers.functions.MultilayerPerceptron -T #{arff} -l #{dir}/lib/weka/M5Rules.model -p 24 > #{arff}.out")
       system("rm #{arff}")
       #extract what was predicted by weka model
       file = File.open("#{arff}.out","r")
       count = 0
@@ -31,38 +31,38 @@ module MS
       system("rm #{arff}.out")
       return peptides
     end
     def predict_ints(peptides)
       data = []
       peptides.each do |pep|
-	array = []
-	array<<pep.mono_mz<<pep.charge<<pep.mass<<pep.p_rt
+        array = []
+        array<<pep.mono_mz<<pep.charge<<pep.mass<<pep.p_rt
         data << array.concat(pep.aa_counts)
       end
       arff = make_int_arff(Time.now.nsec.to_s,data)
       path = Gem.bin_path('mspire-simulator', 'mspire-simulator').split(/\//)
       dir = path[0..path.size-3].join("/")
       system("java weka.classifiers.trees.M5P -T #{arff} -l #{dir}/lib/weka/M5P.model -p 27 > #{arff}.out")
       system("rm #{arff}")
       #extract what was predicted by weka model
       file = File.open("#{arff}.out","r")
       count = 0
       while line = file.gets
         if line =~ /(\d*\.\d{0,3}){1}/
-	  peptides[count].p_int = line.match(/(\d*\.\d{0,3}){1}/)[0].to_f
+          peptides[count].p_int = line.match(/(\d*\.\d{0,3}){1}/)[0].to_f
           count += 1
         end
       end
       system("rm #{arff}.out")
       return peptides
     end
     #James Dalg
     def make_rt_arff(sourcefile, training)
       sourcefile<<".arff"
@@ -105,9 +105,9 @@ module MS
       end
       return sourcefile
     end
-        #James Dalg
+    #James Dalg
     def make_int_arff(sourcefile, training)
       sourcefile<<".arff"
       File.open(sourcefile, "wb") do |f| # need to cite f.puts (not %Q)? if so http://www.devdaily.com/blog/post/ruby/how-write-text-to-file-ruby-example