RubyGems - mspire-simulator - Versions diffs - 0.1.0 - Mend

mspire-simulator 0.1.0

Files changed (37) hide show

data/LICENSE.txt +22 -0
data/README.rdoc +17 -0
data/Rakefile +51 -0
data/VERSION +1 -0
data/bin/mspire-simulator +125 -0
data/bin/sim_mail.rb +26 -0
data/bin/weka/M5P.model +0 -0
data/bin/weka/M5Rules.model +0 -0
data/bin/weka/weka.jar +0 -0
data/lib/ms/curvefit/curve_fit_helper.rb +152 -0
data/lib/ms/curvefit/fit_graph.rb +84 -0
data/lib/ms/curvefit/mzml_reader.rb +28 -0
data/lib/ms/curvefit.rb +120 -0
data/lib/ms/isoelectric_calc.rb +122 -0
data/lib/ms/merger.rb +101 -0
data/lib/ms/mzml_wrapper.rb +67 -0
data/lib/ms/noise.rb +51 -0
data/lib/ms/rt/rt_helper.rb +31 -0
data/lib/ms/rt/rtgenerator.rb +81 -0
data/lib/ms/rt/weka.rb +150 -0
data/lib/ms/sim_digester.rb +92 -0
data/lib/ms/sim_feature.rb +175 -0
data/lib/ms/sim_peptide.rb +182 -0
data/lib/ms/sim_spectra.rb +70 -0
data/lib/ms/sim_trollop.rb +68 -0
data/lib/ms/tr_file_writer.rb +175 -0
data/lib/progress.rb +24 -0
data/mspire-simulator.gemspec +103 -0
data/spec/file_writer_spec.rb +74 -0
data/spec/merger_spec.rb +23 -0
data/spec/ms-simulate_spec.rb +9 -0
data/spec/peptide_spec.rb +16 -0
data/spec/progress_spec.rb +22 -0
data/spec/spec_helper.rb +11 -0
data/spec/spectra_spec.rb +111 -0
data/testFiles/contam/hum_keratin.fasta +11 -0
metadata +246 -0

data/lib/ms/isoelectric_calc.rb ADDED Viewed

@@ -0,0 +1,122 @@
+#!/usr/bin/env ruby
+# http://isoelectric.ovh.org/files/practise-isoelectric-point.html#mozTocId496531
+# Taken from Ryan's github repo
+Precision = 0.001
+ResidueTable = {
+	:K => [2.18,8.95,10.53],
+	:E => [2.19,9.67,4.25],
+	:D => [1.88,9.60,3.65],
+	:H => [1.82,9.17,6.00],
+	:R => [2.17,9.04,12.48],
+	:Q => [2.17,9.13,nil],
+	:N => [2.02,8.80,nil],
+	:C => [1.96,10.28,8.18],
+	:T => [2.11,9.62,nil],
+	:S => [2.21,9.15,nil],
+	:W => [2.38,9.39,nil],
+	:Y => [2.20,9.11,10.07],
+	:F => [1.83,9.13,nil],
+	:M => [2.28,9.21,nil],
+	:I => [2.36,9.68,nil],
+	:L => [2.36,9.60,nil],
+	:V => [2.32,9.62,nil],
+	:P => [1.99,10.96,nil],
+	:A => [2.34,9.69,nil],
+	:G => [2.34,9.60,nil],
+# These are the fringe cases... B and Z... Jerks, these are harder to calculate pIs
+	:B => [1.95,9.20,3.65],
+	:Z => [2.18,9.40,4.25],
+	:X => [2.20,9.40,nil],
+	:U => [1.96,10.28,5.20] # Unfortunately, I've only found the pKr for this... so I've used Cysteine's values.
+}
+PepCharges = Struct.new(:seq, :n_term, :c_term, :y_num, :c_num, :k_num, :h_num, :r_num, :d_num, :e_num, :u_num, :polar_num, :hydrophobic_num, :pi)
+def identify_potential_charges(str)
+	string = str.upcase
+	first = string[0]; last = string[-1]
+	puts string if first.nil? or last.nil?
+	begin
+		out = PepCharges.new(string, ResidueTable[first.to_sym][0], ResidueTable[last.to_sym][1], 0, 0, 0 ,0 ,0 ,0, 0, 0, 0, 0, 0)
+	rescue NoMethodError
+		abort string
+	end
+	string.chars.each do |letter|
+		case letter
+			when "Y"
+				out.y_num += 1
+			when "C"
+				out.c_num += 1
+			when "K"
+				out.k_num += 1
+			when "H"
+				out.h_num += 1
+			when "R"
+				out.r_num += 1
+			when "D"
+				out.d_num += 1
+			when "E"
+				out.e_num += 1
+			when "U"
+				out.u_num += 1
+			when "S", "T", "N", "Q"
+				out.polar_num += 1
+			when "A", "V", "I", "L", "M", "F", "W", "G", "P"
+				out.hydrophobic_num += 1
+		end
+	end
+	out
+end # Returns the PepCharges structure
+def charge_at_pH(pep_charges, pH)
+	charge = 0
+	charge += -1/(1+10**(pep_charges.c_term-pH))
+	charge += -pep_charges.d_num/(1+10**(ResidueTable[:D][2]-pH))
+	charge += -pep_charges.e_num/(1+10**(ResidueTable[:E][2]-pH))
+	charge += -pep_charges.c_num/(1+10**(ResidueTable[:C][2]-pH))
+	charge += -pep_charges.y_num/(1+10**(ResidueTable[:Y][2]-pH))
+	charge += 1/(1+10**(pH - pep_charges.n_term))
+	charge += pep_charges.h_num/(1+10**(pH-ResidueTable[:H][2]))
+	charge += pep_charges.k_num/(1+10**(pH-ResidueTable[:K][2]))
+	charge += pep_charges.r_num/(1+10**(pH-ResidueTable[:R][2]))
+	charge
+end
+def calc_PI(pep_charges)
+	pH = 8; pH_prev = 0.0; pH_next = 14.0
+	charge = charge_at_pH(pep_charges, pH)
+	while pH-pH_prev > Precision and pH_next-pH > Precision
+		if charge < 0.0
+			tmp = pH
+			pH = pH - ((pH-pH_prev)/2)
+			charge = charge_at_pH(pep_charges, pH)
+			pH_next = tmp
+		else
+			tmp = pH
+			pH = pH + ((pH_next - pH)/2)
+			charge = charge_at_pH(pep_charges, pH)
+			pH_prev = tmp
+		end
+	#	puts "charge: #{charge.round(2)}\tpH: #{pH.round(2)}\tpH_next: #{pH_next.round(2)}\tpH_prev: #{pH_prev.round(2)}"
+	end
+	pH
+end
+#pepcharges =[]
+=begin
+#  RUN the ENTRY FILE HERE
+pi = []
+io = File.open(ARGV.shift, 'r')
+io.each_line do |line|
+	pi << calc_PI(identify_potential_charges(line[/^([A-Z]+):.*/]))
+end
+=end
+=begin
+pIes = []
+pepcharges.each do |a|
+	pIes << [a, calc_PI(a)]
+end
+=end
+#out_pi = pepcharges.map {|a| calc_PI(a)}
+#require 'yaml'
+#File.open('pi_list.yml', 'w') {|f| YAML.dump( pi, f) }

data/lib/ms/merger.rb ADDED Viewed

@@ -0,0 +1,101 @@
+require_relative '../progress'
+class Merger
+  def self.mz_value(arr)
+    if arr.class == Hash
+      return arr.keys[0][0]
+    else
+      return arr
+    end
+  end
+  def self.int_value(arr)
+    if arr.class == Array
+      return arr.last + int_value(arr.first)
+    else
+      return arr
+    end
+  end
+  def self.w_avg(values,weights)
+    if values.class == hash
+      values = values.values.flatten
+    end
+    a = []
+    int = 0
+    mz = 0
+    values.each_with_index do |v,i|
+      mz = mz_value(v)
+      int = int_value(weights[i])
+      a<<mz*int
+    end
+    a = a.inject(:+)
+    b = weights.flatten.inject(:+)
+    return a/b
+  end
+  def self.merge(spectra,half_range)
+    @start = Time.now
+    new_data = {}
+    total = spectra.size
+    k = 0
+    spectra.each do |rt,val|
+      Progress.progress("Merging Overlaps:",(((k/total)*100).to_i))
+      peaks = val.transpose
+      peaks.sort_by!{|a| a[0]}
+      peaks = peaks.transpose
+      mzs = peaks[0]
+      ints = peaks[1]
+      mzs.each_with_index do |mz,i|
+	next if mz.class == Hash
+	o_mz = mz
+	mz = mz.keys[0][0] if mz.class == Hash
+	range = (mz..mz+half_range)
+	if range.include?(mzs[i+1])
+	  metaA_mz = [o_mz, mzs[i+1]]
+	  meta_int = [ints[i],ints[i+1]]
+	  sum = meta_int.flatten.inject(:+).to_f
+	  i1 = ints[i]
+	  i1 = ints[i].flatten.inject(:+) if ints[i].class == Array
+	  frac1 = (i1/sum) * 100
+	  frac2 = (ints[i+1]/sum) * 100
+	  metaB_mz = {[w_avg(metaA_mz,meta_int),frac1,frac2] => metaA_mz}
+	  mzs[i] = nil; mzs[i+1] = metaB_mz
+	  ints[i] = nil; ints[i+1] = meta_int
+	end
+      end
+      new_data[rt] = [mzs.compact,ints.compact]
+      k += 1
+    end
+    Progress.progress("Merging Overlaps:",100,Time.now-@start)
+    puts ''
+    return new_data
+  end
+  def self.compact(spectra)
+    @start = Time.now
+    total = spectra.size
+    k = 0
+    spectra.each do |rt,val|
+      Progress.progress("Merge Finishing:",(((k/total)*100).to_i))
+      mzs = val[0]
+      ints = val[1]
+      mzs.each_with_index do |m,i|
+	if m.class == Hash
+	  mzs[i] = m.keys[0][0]
+	  ints[i] = ints[i].flatten.inject(:+)
+	end
+      end
+      spectra[rt] = [mzs,ints]
+      k += 1
+    end
+    Progress.progress("Merge Finishing:",100,Time.now-@start)
+    puts ''
+    return spectra
+  end
+end
+#test
+#data = {1 => [[1.0,1.5,1.7,3.0,4.0,5.0,6.0,7.0,8.0,9.0],[10,9,8,7,6,5,4,3,2,1]], 2 => [[1,2,3,4,5,6,7,8,9],[9,8,7,6,5,4,3,2,1]]}
+#p Merger.merge(data,0.5)

data/lib/ms/mzml_wrapper.rb ADDED Viewed

@@ -0,0 +1,67 @@
+require 'nokogiri'
+require 'progress'
+require 'mspire/mzml'
+class Mzml_Wrapper
+  def initialize(spectra)
+  #spectra is a Hash rt=>[[mzs],[ints]]
+    @start = Time.now
+    count = 0.0
+    scan_number = 1
+    specs = []
+    spectra.each do |rt,data|
+      Progress.progress("Converting to mzml:",(((count/spectra.size)*100).to_i))
+      spc = Mspire::Mzml::Spectrum.new("scan=#{scan_number}") do |spec|
+	spec.describe_many!(['MS:1000127', ['MS:1000511', 1]])
+	spec.data_arrays = [
+	  Mspire::Mzml::DataArray.new(data[0]).describe!('MS:1000514'),
+	  Mspire::Mzml::DataArray.new(data[1]).describe!('MS:1000515')
+	]
+	spec.scan_list = Mspire::Mzml::ScanList.new do |sl|
+	  scan = Mspire::Mzml::Scan.new do |scan|
+	    scan.describe! 'MS:1000016', rt, 'UO:0000010'
+	  end
+	  sl << scan
+	end
+      end
+      count += 1
+      scan_number += 1
+      specs<<spc
+    end
+    @mzml = Mspire::Mzml.new do |mzml|
+      mzml.id = 'ms1'
+      mzml.cvs = Mspire::Mzml::CV::DEFAULT_CVS
+      mzml.file_description = Mspire::Mzml::FileDescription.new  do |fd|
+	fd.file_content = Mspire::Mzml::FileContent.new
+	fd.source_files << Mspire::Mzml::SourceFile.new
+      end
+      default_instrument_config = Mspire::Mzml::InstrumentConfiguration.new("IC").describe!('MS:1000031')
+      mzml.instrument_configurations << default_instrument_config
+      software = Mspire::Mzml::Software.new
+      mzml.software_list << software
+      default_data_processing = Mspire::Mzml::DataProcessing.new("did_nothing")
+      mzml.data_processing_list << default_data_processing
+      mzml.run = Mspire::Mzml::Run.new("simulated_run", default_instrument_config) do |run|
+	spectrum_list = Mspire::Mzml::SpectrumList.new(default_data_processing, specs)
+	run.spectrum_list = spectrum_list
+      end
+    end
+    Progress.progress("Converting to mzml:",100,Time.now-@start)
+    puts ''
+    return @mzml
+  end
+  def to_xml(file)
+    return @mzml.to_xml(file)
+  end
+end

data/lib/ms/noise.rb ADDED Viewed

@@ -0,0 +1,51 @@
+require 'progress'
+require 'ms/rt/rt_helper'
+module MS
+  module Noise
+    module_function
+    def noiseify(density,max_mz)
+    # spectra is {rt => [[mzs],[ints]]}
+      @start = Time.now
+      @noise = {}
+      r_times = Sim_Spectra.r_times
+      count = 0.0
+      r_times.each do |rt|
+	Progress.progress("Adding noise:",(((count/r_times.size)*100).to_i))
+	nmzs = []
+	nints = []
+	density.times do
+	  rmz = RThelper.RandomFloat(0.0,max_mz)
+	  rint = RThelper.RandomFloat(50,1000)
+	  nmzs<<rmz
+	  nints<<rint
+	end
+	@noise[rt] = [nmzs,nints]
+	count += 1
+      end
+      Progress.progress("Adding noise:",100,Time.now-@start)
+      puts ''
+      return @noise
+    end
+    def spec_drops(drop_percentage)
+      r_times = Sim_Spectra.r_times
+      l = r_times.length
+      num_drops = drop_percentage * l
+      num_drops.to_i.times do
+	r_times.delete_at(rand(l+1))
+      end
+      return r_times
+    end
+  end
+end

data/lib/ms/rt/rt_helper.rb ADDED Viewed

@@ -0,0 +1,31 @@
+module RThelper
+  module_function
+  def normalized_gaussian(x,mu,sd)
+    x = x.to_f
+    mu = mu.to_f
+    sd = sd.to_f
+    return ((1/(Math.sqrt(2*(Math::PI)*(sd**2))))*(Math.exp(-(((x-mu)**2)/((2*sd)**2)))))
+  end
+  module_function
+  def gaussian(x,mu,sd,h)
+    x = x.to_f
+    mu = mu.to_f
+    sd = sd.to_f
+    h = h.to_f
+    return h*Math.exp(-(x-mu)**2/(sd**2))
+  end
+  module_function
+  def RandomFloat(a,b)
+    a = a.to_f
+    b = b.to_f
+    random = rand(2147483647.0) / 2147483647.0
+    diff = b - a
+    r = random * diff
+    return a + r
+  end
+end

data/lib/ms/rt/rtgenerator.rb ADDED Viewed

@@ -0,0 +1,81 @@
+require 'time'
+require 'progress'
+require 'ms/sim_feature'
+require 'ms/rt/weka'
+require 'ms/sim_peptide'
+require 'ms/rt/rt_helper'
+module MS
+  module Rtgenerator
+    module_function
+    def generateRT(peptides, one_d)
+      @start = Time.now
+      @r_times = Sim_Spectra.r_times
+      # Gets retention times from the weka model
+      peptides = MS::Weka.predict_rts(peptides)
+      MS::Weka.predict_ints(peptides)
+      #-----------------------------------------------------------------
+      peptides.each_with_index do |pep,ind|
+        Progress.progress("Generating retention times:",(((ind+1)/peptides.size.to_f)*100).to_i)
+	#Fit retention times into scan times
+	max_rt = @r_times.max
+	p_rt = pep.p_rt * 10**-2
+	if p_rt > 1
+	  pep.p_rt = @r_times.max
+	  pep.p_rt_i = @r_times.index(pep.p_rt)
+	else
+	  pep.p_rt = @r_times.find {|i| i >= (p_rt * max_rt)}
+	  pep.p_rt_i = @r_times.index(pep.p_rt)
+	end
+        if pep.p_rt == nil
+          puts "\n\n\t#{pep} TIME-> #{p_rt*max_rt} :: Peptide not predicted in time range: try increasing run time\n\n."
+	else
+	#Give peptide retention times
+	  head_length = nil
+	  tail_length = nil
+	  if one_d
+	    head_length = 300.0
+	    tail_length = 701
+	  else
+	    head_length = 100.0
+	    tail_length = 300
+	  end
+	  a = @r_times.find {|i| i >= (pep.p_rt-head_length)}
+	  b = @r_times.find {|i| i >= (pep.p_rt+tail_length)}
+	  a = @r_times.index(a)
+	  b = @r_times.index(b)
+	  if a == nil
+	    a = @r_times[0]
+	  end
+	  if b == nil
+	    b = @r_times[@r_times.length-1]
+	  end
+	  pep.set_rts(a,b)
+	end
+      end
+      #-----------------------------------------------------------------
+      Progress.progress("Generating retention times:",100,Time.now-@start)
+      puts ""
+      return peptides
+    end
+  end
+end

data/lib/ms/rt/weka.rb ADDED Viewed

@@ -0,0 +1,150 @@
+require 'csv'
+module MS
+  module Weka
+  #James Dalg
+    module_function
+    def predict_rts(peptides)
+      #mz,charge,intensity,rt,A,R,N,D,B,C,E,Q,Z,G,H,I,L,K,M,F,P,S,T,W,Y,V,J,mass,hydro,pi
+      #make arrf file to feed weka model
+      data = []
+      peptides.each do |pep|
+        data<<pep.aa_counts
+      end
+      arff = make_rt_arff(Time.now.nsec.to_s,data)
+      system("java weka.classifiers.functions.MultilayerPerceptron -T #{arff} -l bin/weka/M5Rules.model -p 24 > #{arff}.out")
+      system("rm #{arff}")
+      #extract what was predicted by weka model
+      file = File.open("#{arff}.out","r")
+      count = 0
+      while line = file.gets
+        if line =~ /(\d*\.\d{0,3}){1}/
+          peptides[count].p_rt = line.match(/(\d*\.\d{0,3}){1}/)[0].to_f
+          count += 1
+        end
+      end
+      system("rm #{arff}.out")
+      return peptides
+    end
+    def predict_ints(peptides)
+      data = []
+      peptides.each do |pep|
+	array = []
+	array<<pep.mono_mz<<pep.charge<<pep.mass<<pep.p_rt
+        data << array.concat(pep.aa_counts)
+      end
+      arff = make_int_arff(Time.now.nsec.to_s,data)
+      system("java weka.classifiers.trees.M5P -T #{arff} -l bin/weka/M5P.model -p 27 > #{arff}.out")
+      system("rm #{arff}")
+      #extract what was predicted by weka model
+      file = File.open("#{arff}.out","r")
+      count = 0
+      while line = file.gets
+        if line =~ /(\d*\.\d{0,3}){1}/
+	  peptides[count].p_int = line.match(/(\d*\.\d{0,3}){1}/)[0].to_f
+          count += 1
+        end
+      end
+      system("rm #{arff}.out")
+      return peptides
+    end
+    #James Dalg
+    def make_rt_arff(sourcefile, training)
+      sourcefile<<".arff"
+      File.open(sourcefile, "wb") do |f| # need to cite f.puts (not %Q)? if so http://www.devdaily.com/blog/post/ruby/how-write-text-to-file-ruby-example
+        f.puts %Q{%
+%
+       @RELATION molecularinfo
+       @ATTRIBUTE A    NUMERIC
+       @ATTRIBUTE R    NUMERIC
+       @ATTRIBUTE N    NUMERIC
+       @ATTRIBUTE D    NUMERIC
+       @ATTRIBUTE B    NUMERIC
+       @ATTRIBUTE C    NUMERIC
+       @ATTRIBUTE E    NUMERIC
+       @ATTRIBUTE Q    NUMERIC
+       @ATTRIBUTE Z    NUMERIC
+       @ATTRIBUTE G    NUMERIC
+       @ATTRIBUTE H    NUMERIC
+       @ATTRIBUTE I    NUMERIC
+       @ATTRIBUTE L    NUMERIC
+       @ATTRIBUTE K    NUMERIC
+       @ATTRIBUTE M    NUMERIC
+       @ATTRIBUTE F    NUMERIC
+       @ATTRIBUTE P    NUMERIC
+       @ATTRIBUTE S    NUMERIC
+       @ATTRIBUTE T    NUMERIC
+       @ATTRIBUTE W    NUMERIC
+       @ATTRIBUTE Y    NUMERIC
+       @ATTRIBUTE V    NUMERIC
+       @ATTRIBUTE J    NUMERIC
+       @ATTRIBUTE rt    NUMERIC
+       @DATA
+%
+%      }
+      end
+      training.each do |innerarray|
+        CSV.open(sourcefile, "a") do |csv| #derived from sample code http://www.ruby-doc.org/stdlib-1.9.3/libdoc/csv/rdoc/CSV.html
+          csv << innerarray #idea may be slightly attributable to http://www.ruby-forum.com/topic/299571
+        end
+      end
+      return sourcefile
+    end
+        #James Dalg
+    def make_int_arff(sourcefile, training)
+      sourcefile<<".arff"
+      File.open(sourcefile, "wb") do |f| # need to cite f.puts (not %Q)? if so http://www.devdaily.com/blog/post/ruby/how-write-text-to-file-ruby-example
+        f.puts %Q{%
+%
+       @RELATION molecularinfo
+       @ATTRIBUTE mz   	NUMERIC
+       @ATTRIBUTE charge   NUMERIC
+       @ATTRIBUTE mass 	NUMERIC
+       @ATTRIBUTE rt   NUMERIC
+       @ATTRIBUTE A    NUMERIC
+       @ATTRIBUTE R    NUMERIC
+       @ATTRIBUTE N    NUMERIC
+       @ATTRIBUTE D    NUMERIC
+       @ATTRIBUTE B    NUMERIC
+       @ATTRIBUTE C    NUMERIC
+       @ATTRIBUTE E    NUMERIC
+       @ATTRIBUTE Q    NUMERIC
+       @ATTRIBUTE Z    NUMERIC
+       @ATTRIBUTE G    NUMERIC
+       @ATTRIBUTE H    NUMERIC
+       @ATTRIBUTE I    NUMERIC
+       @ATTRIBUTE L    NUMERIC
+       @ATTRIBUTE K    NUMERIC
+       @ATTRIBUTE M    NUMERIC
+       @ATTRIBUTE F    NUMERIC
+       @ATTRIBUTE P    NUMERIC
+       @ATTRIBUTE S    NUMERIC
+       @ATTRIBUTE T    NUMERIC
+       @ATTRIBUTE W    NUMERIC
+       @ATTRIBUTE Y    NUMERIC
+       @ATTRIBUTE V    NUMERIC
+       @ATTRIBUTE intensity  NUMERIC
+       @DATA
+%
+%      }
+      end
+      training.each do |innerarray|
+        CSV.open(sourcefile, "a") do |csv| #derived from sample code http://www.ruby-doc.org/stdlib-1.9.3/libdoc/csv/rdoc/CSV.html
+          csv << innerarray #idea may be slightly attributable to http://www.ruby-forum.com/topic/299571
+        end
+      end
+      return sourcefile
+    end
+  end
+end

data/lib/ms/sim_digester.rb ADDED Viewed

@@ -0,0 +1,92 @@
+module MS
+  class Sim_Digester
+    attr_reader :digested_file
+    attr_writer :digested_file
+    def initialize(digestor,pH)
+      @digestor = digestor
+      @pH = pH
+      @digested_file = ".#{Time.now.nsec.to_s}"
+    end
+    def create_digested_file(file)
+      inFile = File.open(file,"r")
+      seq = ""
+      inFile.each_line do |sequence|
+        if sequence =~ />/ or sequence == "\n"
+          seq = seq<<";"
+        else
+          seq = seq<<sequence.chomp
+        end
+      end
+      inFile.close
+      proteins = seq.split(/;/).delete_if{|str| str == ""}
+      trypsin = Mspire::Digester[@digestor]
+      digested = []
+      d_file = File.open(@digested_file, "w")
+      proteins.each do |prot|
+        dig = trypsin.digest(prot)
+        dig.each do |d|
+          digested<<d
+        end
+      end
+      proteins.clear
+      digested.uniq!
+      trun_digested = []
+      if digested.length > 50000
+        50000.times do
+          trun_digested<<digested[rand(digested.length)]
+        end
+        digested.clear
+        digested = trun_digested
+      end
+      digested.each do |dig|
+        d_file.puts(dig)
+      end
+      d_file.close
+      num_digested = digested.size
+      digested.clear
+      puts "Number of peptides: #{num_digested}"
+      return num_digested
+    end
+    def digest(file)
+      start = Time.now
+      num_digested = create_digested_file(file)
+      d_file = File.open(@digested_file, "r")
+      i = 0
+      peptides = []
+      d_file.each_line do |peptide_seq|
+        peptide_seq.chomp!
+        Progress.progress("Creating peptides '#{file}':",((i/num_digested.to_f)*100.0).to_i)
+        charge_ratio = charge_at_pH(identify_potential_charges(peptide_seq), @pH)
+        charge_f = charge_ratio.floor
+        charge_c = charge_ratio.ceil
+        peptide_f = MS::Peptide.new(peptide_seq, charge_f) if charge_f != 0
+        peptide_c = MS::Peptide.new(peptide_seq, charge_c) if charge_c != 0
+        peptides<<peptide_f if charge_f != 0
+        peptides<<peptide_c if charge_c != 0
+        i += 1
+      end
+      d_file.close
+      File.delete(@digested_file)
+      Progress.progress("Creating peptides '#{file}':",100,Time.now-start)
+      puts ''
+      return peptides
+    end
+  end
+end