RubyGems - mspire - Versions diffs - 0.3.1 → 0.3.9 - Mend

mspire 0.3.1 → 0.3.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

data/Rakefile +2 -2
data/bin/bioworks_to_pepxml.rb +15 -3
data/bin/ms_to_lmat.rb +2 -1
data/bin/sqt_group.rb +26 -0
data/changelog.txt +36 -0
data/lib/ms/msrun.rb +3 -1
data/lib/ms/parser/mzdata/dom.rb +14 -14
data/lib/ms/scan.rb +3 -3
data/lib/mspire.rb +1 -1
data/lib/sample_enzyme.rb +39 -0
data/lib/spec_id.rb +18 -0
data/lib/spec_id/aa_freqs.rb +6 -9
data/lib/spec_id/digestor.rb +16 -17
data/lib/spec_id/mass.rb +63 -1
data/lib/spec_id/parser/proph.rb +101 -2
data/lib/spec_id/precision/filter.rb +3 -2
data/lib/spec_id/precision/filter/cmdline.rb +3 -1
data/lib/spec_id/precision/filter/output.rb +1 -0
data/lib/spec_id/precision/prob.rb +88 -21
data/lib/spec_id/precision/prob/cmdline.rb +28 -16
data/lib/spec_id/precision/prob/output.rb +8 -2
data/lib/spec_id/proph/pep_summary.rb +25 -12
data/lib/spec_id/sequest.rb +28 -0
data/lib/spec_id/sequest/pepxml.rb +142 -197
data/lib/spec_id/sqt.rb +349 -0
data/lib/spec_id/srf.rb +33 -23
data/lib/validator.rb +40 -57
data/lib/validator/aa.rb +3 -90
data/lib/validator/aa_est.rb +112 -0
data/lib/validator/cmdline.rb +163 -31
data/lib/validator/decoy.rb +15 -7
data/lib/validator/digestion_based.rb +5 -4
data/lib/validator/q_value.rb +32 -0
data/script/peps_per_bin.rb +67 -0
data/script/sqt_to_meta.rb +24 -0
data/specs/bin/bioworks_to_pepxml_spec.rb +3 -3
data/specs/bin/fasta_shaker_spec.rb +2 -2
data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +7 -10
data/specs/bin/filter_and_validate_spec.rb +25 -6
data/specs/bin/ms_to_lmat_spec.rb +2 -2
data/specs/bin/prob_validate_spec.rb +5 -3
data/specs/sample_enzyme_spec.rb +86 -1
data/specs/spec_helper.rb +11 -9
data/specs/spec_id/bioworks_spec.rb +2 -1
data/specs/spec_id/precision/filter_spec.rb +5 -5
data/specs/spec_id/precision/prob_spec.rb +0 -67
data/specs/spec_id/proph/pep_summary_spec.rb +42 -87
data/specs/spec_id/protein_summary_spec.rb +4 -4
data/specs/spec_id/sequest/pepxml_spec.rb +1 -79
data/specs/spec_id/sequest_spec.rb +38 -0
data/specs/spec_id/sqt_spec.rb +111 -3
data/specs/spec_id_spec.rb +2 -0
data/specs/transmem/phobius_spec.rb +3 -1
data/specs/transmem/toppred_spec.rb +1 -1
data/specs/validator/aa_est_spec.rb +66 -0
data/specs/validator/aa_spec.rb +1 -68
data/specs/validator/background_spec.rb +2 -0
data/specs/validator/bias_spec.rb +3 -27
data/specs/validator/decoy_spec.rb +2 -2
data/specs/validator/transmem_spec.rb +2 -1
data/test_files/small.sqt +87 -0
metadata +312 -293

data/lib/validator/decoy.rb CHANGED Viewed

@@ -3,10 +3,12 @@ require 'validator'
 class Validator::Decoy < Validator
   include Precision::Calculator::Decoy
+  # a Regexp (if concatenated) or a String (the filename of separate run)
   attr_accessor :constraint
   attr_accessor :decoy_on_match
   attr_accessor :correct_wins
+  attr_accessor :decoy_to_target_ratio
   attr_accessor :last_pep_was_decoy
@@ -16,13 +18,19 @@ class Validator::Decoy < Validator
   attr_reader :normal_peps_just_submitted
-  def initialize(constraint=nil, decoy_on_match = true, correct_wins = true)
-    @decoy_on_match = decoy_on_match
-    @correct_wins = correct_wins
-    @constraint = constraint
+  DEFAULTS = {
+    :decoy_on_match => true,
+    :correct_wins => true,
+    :decoy_to_target_ratio => 1.0,
+  }
+  def initialize(opts={})
+    merged = DEFAULTS.merge(opts)
+    @constraint, @decoy_on_match, @correct_wins, @decoy_to_target_ratio = merged.values_at(:constraint, :decoy_on_match, :correct_wins, :decoy_to_target_ratio)
   end
   # returns [normal, decoy] (?? I think ??)
+  # reads the full protein reference
   def partition(peps)
     if @decoy_on_match
       if @correct_wins
@@ -74,15 +82,15 @@ class Validator::Decoy < Validator
     @normal_peps_just_submitted = normal
     @increment_normal += normal.size
     @increment_decoy += decoy.size
-    calc_precision(@increment_normal, @increment_decoy)
+    calc_precision(@increment_normal, @increment_decoy, @decoy_to_target_ratio)
   end
   def pephit_precision(peps, separate_peps=nil)
     if separate_peps
-      calc_precision(peps.size, separate_peps.size)
+      calc_precision(peps.size, separate_peps.size, @decoy_to_target_ratio)
     else
       (norm, decoy) = partition(peps)
-      calc_precision(norm.size, decoy.size)
+      calc_precision(norm.size, decoy.size, @decoy_to_target_ratio)
     end
   end

data/lib/validator/digestion_based.rb CHANGED Viewed

@@ -6,7 +6,8 @@ require 'spec_id/sequest/params'
 # SpecID::Pep objects using the pephit_precision method.
 class Validator::DigestionBased < Validator
   DEFAULTS = {
-    :false_to_total_ratio => 1.0,
+    #:false_to_total_ratio => 1.0,  # disable because this needs to be set
+    # explicitly
     :background => 0.0,
   }
@@ -42,13 +43,13 @@ class Validator::DigestionBased < Validator
   # returns [num_tps, num_fps]
   def calc_precision_prep(num_tps, num_fps)
     total_peps_passing_partition = num_tps + num_fps
-    num_fps = adjust_fps_for_background(num_tps, num_fps, @background)
+    num_fps = adjust_fps_for_background(num_tps, num_fps, background)
     ## we must use the false_to_total_ratio to estimate how many are really
     ## incorrect!
     # FALSE/TOTAL  = FALSE(found)/TOTAL(found)
     # TOTAL(found) = FALSE(found) * TOTAL/FALSE
     #              = FALSE(found) / (FALSE/TOTAL)
-    total_false = num_fps / @false_to_total_ratio
+    total_false = num_fps / false_to_total_ratio
     # NOTE: the partition algorithm drops peptides that are transmembrane
     # under certain options.  Thus, the total false estimate must be tempered
     # by this lower number of total peptides.
@@ -60,7 +61,7 @@ class Validator::DigestionBased < Validator
   # assumes partition returns (tps, fps)
   def set_false_to_total_ratio(peps)
     (tps, fps) = partition(peps)
-    @false_to_total_ratio = fps.size.to_f / (tps.size + fps.size)
+    self.false_to_total_ratio = fps.size.to_f / (tps.size + fps.size)
     self
   end

data/lib/validator/q_value.rb ADDED Viewed

@@ -0,0 +1,32 @@
+# from percolator
+# This is a trivial class (since q-values are so straightforward with regards
+# to precision), but it allows us to work with q-values using the same
+# interface as all other validators
+class Validator::QValue
+  # objs should respond_to :q_value
+  # q-values: 0.0 means no false discoveries, 0.5 means 50% false discoveries
+  # 1 - (the largest q value) is the precision
+  def precision(objs)
+    return 1.0 if objs.size == 0
+    largest_q_value = objs.map {|v| v.q_value }.max
+    prec = 1.0 - largest_q_value
+  end
+  # objs should respond_to :q_value
+  # These should be added from low q-value to high q-value
+  # The last q-value added determines the precision
+  def increment_precision(objs)
+    if objs.is_a?(SpecID::Pep) or objs.is_a?(SpecID::Prot)
+      objs = [objs]
+    end
+    precision(objs)
+  end
+  alias_method :pephit_precision, :precision
+  alias_method :prothit_precision, :precision
+  alias_method :increment_pephits_precision, :increment_precision
+end

data/script/peps_per_bin.rb ADDED Viewed

@@ -0,0 +1,67 @@
+#!/usr/bin/ruby -w
+require 'generator'
+require 'optparse'
+require 'fasta'
+require 'sample_enzyme'
+require 'spec_id/digestor'
+require 'spec_id/mass'
+require 'vec'
+opt = {}
+opt[:missed_cleavages] = 0 # ~ parts per million
+opt[:bin_size] = 0.001  # ~ parts per million
+opt[:min] = 300.0
+opt[:max] = 4500.0
+opt[:h_plus] = 1.0
+opts = OptionParser.new do |op|
+  op.banner = "usage: #{File.basename(__FILE__)} *.fasta"
+  op.separator "Outputs a close estimate of number of peptides per bin."
+  op.separator "Uses m+H+ as the peptide mass."
+  op.separator "[for speed, assumes that there is a peptide mass close to the extremes]"
+  op.on("-b", "--bin_size <F>", Float, "size of bins [#{opt[:bin_size]}]") {|v| opt[:bin_size] = v }
+  op.on("-x", "--max <F>", Float, "max mass to accept [#{opt[:max]}]") {|v| opt[:max] = v }
+  op.on("-n", "--min <F>", Float, "min mass to accept [#{opt[:min]}]") {|v| opt[:min] = v }
+  op.on("-h", "--h_plus <F>", Float, "value of H+ to use [#{opt[:h_plus]}]") {|v| opt[:h_plus] = v }
+  op.on("-m", "--missed_cleavages <N>", Integer, "num missed cleavages [#{opt[:missed_cleavages]}]") {|v| opt[:missed_cleavages] = v }
+end
+opts.parse!
+if ARGV.size == 0
+  puts opts.to_s
+  exit
+end
+min_mass = opt[:min]
+max_mass = opt[:max]
+ARGV.each do |file|
+  fasta = Fasta.new(file)
+  uniq_aaseqs = fasta.map do |prot|
+    SampleEnzyme.tryptic(prot.aaseq, opt[:missed_cleavages])
+  end.flatten.uniq
+  masses = Mass::Calculator.new(Mass::MONO, opt[:h_plus]).masses(uniq_aaseqs)
+  passing_masses = Mass::Calculator.new(Mass::MONO, opt[:h_plus]).masses(uniq_aaseqs).select do |mh|
+    ((mh >= min_mass) and (mh <= max_mass))
+  end
+  ## warn if the masses aren't close to the end points
+  if (max_mass - passing_masses.max) > 1.0
+    warn "highest mass is not that close to max: #{passing_masses.max}"
+  end
+  if (passing_masses.min - min_mass) > 1.0
+    warn "lowest mass is not that close to min: #{passing_masses.min}"
+  end
+  num_bins = (max_mass - min_mass) / opt[:bin_size]
+  (bins, freqs) = VecD.new(passing_masses).histogram(num_bins)
+  # report
+  puts "#{file}: #{freqs.avg}"
+end

data/script/sqt_to_meta.rb ADDED Viewed

@@ -0,0 +1,24 @@
+#!/usr/bin/ruby -s
+require 'optparse'
+$outfile = 'meta.sqm'
+opts = OptionParser.new do |op|
+  op.banner = "usage: #{File.basename(__FILE__)} <file>.sqt ..."
+  op.separator "outputs meta.sqm (a sqt meta file)"
+  op.on("-o", "--outfile <file>", "currently: #{$outfile}") {|v| $outfile = v}
+end
+opts.parse!
+if ARGV.size == 0
+  puts opts.to_s
+  exit
+end
+File.open($outfile, 'w') do |out|
+  ARGV.each do |file|
+    out.puts File.expand_path(file)
+  end
+end

data/specs/bin/bioworks_to_pepxml_spec.rb CHANGED Viewed

@@ -41,7 +41,7 @@ describe 'bioworks_to_pepxml.rb' do
       cmd = "#{@cmd} -p #{@tf_params} -o #{@out_path} #{@tf_bioworks_xml} -m #{@tf_mzxml_path} -d /work/special/path --copy_mzxml"
       ## FILES EXIST:
       prc = proc {|file|
-        file.should exist
+        file.exist_as_a_file?.should be_true
         beginning = IO.readlines(file)[0,50].join("\n")
         $XML_SANITY_LINES.each do |line|
           beginning.should include(line)
@@ -55,7 +55,7 @@ describe 'bioworks_to_pepxml.rb' do
       ## COPY MZXML:
       %w(000 020).each do |file|
         mzxml_file = File.join(@out_path, "#{file}.mzXML")
-        mzxml_file.should exist
+        mzxml_file.exist_as_a_file?.should be_true
       end
       ## CLEANUP:
       unless @no_delete then FileUtils.rm_rf(@out_path) end
@@ -68,7 +68,7 @@ describe 'bioworks_to_pepxml.rb' do
       db_re = /C:\\Xcalibur\\database\\ecoli_K12_ncbi_20060321.fasta/
       IO.read(@tf_params).should =~ db_re
       prc = proc {|file|
-        file.should exist
+        file.exist_as_a_file?.should be_true
         IO.read(file).should_not =~ db_re
       }
       _basic(cmd, prc)

data/specs/bin/fasta_shaker_spec.rb CHANGED Viewed

@@ -200,13 +200,13 @@ EDITPEP
     end
   def fastalns(fn)
-    fn.should exist
+    fn.exist_as_a_file?.should be_true
     IO.read(fn).split("\n")
   end
   # returns the fasta object proteins
   def fastap(fn)
-    @f.should exist
+    @f.exist_as_a_file?.should be_true
     Fasta.new(fn).prots
   end

data/specs/bin/filter_and_validate__multiple_vals_helper.yaml CHANGED Viewed

@@ -2,10 +2,10 @@
 pephits_precision:
 - validator: decoy
   value: 0.992932862190813
-- validator: badAA
+- validator: badAAEst
   value: 0.178006237270664
-- validator: badAA
-  value: -0.0247654296463377
+- validator: badAAEst
+  value: -0.0247654296463379
 - validator: badAA
   value: 0.301413862599215
 - validator: bias
@@ -94,22 +94,19 @@ params:
     :decoy_on_match: true
     :correct_wins: true
   - :calculated_background: 0.127208480565371
-    :type: badAA
-    :class: Validator::AA
+    :type: badAAEst
+    :class: Validator::AAEst
     :background: 0.001
     :frequency: 0.0147528119278054
-    :false_to_total_ratio: 1.0
   - :calculated_background: 0.402826855123675
-    :type: badAA
-    :class: Validator::AA
+    :type: badAAEst
+    :class: Validator::AAEst
     :background: 0.0
     :frequency: 0.0463510332199843
-    :false_to_total_ratio: 1.0
   - :calculated_background: 0.127208480565371
     :type: badAA
     :class: Validator::AA
     :background: 0.001
-    :frequency:
     :false_to_total_ratio: 0.180662732637313
   - :calculated_background: 0.773851590106007
     :type: bias

data/specs/bin/filter_and_validate_spec.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+require 'yaml'
 require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
 require 'spec_id/precision/filter'
@@ -80,7 +82,7 @@ describe 'filter_and_validate.rb on small bioworks file' do
       `#{run_normal}`
     end
     structs = [ht_file, hs_file].map do |file|
-      file.should exist
+      file.exist_as_a_file?.should be_true
       struct = YAML.load_file(file)
       File.unlink file
       struct
@@ -104,7 +106,7 @@ describe 'filter_and_validate.rb on small bioworks file' do
   it 'handles multiple validators of the same kind (except, of course, decoy)' do
-    struct = @st_to_yaml.call( "#{@fake_bioworks_file} --proteins  -1 0.0 -2 0.0 -3 0.0 -d 0.01 -p 1000000 --decoy /^DECOY_/ --digestion #{@small_fasta_file},#{@params_file} --bad_aa C,true,0.001 --bad_aa E,true --bad_aa C,false,0.001 --bias #{@small_bias_fasta_file},true --bias #{@small_bias_fasta_file},false --bias #{@small_bias_fasta_file},true,0.2 --tmm #{@phobius_file},1,true,0.8,0.2 --tmm #{@phobius_file} --tmm #{@toppred_file},3,true,false --tmm #{@toppred_file} --tps #{@small_bias_fasta_file} -o text_table:#{@table_output_file} " )
+    struct = @st_to_yaml.call( "#{@fake_bioworks_file} --proteins  -1 0.0 -2 0.0 -3 0.0 -d 0.01 -p 1000000 --decoy /^DECOY_/ --digestion #{@small_fasta_file},#{@params_file} --bad_aa_est C,0.001 --bad_aa_est E --bad_aa C,0.001 --bias #{@small_bias_fasta_file},true --bias #{@small_bias_fasta_file},false --bias #{@small_bias_fasta_file},true,0.2 --fasta #{@small_fasta_file} --tmm #{@phobius_file},1,true,0.8,0.2 --tmm #{@phobius_file} --tmm #{@toppred_file},3,true,false --tmm #{@toppred_file} --tps #{@small_bias_fasta_file} -o text_table:#{@table_output_file} " )
     frozen = YAML.load_file( File.dirname(__FILE__) + "/filter_and_validate__multiple_vals_helper.yaml" )
     ## Pephits precision:
@@ -121,8 +123,25 @@ describe 'filter_and_validate.rb on small bioworks file' do
     frp = frozen['params']
     stp = struct['params']
+    #puts "frozen validators:"
+    #p frp['validators']
+    #puts "seen validators:"
+    #p stp['validators']
     frp['validators'].zip(stp['validators']) do |f,s|
-      f.should == s
+      if f.is_a? Hash
+        f.keys.each do |k|
+          if k == :file or k == :transmem_file
+            File.basename(f[k]).should == File.basename(s[k].gsub('\\','/'))
+          else
+            s[k].should == f[k]
+            #f[k].should == s[k]
+          end
+        end
+      else
+        f.should == s
+      end
     end
     %w(ties prefilter top_hit_by decoy_on_match postfilter include_ties_in_top_hit_postfilter hits_together proteins include_ties_in_top_hit_prefilter).each do |k|
@@ -148,9 +167,9 @@ describe 'filter_and_validate.rb on small bioworks file' do
     text_table = IO.read(@table_output_file)
     # frozen
-    headings_re = Regexp.new( %w(num decoy badAA badAA badAA  bias  bias  bias   tmm   tmm   tmm   tmm   tps).join("\\s+") )
-    data_re = Regexp.new( %w(peps 283 0.993 0.178 -0.025 0.301 0.195 -4.793 0.403 0.438 -0.267 -0.156 -0.020 0.226).join("\\s+") )
-    prot_re = Regexp.new( %w(106 0.972 0.019   0.0 0.038 0.019   0.0 0.094 0.123   0.0   0.0   0.0 0.028).join("\\s+") )
+    headings_re = Regexp.new( %w(num decoy badAAEst badAAEst badAA  bias  bias  bias   tmm   tmm   tmm   tmm   tps).join("\\s+") )
+    data_re = Regexp.new( %w(peps 283 0.993 0.178006 -0.024765 0.301 0.195 -4.793 0.403 0.438 -0.267 -0.156 -0.020 0.226).join("\\s+") )
+    prot_re = Regexp.new( %w(106 0.972 0.018868   0.0 0.038 0.019   0.0 0.094 0.123   0.0   0.0   0.0 0.028).join("\\s+") )
     text_table.should =~ headings_re
     text_table.should =~ data_re
     text_table.should =~ prot_re

data/specs/bin/ms_to_lmat_spec.rb CHANGED Viewed

@@ -16,7 +16,7 @@ describe 'ms_to_lmat.rb' do
     cmd = "#{@cmd} #{@mzxml} --ascii"
     `#{cmd}`
     newfile = @mzxml.sub(".mzXML", ".lmata")
-    newfile.should exist
+    newfile.exist_as_a_file?.should be_true
     IO.read(newfile).should == IO.read(@ans_lmata)
     File.unlink(newfile)
   end
@@ -26,7 +26,7 @@ describe 'ms_to_lmat.rb' do
     cmd = "#{@cmd} #{@mzxml}"
     `#{cmd}`
     newfile = @mzxml.sub(".mzXML", ".lmat")
-    newfile.should exist
+    newfile.exist_as_a_file?.should be_true
     IO.read(newfile).should == IO.read(@ans_lmat)
     File.unlink(newfile)
   end

data/specs/bin/prob_validate_spec.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+require 'yaml'
 require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
 require 'spec_id/precision/prob'
@@ -47,19 +49,19 @@ describe 'filter_and_validate.rb on small bioworks file' do
   it 'responds to --prob init' do
     normal = @st_to_yaml.call( @args + " --prob" )
- normal[:pephits_precision].first[:values].zip([1.0, 1.0, 0.996655518394649, 0.918918918918919]) do |got,exp|
+ normal[:pephits_precision].first[:values].zip([1.0, 1.0, 0.993333333333333, 0.85]) do |got,exp|
       got.should be_close(exp, 0.000000000001)
     end
     #normal_nsp = @st_to_yaml.call( @args + " --prob nsp" )
     #normal.should == normal_nsp
     init = @st_to_yaml.call( @args + " --prob init" )
     init.should_not == normal
-    init[:pephits_precision].first[:values].zip([1.0, 0.974358974358974, 0.981324278438031, 0.890429958391123]) do |got,exp|
+    init[:pephits_precision].first[:values].zip([1.0, 0.95, 0.963333333333333, 0.8025]) do |got,exp|
       got.should be_close(exp, 0.000000000001)
     end
     with_sort_by = @st_to_yaml.call( @args + " --prob nsp --sort_by_init" )
     # frozen
-    with_sort_by[:pephits_precision].first[:values].zip([1.0, 0.994974874371859, 0.996655518394649, 0.918918918918919]) do |got,exp|
+    with_sort_by[:pephits_precision].first[:values].zip([1.0, 0.99, 0.993333333333333, 0.85]) do |got,exp|
       got.should be_close(exp, 0.000000000001)
     end
   end

data/specs/sample_enzyme_spec.rb CHANGED Viewed

@@ -33,9 +33,94 @@ describe SampleEnzyme, "digesting sequences" do
     peps = SampleEnzyme.new('trypsin').digest(st, 2)
     peps.select {|aaseq| aaseq == 'CCCCK'}.size.should == 2
   end
 end
+describe SampleEnzyme, 'making enzyme calculations on sequences and aaseqs' do
+  before(:each) do
+    @full_KRP = SampleEnzyme.new do |se|
+      se.name = 'trypsin'
+      se.cut = 'KR'
+      se.no_cut = 'P'
+      se.sense = 'C'
+    end
+    @just_KR = SampleEnzyme.new do |se|
+      se.name = 'trypsin'
+      se.cut = 'KR'
+      se.no_cut = ''
+      se.sense = 'C'
+    end
+  end
+  it 'calculates the number of tolerant termini' do
+    exp = [{
+      # full KR/P
+      'K.EPTIDR.E' => 2,
+      'K.PEPTIDR.E' => 1,
+      'F.EEPTIDR.E' => 1,
+      'F.PEPTIDW.R' => 0,
+    },
+    {
+      # just KR
+      'K.EPTIDR.E' => 2,
+      'K.PEPTIDR.E' => 2,
+      'F.EEPTIDR.E' => 1,
+      'F.PEPTIDW.R' => 0,
+    }
+    ]
+    scall = Sequest::PepXML::SearchHit
+    sample_enzyme_ar = [@full_KRP, @just_KR]
+    sample_enzyme_ar.zip(exp) do |sample_enzyme,hash|
+      hash.each do |seq, val|
+        sample_enzyme.num_tol_term(seq).should == val
+      end
+    end
+  end
+  it 'calculates number of missed cleavages' do
+    exp = [{
+    "EPTIDR" => 0,
+    "PEPTIDR" => 0,
+    "EEPTIDR" => 0,
+    "PEPTIDW" => 0,
+    "PERPTIDW" => 0,
+    "PEPKPTIDW" => 0,
+    "PEPKTIDW" => 1,
+    "RTTIDR" => 1,
+    "RTTIKK" => 2,
+    "PKEPRTIDW" => 2,
+    "PKEPRTIDKP" => 2,
+    "PKEPRAALKPEERPTIDKW" => 3,
+    },
+    {
+    "EPTIDR" => 0,
+    "PEPTIDR" => 0,
+    "EEPTIDR" => 0,
+    "PEPTIDW" => 0,
+    "PERPTIDW" => 1,
+    "PEPKPTIDW" => 1,
+    "PEPKTIDW" => 1,
+    "RTTIDR" => 1,
+    "RTTIKK" => 2,
+    "PKEPRTIDW" => 2,
+    "PKEPRTIDKP" => 3,
+    "PKEPRAALKPEERPTIDKW" => 5,
+    }
+    ]
+    sample_enzyme_ar = [@full_KRP, @just_KR]
+    sample_enzyme_ar.zip(exp) do |sample_enzyme, hash|
+      hash.each do |aaseq, val|
+        #first, middle, last = SpecID::Pep.split_sequence(seq)
+        # note that we are only using the middle section!
+        sample_enzyme.num_missed_cleavages(aaseq).should == val
+      end
+    end
+  end
+end