RubyGems - mspire - Versions diffs - 0.1.5 → 0.1.7 - Mend

mspire 0.1.5 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

data/Rakefile +5 -2
data/bin/bioworks_to_pepxml.rb +84 -40
data/bin/fasta_shaker.rb +100 -0
data/bin/filter_spec_id.rb +185 -23
data/bin/gi2annot.rb +2 -110
data/bin/id_class_anal.rb +31 -21
data/bin/id_precision.rb +12 -8
data/bin/{false_positive_rate.rb → precision.rb} +1 -1
data/bin/protein_summary.rb +55 -62
data/changelog.txt +34 -0
data/lib/align.rb +0 -1
data/lib/fasta.rb +88 -24
data/lib/gi.rb +114 -0
data/lib/roc.rb +64 -58
data/lib/spec_id/aa_freqs.rb +166 -0
data/lib/spec_id/bioworks.rb +5 -1
data/lib/spec_id/precision.rb +427 -0
data/lib/spec_id/proph.rb +2 -2
data/lib/spec_id/sequest.rb +810 -113
data/lib/spec_id/srf.rb +486 -0
data/lib/spec_id.rb +107 -23
data/release_notes.txt +11 -0
data/script/estimate_fpr_by_cysteine.rb +226 -0
data/script/filter-peps.rb +3 -3
data/script/find_cysteine_background.rb +137 -0
data/script/gen_database_searching.rb +11 -7
data/script/genuine_tps_and_probs.rb +136 -0
data/script/top_hit_per_scan.rb +5 -2
data/test/tc_aa_freqs.rb +59 -0
data/test/tc_bioworks.rb +6 -1
data/test/tc_bioworks_to_pepxml.rb +25 -18
data/test/tc_fasta.rb +81 -3
data/test/tc_fasta_shaker.rb +147 -0
data/test/tc_gi.rb +20 -0
data/test/tc_id_class_anal.rb +9 -12
data/test/tc_id_precision.rb +12 -11
data/test/{tc_false_positive_rate.rb → tc_precision.rb} +13 -22
data/test/tc_protein_summary.rb +31 -22
data/test/tc_roc.rb +95 -50
data/test/tc_sequest.rb +212 -145
data/test/tc_spec.rb +10 -5
data/test/tc_spec_id.rb +0 -2
data/test/tc_spec_id_xml.rb +36 -0
data/test/tc_srf.rb +216 -0
metadata +35 -21
data/lib/spec_id/false_positive_rate.rb +0 -476
data/test/tc_gi2annot.rb +0 -12

data/test/tc_id_precision.rb CHANGED Viewed

@@ -13,20 +13,21 @@ class IDPrecisionTest < Test::Unit::TestCase
   end
   def test_usage
-    puts "RUNNING: #{@cmd}"
+    #puts "RUNNING: #{@cmd}"
     assert_match(/usage:/, `#{@cmd}`)
   end
   ## freeze the output
   def test_basic
     cmd = "#{@cmd} INV_ #{@tf_bioworks_inv_xml}"
-    puts "RUNNING: #{cmd}"
+    #puts "RUNNING: #{cmd}"
     reply = `#{cmd}`
     string =<<END
+#  NH = number of hits
 #  TP = true positives
 #  FP = false positives
 #  PR = precision = TP/(TP+FP)
-PepProts: TP,PepProts: PR,SeqCharge: TP,SeqCharge: PR,Scan(TopHit): TP,Scan(TopHit): PR,Scan(Top10): TP,Scan(Top10): PR,ScanCharge(TopHit): TP,ScanCharge(TopHit): PR,ScanCharge(Top10): TP,ScanCharge(Top10): PR
+PepProts: NH,PepProts: PR,SeqCharge: NH,SeqCharge: PR,Scan(TopHit): NH,Scan(TopHit): PR,Scan(Top10): NH,Scan(Top10): PR,ScanCharge(TopHit): NH,ScanCharge(TopHit): PR,ScanCharge(Top10): NH,ScanCharge(Top10): PR
 75, 1.0, 37, 1.0, 75, 1.0, 75, 1.0, 75, 1.0, 75, 1.0
 95, 1.0, 49, 1.0, 95, 1.0, 95, 1.0, 95, 1.0, 95, 1.0
 125, 1.0, 67, 1.0, 123, 1.0, 125, 1.0, 125, 1.0, 125, 1.0
@@ -34,23 +35,23 @@ PepProts: TP,PepProts: PR,SeqCharge: TP,SeqCharge: PR,Scan(TopHit): TP,Scan(TopH
 186, 1.0, 90, 1.0, 161, 1.0, 186, 1.0, 163, 1.0, 186, 1.0
 193, 1.0, 94, 1.0, 168, 1.0, 193, 1.0, 170, 1.0, 193, 1.0
 204, 1.0, 95, 1.0, 169, 1.0, 204, 1.0, 171, 1.0, 204, 1.0
-212, 1.0, 97, 0.989795918367347, 171, 0.994186046511628, 212, 1.0, 173, 0.994252873563218, 212, 1.0
-214, 0.995348837209302, 99, 0.99, 172, 0.994219653179191, 214, 0.995348837209302, 175, 0.994318181818182, 214, 0.995348837209302
-216, 0.995391705069124, 106, 0.990654205607477, 180, 0.994475138121547, 216, 0.995391705069124, 183, 0.994565217391304, 216, 0.995391705069124
-227, 0.995614035087719, 107, 0.990740740740741, 181, 0.994505494505495, 227, 0.995614035087719, 184, 0.994594594594595, 227, 0.995614035087719
-228, 0.995633187772926, 108, 0.981818181818182, 182, 0.989130434782609, 228, 0.995633187772926, 185, 0.989304812834225, 228, 0.995633187772926
-229, 0.991341991341991, , , , , 229, 0.991341991341991, , , 229, 0.991341991341991
+212, 1.0, 97, 0.989690721649485, 171, 0.994152046783626, 212, 1.0, 173, 0.994219653179191, 212, 1.0
+214, 0.995327102803738, 99, 0.98989898989899, 172, 0.994186046511628, 214, 0.995327102803738, 175, 0.994285714285714, 214, 0.995327102803738
+216, 0.99537037037037, 106, 0.990566037735849, 180, 0.994444444444444, 216, 0.99537037037037, 183, 0.994535519125683, 216, 0.99537037037037
+227, 0.995594713656388, 107, 0.990654205607477, 181, 0.994475138121547, 227, 0.995594713656388, 184, 0.994565217391304, 227, 0.995594713656388
+228, 0.995614035087719, 108, 0.981481481481482, 182, 0.989010989010989, 228, 0.995614035087719, 185, 0.989189189189189, 228, 0.995614035087719
+229, 0.991266375545852, , , , , 229, 0.991266375545852, , , 229, 0.991266375545852
 END
     assert_equal(string, reply)
   end
   def test_basic_with_area
     cmd = "#{@cmd} INV_ #{@tf_bioworks_inv_xml} -a"
-    puts "RUNNING: #{cmd}"
+    #puts "RUNNING: #{cmd}"
     reply = `#{cmd}`
     string =<<END
 Filename PepProts SeqCharge Scan(TopHit) Scan(Top10) ScanCharge(TopHit) ScanCharge(Top10)
-#{@tfiles}bioworks_with_INV_small.xml 228.925732151338 107.878858490814 181.929476342518 228.925732151338 184.92488713549 228.925732151338
+./test/tfiles/bioworks_with_INV_small.xml 228.925377117814 107.877585995136 181.929045912105 228.925377117814 184.924437525838 228.925377117814
 END
     assert_equal(string, reply, "area under the curve")
   end

data/test/{tc_false_positive_rate.rb → tc_precision.rb} RENAMED Viewed

@@ -3,18 +3,17 @@ require 'test/unit'
 require File.dirname(File.expand_path(__FILE__)) + '/load_bin_path'
-class FalsePositiveRateTest < Test::Unit::TestCase
+class PrecisionTest < Test::Unit::TestCase
   ROOT_DIR = File.join(File.dirname(__FILE__), "..")
   def initialize(arg)
     super(arg)
     @tfiles = File.dirname(__FILE__) + '/tfiles/'
     @tf_bioworks_esmall_xml = @tfiles + "bioworks_with_INV_small.xml"
-    @tf_bioworks_small_xml = @tfiles + "bioworks_small.xml"
     @tf_bioworks_shuff = @tfiles + "bioworks_with_SHUFF_small.xml"
-    @cmd = "ruby -I#{File.join(ROOT_DIR, "lib")} -S false_positive_rate.rb "
-    @tf_html = "fpr.html"
-    @tf_png = "fpr.png"
+    @cmd = "ruby -I#{File.join(ROOT_DIR, "lib")} -S precision.rb "
+    @tf_html = @tfiles + "ppv.html"
+    @tf_png = @tfiles + "ppv.png"
     @nodelete = false
   end
@@ -22,20 +21,12 @@ class FalsePositiveRateTest < Test::Unit::TestCase
     assert_match(/Usage:/, `#{@cmd}`)
   end
-  def test_basic_double_dbs
-    output = `#{@cmd} -pg -f #{@tf_bioworks_shuff} #{@tf_bioworks_small_xml} #{@tf_bioworks_small_xml}`
-    assert_match('0.815', output, "precision")
-    assert_match('0.369', output, "Gygi's fpr")
-    assert_match('0.185', output, "fpr")
-    assert_match('106', output, "num true positives")
-    assert_match('24', output, "num false positives")
-  end
   def test_basic_cat
-    output = `#{@cmd} -o fpr.html -pg -f SHUFF_ #{@tf_bioworks_shuff}`
+    output = `#{@cmd} -o #{@tf_html} -f SHUFF_ #{@tf_bioworks_shuff}`
     puts output
     assert_match(/<table.*<\/table>/m, IO.read(@tf_html), "has html table in it")
+    assert_match(/10.*0.3000/m, IO.read(@tf_html), "has values")
     [@tf_html, @tf_png].each do |file|
       assert(File.exist?(file), "file #{file} exists")
       File.unlink(file) unless @nodelete
@@ -43,7 +34,9 @@ class FalsePositiveRateTest < Test::Unit::TestCase
   end
   def test_multiple_files
-    output = `#{@cmd} -o fpr.html -pg -f SHUFF_,INV_ #{@tf_bioworks_shuff} #{@tf_bioworks_esmall_xml}`
+    output = `#{@cmd} -o #{@tf_html} -f SHUFF_,INV_ #{@tf_bioworks_shuff} #{@tf_bioworks_esmall_xml}`
+    assert_match(/<table.*<\/table>/m, IO.read(@tf_html), "has html table in it")
+    assert_match(/1.*1.0000.*1.*1.0000.*0.*0.*15.*0.8667/m, IO.read(@tf_html), "has values")
     [@tf_html, @tf_png].each do |file|
       assert(File.exist?(file), "file #{file} exists")
       File.unlink(file) unless @nodelete
@@ -51,17 +44,15 @@ class FalsePositiveRateTest < Test::Unit::TestCase
   end
   def test_area_under_curve
-    file = File.join(File.dirname(__FILE__), 'fpr_area.txt')
-    `#{@cmd} -o #{file} -pga -f SHUFF_ #{@tf_bioworks_shuff}`
+    file = @tfiles + 'ppv_area.txt'
+    `#{@cmd} -o #{file} -a -f SHUFF_ #{@tf_bioworks_shuff}`
     assert(File.exist?(file), "file #{file} exists")
     output = IO.read(file)
-    assert_match(/Gygi.*2.25620/, output)
-    assert_match(/Prec.*7.87189/, output)
-    assert_match(/FPR.*1.12810/, output)
+    assert_match(/Prec.*7.39206/, output, "consistency check")
     File.unlink file
     outfile = File.join(File.dirname(__FILE__), 'other.html')
-    `#{@cmd} -o #{outfile} -pg -f SHUFF_ #{@tf_bioworks_shuff}`
+    `#{@cmd} -o #{outfile} -f SHUFF_ #{@tf_bioworks_shuff}`
     File.unlink outfile
     File.unlink File.join(File.dirname(__FILE__),'other.png')
   end

data/test/tc_protein_summary.rb CHANGED Viewed

@@ -11,8 +11,9 @@ class ProphProtSummaryTest < Test::Unit::TestCase
   def initialize(arg)
     super(arg)
     @tfiles = File.dirname(__FILE__) + '/tfiles/'
-    @tf_proph = @tfiles + "opd1/000_020-prot.xml"
-    @tf_summary = @tfiles + "opd1/000_020-prot.summary.html"
+    @tfiles_l = File.dirname(__FILE__) + '/tfiles_large/'
+    @tf_proph = @tfiles_l + "opd1/000_020-prot.xml"
+    @tf_summary = @tfiles_l + "opd1/000_020-prot.summary.html"
     @tf_bioworks_small = @tfiles + 'bioworks_small.xml'
     @tf_bioworks_small_summary_html = @tfiles + 'bioworks_small.summary.html'
     @tf_proph_cat_inv =  @tfiles + 'opd1/opd1_cat_inv_small-prot.xml'
@@ -26,13 +27,17 @@ class ProphProtSummaryTest < Test::Unit::TestCase
     assert_match(/usage:/, `#{@cmd}`)
   end
-  def test_proph_basic
-    print `#{@cmd} -c 5.0 #{@tf_proph}`
-    assert(File.exist?(@tf_summary), "file #{@tf_summary} exists")
-    string = IO.read(@tf_summary)
-    assert_match(/gi\|16132176\|ref\|NP_418775\.1\|/, string)
-    assert_match(/16132176/, string)
-    File.unlink(@tf_summary) unless NODELETE
+  def Xtest_proph_basic
+    if File.exist? @tfiles_l
+      print `#{@cmd} -c 5.0 #{@tf_proph}`
+      assert(File.exist?(@tf_summary), "file #{@tf_summary} exists")
+      string = IO.read(@tf_summary)
+      assert_match(/gi\|16132176\|ref\|NP_418775\.1\|/, string)
+      assert_match(/16132176/, string)
+      File.unlink(@tf_summary) unless NODELETE
+    else
+      assert_nil( puts("--SKIPPING TEST-- (missing dir: #{@tfiles_l})") )
+    end
   end
   def test_bioworks_basic
@@ -43,30 +48,34 @@ class ProphProtSummaryTest < Test::Unit::TestCase
     # @TODO: need to freeze the output here
   end
-  def test_bioworks_with_fpr
-    `#{@cmd} -f #{@tf_bioworks_small} #{@tf_bioworks_small} --fpr`
+  def test_bioworks_with_precision
+    `#{@cmd} -f #{@tf_bioworks_small} #{@tf_bioworks_small} --precision`
     assert_match('TP : 106', IO.read(@tf_bioworks_small_summary_html))
     assert_match(/False Positive Rate.*: 0.500/, IO.read(@tf_bioworks_small_summary_html))
     assert(File.exist?(@tf_bioworks_small_summary_html), "file #{@tf_bioworks_small_summary_html} exists")
     File.unlink @tf_bioworks_small_summary_html unless NODELETE
   end
-  def test_proph_with_fpr
+  def Xtest_proph_with_precision
     #puts @cmd
-    print `#{@cmd} #{@tf_proph_cat_inv} -f INV_ --fpr`
+    print `#{@cmd} #{@tf_proph_cat_inv} -f INV_ --precision`
     File.unlink @tf_proph_cat_inv_summary_html unless NODELETE
     File.unlink @tf_proph_cat_inv_summary_png unless NODELETE
   end
-  def test_peptide_count
-    print `#{@cmd} -c 5.0 #{@tf_proph} --peptide_count #{@tf_peptide_count}`
-    assert(File.exist?(@tf_peptide_count), "file #{@tf_peptide_count} exists")
-    file = IO.read(@tf_peptide_count)
-    assert_match("gi|16132176|ref|NP_418775.1|\t2", file)
-    assert_match("gi|16131996|ref|NP_418595.1|\t1", file)
-    assert_match("gi|16131692|ref|NP_418288.1|\t4", file)
-    File.unlink @tf_peptide_count unless NODELETE
+  def Xtest_peptide_count
+    if File.exist? @tfiles_l
+      print `#{@cmd} -c 5.0 #{@tf_proph} --peptide_count #{@tf_peptide_count}`
+      assert(File.exist?(@tf_peptide_count), "file #{@tf_peptide_count} exists")
+      file = IO.read(@tf_peptide_count)
+      assert_match("gi|16132176|ref|NP_418775.1|\t2", file)
+      assert_match("gi|16131996|ref|NP_418595.1|\t1", file)
+      assert_match("gi|16131692|ref|NP_418288.1|\t4", file)
+      File.unlink @tf_peptide_count unless NODELETE
+    else
+      assert_nil( puts("--SKIPPING TEST-- (missing dir: #{@tfiles_l})") )
+    end
   end
 end

data/test/tc_roc.rb CHANGED Viewed

@@ -1,4 +1,3 @@
-#!/usr/bin/ruby -w
 require 'test/unit'
@@ -8,42 +7,40 @@ class ROCTest < Test::Unit::TestCase
   def initialize(arg)
     super(arg)
-    @tp_methods = [:tps_and_precision, :tps_and_fpr2, :tps_and_fpr2_times2]
   end
   def test_area_under_curve
     x = [1,2,3]
     y = [2,3,4]
-    area = ROC.new.area_under_curve(x,y)
-    assert_equal(6, area)
+    _test_auc(6,x,y)
     x = [1,2,3]
     y = [-2,-3,-4]
-    area = ROC.new.area_under_curve(x,y)
-    assert_equal(-6, area)
+    _test_auc(-6, x, y)
     x = [1,2,3]
     y = [4,3,2]
-    area = ROC.new.area_under_curve(x,y)
-    assert_equal(6, area)
+    _test_auc(6, x, y)
     x = [1,2,3]
     y = [-4,-3,-2]
-    area = ROC.new.area_under_curve(x,y)
-    assert_equal(-6, area)
+    _test_auc(-6, x, y)
     x = [4,5,6]
     y = [2,1,2]
-    area = ROC.new.area_under_curve(x,y)
-    assert_equal(3, area)
+    _test_auc(3, x, y)
     x = [4,5,6]
     y = [-2,-1,-2]
+    _test_auc(-3, x, y)
+  end
+  def _test_auc(expected_area,x,y)
     area = ROC.new.area_under_curve(x,y)
-    assert_equal(-3, area)
+    assert_equal(expected_area, area)
   end
- def test_prep_list
+  def test_prep_list
     t = true
     f = false
     x,y = ROC.new.prep_list([[0,f],[1,f],[2,f],[3,t],[3,f],[0,f],[4,f],[1,t],[2,t]])
@@ -51,71 +48,119 @@ class ROCTest < Test::Unit::TestCase
     assert_equal([0,0,1,2,3,4], y)
   end
-  def test_tps_and_precision1
+  def test_tps_and_ppv
     tp = %w(1 2 3 4 5 6 6 6 7 8 9 10 10 10 10 11 12 ).collect {|c| c.to_f } # 17 total
     fp = %w(3.5 4 5 5 5 6 6 6.5 7 8 9 9.5 10 15).collect {|c| c.to_f } # 14 total
     xe = [1, 2, 3, 4, 5, 8, 9, 10, 11, 15, 16, 17]
     #       1, 2, 3, 4        5,   6,             7,             8,   9
     #       10              11                12
     ye = [1, 1, 1, 4.0/6.0, 0.5, 8.0/(7.0+8.0), 9.0/(9.0+9.0), 0.5, 11.0/(11.0+ 11.0),  15.0/(15.0+13.0), 16.0/(16.0+13.0), 17.0/(17.0+13.0)]
-    x, y = ROC.new.tps_and_precision(tp,fp)
-    assert_equal(x.size, y.size)
-    assert_equal(xe, x)
-    assert_equal(ye, y, "complex real-life-like scenario")
-  end
+    _test_tps_and_ppv_method(tp,fp,xe,ye,"complex real-life-like scenario")
-  def test_tps_and_precision
     ## leading fp's
     tp = [1,2,3]
     fp = [0,0,1,2,3,4]
     xe = [1,2,3]
-    ye_s = [[1.0/(1+3), 2.0/(2+4), 3.0/(3+5)],
-           [3.0/(1+3), 4.0/(2+4), 5.0/(3+5)],
-           [6.0/(1+3), 8.0/(2+4), 10.0/(3+5)],
-    ]
-    _test_tp_method(tp,fp,xe,ye_s,@tp_methods)
+    ye = [1.0/(1+3), 2.0/(2+4), 3.0/(3+5)]
+    _test_tps_and_ppv_method(tp,fp,xe,ye, "leading fps")
     ## leading tp's
     tp = [-1,2,3]
     fp = [0,4]
     xe = [1,2,3]
-    ye_s = [[1.0/(1+0), 2.0/(2+1), 3.0/(3+1)],
-            [0.0/(1+0), 1.0/(2+1), 1.0/(3+1)],
-            [0.0/(1+0), 2.0/(2+1), 2.0/(3+1)],
-    ]
-    _test_tp_method(tp,fp,xe,ye_s,@tp_methods)
+    ye = [1.0/(1+0), 2.0/(2+1), 3.0/(3+1)]
+    _test_tps_and_ppv_method(tp,fp,xe,ye, "leading tps")
     ## equal tp's leading
     tp = [0.0001,0.0001,0.0001,2]
     fp = [0.01,4.0]
     xe = [3,4]
-    ye_s = [[3.0/(3+0), 4.0/(4+1)],
-      [0.0/(3+0), 1.0/(4+1)],
-      [0.0/(3+0), 2.0/(4+1)]
-    ]
-    _test_tp_method(tp,fp,xe,ye_s,@tp_methods)
+    ye = [3.0/(3+0), 4.0/(4+1)]
+    _test_tps_and_ppv_method(tp,fp,xe,ye, "equal tps leading")
     ## equal arrays with some repeated values
     tp = %w(1 2 3 3 4 5 6 ).collect {|x| x.to_f } # 17 total
     fp = %w(1 2 3 3 4 5 6 ).collect {|x| x.to_f } # 17 total
     xe = [1,2,4,5,6,7]
-    ye_s = [[0.5, 0.5, 0.5, 0.5, 0.5, 0.5],
-      [0.5, 0.5, 0.5, 0.5, 0.5, 0.5],
-      [1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
-    ]
-    _test_tp_method(tp,fp,xe,ye_s,@tp_methods)
+    ye = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
+    _test_tps_and_ppv_method(tp,fp,xe,ye, "equal arrays with some repeated values")
+  end
+  def _test_tps_and_ppv_method(tp,fp,xe,ye,message='')
+    (x,y) = ROC.new.tps_and_ppv(tp,fp)
+    assert_equal(x.size, y.size)
+    assert_equal(xe, x)
+    assert_equal(ye, y, "tps_and_ppv: #{message}")
+  end
+end
+class DecoyROCTest < ROCTest
+  def test_pred_tps_ppv__leading_fps
+    ## leading fp's
+    hits = [1,2,3]
+    decoys = [0,0,1,2,3,4]
+    num_hits_e = [1,2,3]
+    num_fps = [3,4,5]
+    tps_e = make_tps_e(num_fps, num_hits_e)
+    ppv_e = make_ppv_e(tps_e, num_hits_e)
+    _test_pred_and_tps_and_ppv(hits, decoys, num_hits_e, tps_e, ppv_e)
+  end
+  def test_pred_tps_ppv__leading_tps
+    ## leading tp's
+    hits = [-1,2,3]
+    decoys = [0,4]
+    num_hits_e = [1,2,3]
+    num_fps = [0,1,1]
+    tps_e = make_tps_e(num_fps, num_hits_e)
+    ppv_e = make_ppv_e(tps_e, num_hits_e)
+    _test_pred_and_tps_and_ppv(hits, decoys, num_hits_e, tps_e, ppv_e)
+  end
+  def test_pred_tps_ppv__equal_tps_leading
+    hits = [0.0001,0.0001,0.0001,2]
+    decoys = [0.01,4.0]
+    num_hits_e = [3,4]
+    num_fps = [0,1]
+    tps_e = make_tps_e(num_fps, num_hits_e)
+    ppv_e = make_ppv_e(tps_e, num_hits_e)
+    _test_pred_and_tps_and_ppv(hits, decoys, num_hits_e, tps_e, ppv_e)
   end
-  def _test_tp_method(tp,fp,xe,ye_s,methods)
-    roc = ROC.new
-    methods.zip(ye_s) do |arr|
-      method, ye = arr[0], arr[1]
-      x,y = roc.send(method, tp, fp)
-      assert_equal(x.size, y.size)
-      assert_equal(xe, x)
-      assert_equal(ye, y, "method: #{arr[0]}")
+  def test_pred_tps_ppv__equal_arrays_with_some_repeated_values
+    hits = %w(1 2 3 3 4 5 6 ).collect {|x| x.to_f } # 17 total
+    decoys = %w(1 2 3 3 4 5 6 ).collect {|x| x.to_f } # 17 total
+    num_hits_e = [1,2,4,5,6,7]
+    num_fps = [1,2,4,5,6,7]
+    tps_e = make_tps_e(num_fps, num_hits_e)
+    ppv_e = make_ppv_e(tps_e, num_hits_e)
+    _test_pred_and_tps_and_ppv(hits, decoys, num_hits_e, tps_e, ppv_e)
+  end
+  def _test_pred_and_tps_and_ppv(hits, decoys, num_hits_e, tps_e, ppv_e)
+    answer = DecoyROC.new.pred_and_tps_and_ppv(hits, decoys)
+    expected = [num_hits_e, tps_e, ppv_e]
+    %w(num_hits num_tps ppv).each_with_index do |cat, i|
+      assert_equal(expected[i], answer[i], cat)
+    end
+  end
+  def make_tps_e(num_fps, num_hits_e)
+    tps_e = []
+    num_hits_e.each_with_index do |v,i|
+      tps_e[i] = v - num_fps[i]
     end
+    tps_e
+  end
+  def make_ppv_e(tps_e, num_hits_e)
+    ppv_e = []
+    tps_e.each_with_index {|v,i| ppv_e[i] = v.to_f/num_hits_e[i] }
+    ppv_e
   end
 end