mspire 0.1.5 → 0.1.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. data/Rakefile +5 -2
  2. data/bin/bioworks_to_pepxml.rb +84 -40
  3. data/bin/fasta_shaker.rb +100 -0
  4. data/bin/filter_spec_id.rb +185 -23
  5. data/bin/gi2annot.rb +2 -110
  6. data/bin/id_class_anal.rb +31 -21
  7. data/bin/id_precision.rb +12 -8
  8. data/bin/{false_positive_rate.rb → precision.rb} +1 -1
  9. data/bin/protein_summary.rb +55 -62
  10. data/changelog.txt +34 -0
  11. data/lib/align.rb +0 -1
  12. data/lib/fasta.rb +88 -24
  13. data/lib/gi.rb +114 -0
  14. data/lib/roc.rb +64 -58
  15. data/lib/spec_id/aa_freqs.rb +166 -0
  16. data/lib/spec_id/bioworks.rb +5 -1
  17. data/lib/spec_id/precision.rb +427 -0
  18. data/lib/spec_id/proph.rb +2 -2
  19. data/lib/spec_id/sequest.rb +810 -113
  20. data/lib/spec_id/srf.rb +486 -0
  21. data/lib/spec_id.rb +107 -23
  22. data/release_notes.txt +11 -0
  23. data/script/estimate_fpr_by_cysteine.rb +226 -0
  24. data/script/filter-peps.rb +3 -3
  25. data/script/find_cysteine_background.rb +137 -0
  26. data/script/gen_database_searching.rb +11 -7
  27. data/script/genuine_tps_and_probs.rb +136 -0
  28. data/script/top_hit_per_scan.rb +5 -2
  29. data/test/tc_aa_freqs.rb +59 -0
  30. data/test/tc_bioworks.rb +6 -1
  31. data/test/tc_bioworks_to_pepxml.rb +25 -18
  32. data/test/tc_fasta.rb +81 -3
  33. data/test/tc_fasta_shaker.rb +147 -0
  34. data/test/tc_gi.rb +20 -0
  35. data/test/tc_id_class_anal.rb +9 -12
  36. data/test/tc_id_precision.rb +12 -11
  37. data/test/{tc_false_positive_rate.rb → tc_precision.rb} +13 -22
  38. data/test/tc_protein_summary.rb +31 -22
  39. data/test/tc_roc.rb +95 -50
  40. data/test/tc_sequest.rb +212 -145
  41. data/test/tc_spec.rb +10 -5
  42. data/test/tc_spec_id.rb +0 -2
  43. data/test/tc_spec_id_xml.rb +36 -0
  44. data/test/tc_srf.rb +216 -0
  45. metadata +35 -21
  46. data/lib/spec_id/false_positive_rate.rb +0 -476
  47. data/test/tc_gi2annot.rb +0 -12
@@ -0,0 +1,136 @@
1
+ #!/usr/bin/ruby -w
2
+
3
+ # Here is what I plotted: take each id'd pep-prot id'd on the tophit scans -- you will likely have the same pep-prot id'd on multiple scans -- plot the top probability of each such pep-prot.
4
+ # There are 43 such id'd peptides for Sashimi, whereas SEQUEST id's about 66. So you'll have 66 (1-p-values) to plot, I had 43. Similarly for OMICS.
5
+
6
+ require 'spec_id'
7
+ require 'fasta'
8
+ require 'optparse'
9
+ require 'ostruct'
10
+
11
+ # returns an accession number if available, or the entire reference (less the
12
+ # starting '>'
13
+ def get_fasta_accession(fasta_prot)
14
+ head = fasta_prot.header
15
+ if head =~ ACC_REGEX
16
+ $1.dup
17
+ else
18
+ head.sub(/^>/, '').rstrip
19
+ end
20
+ end
21
+
22
+ # returns the accession number from a reference, or the complete reference
23
+ def accession_from_ref(pep)
24
+ ref = pep.prot.reference
25
+ if ref =~ ACC_REGEX
26
+ $1.dup
27
+ else
28
+ ref.rstrip
29
+ end
30
+ end
31
+
32
+ def get_pep_prot_accession(pep)
33
+ acc = pep.prot.accession
34
+ if !acc || acc == '0' || acc == 0
35
+ accession_from_ref(pep)
36
+ else
37
+ acc
38
+ end
39
+ end
40
+
41
+ #####################################################################
42
+ # MAIN
43
+ #####################################################################
44
+
45
+ opt = OpenStruct.new
46
+ opt.p = 'prob'
47
+ opts = OptionParser.new do |op|
48
+ op.banner = "usage: #{File.basename(__FILE__)} bioworks.xml true_hits.fasta"
49
+ op.separator " [prints to stdout tab delimited table]"
50
+ op.on('-t', '--ties', 'allow ties on best hit') {|v| opt.t = v }
51
+ op.on('-p', '--param <s>', 'param: (xcorr | prob)') {|v| opt.p = v}
52
+ end
53
+ opts.parse!
54
+
55
+ if ARGV.size < 2
56
+ puts opts
57
+ exit
58
+ end
59
+
60
+ case opt.p
61
+ when 'prob'
62
+ param = :peptide_probability
63
+ best = :first
64
+ when 'xcorr'
65
+ param = :xcorr
66
+ best = :last
67
+ else
68
+ abort "incorrect param: #{opt.p}"
69
+ end
70
+
71
+ ############################
72
+ # GLOBALS
73
+ DELIM = "\t"
74
+ ACC_REGEX = /\|(.*?)\|/o
75
+ ############################
76
+
77
+ bioworks = ARGV[0]
78
+ fasta_file = ARGV[1]
79
+
80
+ fprots = Fasta.new.read_file(fasta_file).prots
81
+ gi_nums = fprots.map {|prot| get_fasta_accession(prot) }
82
+
83
+ peptides = SpecID.new(bioworks).peps
84
+
85
+
86
+ ## Get the best peptide(s) per scan
87
+ top_peps_per_scan = []
88
+
89
+ peptides.hash_by(:base_name, :first_scan).each do |bn_scan, pep_array|
90
+ sorted_list = pep_array.sort_by {|pep| pep.send(param).to_f }
91
+
92
+ top_peps = if best == :first ; [sorted_list.shift] ; else [sorted_list.pop] end
93
+ found_another = false
94
+ sorted_list.each do |pep|
95
+ if pep.send(param).to_f == top_peps.send(best).send(param).to_f
96
+ if opt.t
97
+ top_peps << pep
98
+ else
99
+ found_another = true
100
+ end
101
+ end
102
+ end
103
+ unless found_another
104
+ top_peps_per_scan.push( *top_peps )
105
+ end
106
+ end
107
+
108
+
109
+ ## Get the best scoring peptide per peptide/prot from list of best
110
+ ## peptides/scan
111
+ top_pep_seq_prots = top_peps_per_scan.hash_by {|pep| [pep.sequence, get_pep_prot_accession(pep)] }.map do |k,pep_array|
112
+ pep_array.sort_by {|pep| pep.send(param).to_f }.send(best)
113
+ end
114
+
115
+ ## sort the peptides by best score
116
+ sorted_top_pep_seq_prots = top_pep_seq_prots.sort_by {|pep| pep.send(param).to_f }
117
+ if best == :last ; sorted_top_pep_seq_prots.reverse! end
118
+
119
+ ## plot the probability vs. the number of tps
120
+ puts ['#TPs', param, 'sequence', 'protein accession', 'xcorr'].join(DELIM)
121
+ tps = 0
122
+ sorted_top_pep_seq_prots.each do |pep|
123
+ if gi_nums.include?( get_pep_prot_accession(pep) )
124
+ tps += 1
125
+ puts [tps.to_s, pep.send(param), pep.sequence, get_pep_prot_accession(pep), pep.xcorr].join(DELIM)
126
+ end
127
+ end
128
+
129
+
130
+
131
+
132
+
133
+
134
+
135
+
136
+
@@ -4,7 +4,6 @@
4
4
  cats = %w(base_name sequence xcorr deltacn first_scan last_scan)
5
5
  ###################################################################
6
6
 
7
- require 'pp'
8
7
  require 'spec_id'
9
8
  require 'hash_by'
10
9
 
@@ -46,7 +45,11 @@ outfile_top = file.sub(/\.xml$/, extension_top)
46
45
  outfile_all = file.sub(/\.xml$/, extension_all)
47
46
 
48
47
  sp = SpecID.new(file)
49
- pep_hash = sp.peps.hash_by(:first_scan, :last_scan)
48
+
49
+ # The old (incorrect version)
50
+ # pep_hash = sp.peps.hash_by(:first_scan, :last_scan)
51
+ # The correct version:
52
+ pep_hash = sp.peps.hash_by(:base_name, :first_scan, :last_scan)
50
53
  top_per_scan = pep_hash.map {|k,v| v.sort_by {|ob| ob.xcorr.to_f }.last }
51
54
  top_per_scan = top_per_scan.sort_by {|pep| pep.first_scan.to_i }
52
55
 
@@ -0,0 +1,59 @@
1
+
2
+
3
+ require 'test/unit'
4
+ require 'spec_id/aa_freqs'
5
+
6
+
7
+ class FastaTest < Test::Unit::TestCase
8
+
9
+ def initialize(arg)
10
+ super(arg)
11
+ @tfiles = File.dirname(__FILE__) + '/tfiles/'
12
+ @sf = @tfiles + "small.fasta"
13
+ end
14
+
15
+ def test_basic
16
+ obj = SpecID::AAFreqs.new(@sf)
17
+ expect = {:I=>0.0628918621937819, :S=>0.0539719475147049, :D=>0.0526145691939758, :Z=>0.0, :L=>0.102772929998061, :T=>0.0491888048607071, :E=>0.0609527503070261, :O=>0.0, :C=>0.0157714433456144, :K=>0.0471850559110594, :U=>0.0, :Q=>0.0382651412319824, :W=>0.0137030573330748, :A=>0.101997285243359, :M=>0.0294745006786892, :J=>0.0, :G=>0.0811195139292871, :Y=>0.0254670027793937, :X=>0.0, :F=>0.0418201796910348, :R=>0.0546829552065154, :V=>0.0702604873634542, :H=>0.0213302307543145, :B=>0.0, :N=>0.03471010277293, :P=>0.0418201796910348}
18
+ aaf = obj.aafreqs
19
+ expect.each do |k,v|
20
+ assert(aaf.key?(k))
21
+ assert_in_delta(v, aaf[k], 0.00000001, "freqs match up")
22
+ end
23
+ sum = 0.0
24
+ aaf.values.each do |v|
25
+ sum += v
26
+ end
27
+ assert_in_delta(1.0, sum, 0.0000000000001, "all freqs add to 1")
28
+ end
29
+
30
+ def test_probability_of_length_table
31
+ # p SpecID::AAFreqs.probability_of_length_table(0.01, 4)
32
+ assert_equal_arrs_in_delta([0.0, 0.01, 0.0199, 0.029701, 0.0394039900000001], SpecID::AAFreqs.probability_of_length_table(0.01, 4), 0.000000001)
33
+
34
+ assert_equal_arrs_in_delta([0.0, 0.2, 0.36, 0.488, 0.5904], SpecID::AAFreqs.probability_of_length_table(0.2, 4), 0.000000001)
35
+ end
36
+
37
+ def test_actual_and_expected_number
38
+ fobj = Fasta.new.read_file(@sf)
39
+ obj = SpecID::AAFreqs.new
40
+ obj.aafreqs = obj.calculate_frequencies(fobj)
41
+
42
+ peptide_aaseqs = fobj.prots.map do |prot|
43
+ prot.aaseq[0..12]
44
+ end
45
+ assert_equal(50, peptide_aaseqs.size, 'sanity check')
46
+ (ac,ex) = obj.actual_and_expected_number(peptide_aaseqs, :C, 1)
47
+ assert_equal(9, ac)
48
+ assert_in_delta( 9.33530631238985, ex, 0.0000000001)
49
+ end
50
+
51
+ private
52
+ def assert_equal_arrs_in_delta(expect, actual, delta)
53
+ expect.each_with_index do |v,i|
54
+ assert_in_delta(v, actual[i], delta)
55
+ end
56
+ end
57
+
58
+
59
+ end
data/test/tc_bioworks.rb CHANGED
@@ -8,7 +8,8 @@ class BioworksTest < Test::Unit::TestCase
8
8
  def initialize(arg)
9
9
  super(arg)
10
10
  @tfiles = File.dirname(__FILE__) + '/tfiles/'
11
- @tf_bioworks_xml = @tfiles + "bioworks.xml"
11
+ @tfiles_l = File.dirname(__FILE__) + '/tfiles_large/'
12
+ @tf_bioworks_xml = @tfiles_l + "bioworks.xml"
12
13
  @tf_bioworks_xml_small = @tfiles + "bioworks_small.xml"
13
14
  @tf_bioworks_xml_really_small = @tfiles + "bioworks_with_INV_small.xml"
14
15
  @tf_params = @tfiles + "bioworks32.params"
@@ -34,9 +35,13 @@ class BioworksTest < Test::Unit::TestCase
34
35
  end
35
36
 
36
37
  def Xtest_xml_parsing_speed
38
+ if File.exist? @tfiles_l
37
39
  #puts Benchmark.bm {|b|
38
40
  obj = SpecID::Bioworks.new(@tf_bioworks_xml)
39
41
  #}
42
+ else
43
+ assert_nil( puts("--SKIPPING TEST-- (missing dir: #{@tfiles_l})") )
44
+ end
40
45
  end
41
46
 
42
47
  def test_xml_parsing_bioworks_single
@@ -9,11 +9,10 @@ class BioworksToPepXMLTest < Test::Unit::TestCase
9
9
  def initialize(arg)
10
10
  super(arg)
11
11
  @tfiles = File.dirname(__FILE__) + '/tfiles/'
12
- @tf_mzxml_path = @tfiles + "yeast_gly_mzXML"
12
+ @tfiles_l = File.dirname(__FILE__) + '/tfiles_large/'
13
+ @tf_mzxml_path = @tfiles_l + "yeast_gly_mzXML"
13
14
  @tf_bioworks_xml = @tfiles + "bioworks_small.xml"
14
15
  @tf_params = @tfiles + "bioworks32.params"
15
- @tf_opd1 = @tfiles + "opd1/bioworks.000.oldparams.xml"
16
- @tf_opd1_mzxml = @tfiles + "opd1/000.mzXML.timeIndex"
17
16
  @no_delete = false
18
17
  @out_path = @tfiles + 'pepxml/'
19
18
  @cmd = "ruby -I#{File.join(File.dirname(__FILE__), "..", "lib")} -S bioworks_to_pepxml.rb "
@@ -33,23 +32,31 @@ class BioworksToPepXMLTest < Test::Unit::TestCase
33
32
  end
34
33
 
35
34
  def test_basic
36
- cmd = "#{@cmd} -p #{@tf_params} -o #{@out_path} #{@tf_bioworks_xml} -m #{@tf_mzxml_path} -d /work/special/path"
37
- prc = proc {|file|
38
- assert(File.exist?(file), "#{file} exists")
39
- }
40
- _basic(cmd, prc)
41
- unless @no_delete then FileUtils.rm_rf(@out_path) end
35
+ if File.exist? @tfiles_l
36
+ cmd = "#{@cmd} -p #{@tf_params} -o #{@out_path} #{@tf_bioworks_xml} -m #{@tf_mzxml_path} -d /work/special/path"
37
+ prc = proc {|file|
38
+ assert(File.exist?(file), "#{file} exists")
39
+ }
40
+ _basic(cmd, prc)
41
+ unless @no_delete then FileUtils.rm_rf(@out_path) end
42
+ else
43
+ assert_nil( puts("--SKIPPING TEST-- (missing dir: #{@tfiles_l})") )
44
+ end
42
45
  end
43
46
 
44
47
  def test_database
45
- cmd = "#{@cmd} -p #{@tf_params} -o #{@out_path} #{@tf_bioworks_xml} -m #{@tf_mzxml_path}"
46
- db_re = /C:\\Xcalibur\\database\\ecoli_K12_ncbi_20060321.fasta/
47
- assert_match(db_re, IO.read(@tf_params))
48
- prc = proc {|file|
49
- assert(File.exist?(file))
50
- assert_no_match(db_re, IO.read(file))
51
- }
52
- _basic(cmd, prc)
53
- unless @no_delete then FileUtils.rm_rf(@out_path) end
48
+ if File.exist? @tfiles_l
49
+ cmd = "#{@cmd} -p #{@tf_params} -o #{@out_path} #{@tf_bioworks_xml} -m #{@tf_mzxml_path}"
50
+ db_re = /C:\\Xcalibur\\database\\ecoli_K12_ncbi_20060321.fasta/
51
+ assert_match(db_re, IO.read(@tf_params))
52
+ prc = proc {|file|
53
+ assert(File.exist?(file))
54
+ assert_no_match(db_re, IO.read(file))
55
+ }
56
+ _basic(cmd, prc)
57
+ unless @no_delete then FileUtils.rm_rf(@out_path) end
58
+ else
59
+ assert_nil( puts("--SKIPPING TEST-- (missing dir: #{@tfiles_l})") )
60
+ end
54
61
  end
55
62
  end
data/test/tc_fasta.rb CHANGED
@@ -4,6 +4,8 @@ require File.dirname(File.expand_path(__FILE__)) + '/load_bin_path'
4
4
  require 'test/unit'
5
5
  require 'fasta'
6
6
  require 'assert_files'
7
+ require 'sample_enzyme'
8
+ require 'set'
7
9
 
8
10
 
9
11
  module Test::Unit::Assertions
@@ -11,6 +13,7 @@ module Test::Unit::Assertions
11
13
  end
12
14
 
13
15
  class FastaTest < Test::Unit::TestCase
16
+ NODELETE = false
14
17
 
15
18
  def initialize(arg)
16
19
  super(arg)
@@ -73,7 +76,7 @@ class FastaTest < Test::Unit::TestCase
73
76
 
74
77
  def test_mod
75
78
  ## Testing shuffle:
76
- puts `#{@fasta_mod_cmd + 'shuffle ' + @sf}`
79
+ `#{@fasta_mod_cmd + 'shuffle ' + @sf}`
77
80
  assert(File.exist?(@sf_shuffle), "output file #{@sf_shuffle} exists")
78
81
  ob1 = Fasta.new.read_file(@sf)
79
82
  ob2 = Fasta.new.read_file(@sf_shuffle)
@@ -83,7 +86,7 @@ class FastaTest < Test::Unit::TestCase
83
86
  assert(_are_shuffled?(ob1,ob2))
84
87
 
85
88
  ## Testing invert:
86
- puts `#{@fasta_mod_cmd + 'invert ' + @sf}`
89
+ `#{@fasta_mod_cmd + 'invert ' + @sf}`
87
90
  assert(File.exist?(@sf_invert), "output file #{@sf_invert} exists")
88
91
  ob1 = Fasta.new.read_file(@sf)
89
92
  ob2 = Fasta.new.read_file(@sf_invert)
@@ -94,7 +97,7 @@ class FastaTest < Test::Unit::TestCase
94
97
 
95
98
  ## Testing prefix
96
99
  #puts "#{@fasta_mod_cmd + '-p _HELLO_ invert ' + @sf}"
97
- puts `#{@fasta_mod_cmd + 'invert -p _HELLO_ ' + @sf}` # NOT WORKING!
100
+ `#{@fasta_mod_cmd + 'invert -p _HELLO_ ' + @sf}` # NOT WORKING!
98
101
  assert(File.exist?(@sf_invert), "output file #{@sf_invert} exists")
99
102
  ob1 = Fasta.new.read_file(@sf)
100
103
  ob2 = Fasta.new.read_file(@sf_invert)
@@ -176,6 +179,81 @@ class FastaTest < Test::Unit::TestCase
176
179
  end
177
180
  end
178
181
 
182
+ def test_invert_tryptic_peptides
183
+ # FOR INDIVIDUAL PROTEINS:
184
+ seq = 'ABCKCDERDEKDGEKWXYRRKDER'
185
+ # tryptic = ABCK, CDER, DEK, DGEK, WXYR, R, K, DER
186
+ tryp = SampleEnzyme.tryptic(seq)
187
+ reverse_tryptic = %w(CBAK EDCR EDK EGDK YXWR R K EDR)
188
+ prot = Fasta::Prot.new(nil, seq)
189
+ prot.invert_tryptic_peptides!
190
+ assert_equal(reverse_tryptic.join(''), prot.aaseq, "reversing tryptic peptides")
191
+
192
+ seq = 'XYRABCD'
193
+ prot = Fasta::Prot.new(nil, seq)
194
+ prot.invert_tryptic_peptides!
195
+ assert_equal('YXRDCBA', prot.aaseq, 'last peptide treated special')
196
+
197
+ seq = 'XYRPABCD'
198
+ prot = Fasta::Prot.new(nil, seq)
199
+ prot.invert_tryptic_peptides!
200
+ assert_equal('DCBAPRYX', prot.aaseq, 'with a proline')
201
+
202
+ end
203
+
204
+ def test_fraction_of_prots
205
+ peps = [['>silly1', "PEPTIDE"], ['>silly2', "ANOTHER"], ['>silly3', "AGAIN"], ['>silly4', "LARMA"]]
206
+ prots = peps.map do |header, seq|
207
+ Fasta::Prot.new(header, seq)
208
+ end
209
+ f = Fasta.new(prots)
210
+ # simple:
211
+ n = f.fraction_of_prots(1.0)
212
+ assert_equal(f.prots.map{|v| v.header }.to_set, n.prots.map{|v| v.header }.to_set, "same headers")
213
+ assert_equal(f.prots.map{|v| v.aaseq }.to_set, n.prots.map{|v| v.aaseq }.to_set, "same aaseqs")
214
+
215
+ pre = proc {|cnt| "SHUFF_f#{cnt}_" }
216
+ # test prefix
217
+ n = f.fraction_of_prots(1.0, pre)
218
+ n.prots.each do |prot|
219
+ assert_match(/^>SHUFF_f0_/, prot.header, "contains new prefix")
220
+ end
221
+
222
+ # smaller
223
+ n = f.fraction_of_prots(0.75, pre)
224
+ assert_equal(3, n.prots.size, "correct number of proteins")
225
+ # bigger
226
+ n = f.fraction_of_prots(2.5, pre)
227
+ assert_equal(10, n.prots.size, "correct number of proteins")
228
+ n.prots[0..3].each {|prt| assert_match(/^>SHUFF_f0_/, prt.header ) }
229
+ n.prots[4..7].each {|prt| assert_match(/^>SHUFF_f1_/, prt.header ) }
230
+ n.prots[8..9].each {|prt| assert_match(/^>SHUFF_f2_/, prt.header ) }
231
+ # crazy
232
+ n = f.fraction_of_prots(1.33, pre)
233
+ assert_equal(6, n.prots.size, "correct number of proteins")
234
+ end
235
+
236
+ def test_inverted_tryptic_peptides_for_file
237
+ # for a file:
238
+ tmpfile = @tfiles + "fasta.tmp"
239
+ fasta = Fasta.new.read_file(@sf)
240
+ fasta.aaseq_invert_tryptic_peptides!
241
+ fasta.write_file(tmpfile)
242
+ lines = IO.readlines(tmpfile)
243
+ #normal = 'MKRISTTITTTITITTGNGAG'
244
+ inverted_tryptic = 'MKRGAGNGTTITITTTITTSI' ## ?????
245
+ assert_equal(inverted_tryptic, lines[1].chomp)
246
+ #normal = 'MATYLIGDVHGCYDELIALLHKVEFTPGKDTLWLTGDLVARGPGSLDVLRYVKSLGDSVRLVLGNHDLHL
247
+ # LAVFAGISRNKPKDRLTPLLEAPDADELLNWLRRQPLLQIDEEKKLVMAHAGITPQWDLQTAKECARDVE
248
+ # AVLSSDSYPFFLDAMYGDMPNNWSPELRGLGRLRFITNAFTRMRFCFPNGQLDMYSKESPEEAPAPLKPW
249
+ # FAIPGPVAEEYSIAFGHWASLEGKGTPEGIYALDTGCCWGGTLTCLRWEDKQYFVQPSNRHKDLGEAAAS'
250
+ inverted_tryptic = 'HLLAILEDYCGHVDGILYTAMKGPTFEVKAVLDGTLWLTDRLVDLSGPGRVYKVSDGLSRSIGAFVALLHLDHNGLVLRPKNKDRLWNLLEDADPAELLPTLRREEDIQLLPQKKATQLDWQPTIGAHAMVLKACERLEPSWNNPMDGYMADLFFPYSDSSLVAEVDRGLGRLRTFANTIFRMRSYMDLQGNPFCFKGELSAWHGFAISYEEAVPGPIAFWPKLPAPAEEPSEKLCTLTGGWCCGTDLAYIGEPTGRDEWKNSPQVFYQRHKSAAAEGLD'
251
+ assert_equal(inverted_tryptic, lines[-1].chomp)
252
+ File.unlink(tmpfile) unless NODELETE
253
+ end
254
+
255
+
256
+
179
257
  ## HELPER ASSERTIONS:
180
258
 
181
259
  def _are_inverted?(obj1, obj2)
@@ -0,0 +1,147 @@
1
+
2
+ require 'test/unit'
3
+ require 'fasta'
4
+
5
+ Filestring = ">gi|P1
6
+ AMKRGAN
7
+ >gi|P2
8
+ CRGATKKTAGRPMEK
9
+ >gi|P3
10
+ PEPTIDE
11
+ "
12
+
13
+ Rev = ">gi|P1
14
+ NAGRKMA
15
+ >gi|P2
16
+ KEMPRGATKKTAGRC
17
+ >gi|P3
18
+ EDITPEP
19
+ "
20
+
21
+ RevTryptic = ">gi|P1
22
+ MAKRNAG
23
+ >gi|P2
24
+ CRTAGKKEMPRGATK
25
+ >gi|P3
26
+ EDITPEP
27
+ "
28
+
29
+ ShuffTryptic = ">gi|P1
30
+ MAKRNAG
31
+ >gi|P2
32
+ CRTAGKKEMPRGATK
33
+ >gi|P3
34
+ EDITPEP
35
+ "
36
+
37
+
38
+
39
+ class TestBasic < Test::Unit::TestCase
40
+
41
+ def setup
42
+ testdir = File.dirname(__FILE__)
43
+ libdir = testdir + '/../lib'
44
+ bindir = testdir + '/../bin'
45
+ progname = "fasta_shaker.rb"
46
+ @cmd = "ruby -I #{libdir} #{bindir}/#{progname} "
47
+ @tfiles = testdir + '/tfiles/'
48
+ @tmpfile = @tfiles + "littlefasta.trash.fasta"
49
+ File.open(@tmpfile, "w") {|fh| fh.print Filestring }
50
+ @f = @tfiles + "trash.fasta"
51
+ end
52
+
53
+ def teardown
54
+ File.unlink @tmpfile if File.exist? @tmpfile
55
+ File.unlink @f if File.exist? @f
56
+ end
57
+
58
+ def test_reverse
59
+ cmd = @cmd + "reverse #{@tmpfile} -o #{@f}"
60
+ system cmd
61
+ assert_equal(Rev, fastap(@f).to_s)
62
+ end
63
+
64
+ def test_reverse_tryptic
65
+ cmd = @cmd + "reverse #{@tmpfile} -o #{@f} --tryptic_peptides"
66
+ system cmd
67
+ assert_equal(RevTryptic, fastap(@f).to_s)
68
+ end
69
+
70
+ def test_shuff_tryptic
71
+ cmd = @cmd + "shuffle #{@tmpfile} -o #{@f} --tryptic_peptides"
72
+ system cmd
73
+ lns = fastap(@f).to_s.split("\n")
74
+ assert_equal('KR', lns[1][2..3])
75
+ assert_equal('R', lns[3][1..1])
76
+ assert_equal('CRGATKKTAGRPMEK'.size, lns[3].size, "sequence is same size")
77
+ assert_not_equal('CRGATKKTAGRPMEK', lns[3], "sequence is randomised from original [remote chance of failure] rerun to make sure")
78
+ end
79
+
80
+ def test_shuffle
81
+ cmd = @cmd + "shuffle #{@tmpfile} -o #{@f}"
82
+ system cmd
83
+ clines = strlns(Filestring)
84
+ lns = fastalns(@f)
85
+ lns.each_with_index do |line,i|
86
+ assert_equal(clines[i].size, line.size, "same size lines: A: <<#{clines[i]}>> B: <<#{line}>>")
87
+ end
88
+ assert_equal('CRGATKKTAGRPMEK'.size, lns[3].size, "sequence is same size")
89
+ assert_not_equal('CRGATKKTAGRPMEK', lns[3], "sequence is randomised from original [remote chance of failure] rerun to make sure")
90
+ end
91
+
92
+ def test_cat
93
+ cmd = @cmd + "reverse #{@tmpfile} -c -o #{@f}"
94
+ `#{cmd}` ## suppress warning
95
+ lns = fastalns(@f)
96
+ assert_equal(strlns(Filestring), lns[0..5], "first part equal")
97
+ assert_equal(strlns(Rev), lns[6..-1], "second part equal")
98
+ end
99
+
100
+ def test_fraction
101
+ cmd = @cmd + "reverse #{@tmpfile} -f 2.6 -o #{@f}"
102
+ `#{cmd}`
103
+ assert_equal(8, fastap(@f).size)
104
+
105
+ cmd = @cmd + "shuffle #{@tmpfile} -f 2.0 -c -p MINE_ -o #{@f}"
106
+ `#{cmd}`
107
+ assert_equal(9, fastap(@f).size)
108
+ fp = fastap(@f)
109
+ fp[0..2].each do |prt|
110
+ assert_match(/^>/, prt.header, "prefix matches")
111
+ end
112
+ fp[3..5].each do |prt|
113
+ assert_match(/^>MINE_f0_/, prt.header, "prefix matches")
114
+ end
115
+ fp[6..8].each do |prt|
116
+ assert_match(/^>MINE_f1_/, prt.header, "prefix matches")
117
+ end
118
+ #cmd = @cmd + "reverse #{@tmpfile} -c -f 2.0 -o #{@f}"
119
+ end
120
+
121
+ def test_prefix
122
+ cmd = @cmd + "reverse #{@tmpfile} -p SILLY_ -o #{@f}"
123
+ `#{cmd}`
124
+ fp = fastap(@f)
125
+ fp.each do |prt|
126
+ assert_match(/^>SILLY_.+/, prt.header)
127
+ end
128
+ end
129
+
130
+
131
+ private
132
+ def strlns(str)
133
+ str.split("\n")
134
+ end
135
+
136
+ def fastalns(fn)
137
+ assert(File.exist?(fn), "FILE: #{fn} exists")
138
+ IO.read(fn).split("\n")
139
+ end
140
+
141
+ # returns the fasta object proteins
142
+ def fastap(fn)
143
+ assert(File.exist?(fn), "FILE: #{fn} exists")
144
+ Fasta.new.read_file(fn).prots
145
+ end
146
+
147
+ end
data/test/tc_gi.rb ADDED
@@ -0,0 +1,20 @@
1
+
2
+ require 'test/unit'
3
+ require 'gi'
4
+
5
+
6
+ class Gi2AnnotTest < Test::Unit::TestCase
7
+ ROOT_DIR = File.join(File.dirname(__FILE__), '..')
8
+
9
+ def test_single_query
10
+ #begin
11
+ annot = GI.gi2annot([16130548]).first
12
+ #rescue
13
+ puts "SKIPPING gi2annot test since no internet connection available:"
14
+ puts "#{$!}"
15
+ assert true
16
+ #else
17
+ assert_equal('CP4-57 prophage; RNase LS [Escherichia coli K12]'+"\n", annot)
18
+ #end
19
+ end
20
+ end
@@ -23,14 +23,12 @@ class IDClassAnalTest < Test::Unit::TestCase
23
23
  output = `#{@cmd} -p INV_ #{@tf_proph_inv}`
24
24
  fps = [1.00, 1.00, 0.97]
25
25
  tps = [1.00, 1.00, 0.98, 0.97, 0.97, 0.97, 0.97]
26
- puts output
27
26
  #File.open("tmp.csv","w") do |fh| fh.print output end
28
27
  assert 1
29
28
  end
30
29
 
31
- def Xtest_basic
30
+ def test_basic
32
31
  output = `#{@cmd} -p INV_ #{@tf_bioworks_esmall_xml}`
33
- # @TODO: that's the output, need to grab for consistency sake
34
32
  exp = [
35
33
  [1, 1.0, 0.0],
36
34
  [2, 1.0, 0.0],
@@ -40,11 +38,11 @@ class IDClassAnalTest < Test::Unit::TestCase
40
38
  [6, 1.0, 0.0],
41
39
  [9, 1.0, 0.0],
42
40
  [10, 1.0, 0.0],
43
- [11, 0.916666666666667, 0.166666666666667],
44
- [12, 0.923076923076923, 0.153846153846154],
45
- [13, 0.928571428571429, 0.142857142857143],
46
- [14, 0.933333333333333, 0.133333333333333],
47
- [15, 0.882352941176471, 0.235294117647059]
41
+ [11, 0.909090909090909],
42
+ [12, 0.916666666666667],
43
+ [13, 0.923076923076923],
44
+ [14, 0.928571428571429],
45
+ [15, 0.866666666666667],
48
46
  ]
49
47
  outarr = output.split($/)
50
48
  exp.each_with_index do |line,i|
@@ -55,18 +53,17 @@ class IDClassAnalTest < Test::Unit::TestCase
55
53
  end
56
54
  end
57
55
 
58
- def Xtest_multiple_output
56
+ def test_multiple_output
59
57
  myplot = 'class_anal.toplot'
60
58
  output = `#{@cmd} -j -p INV_,SHUFF_ #{@tf_bioworks_esmall_xml} #{@tf_bioworks_shuff}`
61
59
  assert(output.size > 10) ## @TODO: BETTER HERE
62
60
  assert(File.exist?(myplot), "file #{myplot} exists")
63
61
  File.unlink myplot
64
-
65
62
  end
66
63
 
67
- def Xtest_jtplot_output
64
+ def test_jtplot_output
68
65
  myplot = 'class_anal.toplot'
69
- `#{@cmd} -p INV_ -j #{@tf_bioworks_esmall_xml}`
66
+ output = `#{@cmd} -p INV_ -j #{@tf_bioworks_esmall_xml}`
70
67
  assert(File.exist?(myplot), "file #{myplot} exists")
71
68
  File.unlink myplot
72
69
  end