mspire 0.1.5 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. data/Rakefile +5 -2
  2. data/bin/bioworks_to_pepxml.rb +84 -40
  3. data/bin/fasta_shaker.rb +100 -0
  4. data/bin/filter_spec_id.rb +185 -23
  5. data/bin/gi2annot.rb +2 -110
  6. data/bin/id_class_anal.rb +31 -21
  7. data/bin/id_precision.rb +12 -8
  8. data/bin/{false_positive_rate.rb → precision.rb} +1 -1
  9. data/bin/protein_summary.rb +55 -62
  10. data/changelog.txt +34 -0
  11. data/lib/align.rb +0 -1
  12. data/lib/fasta.rb +88 -24
  13. data/lib/gi.rb +114 -0
  14. data/lib/roc.rb +64 -58
  15. data/lib/spec_id/aa_freqs.rb +166 -0
  16. data/lib/spec_id/bioworks.rb +5 -1
  17. data/lib/spec_id/precision.rb +427 -0
  18. data/lib/spec_id/proph.rb +2 -2
  19. data/lib/spec_id/sequest.rb +810 -113
  20. data/lib/spec_id/srf.rb +486 -0
  21. data/lib/spec_id.rb +107 -23
  22. data/release_notes.txt +11 -0
  23. data/script/estimate_fpr_by_cysteine.rb +226 -0
  24. data/script/filter-peps.rb +3 -3
  25. data/script/find_cysteine_background.rb +137 -0
  26. data/script/gen_database_searching.rb +11 -7
  27. data/script/genuine_tps_and_probs.rb +136 -0
  28. data/script/top_hit_per_scan.rb +5 -2
  29. data/test/tc_aa_freqs.rb +59 -0
  30. data/test/tc_bioworks.rb +6 -1
  31. data/test/tc_bioworks_to_pepxml.rb +25 -18
  32. data/test/tc_fasta.rb +81 -3
  33. data/test/tc_fasta_shaker.rb +147 -0
  34. data/test/tc_gi.rb +20 -0
  35. data/test/tc_id_class_anal.rb +9 -12
  36. data/test/tc_id_precision.rb +12 -11
  37. data/test/{tc_false_positive_rate.rb → tc_precision.rb} +13 -22
  38. data/test/tc_protein_summary.rb +31 -22
  39. data/test/tc_roc.rb +95 -50
  40. data/test/tc_sequest.rb +212 -145
  41. data/test/tc_spec.rb +10 -5
  42. data/test/tc_spec_id.rb +0 -2
  43. data/test/tc_spec_id_xml.rb +36 -0
  44. data/test/tc_srf.rb +216 -0
  45. metadata +35 -21
  46. data/lib/spec_id/false_positive_rate.rb +0 -476
  47. data/test/tc_gi2annot.rb +0 -12
@@ -0,0 +1,136 @@
1
+ #!/usr/bin/ruby -w
2
+
3
+ # Here is what I plotted: take each id'd pep-prot id'd on the tophit scans -- you will likely have the same pep-prot id'd on multiple scans -- plot the top probability of each such pep-prot.
4
+ # There are 43 such id'd peptides for Sashimi, whereas SEQUEST id's about 66. So you'll have 66 (1-p-values) to plot, I had 43. Similarly for OMICS.
5
+
6
+ require 'spec_id'
7
+ require 'fasta'
8
+ require 'optparse'
9
+ require 'ostruct'
10
+
11
+ # returns an accession number if available, or the entire reference (less the
12
+ # starting '>'
13
+ def get_fasta_accession(fasta_prot)
14
+ head = fasta_prot.header
15
+ if head =~ ACC_REGEX
16
+ $1.dup
17
+ else
18
+ head.sub(/^>/, '').rstrip
19
+ end
20
+ end
21
+
22
+ # returns the accession number from a reference, or the complete reference
23
+ def accession_from_ref(pep)
24
+ ref = pep.prot.reference
25
+ if ref =~ ACC_REGEX
26
+ $1.dup
27
+ else
28
+ ref.rstrip
29
+ end
30
+ end
31
+
32
+ def get_pep_prot_accession(pep)
33
+ acc = pep.prot.accession
34
+ if !acc || acc == '0' || acc == 0
35
+ accession_from_ref(pep)
36
+ else
37
+ acc
38
+ end
39
+ end
40
+
41
+ #####################################################################
42
+ # MAIN
43
+ #####################################################################
44
+
45
+ opt = OpenStruct.new
46
+ opt.p = 'prob'
47
+ opts = OptionParser.new do |op|
48
+ op.banner = "usage: #{File.basename(__FILE__)} bioworks.xml true_hits.fasta"
49
+ op.separator " [prints to stdout tab delimited table]"
50
+ op.on('-t', '--ties', 'allow ties on best hit') {|v| opt.t = v }
51
+ op.on('-p', '--param <s>', 'param: (xcorr | prob)') {|v| opt.p = v}
52
+ end
53
+ opts.parse!
54
+
55
+ if ARGV.size < 2
56
+ puts opts
57
+ exit
58
+ end
59
+
60
+ case opt.p
61
+ when 'prob'
62
+ param = :peptide_probability
63
+ best = :first
64
+ when 'xcorr'
65
+ param = :xcorr
66
+ best = :last
67
+ else
68
+ abort "incorrect param: #{opt.p}"
69
+ end
70
+
71
+ ############################
72
+ # GLOBALS
73
+ DELIM = "\t"
74
+ ACC_REGEX = /\|(.*?)\|/o
75
+ ############################
76
+
77
+ bioworks = ARGV[0]
78
+ fasta_file = ARGV[1]
79
+
80
+ fprots = Fasta.new.read_file(fasta_file).prots
81
+ gi_nums = fprots.map {|prot| get_fasta_accession(prot) }
82
+
83
+ peptides = SpecID.new(bioworks).peps
84
+
85
+
86
+ ## Get the best peptide(s) per scan
87
+ top_peps_per_scan = []
88
+
89
+ peptides.hash_by(:base_name, :first_scan).each do |bn_scan, pep_array|
90
+ sorted_list = pep_array.sort_by {|pep| pep.send(param).to_f }
91
+
92
+ top_peps = if best == :first ; [sorted_list.shift] ; else [sorted_list.pop] end
93
+ found_another = false
94
+ sorted_list.each do |pep|
95
+ if pep.send(param).to_f == top_peps.send(best).send(param).to_f
96
+ if opt.t
97
+ top_peps << pep
98
+ else
99
+ found_another = true
100
+ end
101
+ end
102
+ end
103
+ unless found_another
104
+ top_peps_per_scan.push( *top_peps )
105
+ end
106
+ end
107
+
108
+
109
+ ## Get the best scoring peptide per peptide/prot from list of best
110
+ ## peptides/scan
111
+ top_pep_seq_prots = top_peps_per_scan.hash_by {|pep| [pep.sequence, get_pep_prot_accession(pep)] }.map do |k,pep_array|
112
+ pep_array.sort_by {|pep| pep.send(param).to_f }.send(best)
113
+ end
114
+
115
+ ## sort the peptides by best score
116
+ sorted_top_pep_seq_prots = top_pep_seq_prots.sort_by {|pep| pep.send(param).to_f }
117
+ if best == :last ; sorted_top_pep_seq_prots.reverse! end
118
+
119
+ ## plot the probability vs. the number of tps
120
+ puts ['#TPs', param, 'sequence', 'protein accession', 'xcorr'].join(DELIM)
121
+ tps = 0
122
+ sorted_top_pep_seq_prots.each do |pep|
123
+ if gi_nums.include?( get_pep_prot_accession(pep) )
124
+ tps += 1
125
+ puts [tps.to_s, pep.send(param), pep.sequence, get_pep_prot_accession(pep), pep.xcorr].join(DELIM)
126
+ end
127
+ end
128
+
129
+
130
+
131
+
132
+
133
+
134
+
135
+
136
+
@@ -4,7 +4,6 @@
4
4
  cats = %w(base_name sequence xcorr deltacn first_scan last_scan)
5
5
  ###################################################################
6
6
 
7
- require 'pp'
8
7
  require 'spec_id'
9
8
  require 'hash_by'
10
9
 
@@ -46,7 +45,11 @@ outfile_top = file.sub(/\.xml$/, extension_top)
46
45
  outfile_all = file.sub(/\.xml$/, extension_all)
47
46
 
48
47
  sp = SpecID.new(file)
49
- pep_hash = sp.peps.hash_by(:first_scan, :last_scan)
48
+
49
+ # The old (incorrect version)
50
+ # pep_hash = sp.peps.hash_by(:first_scan, :last_scan)
51
+ # The correct version:
52
+ pep_hash = sp.peps.hash_by(:base_name, :first_scan, :last_scan)
50
53
  top_per_scan = pep_hash.map {|k,v| v.sort_by {|ob| ob.xcorr.to_f }.last }
51
54
  top_per_scan = top_per_scan.sort_by {|pep| pep.first_scan.to_i }
52
55
 
@@ -0,0 +1,59 @@
1
+
2
+
3
+ require 'test/unit'
4
+ require 'spec_id/aa_freqs'
5
+
6
+
7
+ class FastaTest < Test::Unit::TestCase
8
+
9
+ def initialize(arg)
10
+ super(arg)
11
+ @tfiles = File.dirname(__FILE__) + '/tfiles/'
12
+ @sf = @tfiles + "small.fasta"
13
+ end
14
+
15
+ def test_basic
16
+ obj = SpecID::AAFreqs.new(@sf)
17
+ expect = {:I=>0.0628918621937819, :S=>0.0539719475147049, :D=>0.0526145691939758, :Z=>0.0, :L=>0.102772929998061, :T=>0.0491888048607071, :E=>0.0609527503070261, :O=>0.0, :C=>0.0157714433456144, :K=>0.0471850559110594, :U=>0.0, :Q=>0.0382651412319824, :W=>0.0137030573330748, :A=>0.101997285243359, :M=>0.0294745006786892, :J=>0.0, :G=>0.0811195139292871, :Y=>0.0254670027793937, :X=>0.0, :F=>0.0418201796910348, :R=>0.0546829552065154, :V=>0.0702604873634542, :H=>0.0213302307543145, :B=>0.0, :N=>0.03471010277293, :P=>0.0418201796910348}
18
+ aaf = obj.aafreqs
19
+ expect.each do |k,v|
20
+ assert(aaf.key?(k))
21
+ assert_in_delta(v, aaf[k], 0.00000001, "freqs match up")
22
+ end
23
+ sum = 0.0
24
+ aaf.values.each do |v|
25
+ sum += v
26
+ end
27
+ assert_in_delta(1.0, sum, 0.0000000000001, "all freqs add to 1")
28
+ end
29
+
30
+ def test_probability_of_length_table
31
+ # p SpecID::AAFreqs.probability_of_length_table(0.01, 4)
32
+ assert_equal_arrs_in_delta([0.0, 0.01, 0.0199, 0.029701, 0.0394039900000001], SpecID::AAFreqs.probability_of_length_table(0.01, 4), 0.000000001)
33
+
34
+ assert_equal_arrs_in_delta([0.0, 0.2, 0.36, 0.488, 0.5904], SpecID::AAFreqs.probability_of_length_table(0.2, 4), 0.000000001)
35
+ end
36
+
37
+ def test_actual_and_expected_number
38
+ fobj = Fasta.new.read_file(@sf)
39
+ obj = SpecID::AAFreqs.new
40
+ obj.aafreqs = obj.calculate_frequencies(fobj)
41
+
42
+ peptide_aaseqs = fobj.prots.map do |prot|
43
+ prot.aaseq[0..12]
44
+ end
45
+ assert_equal(50, peptide_aaseqs.size, 'sanity check')
46
+ (ac,ex) = obj.actual_and_expected_number(peptide_aaseqs, :C, 1)
47
+ assert_equal(9, ac)
48
+ assert_in_delta( 9.33530631238985, ex, 0.0000000001)
49
+ end
50
+
51
+ private
52
+ def assert_equal_arrs_in_delta(expect, actual, delta)
53
+ expect.each_with_index do |v,i|
54
+ assert_in_delta(v, actual[i], delta)
55
+ end
56
+ end
57
+
58
+
59
+ end
data/test/tc_bioworks.rb CHANGED
@@ -8,7 +8,8 @@ class BioworksTest < Test::Unit::TestCase
8
8
  def initialize(arg)
9
9
  super(arg)
10
10
  @tfiles = File.dirname(__FILE__) + '/tfiles/'
11
- @tf_bioworks_xml = @tfiles + "bioworks.xml"
11
+ @tfiles_l = File.dirname(__FILE__) + '/tfiles_large/'
12
+ @tf_bioworks_xml = @tfiles_l + "bioworks.xml"
12
13
  @tf_bioworks_xml_small = @tfiles + "bioworks_small.xml"
13
14
  @tf_bioworks_xml_really_small = @tfiles + "bioworks_with_INV_small.xml"
14
15
  @tf_params = @tfiles + "bioworks32.params"
@@ -34,9 +35,13 @@ class BioworksTest < Test::Unit::TestCase
34
35
  end
35
36
 
36
37
  def Xtest_xml_parsing_speed
38
+ if File.exist? @tfiles_l
37
39
  #puts Benchmark.bm {|b|
38
40
  obj = SpecID::Bioworks.new(@tf_bioworks_xml)
39
41
  #}
42
+ else
43
+ assert_nil( puts("--SKIPPING TEST-- (missing dir: #{@tfiles_l})") )
44
+ end
40
45
  end
41
46
 
42
47
  def test_xml_parsing_bioworks_single
@@ -9,11 +9,10 @@ class BioworksToPepXMLTest < Test::Unit::TestCase
9
9
  def initialize(arg)
10
10
  super(arg)
11
11
  @tfiles = File.dirname(__FILE__) + '/tfiles/'
12
- @tf_mzxml_path = @tfiles + "yeast_gly_mzXML"
12
+ @tfiles_l = File.dirname(__FILE__) + '/tfiles_large/'
13
+ @tf_mzxml_path = @tfiles_l + "yeast_gly_mzXML"
13
14
  @tf_bioworks_xml = @tfiles + "bioworks_small.xml"
14
15
  @tf_params = @tfiles + "bioworks32.params"
15
- @tf_opd1 = @tfiles + "opd1/bioworks.000.oldparams.xml"
16
- @tf_opd1_mzxml = @tfiles + "opd1/000.mzXML.timeIndex"
17
16
  @no_delete = false
18
17
  @out_path = @tfiles + 'pepxml/'
19
18
  @cmd = "ruby -I#{File.join(File.dirname(__FILE__), "..", "lib")} -S bioworks_to_pepxml.rb "
@@ -33,23 +32,31 @@ class BioworksToPepXMLTest < Test::Unit::TestCase
33
32
  end
34
33
 
35
34
  def test_basic
36
- cmd = "#{@cmd} -p #{@tf_params} -o #{@out_path} #{@tf_bioworks_xml} -m #{@tf_mzxml_path} -d /work/special/path"
37
- prc = proc {|file|
38
- assert(File.exist?(file), "#{file} exists")
39
- }
40
- _basic(cmd, prc)
41
- unless @no_delete then FileUtils.rm_rf(@out_path) end
35
+ if File.exist? @tfiles_l
36
+ cmd = "#{@cmd} -p #{@tf_params} -o #{@out_path} #{@tf_bioworks_xml} -m #{@tf_mzxml_path} -d /work/special/path"
37
+ prc = proc {|file|
38
+ assert(File.exist?(file), "#{file} exists")
39
+ }
40
+ _basic(cmd, prc)
41
+ unless @no_delete then FileUtils.rm_rf(@out_path) end
42
+ else
43
+ assert_nil( puts("--SKIPPING TEST-- (missing dir: #{@tfiles_l})") )
44
+ end
42
45
  end
43
46
 
44
47
  def test_database
45
- cmd = "#{@cmd} -p #{@tf_params} -o #{@out_path} #{@tf_bioworks_xml} -m #{@tf_mzxml_path}"
46
- db_re = /C:\\Xcalibur\\database\\ecoli_K12_ncbi_20060321.fasta/
47
- assert_match(db_re, IO.read(@tf_params))
48
- prc = proc {|file|
49
- assert(File.exist?(file))
50
- assert_no_match(db_re, IO.read(file))
51
- }
52
- _basic(cmd, prc)
53
- unless @no_delete then FileUtils.rm_rf(@out_path) end
48
+ if File.exist? @tfiles_l
49
+ cmd = "#{@cmd} -p #{@tf_params} -o #{@out_path} #{@tf_bioworks_xml} -m #{@tf_mzxml_path}"
50
+ db_re = /C:\\Xcalibur\\database\\ecoli_K12_ncbi_20060321.fasta/
51
+ assert_match(db_re, IO.read(@tf_params))
52
+ prc = proc {|file|
53
+ assert(File.exist?(file))
54
+ assert_no_match(db_re, IO.read(file))
55
+ }
56
+ _basic(cmd, prc)
57
+ unless @no_delete then FileUtils.rm_rf(@out_path) end
58
+ else
59
+ assert_nil( puts("--SKIPPING TEST-- (missing dir: #{@tfiles_l})") )
60
+ end
54
61
  end
55
62
  end
data/test/tc_fasta.rb CHANGED
@@ -4,6 +4,8 @@ require File.dirname(File.expand_path(__FILE__)) + '/load_bin_path'
4
4
  require 'test/unit'
5
5
  require 'fasta'
6
6
  require 'assert_files'
7
+ require 'sample_enzyme'
8
+ require 'set'
7
9
 
8
10
 
9
11
  module Test::Unit::Assertions
@@ -11,6 +13,7 @@ module Test::Unit::Assertions
11
13
  end
12
14
 
13
15
  class FastaTest < Test::Unit::TestCase
16
+ NODELETE = false
14
17
 
15
18
  def initialize(arg)
16
19
  super(arg)
@@ -73,7 +76,7 @@ class FastaTest < Test::Unit::TestCase
73
76
 
74
77
  def test_mod
75
78
  ## Testing shuffle:
76
- puts `#{@fasta_mod_cmd + 'shuffle ' + @sf}`
79
+ `#{@fasta_mod_cmd + 'shuffle ' + @sf}`
77
80
  assert(File.exist?(@sf_shuffle), "output file #{@sf_shuffle} exists")
78
81
  ob1 = Fasta.new.read_file(@sf)
79
82
  ob2 = Fasta.new.read_file(@sf_shuffle)
@@ -83,7 +86,7 @@ class FastaTest < Test::Unit::TestCase
83
86
  assert(_are_shuffled?(ob1,ob2))
84
87
 
85
88
  ## Testing invert:
86
- puts `#{@fasta_mod_cmd + 'invert ' + @sf}`
89
+ `#{@fasta_mod_cmd + 'invert ' + @sf}`
87
90
  assert(File.exist?(@sf_invert), "output file #{@sf_invert} exists")
88
91
  ob1 = Fasta.new.read_file(@sf)
89
92
  ob2 = Fasta.new.read_file(@sf_invert)
@@ -94,7 +97,7 @@ class FastaTest < Test::Unit::TestCase
94
97
 
95
98
  ## Testing prefix
96
99
  #puts "#{@fasta_mod_cmd + '-p _HELLO_ invert ' + @sf}"
97
- puts `#{@fasta_mod_cmd + 'invert -p _HELLO_ ' + @sf}` # NOT WORKING!
100
+ `#{@fasta_mod_cmd + 'invert -p _HELLO_ ' + @sf}` # NOT WORKING!
98
101
  assert(File.exist?(@sf_invert), "output file #{@sf_invert} exists")
99
102
  ob1 = Fasta.new.read_file(@sf)
100
103
  ob2 = Fasta.new.read_file(@sf_invert)
@@ -176,6 +179,81 @@ class FastaTest < Test::Unit::TestCase
176
179
  end
177
180
  end
178
181
 
182
+ def test_invert_tryptic_peptides
183
+ # FOR INDIVIDUAL PROTEINS:
184
+ seq = 'ABCKCDERDEKDGEKWXYRRKDER'
185
+ # tryptic = ABCK, CDER, DEK, DGEK, WXYR, R, K, DER
186
+ tryp = SampleEnzyme.tryptic(seq)
187
+ reverse_tryptic = %w(CBAK EDCR EDK EGDK YXWR R K EDR)
188
+ prot = Fasta::Prot.new(nil, seq)
189
+ prot.invert_tryptic_peptides!
190
+ assert_equal(reverse_tryptic.join(''), prot.aaseq, "reversing tryptic peptides")
191
+
192
+ seq = 'XYRABCD'
193
+ prot = Fasta::Prot.new(nil, seq)
194
+ prot.invert_tryptic_peptides!
195
+ assert_equal('YXRDCBA', prot.aaseq, 'last peptide treated special')
196
+
197
+ seq = 'XYRPABCD'
198
+ prot = Fasta::Prot.new(nil, seq)
199
+ prot.invert_tryptic_peptides!
200
+ assert_equal('DCBAPRYX', prot.aaseq, 'with a proline')
201
+
202
+ end
203
+
204
+ def test_fraction_of_prots
205
+ peps = [['>silly1', "PEPTIDE"], ['>silly2', "ANOTHER"], ['>silly3', "AGAIN"], ['>silly4', "LARMA"]]
206
+ prots = peps.map do |header, seq|
207
+ Fasta::Prot.new(header, seq)
208
+ end
209
+ f = Fasta.new(prots)
210
+ # simple:
211
+ n = f.fraction_of_prots(1.0)
212
+ assert_equal(f.prots.map{|v| v.header }.to_set, n.prots.map{|v| v.header }.to_set, "same headers")
213
+ assert_equal(f.prots.map{|v| v.aaseq }.to_set, n.prots.map{|v| v.aaseq }.to_set, "same aaseqs")
214
+
215
+ pre = proc {|cnt| "SHUFF_f#{cnt}_" }
216
+ # test prefix
217
+ n = f.fraction_of_prots(1.0, pre)
218
+ n.prots.each do |prot|
219
+ assert_match(/^>SHUFF_f0_/, prot.header, "contains new prefix")
220
+ end
221
+
222
+ # smaller
223
+ n = f.fraction_of_prots(0.75, pre)
224
+ assert_equal(3, n.prots.size, "correct number of proteins")
225
+ # bigger
226
+ n = f.fraction_of_prots(2.5, pre)
227
+ assert_equal(10, n.prots.size, "correct number of proteins")
228
+ n.prots[0..3].each {|prt| assert_match(/^>SHUFF_f0_/, prt.header ) }
229
+ n.prots[4..7].each {|prt| assert_match(/^>SHUFF_f1_/, prt.header ) }
230
+ n.prots[8..9].each {|prt| assert_match(/^>SHUFF_f2_/, prt.header ) }
231
+ # crazy
232
+ n = f.fraction_of_prots(1.33, pre)
233
+ assert_equal(6, n.prots.size, "correct number of proteins")
234
+ end
235
+
236
+ def test_inverted_tryptic_peptides_for_file
237
+ # for a file:
238
+ tmpfile = @tfiles + "fasta.tmp"
239
+ fasta = Fasta.new.read_file(@sf)
240
+ fasta.aaseq_invert_tryptic_peptides!
241
+ fasta.write_file(tmpfile)
242
+ lines = IO.readlines(tmpfile)
243
+ #normal = 'MKRISTTITTTITITTGNGAG'
244
+ inverted_tryptic = 'MKRGAGNGTTITITTTITTSI' ## ?????
245
+ assert_equal(inverted_tryptic, lines[1].chomp)
246
+ #normal = 'MATYLIGDVHGCYDELIALLHKVEFTPGKDTLWLTGDLVARGPGSLDVLRYVKSLGDSVRLVLGNHDLHL
247
+ # LAVFAGISRNKPKDRLTPLLEAPDADELLNWLRRQPLLQIDEEKKLVMAHAGITPQWDLQTAKECARDVE
248
+ # AVLSSDSYPFFLDAMYGDMPNNWSPELRGLGRLRFITNAFTRMRFCFPNGQLDMYSKESPEEAPAPLKPW
249
+ # FAIPGPVAEEYSIAFGHWASLEGKGTPEGIYALDTGCCWGGTLTCLRWEDKQYFVQPSNRHKDLGEAAAS'
250
+ inverted_tryptic = 'HLLAILEDYCGHVDGILYTAMKGPTFEVKAVLDGTLWLTDRLVDLSGPGRVYKVSDGLSRSIGAFVALLHLDHNGLVLRPKNKDRLWNLLEDADPAELLPTLRREEDIQLLPQKKATQLDWQPTIGAHAMVLKACERLEPSWNNPMDGYMADLFFPYSDSSLVAEVDRGLGRLRTFANTIFRMRSYMDLQGNPFCFKGELSAWHGFAISYEEAVPGPIAFWPKLPAPAEEPSEKLCTLTGGWCCGTDLAYIGEPTGRDEWKNSPQVFYQRHKSAAAEGLD'
251
+ assert_equal(inverted_tryptic, lines[-1].chomp)
252
+ File.unlink(tmpfile) unless NODELETE
253
+ end
254
+
255
+
256
+
179
257
  ## HELPER ASSERTIONS:
180
258
 
181
259
  def _are_inverted?(obj1, obj2)
@@ -0,0 +1,147 @@
1
+
2
+ require 'test/unit'
3
+ require 'fasta'
4
+
5
+ Filestring = ">gi|P1
6
+ AMKRGAN
7
+ >gi|P2
8
+ CRGATKKTAGRPMEK
9
+ >gi|P3
10
+ PEPTIDE
11
+ "
12
+
13
+ Rev = ">gi|P1
14
+ NAGRKMA
15
+ >gi|P2
16
+ KEMPRGATKKTAGRC
17
+ >gi|P3
18
+ EDITPEP
19
+ "
20
+
21
+ RevTryptic = ">gi|P1
22
+ MAKRNAG
23
+ >gi|P2
24
+ CRTAGKKEMPRGATK
25
+ >gi|P3
26
+ EDITPEP
27
+ "
28
+
29
+ ShuffTryptic = ">gi|P1
30
+ MAKRNAG
31
+ >gi|P2
32
+ CRTAGKKEMPRGATK
33
+ >gi|P3
34
+ EDITPEP
35
+ "
36
+
37
+
38
+
39
+ class TestBasic < Test::Unit::TestCase
40
+
41
+ def setup
42
+ testdir = File.dirname(__FILE__)
43
+ libdir = testdir + '/../lib'
44
+ bindir = testdir + '/../bin'
45
+ progname = "fasta_shaker.rb"
46
+ @cmd = "ruby -I #{libdir} #{bindir}/#{progname} "
47
+ @tfiles = testdir + '/tfiles/'
48
+ @tmpfile = @tfiles + "littlefasta.trash.fasta"
49
+ File.open(@tmpfile, "w") {|fh| fh.print Filestring }
50
+ @f = @tfiles + "trash.fasta"
51
+ end
52
+
53
+ def teardown
54
+ File.unlink @tmpfile if File.exist? @tmpfile
55
+ File.unlink @f if File.exist? @f
56
+ end
57
+
58
+ def test_reverse
59
+ cmd = @cmd + "reverse #{@tmpfile} -o #{@f}"
60
+ system cmd
61
+ assert_equal(Rev, fastap(@f).to_s)
62
+ end
63
+
64
+ def test_reverse_tryptic
65
+ cmd = @cmd + "reverse #{@tmpfile} -o #{@f} --tryptic_peptides"
66
+ system cmd
67
+ assert_equal(RevTryptic, fastap(@f).to_s)
68
+ end
69
+
70
+ def test_shuff_tryptic
71
+ cmd = @cmd + "shuffle #{@tmpfile} -o #{@f} --tryptic_peptides"
72
+ system cmd
73
+ lns = fastap(@f).to_s.split("\n")
74
+ assert_equal('KR', lns[1][2..3])
75
+ assert_equal('R', lns[3][1..1])
76
+ assert_equal('CRGATKKTAGRPMEK'.size, lns[3].size, "sequence is same size")
77
+ assert_not_equal('CRGATKKTAGRPMEK', lns[3], "sequence is randomised from original [remote chance of failure] rerun to make sure")
78
+ end
79
+
80
+ def test_shuffle
81
+ cmd = @cmd + "shuffle #{@tmpfile} -o #{@f}"
82
+ system cmd
83
+ clines = strlns(Filestring)
84
+ lns = fastalns(@f)
85
+ lns.each_with_index do |line,i|
86
+ assert_equal(clines[i].size, line.size, "same size lines: A: <<#{clines[i]}>> B: <<#{line}>>")
87
+ end
88
+ assert_equal('CRGATKKTAGRPMEK'.size, lns[3].size, "sequence is same size")
89
+ assert_not_equal('CRGATKKTAGRPMEK', lns[3], "sequence is randomised from original [remote chance of failure] rerun to make sure")
90
+ end
91
+
92
+ def test_cat
93
+ cmd = @cmd + "reverse #{@tmpfile} -c -o #{@f}"
94
+ `#{cmd}` ## suppress warning
95
+ lns = fastalns(@f)
96
+ assert_equal(strlns(Filestring), lns[0..5], "first part equal")
97
+ assert_equal(strlns(Rev), lns[6..-1], "second part equal")
98
+ end
99
+
100
+ def test_fraction
101
+ cmd = @cmd + "reverse #{@tmpfile} -f 2.6 -o #{@f}"
102
+ `#{cmd}`
103
+ assert_equal(8, fastap(@f).size)
104
+
105
+ cmd = @cmd + "shuffle #{@tmpfile} -f 2.0 -c -p MINE_ -o #{@f}"
106
+ `#{cmd}`
107
+ assert_equal(9, fastap(@f).size)
108
+ fp = fastap(@f)
109
+ fp[0..2].each do |prt|
110
+ assert_match(/^>/, prt.header, "prefix matches")
111
+ end
112
+ fp[3..5].each do |prt|
113
+ assert_match(/^>MINE_f0_/, prt.header, "prefix matches")
114
+ end
115
+ fp[6..8].each do |prt|
116
+ assert_match(/^>MINE_f1_/, prt.header, "prefix matches")
117
+ end
118
+ #cmd = @cmd + "reverse #{@tmpfile} -c -f 2.0 -o #{@f}"
119
+ end
120
+
121
+ def test_prefix
122
+ cmd = @cmd + "reverse #{@tmpfile} -p SILLY_ -o #{@f}"
123
+ `#{cmd}`
124
+ fp = fastap(@f)
125
+ fp.each do |prt|
126
+ assert_match(/^>SILLY_.+/, prt.header)
127
+ end
128
+ end
129
+
130
+
131
+ private
132
+ def strlns(str)
133
+ str.split("\n")
134
+ end
135
+
136
+ def fastalns(fn)
137
+ assert(File.exist?(fn), "FILE: #{fn} exists")
138
+ IO.read(fn).split("\n")
139
+ end
140
+
141
+ # returns the fasta object proteins
142
+ def fastap(fn)
143
+ assert(File.exist?(fn), "FILE: #{fn} exists")
144
+ Fasta.new.read_file(fn).prots
145
+ end
146
+
147
+ end
data/test/tc_gi.rb ADDED
@@ -0,0 +1,20 @@
1
+
2
+ require 'test/unit'
3
+ require 'gi'
4
+
5
+
6
+ class Gi2AnnotTest < Test::Unit::TestCase
7
+ ROOT_DIR = File.join(File.dirname(__FILE__), '..')
8
+
9
+ def test_single_query
10
+ #begin
11
+ annot = GI.gi2annot([16130548]).first
12
+ #rescue
13
+ puts "SKIPPING gi2annot test since no internet connection available:"
14
+ puts "#{$!}"
15
+ assert true
16
+ #else
17
+ assert_equal('CP4-57 prophage; RNase LS [Escherichia coli K12]'+"\n", annot)
18
+ #end
19
+ end
20
+ end
@@ -23,14 +23,12 @@ class IDClassAnalTest < Test::Unit::TestCase
23
23
  output = `#{@cmd} -p INV_ #{@tf_proph_inv}`
24
24
  fps = [1.00, 1.00, 0.97]
25
25
  tps = [1.00, 1.00, 0.98, 0.97, 0.97, 0.97, 0.97]
26
- puts output
27
26
  #File.open("tmp.csv","w") do |fh| fh.print output end
28
27
  assert 1
29
28
  end
30
29
 
31
- def Xtest_basic
30
+ def test_basic
32
31
  output = `#{@cmd} -p INV_ #{@tf_bioworks_esmall_xml}`
33
- # @TODO: that's the output, need to grab for consistency sake
34
32
  exp = [
35
33
  [1, 1.0, 0.0],
36
34
  [2, 1.0, 0.0],
@@ -40,11 +38,11 @@ class IDClassAnalTest < Test::Unit::TestCase
40
38
  [6, 1.0, 0.0],
41
39
  [9, 1.0, 0.0],
42
40
  [10, 1.0, 0.0],
43
- [11, 0.916666666666667, 0.166666666666667],
44
- [12, 0.923076923076923, 0.153846153846154],
45
- [13, 0.928571428571429, 0.142857142857143],
46
- [14, 0.933333333333333, 0.133333333333333],
47
- [15, 0.882352941176471, 0.235294117647059]
41
+ [11, 0.909090909090909],
42
+ [12, 0.916666666666667],
43
+ [13, 0.923076923076923],
44
+ [14, 0.928571428571429],
45
+ [15, 0.866666666666667],
48
46
  ]
49
47
  outarr = output.split($/)
50
48
  exp.each_with_index do |line,i|
@@ -55,18 +53,17 @@ class IDClassAnalTest < Test::Unit::TestCase
55
53
  end
56
54
  end
57
55
 
58
- def Xtest_multiple_output
56
+ def test_multiple_output
59
57
  myplot = 'class_anal.toplot'
60
58
  output = `#{@cmd} -j -p INV_,SHUFF_ #{@tf_bioworks_esmall_xml} #{@tf_bioworks_shuff}`
61
59
  assert(output.size > 10) ## @TODO: BETTER HERE
62
60
  assert(File.exist?(myplot), "file #{myplot} exists")
63
61
  File.unlink myplot
64
-
65
62
  end
66
63
 
67
- def Xtest_jtplot_output
64
+ def test_jtplot_output
68
65
  myplot = 'class_anal.toplot'
69
- `#{@cmd} -p INV_ -j #{@tf_bioworks_esmall_xml}`
66
+ output = `#{@cmd} -p INV_ -j #{@tf_bioworks_esmall_xml}`
70
67
  assert(File.exist?(myplot), "file #{myplot} exists")
71
68
  File.unlink myplot
72
69
  end