mspire 0.1.7 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. data/Rakefile +41 -14
  2. data/bin/bioworks2excel.rb +1 -1
  3. data/bin/bioworks_to_pepxml.rb +46 -59
  4. data/bin/fasta_shaker.rb +1 -1
  5. data/bin/filter.rb +6 -0
  6. data/bin/find_aa_freq.rb +23 -0
  7. data/bin/id_precision.rb +3 -2
  8. data/bin/mzxml_to_lmat.rb +2 -1
  9. data/bin/pepproph_filter.rb +1 -1
  10. data/bin/precision.rb +1 -1
  11. data/bin/protein_summary.rb +2 -451
  12. data/bin/raw_to_mzXML.rb +55 -0
  13. data/bin/srf_group.rb +26 -0
  14. data/changelog.txt +7 -0
  15. data/lib/align.rb +3 -3
  16. data/lib/fasta.rb +6 -1
  17. data/lib/gi.rb +9 -4
  18. data/lib/roc.rb +2 -0
  19. data/lib/sample_enzyme.rb +2 -1
  20. data/lib/spec/mzxml/parser.rb +2 -43
  21. data/lib/spec/mzxml.rb +65 -2
  22. data/lib/spec_id/aa_freqs.rb +10 -7
  23. data/lib/spec_id/bioworks.rb +67 -87
  24. data/lib/spec_id/filter.rb +794 -0
  25. data/lib/spec_id/precision.rb +29 -36
  26. data/lib/spec_id/proph.rb +5 -3
  27. data/lib/spec_id/protein_summary.rb +459 -0
  28. data/lib/spec_id/sequest.rb +323 -271
  29. data/lib/spec_id/srf.rb +189 -135
  30. data/lib/spec_id.rb +276 -227
  31. data/lib/spec_id_xml.rb +101 -0
  32. data/lib/toppred.rb +18 -0
  33. data/script/degenerate_peptides.rb +47 -0
  34. data/script/filter-peps.rb +5 -1
  35. data/test/tc_align.rb +1 -1
  36. data/test/tc_bioworks.rb +25 -22
  37. data/test/tc_bioworks_to_pepxml.rb +37 -4
  38. data/test/tc_fasta.rb +3 -1
  39. data/test/tc_fasta_shaker.rb +8 -6
  40. data/test/tc_filter.rb +203 -0
  41. data/test/tc_gi.rb +6 -9
  42. data/test/tc_id_precision.rb +31 -0
  43. data/test/tc_mzxml.rb +8 -6
  44. data/test/tc_peptide_parent_times.rb +2 -1
  45. data/test/tc_precision.rb +1 -1
  46. data/test/tc_proph.rb +5 -5
  47. data/test/tc_protein_summary.rb +36 -13
  48. data/test/tc_sequest.rb +78 -33
  49. data/test/tc_spec_id.rb +128 -6
  50. data/test/tc_srf.rb +84 -38
  51. metadata +67 -62
  52. data/bin/fasta_cat.rb +0 -39
  53. data/bin/fasta_cat_mod.rb +0 -59
  54. data/bin/fasta_mod.rb +0 -57
  55. data/bin/filter_spec_id.rb +0 -365
  56. data/bin/raw2mzXML.rb +0 -21
  57. data/script/gen_database_searching.rb +0 -258
data/test/tc_filter.rb ADDED
@@ -0,0 +1,203 @@
1
+
2
+ require 'test/unit'
3
+ require 'spec_id/filter'
4
+ require 'spec_id/srf'
5
+ require 'set_from_hash'
6
+ require File.dirname(__FILE__) + '/test_helper'
7
+
8
+ $VERBOSE = false
9
+
10
+
11
+ class TestFilter < Test::Unit::TestCase
12
+
13
+ def initialize(arg)
14
+ super(arg)
15
+ @tfiles = File.dirname(__FILE__) + '/tfiles/'
16
+ @tfiles_l = File.dirname(__FILE__) + '/tfiles_large/'
17
+ @small_inv = @tfiles + 'bioworks_with_INV_small.xml'
18
+ @small = @tfiles + 'bioworks_small.xml'
19
+ ## SRF:
20
+ @zero_srf = @tfiles_l + 'opd1_cat_inv/000.srf'
21
+ @twenty_srf = @tfiles_l + 'opd1_cat_inv/020.srf'
22
+ @zero_srg = @tfiles_l + 'bioworks_000.srg'
23
+ @both_srg = @tfiles_l + 'bioworks_both.srg'
24
+ ## FASTA:
25
+ @opd1_fasta = @tfiles_l + 'opd1_cat_inv/ecoli_K12_ncbi_20060321.fasta'
26
+ @opd1_correct_fasta = @tfiles_l + 'opd1_cat_inv/correct_fictitious_314.fasta'
27
+ if File.exist? @tfiles_l
28
+ File.open(@zero_srg, 'w') {|fh| fh.puts( File.expand_path(@zero_srf) ) }
29
+ File.open(@both_srg, 'w') {|fh| fh.puts( File.expand_path(@zero_srf) ); fh.puts( File.expand_path(@twenty_srf) ) }
30
+ end
31
+ end
32
+
33
+ def test_protein_fppr
34
+ peps_per_prot = [4,4,3,2,2]
35
+ (num, mean_fppr, std_num, std_fppr) = SpecID::Filter.new.protein_fppr(peps_per_prot, 1, 10)
36
+ assert_equal(0, mean_fppr, "no prots completely wrong")
37
+ assert_equal(0, std_fppr, "no prots completely wrong")
38
+ (num, mean_fppr, std_num, std_fppr) = SpecID::Filter.new.protein_fppr(peps_per_prot, 14, 10)
39
+ assert_equal(4.0/5, mean_fppr, "only one prot right")
40
+ assert_equal(0.0, std_fppr, "only one prot right")
41
+ end
42
+
43
+ def test_filter_sequest
44
+ hashes = [
45
+ {:xcorr => 1.2, :deltacn => 0.1, :ppm => 40, :charge => 2},
46
+ {:xcorr => 1.3, :deltacn => 0.1, :ppm => 50, :charge => 3},
47
+ {:xcorr => 1.4, :deltacn => 0.1, :ppm => 50, :charge => 1},
48
+ {:xcorr => 1.5, :deltacn => 1.1, :ppm => 20, :charge => 2},
49
+ {:xcorr => 1.3, :deltacn => 0.1, :ppm => 20, :charge => 2},
50
+ {:xcorr => 1.3, :deltacn => 0.1, :ppm => 40, :charge => 2},
51
+ ]
52
+ peps = hashes.map do |hash|
53
+ pep = SRF::OUT::Pep.new.set_from_hash(hash)
54
+ end
55
+ sp = GenericSpecID.new.set_from_hash({:peps => peps})
56
+ before_size = sp.peps.size
57
+ assert_filter([1.2, 1.2, 1.2, 0.1, 50], sp, 5, "all passing")
58
+ assert_filter([1.6, 1.6, 1.6, 0.1, 50], sp, 0, "xcorrs too high")
59
+ assert_filter([1.6, 1.0, 1.0, 0.1, 50], sp, 4, "one xcorr too high")
60
+ assert_filter([1.0, 1.6, 1.0, 0.1, 50], sp, 2, "one xcorr too high")
61
+ assert_filter([1.0, 1.0, 1.6, 0.1, 50], sp, 4, "one xcorr too high")
62
+ assert_filter([1.2, 1.2, 1.2, 0.2, 50], sp, 0, "high deltacn")
63
+
64
+ ## with deltcnstars:
65
+ assert_filter([1.2, 1.2, 1.2, 0.1, 50], sp, 6, "all passing", true)
66
+ assert_filter([1.2, 1.2, 1.2, 0.2, 50], sp, 1, "high deltacn", true)
67
+ assert_filter([1.0, 1.0, 1.6, 0.1, 50], sp, 5, "one xcorr too high", true)
68
+ end
69
+
70
+ def assert_filter(filter_args, spec_id, expected_passing, message, include_deltcn=false)
71
+ npeps = spec_id.filter_sequest(filter_args, include_deltcn)
72
+ assert_equal(expected_passing, npeps.size, message)
73
+ end
74
+
75
+ def test_passing_proteins
76
+ hash_prots = (0..7).map do |n|
77
+ SpecID::GenericProt.new.set_from_hash({:reference => "prot_"+n.to_s, :peps => []})
78
+ end
79
+ arr_prots = (0..7).map do |n|
80
+ SRF::OUT::Prot.new.set_from_hash({:reference => "prot_"+n.to_s, :peps => []})
81
+ end
82
+ [hash_prots, arr_prots].each do |prots|
83
+
84
+ hashes = [
85
+ {:aaseq => 'PEP0', :xcorr => 1.2, :deltacn => 0.1, :ppm => 40, :charge => 2, :prots => [prots[0],prots[1]]},
86
+ {:aaseq => 'PEP1', :xcorr => 1.3, :deltacn => 0.1, :ppm => 50, :charge => 3, :prots => [prots[1],prots[2]]},
87
+ {:aaseq => 'PEP2', :xcorr => 1.4, :deltacn => 0.1, :ppm => 50, :charge => 1, :prots => [prots[3]]},
88
+ {:aaseq => 'PEP3', :xcorr => 1.5, :deltacn => 1.1, :ppm => 20, :charge => 2, :prots => [prots[4]]},
89
+ {:aaseq => 'PEP4', :xcorr => 1.3, :deltacn => 0.1, :ppm => 20, :charge => 2, :prots => [prots[0]]},
90
+ {:aaseq => 'PEP5', :xcorr => 1.3, :deltacn => 0.1, :ppm => 40, :charge => 2, :prots => prots[1,2]},
91
+ ]
92
+
93
+ peps = hashes.map do |hash|
94
+ SRF::OUT::Pep.new.set_from_hash(hash)
95
+ end
96
+
97
+
98
+ prts = SpecID.passing_proteins(peps)
99
+ exp = (0..4).map do |n|
100
+ "prot_" + n.to_s
101
+ end
102
+ refs = prts.map { |v| v.reference }.sort
103
+ assert_equal(exp, refs)
104
+
105
+
106
+ prts = SpecID.passing_proteins(peps, :update)
107
+ prot_0_before = prts.select {|v| v.reference == 'prot_0'}.first
108
+ assert_protein_match(prts, 'prot_0', %w(PEP0 PEP4))
109
+ assert_protein_match(prts, 'prot_1', %w(PEP0 PEP1 PEP5))
110
+ assert_protein_match(prts, 'prot_2', %w(PEP1 PEP5))
111
+ assert_protein_match(prts, 'prot_3', %w(PEP2))
112
+ assert_protein_match(prts, 'prot_4', %w(PEP3))
113
+ srt_ref = prts.map {|v| v.reference}.sort
114
+ assert_equal(%w(prot_0 prot_1 prot_2 prot_3 prot_4), srt_ref, "just the right number of prots")
115
+ prot_0 = prts.select {|v| v.reference == 'prot_0'}.first
116
+ assert_equal(prot_0_before.__id__, prot_0.__id__, "proteins are identical")
117
+
118
+
119
+ prot_0_before = prts.select {|v| v.reference == 'prot_0'}.first.__id__
120
+
121
+ prts = SpecID.passing_proteins(peps, :new)
122
+ assert_protein_match(prts, 'prot_0', %w(PEP0 PEP4))
123
+ assert_protein_match(prts, 'prot_1', %w(PEP0 PEP1 PEP5))
124
+ assert_protein_match(prts, 'prot_2', %w(PEP1 PEP5))
125
+ assert_protein_match(prts, 'prot_3', %w(PEP2))
126
+ assert_protein_match(prts, 'prot_4', %w(PEP3))
127
+ srt_ref = prts.map {|v| v.reference}.sort
128
+ assert_equal(%w(prot_0 prot_1 prot_2 prot_3 prot_4), srt_ref, "just the right number of prots")
129
+ prot_0 = prts.select {|v| v.reference == 'prot_0'}.first
130
+ assert_not_equal(prot_0_before, prot_0.__id__, "proteins are not identical")
131
+
132
+ end
133
+ end
134
+
135
+ def assert_protein_match(prts, ref, pepseqs, message='')
136
+ prt = prts.select{|v| v.reference == ref }.first
137
+ sorted_prt_peps_aaseqs = prt.peps.map {|v| v.aaseq }.sort
138
+ sorted_pepseqs = pepseqs.sort
139
+ assert_equal(pepseqs, sorted_prt_peps_aaseqs, message)
140
+ end
141
+
142
+ def test_usage
143
+ output = capture_stdout {
144
+ SpecID::Filter.run_from_argv([])
145
+ }
146
+ assert_match('usage:', output)
147
+ end
148
+
149
+ def test_basic_bioworks_xml
150
+
151
+ output = capture_stdout {
152
+ SpecID::Filter.run_from_argv([@small].push( *(%w(-1 1.0 -2 1.0 -3 1.0 -c 0.1 --ppm 1000))) )
153
+ }
154
+ ## FROZEN:
155
+ assert_match(/pep_hits\s+4/, output)
156
+ assert_match(/uniq_aa_hits\s+4/, output)
157
+ assert_match(/prot_hits\s+4/, output)
158
+
159
+
160
+ output = capture_stdout {
161
+ SpecID::Filter.run_from_argv([@small_inv].push( *(%w(-1 1.0 -2 1.0 -3 1.0 -c 0.1 --ppm 1000 -f INV_))) )
162
+ }
163
+ #puts ""
164
+ #puts output
165
+ ## FROZEN:
166
+ assert_match(/pep_hits\s+151/, output)
167
+ assert_match(/uniq_aa_hits\s+75/, output)
168
+ assert_match(/prot_hits\s+13/, output)
169
+ end
170
+
171
+ def test_srf
172
+ if File.exist? @tfiles_l
173
+ ## dcy
174
+ output = capture_stdout {
175
+ SpecID::Filter.run_from_argv([@zero_srg].push( *(%w(-1 1.0 -2 1.0 -3 1.0 -c 0.1 --ppm 1000 -f INV_))) )
176
+ }
177
+ ## FROZEN:
178
+ #puts ""
179
+ #puts output
180
+ assert_match(/pep_hits\s+2111\s+107\.2/, output)
181
+ assert_match(/uniq_aa_hits\s+2034\s+106\.6/, output)
182
+ assert_match(/prot_hits\s+1454\s+100\.0/, output)
183
+
184
+ ## cys tps fps COMBINED
185
+ # tps are fictitious!
186
+ output = capture_stdout {
187
+ # that's the background freq for ecoli that this file's from
188
+ SpecID::Filter.run_from_argv([@zero_srg].push( *(%w(-1 1.0 -2 1.0 -3 1.0 -c 0.1 --ppm 1000 --occams_razor --cys 0.0115866200193321 --t).push(@opd1_correct_fasta))))
189
+ }
190
+ #puts ""
191
+ #puts output
192
+ ## FROZEN:
193
+ assert_match(/num\s+tps%\s+cys%/, output, "header")
194
+ assert_match(/pep_hits\s+4374\s+9\d\.\d.*\s+83\.7/, output)
195
+ assert_match(/uniq_aa_hits\s+4203\s+9\d\.\d.*\s+82\.8/, output)
196
+ assert_match(/prot_hits\s+2986\s+9\d\..*\s+7\d\./, output)
197
+ assert_match(/occams.*\s+2986\s+8\d\..*\s+7\d\./, output)
198
+ else
199
+ assert_nil( puts("--SKIPPING TEST-- (missing dir: #{@tfiles_l})" ))
200
+ end
201
+ end
202
+
203
+ end
data/test/tc_gi.rb CHANGED
@@ -7,14 +7,11 @@ class Gi2AnnotTest < Test::Unit::TestCase
7
7
  ROOT_DIR = File.join(File.dirname(__FILE__), '..')
8
8
 
9
9
  def test_single_query
10
- #begin
11
- annot = GI.gi2annot([16130548]).first
12
- #rescue
13
- puts "SKIPPING gi2annot test since no internet connection available:"
14
- puts "#{$!}"
15
- assert true
16
- #else
17
- assert_equal('CP4-57 prophage; RNase LS [Escherichia coli K12]'+"\n", annot)
18
- #end
10
+ annot = GI.gi2annot([16130548])
11
+ if annot
12
+ assert_equal('CP4-57 prophage; RNase LS [Escherichia coli K12]', annot.first)
13
+ else
14
+ assert_nil( puts("SKIPPING gi test (no internet connection available)") )
15
+ end
19
16
  end
20
17
  end
@@ -30,6 +30,30 @@ class IDPrecisionTest < Test::Unit::TestCase
30
30
  PepProts: NH,PepProts: PR,SeqCharge: NH,SeqCharge: PR,Scan(TopHit): NH,Scan(TopHit): PR,Scan(Top10): NH,Scan(Top10): PR,ScanCharge(TopHit): NH,ScanCharge(TopHit): PR,ScanCharge(Top10): NH,ScanCharge(Top10): PR
31
31
  75, 1.0, 37, 1.0, 75, 1.0, 75, 1.0, 75, 1.0, 75, 1.0
32
32
  95, 1.0, 49, 1.0, 95, 1.0, 95, 1.0, 95, 1.0, 95, 1.0
33
+ 155, 1.0, 67, 1.0, 123, 1.0, 155, 1.0, 125, 1.0, 155, 1.0
34
+ 186, 1.0, 85, 1.0, 154, 1.0, 186, 1.0, 156, 1.0, 186, 1.0
35
+ 196, 1.0, 90, 1.0, 161, 1.0, 196, 1.0, 163, 1.0, 196, 1.0
36
+ 214, 1.0, 94, 1.0, 168, 1.0, 214, 1.0, 170, 1.0, 214, 1.0
37
+ 215, 1.0, 95, 1.0, 169, 1.0, 215, 1.0, 171, 1.0, 215, 1.0
38
+ 217, 0.995391705069124, 97, 0.989690721649485, 171, 0.994152046783626, 217, 0.995391705069124, 173, 0.994219653179191, 217, 0.995391705069124
39
+ 219, 0.995433789954338, 99, 0.98989898989899, 172, 0.994186046511628, 219, 0.995433789954338, 175, 0.994285714285714, 219, 0.995433789954338
40
+ 227, 0.995594713656388, 106, 0.990566037735849, 180, 0.994444444444444, 227, 0.995594713656388, 183, 0.994535519125683, 227, 0.995594713656388
41
+ 228, 0.995614035087719, 107, 0.990654205607477, 181, 0.994475138121547, 228, 0.995614035087719, 184, 0.994565217391304, 228, 0.995614035087719
42
+ 229, 0.991266375545852, 108, 0.981481481481482, 182, 0.989010989010989, 229, 0.991266375545852, 185, 0.989189189189189, 229, 0.991266375545852
43
+ END
44
+
45
+ # This was the result we were getting before first hashing on protein
46
+ # sequences and doing uniqe peptide hits. It is very similar ( but not
47
+ # exactly the same) to what we are doing now). Must have something to do
48
+ # with the way things are hashed out.
49
+ before_doing_uniq_peptides=<<END
50
+ # NH = number of hits
51
+ # TP = true positives
52
+ # FP = false positives
53
+ # PR = precision = TP/(TP+FP)
54
+ PepProts: NH,PepProts: PR,SeqCharge: NH,SeqCharge: PR,Scan(TopHit): NH,Scan(TopHit): PR,Scan(Top10): NH,Scan(Top10): PR,ScanCharge(TopHit): NH,ScanCharge(TopHit): PR,ScanCharge(Top10): NH,ScanCharge(Top10): PR
55
+ 75, 1.0, 37, 1.0, 75, 1.0, 75, 1.0, 75, 1.0, 75, 1.0
56
+ 95, 1.0, 49, 1.0, 95, 1.0, 95, 1.0, 95, 1.0, 95, 1.0
33
57
  125, 1.0, 67, 1.0, 123, 1.0, 125, 1.0, 125, 1.0, 125, 1.0
34
58
  155, 1.0, 85, 1.0, 154, 1.0, 155, 1.0, 156, 1.0, 155, 1.0
35
59
  186, 1.0, 90, 1.0, 161, 1.0, 186, 1.0, 163, 1.0, 186, 1.0
@@ -49,10 +73,17 @@ END
49
73
  cmd = "#{@cmd} INV_ #{@tf_bioworks_inv_xml} -a"
50
74
  #puts "RUNNING: #{cmd}"
51
75
  reply = `#{cmd}`
76
+ # This is what we were getting before hashing for uniqe peptides
77
+ # It is very similar (but not identical to previous output)
52
78
  string =<<END
53
79
  Filename PepProts SeqCharge Scan(TopHit) Scan(Top10) ScanCharge(TopHit) ScanCharge(Top10)
54
80
  ./test/tfiles/bioworks_with_INV_small.xml 228.925377117814 107.877585995136 181.929045912105 228.925377117814 184.924437525838 228.925377117814
55
81
  END
82
+
83
+ string =<<NEWEND
84
+ Filename PepProts SeqCharge Scan(TopHit) Scan(Top10) ScanCharge(TopHit) ScanCharge(Top10)
85
+ ./test/tfiles/bioworks_with_INV_small.xml 228.939375794224 107.877585995136 181.929045912105 228.939375794224 184.924437525838 228.939375794224
86
+ NEWEND
56
87
  assert_equal(string, reply, "area under the curve")
57
88
  end
58
89
  end
data/test/tc_mzxml.rb CHANGED
@@ -1,7 +1,6 @@
1
1
  require 'test/unit'
2
2
  require 'spec/mzxml/parser'
3
3
 
4
-
5
4
  class SpecMzXML < Test::Unit::TestCase
6
5
  def initialize(arg)
7
6
  super(arg)
@@ -49,23 +48,24 @@ class SpecMzXML < Test::Unit::TestCase
49
48
  sr_raw = @tfiles + 'smallraw.RAW'
50
49
  sr_noext = @tfiles + 'smallraw'
51
50
  sr_mzxml = @tfiles + 'smallraw.mzXML'
52
- ob = Spec::MzXML::Parser.new
51
+ klass = Spec::MzXML
53
52
  # given raw
54
- file = ob.file_to_mzxml(sr_raw)
53
+ file = klass.file_to_mzxml(sr_raw)
55
54
  file_to_mzxml_assert(file)
56
55
  # given mzXML
57
- file = ob.file_to_mzxml(sr_mzxml)
56
+ file = klass.file_to_mzxml(sr_mzxml)
58
57
  file_to_mzxml_assert(file)
59
58
  File.unlink(sr_mzxml)
60
59
  # given basename (and no mzXML)
61
- file = ob.file_to_mzxml(sr_noext)
60
+ file = klass.file_to_mzxml(sr_noext)
62
61
  file_to_mzxml_assert(file)
63
62
  # given basename (and mzXML)
64
- file = ob.file_to_mzxml(sr_noext)
63
+ file = klass.file_to_mzxml(sr_noext)
65
64
  file_to_mzxml_assert(file)
66
65
  File.unlink(sr_mzxml)
67
66
  else
68
67
  puts "SKIPPING tests requiring 't2x' to convert RAW to mzXML"
68
+ puts "(look in the archive folder of the gem for t2x binary for linux)"
69
69
  end
70
70
  end
71
71
 
@@ -84,3 +84,5 @@ class SpecMzXML < Test::Unit::TestCase
84
84
  end
85
85
 
86
86
  end
87
+
88
+
@@ -9,7 +9,8 @@ class PeptideParentTimesTest < Test::Unit::TestCase
9
9
  end
10
10
 
11
11
  def test_blank
12
- puts "NOT RUNNING ANY TESTS FOR PEPTIDE_PARENT_TIMES RIGHT NOW"
12
+ ## need to finish this guy up:
13
+ puts "\nSKIPPING: tests for peptide_parent_times"
13
14
  end
14
15
 
15
16
  def Xtest_run
data/test/tc_precision.rb CHANGED
@@ -3,7 +3,7 @@ require 'test/unit'
3
3
  require File.dirname(File.expand_path(__FILE__)) + '/load_bin_path'
4
4
 
5
5
 
6
- class PrecisionTest < Test::Unit::TestCase
6
+ class PrecTest < Test::Unit::TestCase
7
7
  ROOT_DIR = File.join(File.dirname(__FILE__), "..")
8
8
 
9
9
  def initialize(arg)
data/test/tc_proph.rb CHANGED
@@ -16,8 +16,8 @@ class ProphTest < Test::Unit::TestCase
16
16
 
17
17
  def test_parse_protxml_file
18
18
  file = @tfiles + 'opd1/000_020_3prots-prot.xml'
19
- #obj = SpecID::Proph::ProtSummary.new
20
- obj = SpecID::Proph::ProtSummary.new(file)
19
+ #obj = Proph::ProtSummary.new
20
+ obj = Proph::ProtSummary.new(file)
21
21
  assert_equal(3, obj.prot_groups.size)
22
22
  assert_equal("1.00", obj.prot_groups.first.probability)
23
23
  assert_equal("0.98", obj.prot_groups[2].probability)
@@ -38,7 +38,7 @@ class ProphTest < Test::Unit::TestCase
38
38
 
39
39
 
40
40
  def Xtest_filter_by_min_pep_prob
41
- obj = SpecID::Proph::Pep::Parser.new
41
+ obj = Proph::Pep::Parser.new
42
42
  new_file = "tfiles/tmp.xml"
43
43
  assert_match(/peptideprophet_result probability="0.[0-5]/, IO.read(@pepproph_xml))
44
44
  obj.filter_by_min_pep_prob(@pepproph_xml, new_file, 0.50)
@@ -48,7 +48,7 @@ class ProphTest < Test::Unit::TestCase
48
48
  end
49
49
 
50
50
  def Xtest_uniq_by_seqcharge
51
- cls = SpecID::Proph::Pep
51
+ cls = Proph::Pep
52
52
  p1 = cls.new({ :charge => '2', :sequence => 'PEPTIDE' })
53
53
  p2 = cls.new({ :charge => '3', :sequence => 'PEPTIDE' })
54
54
  p3 = cls.new({ :charge => '2', :sequence => 'PEPTIDE' })
@@ -91,7 +91,7 @@ class ProphTest < Test::Unit::TestCase
91
91
  s1 = Spec::Scan.new(1,2,0.10, 300.2, i1, p1)
92
92
  s2 = Spec::Scan.new(2,2,0.20, 301.1, i2, p2)
93
93
  s3 = Spec::Scan.new(3,2,0.30, 302.0, i3, p3)
94
- scan = SpecID::Proph::Pep.new({:scans => [s1,s2,s3]}).arithmetic_avg_scan_by_parent_time
94
+ scan = Proph::Pep.new({:scans => [s1,s2,s3]}).arithmetic_avg_scan_by_parent_time
95
95
  tot_inten = i1 + i2 + i3
96
96
  tm = ( t1 * (i1/tot_inten) + t2 * (i2/tot_inten) + t3 * (i3/tot_inten) )
97
97
  {:ms_level => 2, :prec_inten => 130115.0/3, :num => nil, :prec_mz => 301.1.to_f, :time => tm }.each do |k,v|
@@ -1,6 +1,7 @@
1
1
 
2
2
  require 'test/unit'
3
- require File.dirname(File.expand_path(__FILE__)) + '/load_bin_path'
3
+ require 'spec_id/protein_summary'
4
+ require File.dirname(__FILE__) + '/test_helper'
4
5
 
5
6
 
6
7
 
@@ -20,16 +21,29 @@ class ProphProtSummaryTest < Test::Unit::TestCase
20
21
  @tf_proph_cat_inv_summary_html = @tfiles + 'opd1/opd1_cat_inv_small-prot.summary.html'
21
22
  @tf_proph_cat_inv_summary_png = @tfiles + 'opd1/opd1_cat_inv_small-prot.summary.png'
22
23
  @tf_peptide_count = @tfiles + "peptide_counts.tmp.txt"
23
- @cmd = "ruby -I#{File.join(File.dirname(__FILE__), "..", "lib")} -S protein_summary.rb "
24
24
  end
25
25
 
26
+ def runit(string_or_args)
27
+ args = if string_or_args.is_a? String
28
+ string_or_args.split(/\s+/)
29
+ else
30
+ string_or_args
31
+ end
32
+ ProteinSummary.new.create_from_command_line_args(args)
33
+ end
34
+
35
+
26
36
  def test_usage
27
- assert_match(/usage:/, `#{@cmd}`)
37
+ output = capture_stdout {
38
+ runit('')
39
+ }
40
+ assert_match(/usage:/, output)
28
41
  end
29
42
 
30
- def Xtest_proph_basic
43
+ def test_proph_basic
31
44
  if File.exist? @tfiles_l
32
- print `#{@cmd} -c 5.0 #{@tf_proph}`
45
+ runit "-c 5.0 #{@tf_proph}"
46
+ ProteinSummary.new.create_from_command_line_args([@tf_proph, '-c', '5.0'])
33
47
  assert(File.exist?(@tf_summary), "file #{@tf_summary} exists")
34
48
  string = IO.read(@tf_summary)
35
49
  assert_match(/gi\|16132176\|ref\|NP_418775\.1\|/, string)
@@ -41,7 +55,7 @@ class ProphProtSummaryTest < Test::Unit::TestCase
41
55
  end
42
56
 
43
57
  def test_bioworks_basic
44
- print `#{@cmd} #{@tf_bioworks_small}`
58
+ runit "#{@tf_bioworks_small}"
45
59
  assert(File.exist?(@tf_bioworks_small_summary_html), "file #{@tf_bioworks_small_summary_html} exists")
46
60
  File.unlink @tf_bioworks_small_summary_html unless NODELETE
47
61
 
@@ -49,23 +63,32 @@ class ProphProtSummaryTest < Test::Unit::TestCase
49
63
  end
50
64
 
51
65
  def test_bioworks_with_precision
52
- `#{@cmd} -f #{@tf_bioworks_small} #{@tf_bioworks_small} --precision`
53
- assert_match('TP : 106', IO.read(@tf_bioworks_small_summary_html))
54
- assert_match(/False Positive Rate.*: 0.500/, IO.read(@tf_bioworks_small_summary_html))
66
+ ## Could reimplement a separate file approach?
67
+ #reply = `#{@cmd} -f #{@tf_bioworks_small} #{@tf_bioworks_small} --precision`
68
+ runit "#{@tf_bioworks_small} --precision"
69
+ assert_match(/# hits.*106/m, IO.read(@tf_bioworks_small_summary_html))
70
+ #assert_match(/False Positive Rate.*: 0.500/, IO.read(@tf_bioworks_small_summary_html))
71
+ #assert_match(/False Positive Rate.*: 0.500/, IO.read(@tf_bioworks_small_summary_html))
55
72
  assert(File.exist?(@tf_bioworks_small_summary_html), "file #{@tf_bioworks_small_summary_html} exists")
56
73
  File.unlink @tf_bioworks_small_summary_html unless NODELETE
57
74
  end
58
75
 
59
- def Xtest_proph_with_precision
76
+ def test_proph_with_precision
60
77
  #puts @cmd
61
- print `#{@cmd} #{@tf_proph_cat_inv} -f INV_ --precision`
78
+ runit "#{@tf_proph_cat_inv} -f INV_ --precision"
79
+ html = IO.read(@tf_proph_cat_inv_summary_html)
80
+ assert_match(/# hits/, html, "in #{@tf_proph_cat_inv_summary_html}")
81
+ assert_match(/2.*0\.0000/m, html, "in #{@tf_proph_cat_inv_summary_html}")
82
+ assert_match(/3.*0\.3333/m, html, "in #{@tf_proph_cat_inv_summary_html}")
83
+ assert_match(/7.*0\.5714/m, html, "in #{@tf_proph_cat_inv_summary_html}")
84
+
62
85
  File.unlink @tf_proph_cat_inv_summary_html unless NODELETE
63
86
  File.unlink @tf_proph_cat_inv_summary_png unless NODELETE
64
87
  end
65
88
 
66
- def Xtest_peptide_count
89
+ def test_peptide_count
67
90
  if File.exist? @tfiles_l
68
- print `#{@cmd} -c 5.0 #{@tf_proph} --peptide_count #{@tf_peptide_count}`
91
+ runit "-c 5.0 #{@tf_proph} --peptide_count #{@tf_peptide_count}"
69
92
  assert(File.exist?(@tf_peptide_count), "file #{@tf_peptide_count} exists")
70
93
  file = IO.read(@tf_peptide_count)
71
94
  assert_match("gi|16132176|ref|NP_418775.1|\t2", file)
data/test/tc_sequest.rb CHANGED
@@ -4,8 +4,10 @@
4
4
  require 'spec_id'
5
5
  require 'spec_id/sequest'
6
6
  require 'test/unit'
7
+ require 'spec/mzxml'
7
8
 
8
9
 
10
+ NODELETE = false
9
11
 
10
12
  class SequestTest < Test::Unit::TestCase
11
13
 
@@ -18,10 +20,10 @@ class SequestTest < Test::Unit::TestCase
18
20
  @tf_bioworks_xml = @tfiles + "bioworks_small.xml"
19
21
  end
20
22
 
21
- def test_set_from_bioworks
23
+ def Xtest_set_from_bioworks
22
24
  if File.exist? @tfiles_l
23
25
  out_path = '.'
24
- pepxml_objs = SpecID::Sequest::PepXML.set_from_bioworks(@tf_params, @tf_bioworks_xml, @tf_mzxml_path, out_path)
26
+ pepxml_objs = Sequest::PepXML.set_from_bioworks_xml(@tf_bioworks_xml, @tf_params, {:ms_path => @tf_mzxml_path, :out_path => out_path})
25
27
  pepxml_objs.each do |obj|
26
28
  assert(obj.spectrum_queries.size > 2)
27
29
  assert(obj.spectrum_queries.first.search_results.first.search_hits.size > 0)
@@ -55,10 +57,10 @@ class SequestTest < Test::Unit::TestCase
55
57
  mzxml_path = @tfiles + "opd1"
56
58
  out_path = @tfiles
57
59
  pepxml_version = 18
58
- pepxml_objs = SpecID::Sequest::PepXML.set_from_bioworks(params, bioworks_xml, mzxml_path, out_path, pepxml_version, "trypsin")
60
+ pepxml_objs = Sequest::PepXML.set_from_bioworks_xml(bioworks_xml, params, {:ms_data => mzxml_path, :out_path => out_path, :pepxml_version => pepxml_version, :sample_enzyme => "trypsin"})
59
61
  puts "TOOK #{Time.new - st}secs"
60
62
  po = pepxml_objs.first
61
- assert_equal(pepxml_version, SpecID::Sequest::PepXML.pepxml_version)
63
+ assert_equal(pepxml_version, Sequest::PepXML.pepxml_version)
62
64
 
63
65
  # MSMSPipelineAnalysis
64
66
  pipe = po.msms_pipeline_analysis
@@ -197,9 +199,9 @@ class SequestTest < Test::Unit::TestCase
197
199
 
198
200
 
199
201
 
200
- def test_calc_num_tol_term
201
- params = SpecID::Sequest::Params.new(@tf_params)
202
- scall = SpecID::Sequest::PepXML::SearchHit
202
+ def Xtest_calc_num_tol_term
203
+ params = Sequest::Params.new(@tf_params)
204
+ scall = Sequest::PepXML::SearchHit
203
205
  sym = :calc_num_tol_term
204
206
  assert_equal(2, scall.send(sym, params, "K.EPTIDR.E"))
205
207
  assert_equal(1, scall.send(sym, params, "K.PEPTIDR.E"))
@@ -207,9 +209,9 @@ class SequestTest < Test::Unit::TestCase
207
209
  assert_equal(0, scall.send(sym, params, "F.PEPTIDW.R"))
208
210
  end
209
211
 
210
- def test_calc_num_missed_cleavages
211
- params = SpecID::Sequest::Params.new(@tf_params)
212
- scall = SpecID::Sequest::PepXML::SearchHit
212
+ def Xtest_calc_num_missed_cleavages
213
+ params = Sequest::Params.new(@tf_params)
214
+ scall = Sequest::PepXML::SearchHit
213
215
  sym = :calc_num_missed_cleavages
214
216
  assert_equal(0, scall.send(sym, params, "K.EPTIDR.E"))
215
217
  assert_equal(0, scall.send(sym, params, "K.PEPTIDR.E"))
@@ -225,35 +227,27 @@ class SequestTest < Test::Unit::TestCase
225
227
  end
226
228
 
227
229
 
228
- def test_sys_ind_basename
229
- assert_equal("hello.fasta", SpecID::Sequest::Params.new._sys_ind_basename("C:\\Xcalibur\\database\\hello.fasta"))
230
- assert_equal("hello.fasta", SpecID::Sequest::Params.new._sys_ind_basename("/work/john/hello.fasta"))
230
+ def Xtest_sys_ind_basename
231
+ assert_equal("hello.fasta", Sequest::Params.new._sys_ind_basename("C:\\Xcalibur\\database\\hello.fasta"))
232
+ assert_equal("hello.fasta", Sequest::Params.new._sys_ind_basename("/work/john/hello.fasta"))
231
233
  end
232
234
 
233
- def test_modifications
234
- obj = SpecID::Sequest::PepXML::Modifications.new(nil, "(M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000) ")
235
+ def Xtest_modifications
236
+ obj = Sequest::PepXML::Modifications.new(nil, "(M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000) ")
235
237
  answ = {[:C, 12.0]=>"^", [:S, 80.0]=>"@", [:M, 29.0]=>"#", [:M, 15.9]=>"*", [:ct, 12.33]=>"[", [:nt, 14.2]=>"]"}
236
238
  assert_equal(answ, obj.mod_symbols_hash, "mod_symbols_hash")
237
239
 
238
240
  ## need more here
239
241
  end
240
242
 
241
- def test_non_standard_aa_removal
242
- hash = {"K.PEPTIDE.Z" => "K.PEPTIDE.Z", "K.*M" => "K.M", "aI" => 'I', "YI.&" => "YI.", "EI.!@#\$%^&*(){}[]|\\;:'\"<>,?/EI" => 'EI.EI'}
243
- cl = proc {|v| SpecID::Sequest::PepXML::SearchHit.remove_non_amino_acids(v) }
244
- hash.each do |k,v|
245
- assert_equal(v, cl.call(k))
246
- end
247
- end
248
-
249
- def test_modification_info
243
+ def Xtest_modification_info
250
244
  hash = {
251
245
  :mod_nterm_mass => 520.2,
252
246
  :modified_peptide => "MOD*IFI^E&D",
253
247
  :mod_aminoacid_mass => [[3, 150.3], [6, 345.2]],
254
248
  }
255
249
  answ = "<modification_info mod_nterm_mass=\"520.2\" modified_peptide=\"MOD*IFI^E&amp;D\">\n\t<mod_aminoacid_mass position=\"3\" mass=\"150.3\"/>\n\t<mod_aminoacid_mass position=\"6\" mass=\"345.2\"/>\n</modification_info>\n"
256
- string = SpecID::Sequest::PepXML::SearchHit::ModificationInfo.new(hash).to_pepxml
250
+ string = Sequest::PepXML::SearchHit::ModificationInfo.new(hash).to_pepxml
257
251
  assert_match(_re('<modification_info'), answ)
258
252
  assert_match(_re(" mod_nterm_mass=\"520.2\""), answ)
259
253
  assert_match(_re(" modified_peptide=\"MOD*IFI^E&amp;D\""), answ)
@@ -270,22 +264,73 @@ class SequestTest < Test::Unit::TestCase
270
264
  end
271
265
 
272
266
  def test_modifications
273
- params = SpecID::Sequest::Params.new(@tf_params)
267
+ params = Sequest::Params.new(@tf_params)
274
268
  mod_string = "(M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000) "
275
269
  params.diff_search_options = "15.90000 M 29.00000 M 80.00000 S 12.00000 C"
276
270
  params.term_diff_search_options = "14.20000 12.33000"
277
- assert 1
278
- =begin
279
- mod = SpecID::Sequest::PepXML::Modifications(params, mod_string)
280
- SpecID::Sequest::PepXML::Modifications
281
- peptide = "PEPTIDE"
271
+ mod = Sequest::PepXML::Modifications.new(params, mod_string)
282
272
  ## no mods
273
+ peptide = "PEPTIDE"
283
274
  assert_equal(nil, mod.modification_info(peptide))
284
275
  peptide = "]M*EC^S@IDM#M*EMSCM["
285
- p mod.modification_info(peptide)
286
- =end
276
+ modinfo = mod.modification_info(peptide)
277
+ assert_equal(peptide, modinfo.modified_peptide)
278
+ assert_in_delta(146.40054, modinfo.mod_nterm_mass, 0.000001)
279
+ assert_in_delta(160.52994, modinfo.mod_cterm_mass, 0.000001)
280
+ end
287
281
 
282
+ # splits string on ' 'and matches the line found by find_line_regexp in
283
+ # lines
284
+ def match_modline_pieces(lines, find_line_regexp, string)
285
+ pieces = string.split(' ').map {|v| /#{Regexp.escape(v)}/ }
286
+ lines.each do |line|
287
+ if line =~ find_line_regexp
288
+ pieces.each do |piece|
289
+ assert_match(piece, line)
290
+ end
291
+ end
292
+ end
288
293
  end
289
294
 
295
+ def test_modifications_in_run
296
+ if File.exist? @tfiles_l
297
+ modfiles_sequest_dir = @tfiles_l + 'opd1_2runs_2mods/sequest/'
298
+ modfiles_data_dir = @tfiles_l + 'opd1_2runs_2mods/data/'
299
+ srgfile = modfiles_sequest_dir + 'tmp.srg'
300
+ out_path = modfiles_sequest_dir + 'pepxml'
301
+ modfiles = %w(020 040).map do |file|
302
+ modfiles_sequest_dir + file + ".srf"
303
+ end
304
+ objs = Sequest::PepXML.set_from_bioworks( SRFGroup.new(modfiles).to_srg(srgfile), {:ms_data => modfiles_data_dir, :out_path => out_path, :print => true, :backup_db_path => '/project/marcotte/marcotte/ms/database'} )
305
+ %w(020 040).each do |file|
306
+ fn = out_path + '/' + file + '.xml'
307
+ assert(File.exist?(fn), "file #{fn} exists")
308
+ beginning = IO.read(fn)
309
+ lines = beginning.split("\n")
310
+ [
311
+ [/aminoacid="M"/, '<aminoacid_modification symbol="*" massdiff="+15.9994" aminoacid="M" variable="Y" binary="N" mass="147.192"'],
312
+
313
+ [/aminoacid="S"/, '<aminoacid_modification symbol="#" massdiff="+79.9799" aminoacid="S" variable="Y" binary="N" mass="167.0581"'],
314
+ [/aminoacid="T"/, '<aminoacid_modification symbol="#" massdiff="+79.9799" aminoacid="T" variable="Y" binary="N" mass="181.085"'],
315
+ [/aminoacid="Y"/, '<aminoacid_modification symbol="#" massdiff="+79.9799" aminoacid="Y" variable="Y" binary="N" mass="243.1559"'],
316
+ [/parameter name="diff_search_options"/, '<parameter name="diff_search_options" value="15.999400 M 79.979900 STY 0.000000 M 0.000000 X 0.000000 T 0.000000 Y"/>'],
317
+ ].each do |a,b|
318
+ match_modline_pieces(lines, a, b)
319
+ end
320
+ [
321
+ '<modification_info modified_peptide="Y#RLGGS#T#K">',
322
+ '<mod_aminoacid_mass position="1" mass="243.1559"/>',
323
+ '<mod_aminoacid_mass position="7" mass="167.0581"/>',
324
+ '</modification_info>',
325
+ '<mod_aminoacid_mass position="9" mass="181.085"/>'
326
+ ].each do |line|
327
+ assert_match(/#{Regexp.escape(line)}/, beginning, "a modification info for a peptide")
328
+ end
329
+ File.unlink(fn) unless NODELETE
330
+ end
331
+ else
332
+ assert_nil( puts("--SKIPPING TEST-- (missing dir: #{@tfiles_l})") )
333
+ end
334
+ end
290
335
  end
291
336