mspire 0.1.7 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. data/Rakefile +41 -14
  2. data/bin/bioworks2excel.rb +1 -1
  3. data/bin/bioworks_to_pepxml.rb +46 -59
  4. data/bin/fasta_shaker.rb +1 -1
  5. data/bin/filter.rb +6 -0
  6. data/bin/find_aa_freq.rb +23 -0
  7. data/bin/id_precision.rb +3 -2
  8. data/bin/mzxml_to_lmat.rb +2 -1
  9. data/bin/pepproph_filter.rb +1 -1
  10. data/bin/precision.rb +1 -1
  11. data/bin/protein_summary.rb +2 -451
  12. data/bin/raw_to_mzXML.rb +55 -0
  13. data/bin/srf_group.rb +26 -0
  14. data/changelog.txt +7 -0
  15. data/lib/align.rb +3 -3
  16. data/lib/fasta.rb +6 -1
  17. data/lib/gi.rb +9 -4
  18. data/lib/roc.rb +2 -0
  19. data/lib/sample_enzyme.rb +2 -1
  20. data/lib/spec/mzxml/parser.rb +2 -43
  21. data/lib/spec/mzxml.rb +65 -2
  22. data/lib/spec_id/aa_freqs.rb +10 -7
  23. data/lib/spec_id/bioworks.rb +67 -87
  24. data/lib/spec_id/filter.rb +794 -0
  25. data/lib/spec_id/precision.rb +29 -36
  26. data/lib/spec_id/proph.rb +5 -3
  27. data/lib/spec_id/protein_summary.rb +459 -0
  28. data/lib/spec_id/sequest.rb +323 -271
  29. data/lib/spec_id/srf.rb +189 -135
  30. data/lib/spec_id.rb +276 -227
  31. data/lib/spec_id_xml.rb +101 -0
  32. data/lib/toppred.rb +18 -0
  33. data/script/degenerate_peptides.rb +47 -0
  34. data/script/filter-peps.rb +5 -1
  35. data/test/tc_align.rb +1 -1
  36. data/test/tc_bioworks.rb +25 -22
  37. data/test/tc_bioworks_to_pepxml.rb +37 -4
  38. data/test/tc_fasta.rb +3 -1
  39. data/test/tc_fasta_shaker.rb +8 -6
  40. data/test/tc_filter.rb +203 -0
  41. data/test/tc_gi.rb +6 -9
  42. data/test/tc_id_precision.rb +31 -0
  43. data/test/tc_mzxml.rb +8 -6
  44. data/test/tc_peptide_parent_times.rb +2 -1
  45. data/test/tc_precision.rb +1 -1
  46. data/test/tc_proph.rb +5 -5
  47. data/test/tc_protein_summary.rb +36 -13
  48. data/test/tc_sequest.rb +78 -33
  49. data/test/tc_spec_id.rb +128 -6
  50. data/test/tc_srf.rb +84 -38
  51. metadata +67 -62
  52. data/bin/fasta_cat.rb +0 -39
  53. data/bin/fasta_cat_mod.rb +0 -59
  54. data/bin/fasta_mod.rb +0 -57
  55. data/bin/filter_spec_id.rb +0 -365
  56. data/bin/raw2mzXML.rb +0 -21
  57. data/script/gen_database_searching.rb +0 -258
data/test/tc_filter.rb ADDED
@@ -0,0 +1,203 @@
1
+
2
+ require 'test/unit'
3
+ require 'spec_id/filter'
4
+ require 'spec_id/srf'
5
+ require 'set_from_hash'
6
+ require File.dirname(__FILE__) + '/test_helper'
7
+
8
+ $VERBOSE = false
9
+
10
+
11
+ class TestFilter < Test::Unit::TestCase
12
+
13
+ def initialize(arg)
14
+ super(arg)
15
+ @tfiles = File.dirname(__FILE__) + '/tfiles/'
16
+ @tfiles_l = File.dirname(__FILE__) + '/tfiles_large/'
17
+ @small_inv = @tfiles + 'bioworks_with_INV_small.xml'
18
+ @small = @tfiles + 'bioworks_small.xml'
19
+ ## SRF:
20
+ @zero_srf = @tfiles_l + 'opd1_cat_inv/000.srf'
21
+ @twenty_srf = @tfiles_l + 'opd1_cat_inv/020.srf'
22
+ @zero_srg = @tfiles_l + 'bioworks_000.srg'
23
+ @both_srg = @tfiles_l + 'bioworks_both.srg'
24
+ ## FASTA:
25
+ @opd1_fasta = @tfiles_l + 'opd1_cat_inv/ecoli_K12_ncbi_20060321.fasta'
26
+ @opd1_correct_fasta = @tfiles_l + 'opd1_cat_inv/correct_fictitious_314.fasta'
27
+ if File.exist? @tfiles_l
28
+ File.open(@zero_srg, 'w') {|fh| fh.puts( File.expand_path(@zero_srf) ) }
29
+ File.open(@both_srg, 'w') {|fh| fh.puts( File.expand_path(@zero_srf) ); fh.puts( File.expand_path(@twenty_srf) ) }
30
+ end
31
+ end
32
+
33
+ def test_protein_fppr
34
+ peps_per_prot = [4,4,3,2,2]
35
+ (num, mean_fppr, std_num, std_fppr) = SpecID::Filter.new.protein_fppr(peps_per_prot, 1, 10)
36
+ assert_equal(0, mean_fppr, "no prots completely wrong")
37
+ assert_equal(0, std_fppr, "no prots completely wrong")
38
+ (num, mean_fppr, std_num, std_fppr) = SpecID::Filter.new.protein_fppr(peps_per_prot, 14, 10)
39
+ assert_equal(4.0/5, mean_fppr, "only one prot right")
40
+ assert_equal(0.0, std_fppr, "only one prot right")
41
+ end
42
+
43
+ def test_filter_sequest
44
+ hashes = [
45
+ {:xcorr => 1.2, :deltacn => 0.1, :ppm => 40, :charge => 2},
46
+ {:xcorr => 1.3, :deltacn => 0.1, :ppm => 50, :charge => 3},
47
+ {:xcorr => 1.4, :deltacn => 0.1, :ppm => 50, :charge => 1},
48
+ {:xcorr => 1.5, :deltacn => 1.1, :ppm => 20, :charge => 2},
49
+ {:xcorr => 1.3, :deltacn => 0.1, :ppm => 20, :charge => 2},
50
+ {:xcorr => 1.3, :deltacn => 0.1, :ppm => 40, :charge => 2},
51
+ ]
52
+ peps = hashes.map do |hash|
53
+ pep = SRF::OUT::Pep.new.set_from_hash(hash)
54
+ end
55
+ sp = GenericSpecID.new.set_from_hash({:peps => peps})
56
+ before_size = sp.peps.size
57
+ assert_filter([1.2, 1.2, 1.2, 0.1, 50], sp, 5, "all passing")
58
+ assert_filter([1.6, 1.6, 1.6, 0.1, 50], sp, 0, "xcorrs too high")
59
+ assert_filter([1.6, 1.0, 1.0, 0.1, 50], sp, 4, "one xcorr too high")
60
+ assert_filter([1.0, 1.6, 1.0, 0.1, 50], sp, 2, "one xcorr too high")
61
+ assert_filter([1.0, 1.0, 1.6, 0.1, 50], sp, 4, "one xcorr too high")
62
+ assert_filter([1.2, 1.2, 1.2, 0.2, 50], sp, 0, "high deltacn")
63
+
64
+ ## with deltcnstars:
65
+ assert_filter([1.2, 1.2, 1.2, 0.1, 50], sp, 6, "all passing", true)
66
+ assert_filter([1.2, 1.2, 1.2, 0.2, 50], sp, 1, "high deltacn", true)
67
+ assert_filter([1.0, 1.0, 1.6, 0.1, 50], sp, 5, "one xcorr too high", true)
68
+ end
69
+
70
+ def assert_filter(filter_args, spec_id, expected_passing, message, include_deltcn=false)
71
+ npeps = spec_id.filter_sequest(filter_args, include_deltcn)
72
+ assert_equal(expected_passing, npeps.size, message)
73
+ end
74
+
75
+ def test_passing_proteins
76
+ hash_prots = (0..7).map do |n|
77
+ SpecID::GenericProt.new.set_from_hash({:reference => "prot_"+n.to_s, :peps => []})
78
+ end
79
+ arr_prots = (0..7).map do |n|
80
+ SRF::OUT::Prot.new.set_from_hash({:reference => "prot_"+n.to_s, :peps => []})
81
+ end
82
+ [hash_prots, arr_prots].each do |prots|
83
+
84
+ hashes = [
85
+ {:aaseq => 'PEP0', :xcorr => 1.2, :deltacn => 0.1, :ppm => 40, :charge => 2, :prots => [prots[0],prots[1]]},
86
+ {:aaseq => 'PEP1', :xcorr => 1.3, :deltacn => 0.1, :ppm => 50, :charge => 3, :prots => [prots[1],prots[2]]},
87
+ {:aaseq => 'PEP2', :xcorr => 1.4, :deltacn => 0.1, :ppm => 50, :charge => 1, :prots => [prots[3]]},
88
+ {:aaseq => 'PEP3', :xcorr => 1.5, :deltacn => 1.1, :ppm => 20, :charge => 2, :prots => [prots[4]]},
89
+ {:aaseq => 'PEP4', :xcorr => 1.3, :deltacn => 0.1, :ppm => 20, :charge => 2, :prots => [prots[0]]},
90
+ {:aaseq => 'PEP5', :xcorr => 1.3, :deltacn => 0.1, :ppm => 40, :charge => 2, :prots => prots[1,2]},
91
+ ]
92
+
93
+ peps = hashes.map do |hash|
94
+ SRF::OUT::Pep.new.set_from_hash(hash)
95
+ end
96
+
97
+
98
+ prts = SpecID.passing_proteins(peps)
99
+ exp = (0..4).map do |n|
100
+ "prot_" + n.to_s
101
+ end
102
+ refs = prts.map { |v| v.reference }.sort
103
+ assert_equal(exp, refs)
104
+
105
+
106
+ prts = SpecID.passing_proteins(peps, :update)
107
+ prot_0_before = prts.select {|v| v.reference == 'prot_0'}.first
108
+ assert_protein_match(prts, 'prot_0', %w(PEP0 PEP4))
109
+ assert_protein_match(prts, 'prot_1', %w(PEP0 PEP1 PEP5))
110
+ assert_protein_match(prts, 'prot_2', %w(PEP1 PEP5))
111
+ assert_protein_match(prts, 'prot_3', %w(PEP2))
112
+ assert_protein_match(prts, 'prot_4', %w(PEP3))
113
+ srt_ref = prts.map {|v| v.reference}.sort
114
+ assert_equal(%w(prot_0 prot_1 prot_2 prot_3 prot_4), srt_ref, "just the right number of prots")
115
+ prot_0 = prts.select {|v| v.reference == 'prot_0'}.first
116
+ assert_equal(prot_0_before.__id__, prot_0.__id__, "proteins are identical")
117
+
118
+
119
+ prot_0_before = prts.select {|v| v.reference == 'prot_0'}.first.__id__
120
+
121
+ prts = SpecID.passing_proteins(peps, :new)
122
+ assert_protein_match(prts, 'prot_0', %w(PEP0 PEP4))
123
+ assert_protein_match(prts, 'prot_1', %w(PEP0 PEP1 PEP5))
124
+ assert_protein_match(prts, 'prot_2', %w(PEP1 PEP5))
125
+ assert_protein_match(prts, 'prot_3', %w(PEP2))
126
+ assert_protein_match(prts, 'prot_4', %w(PEP3))
127
+ srt_ref = prts.map {|v| v.reference}.sort
128
+ assert_equal(%w(prot_0 prot_1 prot_2 prot_3 prot_4), srt_ref, "just the right number of prots")
129
+ prot_0 = prts.select {|v| v.reference == 'prot_0'}.first
130
+ assert_not_equal(prot_0_before, prot_0.__id__, "proteins are not identical")
131
+
132
+ end
133
+ end
134
+
135
+ def assert_protein_match(prts, ref, pepseqs, message='')
136
+ prt = prts.select{|v| v.reference == ref }.first
137
+ sorted_prt_peps_aaseqs = prt.peps.map {|v| v.aaseq }.sort
138
+ sorted_pepseqs = pepseqs.sort
139
+ assert_equal(pepseqs, sorted_prt_peps_aaseqs, message)
140
+ end
141
+
142
+ def test_usage
143
+ output = capture_stdout {
144
+ SpecID::Filter.run_from_argv([])
145
+ }
146
+ assert_match('usage:', output)
147
+ end
148
+
149
+ def test_basic_bioworks_xml
150
+
151
+ output = capture_stdout {
152
+ SpecID::Filter.run_from_argv([@small].push( *(%w(-1 1.0 -2 1.0 -3 1.0 -c 0.1 --ppm 1000))) )
153
+ }
154
+ ## FROZEN:
155
+ assert_match(/pep_hits\s+4/, output)
156
+ assert_match(/uniq_aa_hits\s+4/, output)
157
+ assert_match(/prot_hits\s+4/, output)
158
+
159
+
160
+ output = capture_stdout {
161
+ SpecID::Filter.run_from_argv([@small_inv].push( *(%w(-1 1.0 -2 1.0 -3 1.0 -c 0.1 --ppm 1000 -f INV_))) )
162
+ }
163
+ #puts ""
164
+ #puts output
165
+ ## FROZEN:
166
+ assert_match(/pep_hits\s+151/, output)
167
+ assert_match(/uniq_aa_hits\s+75/, output)
168
+ assert_match(/prot_hits\s+13/, output)
169
+ end
170
+
171
+ def test_srf
172
+ if File.exist? @tfiles_l
173
+ ## dcy
174
+ output = capture_stdout {
175
+ SpecID::Filter.run_from_argv([@zero_srg].push( *(%w(-1 1.0 -2 1.0 -3 1.0 -c 0.1 --ppm 1000 -f INV_))) )
176
+ }
177
+ ## FROZEN:
178
+ #puts ""
179
+ #puts output
180
+ assert_match(/pep_hits\s+2111\s+107\.2/, output)
181
+ assert_match(/uniq_aa_hits\s+2034\s+106\.6/, output)
182
+ assert_match(/prot_hits\s+1454\s+100\.0/, output)
183
+
184
+ ## cys tps fps COMBINED
185
+ # tps are fictitious!
186
+ output = capture_stdout {
187
+ # that's the background freq for ecoli that this file's from
188
+ SpecID::Filter.run_from_argv([@zero_srg].push( *(%w(-1 1.0 -2 1.0 -3 1.0 -c 0.1 --ppm 1000 --occams_razor --cys 0.0115866200193321 --t).push(@opd1_correct_fasta))))
189
+ }
190
+ #puts ""
191
+ #puts output
192
+ ## FROZEN:
193
+ assert_match(/num\s+tps%\s+cys%/, output, "header")
194
+ assert_match(/pep_hits\s+4374\s+9\d\.\d.*\s+83\.7/, output)
195
+ assert_match(/uniq_aa_hits\s+4203\s+9\d\.\d.*\s+82\.8/, output)
196
+ assert_match(/prot_hits\s+2986\s+9\d\..*\s+7\d\./, output)
197
+ assert_match(/occams.*\s+2986\s+8\d\..*\s+7\d\./, output)
198
+ else
199
+ assert_nil( puts("--SKIPPING TEST-- (missing dir: #{@tfiles_l})" ))
200
+ end
201
+ end
202
+
203
+ end
data/test/tc_gi.rb CHANGED
@@ -7,14 +7,11 @@ class Gi2AnnotTest < Test::Unit::TestCase
7
7
  ROOT_DIR = File.join(File.dirname(__FILE__), '..')
8
8
 
9
9
  def test_single_query
10
- #begin
11
- annot = GI.gi2annot([16130548]).first
12
- #rescue
13
- puts "SKIPPING gi2annot test since no internet connection available:"
14
- puts "#{$!}"
15
- assert true
16
- #else
17
- assert_equal('CP4-57 prophage; RNase LS [Escherichia coli K12]'+"\n", annot)
18
- #end
10
+ annot = GI.gi2annot([16130548])
11
+ if annot
12
+ assert_equal('CP4-57 prophage; RNase LS [Escherichia coli K12]', annot.first)
13
+ else
14
+ assert_nil( puts("SKIPPING gi test (no internet connection available)") )
15
+ end
19
16
  end
20
17
  end
@@ -30,6 +30,30 @@ class IDPrecisionTest < Test::Unit::TestCase
30
30
  PepProts: NH,PepProts: PR,SeqCharge: NH,SeqCharge: PR,Scan(TopHit): NH,Scan(TopHit): PR,Scan(Top10): NH,Scan(Top10): PR,ScanCharge(TopHit): NH,ScanCharge(TopHit): PR,ScanCharge(Top10): NH,ScanCharge(Top10): PR
31
31
  75, 1.0, 37, 1.0, 75, 1.0, 75, 1.0, 75, 1.0, 75, 1.0
32
32
  95, 1.0, 49, 1.0, 95, 1.0, 95, 1.0, 95, 1.0, 95, 1.0
33
+ 155, 1.0, 67, 1.0, 123, 1.0, 155, 1.0, 125, 1.0, 155, 1.0
34
+ 186, 1.0, 85, 1.0, 154, 1.0, 186, 1.0, 156, 1.0, 186, 1.0
35
+ 196, 1.0, 90, 1.0, 161, 1.0, 196, 1.0, 163, 1.0, 196, 1.0
36
+ 214, 1.0, 94, 1.0, 168, 1.0, 214, 1.0, 170, 1.0, 214, 1.0
37
+ 215, 1.0, 95, 1.0, 169, 1.0, 215, 1.0, 171, 1.0, 215, 1.0
38
+ 217, 0.995391705069124, 97, 0.989690721649485, 171, 0.994152046783626, 217, 0.995391705069124, 173, 0.994219653179191, 217, 0.995391705069124
39
+ 219, 0.995433789954338, 99, 0.98989898989899, 172, 0.994186046511628, 219, 0.995433789954338, 175, 0.994285714285714, 219, 0.995433789954338
40
+ 227, 0.995594713656388, 106, 0.990566037735849, 180, 0.994444444444444, 227, 0.995594713656388, 183, 0.994535519125683, 227, 0.995594713656388
41
+ 228, 0.995614035087719, 107, 0.990654205607477, 181, 0.994475138121547, 228, 0.995614035087719, 184, 0.994565217391304, 228, 0.995614035087719
42
+ 229, 0.991266375545852, 108, 0.981481481481482, 182, 0.989010989010989, 229, 0.991266375545852, 185, 0.989189189189189, 229, 0.991266375545852
43
+ END
44
+
45
+ # This was the result we were getting before first hashing on protein
46
+ # sequences and doing uniqe peptide hits. It is very similar ( but not
47
+ # exactly the same) to what we are doing now). Must have something to do
48
+ # with the way things are hashed out.
49
+ before_doing_uniq_peptides=<<END
50
+ # NH = number of hits
51
+ # TP = true positives
52
+ # FP = false positives
53
+ # PR = precision = TP/(TP+FP)
54
+ PepProts: NH,PepProts: PR,SeqCharge: NH,SeqCharge: PR,Scan(TopHit): NH,Scan(TopHit): PR,Scan(Top10): NH,Scan(Top10): PR,ScanCharge(TopHit): NH,ScanCharge(TopHit): PR,ScanCharge(Top10): NH,ScanCharge(Top10): PR
55
+ 75, 1.0, 37, 1.0, 75, 1.0, 75, 1.0, 75, 1.0, 75, 1.0
56
+ 95, 1.0, 49, 1.0, 95, 1.0, 95, 1.0, 95, 1.0, 95, 1.0
33
57
  125, 1.0, 67, 1.0, 123, 1.0, 125, 1.0, 125, 1.0, 125, 1.0
34
58
  155, 1.0, 85, 1.0, 154, 1.0, 155, 1.0, 156, 1.0, 155, 1.0
35
59
  186, 1.0, 90, 1.0, 161, 1.0, 186, 1.0, 163, 1.0, 186, 1.0
@@ -49,10 +73,17 @@ END
49
73
  cmd = "#{@cmd} INV_ #{@tf_bioworks_inv_xml} -a"
50
74
  #puts "RUNNING: #{cmd}"
51
75
  reply = `#{cmd}`
76
+ # This is what we were getting before hashing for uniqe peptides
77
+ # It is very similar (but not identical to previous output)
52
78
  string =<<END
53
79
  Filename PepProts SeqCharge Scan(TopHit) Scan(Top10) ScanCharge(TopHit) ScanCharge(Top10)
54
80
  ./test/tfiles/bioworks_with_INV_small.xml 228.925377117814 107.877585995136 181.929045912105 228.925377117814 184.924437525838 228.925377117814
55
81
  END
82
+
83
+ string =<<NEWEND
84
+ Filename PepProts SeqCharge Scan(TopHit) Scan(Top10) ScanCharge(TopHit) ScanCharge(Top10)
85
+ ./test/tfiles/bioworks_with_INV_small.xml 228.939375794224 107.877585995136 181.929045912105 228.939375794224 184.924437525838 228.939375794224
86
+ NEWEND
56
87
  assert_equal(string, reply, "area under the curve")
57
88
  end
58
89
  end
data/test/tc_mzxml.rb CHANGED
@@ -1,7 +1,6 @@
1
1
  require 'test/unit'
2
2
  require 'spec/mzxml/parser'
3
3
 
4
-
5
4
  class SpecMzXML < Test::Unit::TestCase
6
5
  def initialize(arg)
7
6
  super(arg)
@@ -49,23 +48,24 @@ class SpecMzXML < Test::Unit::TestCase
49
48
  sr_raw = @tfiles + 'smallraw.RAW'
50
49
  sr_noext = @tfiles + 'smallraw'
51
50
  sr_mzxml = @tfiles + 'smallraw.mzXML'
52
- ob = Spec::MzXML::Parser.new
51
+ klass = Spec::MzXML
53
52
  # given raw
54
- file = ob.file_to_mzxml(sr_raw)
53
+ file = klass.file_to_mzxml(sr_raw)
55
54
  file_to_mzxml_assert(file)
56
55
  # given mzXML
57
- file = ob.file_to_mzxml(sr_mzxml)
56
+ file = klass.file_to_mzxml(sr_mzxml)
58
57
  file_to_mzxml_assert(file)
59
58
  File.unlink(sr_mzxml)
60
59
  # given basename (and no mzXML)
61
- file = ob.file_to_mzxml(sr_noext)
60
+ file = klass.file_to_mzxml(sr_noext)
62
61
  file_to_mzxml_assert(file)
63
62
  # given basename (and mzXML)
64
- file = ob.file_to_mzxml(sr_noext)
63
+ file = klass.file_to_mzxml(sr_noext)
65
64
  file_to_mzxml_assert(file)
66
65
  File.unlink(sr_mzxml)
67
66
  else
68
67
  puts "SKIPPING tests requiring 't2x' to convert RAW to mzXML"
68
+ puts "(look in the archive folder of the gem for t2x binary for linux)"
69
69
  end
70
70
  end
71
71
 
@@ -84,3 +84,5 @@ class SpecMzXML < Test::Unit::TestCase
84
84
  end
85
85
 
86
86
  end
87
+
88
+
@@ -9,7 +9,8 @@ class PeptideParentTimesTest < Test::Unit::TestCase
9
9
  end
10
10
 
11
11
  def test_blank
12
- puts "NOT RUNNING ANY TESTS FOR PEPTIDE_PARENT_TIMES RIGHT NOW"
12
+ ## need to finish this guy up:
13
+ puts "\nSKIPPING: tests for peptide_parent_times"
13
14
  end
14
15
 
15
16
  def Xtest_run
data/test/tc_precision.rb CHANGED
@@ -3,7 +3,7 @@ require 'test/unit'
3
3
  require File.dirname(File.expand_path(__FILE__)) + '/load_bin_path'
4
4
 
5
5
 
6
- class PrecisionTest < Test::Unit::TestCase
6
+ class PrecTest < Test::Unit::TestCase
7
7
  ROOT_DIR = File.join(File.dirname(__FILE__), "..")
8
8
 
9
9
  def initialize(arg)
data/test/tc_proph.rb CHANGED
@@ -16,8 +16,8 @@ class ProphTest < Test::Unit::TestCase
16
16
 
17
17
  def test_parse_protxml_file
18
18
  file = @tfiles + 'opd1/000_020_3prots-prot.xml'
19
- #obj = SpecID::Proph::ProtSummary.new
20
- obj = SpecID::Proph::ProtSummary.new(file)
19
+ #obj = Proph::ProtSummary.new
20
+ obj = Proph::ProtSummary.new(file)
21
21
  assert_equal(3, obj.prot_groups.size)
22
22
  assert_equal("1.00", obj.prot_groups.first.probability)
23
23
  assert_equal("0.98", obj.prot_groups[2].probability)
@@ -38,7 +38,7 @@ class ProphTest < Test::Unit::TestCase
38
38
 
39
39
 
40
40
  def Xtest_filter_by_min_pep_prob
41
- obj = SpecID::Proph::Pep::Parser.new
41
+ obj = Proph::Pep::Parser.new
42
42
  new_file = "tfiles/tmp.xml"
43
43
  assert_match(/peptideprophet_result probability="0.[0-5]/, IO.read(@pepproph_xml))
44
44
  obj.filter_by_min_pep_prob(@pepproph_xml, new_file, 0.50)
@@ -48,7 +48,7 @@ class ProphTest < Test::Unit::TestCase
48
48
  end
49
49
 
50
50
  def Xtest_uniq_by_seqcharge
51
- cls = SpecID::Proph::Pep
51
+ cls = Proph::Pep
52
52
  p1 = cls.new({ :charge => '2', :sequence => 'PEPTIDE' })
53
53
  p2 = cls.new({ :charge => '3', :sequence => 'PEPTIDE' })
54
54
  p3 = cls.new({ :charge => '2', :sequence => 'PEPTIDE' })
@@ -91,7 +91,7 @@ class ProphTest < Test::Unit::TestCase
91
91
  s1 = Spec::Scan.new(1,2,0.10, 300.2, i1, p1)
92
92
  s2 = Spec::Scan.new(2,2,0.20, 301.1, i2, p2)
93
93
  s3 = Spec::Scan.new(3,2,0.30, 302.0, i3, p3)
94
- scan = SpecID::Proph::Pep.new({:scans => [s1,s2,s3]}).arithmetic_avg_scan_by_parent_time
94
+ scan = Proph::Pep.new({:scans => [s1,s2,s3]}).arithmetic_avg_scan_by_parent_time
95
95
  tot_inten = i1 + i2 + i3
96
96
  tm = ( t1 * (i1/tot_inten) + t2 * (i2/tot_inten) + t3 * (i3/tot_inten) )
97
97
  {:ms_level => 2, :prec_inten => 130115.0/3, :num => nil, :prec_mz => 301.1.to_f, :time => tm }.each do |k,v|
@@ -1,6 +1,7 @@
1
1
 
2
2
  require 'test/unit'
3
- require File.dirname(File.expand_path(__FILE__)) + '/load_bin_path'
3
+ require 'spec_id/protein_summary'
4
+ require File.dirname(__FILE__) + '/test_helper'
4
5
 
5
6
 
6
7
 
@@ -20,16 +21,29 @@ class ProphProtSummaryTest < Test::Unit::TestCase
20
21
  @tf_proph_cat_inv_summary_html = @tfiles + 'opd1/opd1_cat_inv_small-prot.summary.html'
21
22
  @tf_proph_cat_inv_summary_png = @tfiles + 'opd1/opd1_cat_inv_small-prot.summary.png'
22
23
  @tf_peptide_count = @tfiles + "peptide_counts.tmp.txt"
23
- @cmd = "ruby -I#{File.join(File.dirname(__FILE__), "..", "lib")} -S protein_summary.rb "
24
24
  end
25
25
 
26
+ def runit(string_or_args)
27
+ args = if string_or_args.is_a? String
28
+ string_or_args.split(/\s+/)
29
+ else
30
+ string_or_args
31
+ end
32
+ ProteinSummary.new.create_from_command_line_args(args)
33
+ end
34
+
35
+
26
36
  def test_usage
27
- assert_match(/usage:/, `#{@cmd}`)
37
+ output = capture_stdout {
38
+ runit('')
39
+ }
40
+ assert_match(/usage:/, output)
28
41
  end
29
42
 
30
- def Xtest_proph_basic
43
+ def test_proph_basic
31
44
  if File.exist? @tfiles_l
32
- print `#{@cmd} -c 5.0 #{@tf_proph}`
45
+ runit "-c 5.0 #{@tf_proph}"
46
+ ProteinSummary.new.create_from_command_line_args([@tf_proph, '-c', '5.0'])
33
47
  assert(File.exist?(@tf_summary), "file #{@tf_summary} exists")
34
48
  string = IO.read(@tf_summary)
35
49
  assert_match(/gi\|16132176\|ref\|NP_418775\.1\|/, string)
@@ -41,7 +55,7 @@ class ProphProtSummaryTest < Test::Unit::TestCase
41
55
  end
42
56
 
43
57
  def test_bioworks_basic
44
- print `#{@cmd} #{@tf_bioworks_small}`
58
+ runit "#{@tf_bioworks_small}"
45
59
  assert(File.exist?(@tf_bioworks_small_summary_html), "file #{@tf_bioworks_small_summary_html} exists")
46
60
  File.unlink @tf_bioworks_small_summary_html unless NODELETE
47
61
 
@@ -49,23 +63,32 @@ class ProphProtSummaryTest < Test::Unit::TestCase
49
63
  end
50
64
 
51
65
  def test_bioworks_with_precision
52
- `#{@cmd} -f #{@tf_bioworks_small} #{@tf_bioworks_small} --precision`
53
- assert_match('TP : 106', IO.read(@tf_bioworks_small_summary_html))
54
- assert_match(/False Positive Rate.*: 0.500/, IO.read(@tf_bioworks_small_summary_html))
66
+ ## Could reimplement a separate file approach?
67
+ #reply = `#{@cmd} -f #{@tf_bioworks_small} #{@tf_bioworks_small} --precision`
68
+ runit "#{@tf_bioworks_small} --precision"
69
+ assert_match(/# hits.*106/m, IO.read(@tf_bioworks_small_summary_html))
70
+ #assert_match(/False Positive Rate.*: 0.500/, IO.read(@tf_bioworks_small_summary_html))
71
+ #assert_match(/False Positive Rate.*: 0.500/, IO.read(@tf_bioworks_small_summary_html))
55
72
  assert(File.exist?(@tf_bioworks_small_summary_html), "file #{@tf_bioworks_small_summary_html} exists")
56
73
  File.unlink @tf_bioworks_small_summary_html unless NODELETE
57
74
  end
58
75
 
59
- def Xtest_proph_with_precision
76
+ def test_proph_with_precision
60
77
  #puts @cmd
61
- print `#{@cmd} #{@tf_proph_cat_inv} -f INV_ --precision`
78
+ runit "#{@tf_proph_cat_inv} -f INV_ --precision"
79
+ html = IO.read(@tf_proph_cat_inv_summary_html)
80
+ assert_match(/# hits/, html, "in #{@tf_proph_cat_inv_summary_html}")
81
+ assert_match(/2.*0\.0000/m, html, "in #{@tf_proph_cat_inv_summary_html}")
82
+ assert_match(/3.*0\.3333/m, html, "in #{@tf_proph_cat_inv_summary_html}")
83
+ assert_match(/7.*0\.5714/m, html, "in #{@tf_proph_cat_inv_summary_html}")
84
+
62
85
  File.unlink @tf_proph_cat_inv_summary_html unless NODELETE
63
86
  File.unlink @tf_proph_cat_inv_summary_png unless NODELETE
64
87
  end
65
88
 
66
- def Xtest_peptide_count
89
+ def test_peptide_count
67
90
  if File.exist? @tfiles_l
68
- print `#{@cmd} -c 5.0 #{@tf_proph} --peptide_count #{@tf_peptide_count}`
91
+ runit "-c 5.0 #{@tf_proph} --peptide_count #{@tf_peptide_count}"
69
92
  assert(File.exist?(@tf_peptide_count), "file #{@tf_peptide_count} exists")
70
93
  file = IO.read(@tf_peptide_count)
71
94
  assert_match("gi|16132176|ref|NP_418775.1|\t2", file)
data/test/tc_sequest.rb CHANGED
@@ -4,8 +4,10 @@
4
4
  require 'spec_id'
5
5
  require 'spec_id/sequest'
6
6
  require 'test/unit'
7
+ require 'spec/mzxml'
7
8
 
8
9
 
10
+ NODELETE = false
9
11
 
10
12
  class SequestTest < Test::Unit::TestCase
11
13
 
@@ -18,10 +20,10 @@ class SequestTest < Test::Unit::TestCase
18
20
  @tf_bioworks_xml = @tfiles + "bioworks_small.xml"
19
21
  end
20
22
 
21
- def test_set_from_bioworks
23
+ def Xtest_set_from_bioworks
22
24
  if File.exist? @tfiles_l
23
25
  out_path = '.'
24
- pepxml_objs = SpecID::Sequest::PepXML.set_from_bioworks(@tf_params, @tf_bioworks_xml, @tf_mzxml_path, out_path)
26
+ pepxml_objs = Sequest::PepXML.set_from_bioworks_xml(@tf_bioworks_xml, @tf_params, {:ms_path => @tf_mzxml_path, :out_path => out_path})
25
27
  pepxml_objs.each do |obj|
26
28
  assert(obj.spectrum_queries.size > 2)
27
29
  assert(obj.spectrum_queries.first.search_results.first.search_hits.size > 0)
@@ -55,10 +57,10 @@ class SequestTest < Test::Unit::TestCase
55
57
  mzxml_path = @tfiles + "opd1"
56
58
  out_path = @tfiles
57
59
  pepxml_version = 18
58
- pepxml_objs = SpecID::Sequest::PepXML.set_from_bioworks(params, bioworks_xml, mzxml_path, out_path, pepxml_version, "trypsin")
60
+ pepxml_objs = Sequest::PepXML.set_from_bioworks_xml(bioworks_xml, params, {:ms_data => mzxml_path, :out_path => out_path, :pepxml_version => pepxml_version, :sample_enzyme => "trypsin"})
59
61
  puts "TOOK #{Time.new - st}secs"
60
62
  po = pepxml_objs.first
61
- assert_equal(pepxml_version, SpecID::Sequest::PepXML.pepxml_version)
63
+ assert_equal(pepxml_version, Sequest::PepXML.pepxml_version)
62
64
 
63
65
  # MSMSPipelineAnalysis
64
66
  pipe = po.msms_pipeline_analysis
@@ -197,9 +199,9 @@ class SequestTest < Test::Unit::TestCase
197
199
 
198
200
 
199
201
 
200
- def test_calc_num_tol_term
201
- params = SpecID::Sequest::Params.new(@tf_params)
202
- scall = SpecID::Sequest::PepXML::SearchHit
202
+ def Xtest_calc_num_tol_term
203
+ params = Sequest::Params.new(@tf_params)
204
+ scall = Sequest::PepXML::SearchHit
203
205
  sym = :calc_num_tol_term
204
206
  assert_equal(2, scall.send(sym, params, "K.EPTIDR.E"))
205
207
  assert_equal(1, scall.send(sym, params, "K.PEPTIDR.E"))
@@ -207,9 +209,9 @@ class SequestTest < Test::Unit::TestCase
207
209
  assert_equal(0, scall.send(sym, params, "F.PEPTIDW.R"))
208
210
  end
209
211
 
210
- def test_calc_num_missed_cleavages
211
- params = SpecID::Sequest::Params.new(@tf_params)
212
- scall = SpecID::Sequest::PepXML::SearchHit
212
+ def Xtest_calc_num_missed_cleavages
213
+ params = Sequest::Params.new(@tf_params)
214
+ scall = Sequest::PepXML::SearchHit
213
215
  sym = :calc_num_missed_cleavages
214
216
  assert_equal(0, scall.send(sym, params, "K.EPTIDR.E"))
215
217
  assert_equal(0, scall.send(sym, params, "K.PEPTIDR.E"))
@@ -225,35 +227,27 @@ class SequestTest < Test::Unit::TestCase
225
227
  end
226
228
 
227
229
 
228
- def test_sys_ind_basename
229
- assert_equal("hello.fasta", SpecID::Sequest::Params.new._sys_ind_basename("C:\\Xcalibur\\database\\hello.fasta"))
230
- assert_equal("hello.fasta", SpecID::Sequest::Params.new._sys_ind_basename("/work/john/hello.fasta"))
230
+ def Xtest_sys_ind_basename
231
+ assert_equal("hello.fasta", Sequest::Params.new._sys_ind_basename("C:\\Xcalibur\\database\\hello.fasta"))
232
+ assert_equal("hello.fasta", Sequest::Params.new._sys_ind_basename("/work/john/hello.fasta"))
231
233
  end
232
234
 
233
- def test_modifications
234
- obj = SpecID::Sequest::PepXML::Modifications.new(nil, "(M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000) ")
235
+ def Xtest_modifications
236
+ obj = Sequest::PepXML::Modifications.new(nil, "(M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000) ")
235
237
  answ = {[:C, 12.0]=>"^", [:S, 80.0]=>"@", [:M, 29.0]=>"#", [:M, 15.9]=>"*", [:ct, 12.33]=>"[", [:nt, 14.2]=>"]"}
236
238
  assert_equal(answ, obj.mod_symbols_hash, "mod_symbols_hash")
237
239
 
238
240
  ## need more here
239
241
  end
240
242
 
241
- def test_non_standard_aa_removal
242
- hash = {"K.PEPTIDE.Z" => "K.PEPTIDE.Z", "K.*M" => "K.M", "aI" => 'I', "YI.&" => "YI.", "EI.!@#\$%^&*(){}[]|\\;:'\"<>,?/EI" => 'EI.EI'}
243
- cl = proc {|v| SpecID::Sequest::PepXML::SearchHit.remove_non_amino_acids(v) }
244
- hash.each do |k,v|
245
- assert_equal(v, cl.call(k))
246
- end
247
- end
248
-
249
- def test_modification_info
243
+ def Xtest_modification_info
250
244
  hash = {
251
245
  :mod_nterm_mass => 520.2,
252
246
  :modified_peptide => "MOD*IFI^E&D",
253
247
  :mod_aminoacid_mass => [[3, 150.3], [6, 345.2]],
254
248
  }
255
249
  answ = "<modification_info mod_nterm_mass=\"520.2\" modified_peptide=\"MOD*IFI^E&amp;D\">\n\t<mod_aminoacid_mass position=\"3\" mass=\"150.3\"/>\n\t<mod_aminoacid_mass position=\"6\" mass=\"345.2\"/>\n</modification_info>\n"
256
- string = SpecID::Sequest::PepXML::SearchHit::ModificationInfo.new(hash).to_pepxml
250
+ string = Sequest::PepXML::SearchHit::ModificationInfo.new(hash).to_pepxml
257
251
  assert_match(_re('<modification_info'), answ)
258
252
  assert_match(_re(" mod_nterm_mass=\"520.2\""), answ)
259
253
  assert_match(_re(" modified_peptide=\"MOD*IFI^E&amp;D\""), answ)
@@ -270,22 +264,73 @@ class SequestTest < Test::Unit::TestCase
270
264
  end
271
265
 
272
266
  def test_modifications
273
- params = SpecID::Sequest::Params.new(@tf_params)
267
+ params = Sequest::Params.new(@tf_params)
274
268
  mod_string = "(M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000) "
275
269
  params.diff_search_options = "15.90000 M 29.00000 M 80.00000 S 12.00000 C"
276
270
  params.term_diff_search_options = "14.20000 12.33000"
277
- assert 1
278
- =begin
279
- mod = SpecID::Sequest::PepXML::Modifications(params, mod_string)
280
- SpecID::Sequest::PepXML::Modifications
281
- peptide = "PEPTIDE"
271
+ mod = Sequest::PepXML::Modifications.new(params, mod_string)
282
272
  ## no mods
273
+ peptide = "PEPTIDE"
283
274
  assert_equal(nil, mod.modification_info(peptide))
284
275
  peptide = "]M*EC^S@IDM#M*EMSCM["
285
- p mod.modification_info(peptide)
286
- =end
276
+ modinfo = mod.modification_info(peptide)
277
+ assert_equal(peptide, modinfo.modified_peptide)
278
+ assert_in_delta(146.40054, modinfo.mod_nterm_mass, 0.000001)
279
+ assert_in_delta(160.52994, modinfo.mod_cterm_mass, 0.000001)
280
+ end
287
281
 
282
+ # splits string on ' 'and matches the line found by find_line_regexp in
283
+ # lines
284
+ def match_modline_pieces(lines, find_line_regexp, string)
285
+ pieces = string.split(' ').map {|v| /#{Regexp.escape(v)}/ }
286
+ lines.each do |line|
287
+ if line =~ find_line_regexp
288
+ pieces.each do |piece|
289
+ assert_match(piece, line)
290
+ end
291
+ end
292
+ end
288
293
  end
289
294
 
295
+ def test_modifications_in_run
296
+ if File.exist? @tfiles_l
297
+ modfiles_sequest_dir = @tfiles_l + 'opd1_2runs_2mods/sequest/'
298
+ modfiles_data_dir = @tfiles_l + 'opd1_2runs_2mods/data/'
299
+ srgfile = modfiles_sequest_dir + 'tmp.srg'
300
+ out_path = modfiles_sequest_dir + 'pepxml'
301
+ modfiles = %w(020 040).map do |file|
302
+ modfiles_sequest_dir + file + ".srf"
303
+ end
304
+ objs = Sequest::PepXML.set_from_bioworks( SRFGroup.new(modfiles).to_srg(srgfile), {:ms_data => modfiles_data_dir, :out_path => out_path, :print => true, :backup_db_path => '/project/marcotte/marcotte/ms/database'} )
305
+ %w(020 040).each do |file|
306
+ fn = out_path + '/' + file + '.xml'
307
+ assert(File.exist?(fn), "file #{fn} exists")
308
+ beginning = IO.read(fn)
309
+ lines = beginning.split("\n")
310
+ [
311
+ [/aminoacid="M"/, '<aminoacid_modification symbol="*" massdiff="+15.9994" aminoacid="M" variable="Y" binary="N" mass="147.192"'],
312
+
313
+ [/aminoacid="S"/, '<aminoacid_modification symbol="#" massdiff="+79.9799" aminoacid="S" variable="Y" binary="N" mass="167.0581"'],
314
+ [/aminoacid="T"/, '<aminoacid_modification symbol="#" massdiff="+79.9799" aminoacid="T" variable="Y" binary="N" mass="181.085"'],
315
+ [/aminoacid="Y"/, '<aminoacid_modification symbol="#" massdiff="+79.9799" aminoacid="Y" variable="Y" binary="N" mass="243.1559"'],
316
+ [/parameter name="diff_search_options"/, '<parameter name="diff_search_options" value="15.999400 M 79.979900 STY 0.000000 M 0.000000 X 0.000000 T 0.000000 Y"/>'],
317
+ ].each do |a,b|
318
+ match_modline_pieces(lines, a, b)
319
+ end
320
+ [
321
+ '<modification_info modified_peptide="Y#RLGGS#T#K">',
322
+ '<mod_aminoacid_mass position="1" mass="243.1559"/>',
323
+ '<mod_aminoacid_mass position="7" mass="167.0581"/>',
324
+ '</modification_info>',
325
+ '<mod_aminoacid_mass position="9" mass="181.085"/>'
326
+ ].each do |line|
327
+ assert_match(/#{Regexp.escape(line)}/, beginning, "a modification info for a peptide")
328
+ end
329
+ File.unlink(fn) unless NODELETE
330
+ end
331
+ else
332
+ assert_nil( puts("--SKIPPING TEST-- (missing dir: #{@tfiles_l})") )
333
+ end
334
+ end
290
335
  end
291
336