mspire 0.1.7 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +41 -14
- data/bin/bioworks2excel.rb +1 -1
- data/bin/bioworks_to_pepxml.rb +46 -59
- data/bin/fasta_shaker.rb +1 -1
- data/bin/filter.rb +6 -0
- data/bin/find_aa_freq.rb +23 -0
- data/bin/id_precision.rb +3 -2
- data/bin/mzxml_to_lmat.rb +2 -1
- data/bin/pepproph_filter.rb +1 -1
- data/bin/precision.rb +1 -1
- data/bin/protein_summary.rb +2 -451
- data/bin/raw_to_mzXML.rb +55 -0
- data/bin/srf_group.rb +26 -0
- data/changelog.txt +7 -0
- data/lib/align.rb +3 -3
- data/lib/fasta.rb +6 -1
- data/lib/gi.rb +9 -4
- data/lib/roc.rb +2 -0
- data/lib/sample_enzyme.rb +2 -1
- data/lib/spec/mzxml/parser.rb +2 -43
- data/lib/spec/mzxml.rb +65 -2
- data/lib/spec_id/aa_freqs.rb +10 -7
- data/lib/spec_id/bioworks.rb +67 -87
- data/lib/spec_id/filter.rb +794 -0
- data/lib/spec_id/precision.rb +29 -36
- data/lib/spec_id/proph.rb +5 -3
- data/lib/spec_id/protein_summary.rb +459 -0
- data/lib/spec_id/sequest.rb +323 -271
- data/lib/spec_id/srf.rb +189 -135
- data/lib/spec_id.rb +276 -227
- data/lib/spec_id_xml.rb +101 -0
- data/lib/toppred.rb +18 -0
- data/script/degenerate_peptides.rb +47 -0
- data/script/filter-peps.rb +5 -1
- data/test/tc_align.rb +1 -1
- data/test/tc_bioworks.rb +25 -22
- data/test/tc_bioworks_to_pepxml.rb +37 -4
- data/test/tc_fasta.rb +3 -1
- data/test/tc_fasta_shaker.rb +8 -6
- data/test/tc_filter.rb +203 -0
- data/test/tc_gi.rb +6 -9
- data/test/tc_id_precision.rb +31 -0
- data/test/tc_mzxml.rb +8 -6
- data/test/tc_peptide_parent_times.rb +2 -1
- data/test/tc_precision.rb +1 -1
- data/test/tc_proph.rb +5 -5
- data/test/tc_protein_summary.rb +36 -13
- data/test/tc_sequest.rb +78 -33
- data/test/tc_spec_id.rb +128 -6
- data/test/tc_srf.rb +84 -38
- metadata +67 -62
- data/bin/fasta_cat.rb +0 -39
- data/bin/fasta_cat_mod.rb +0 -59
- data/bin/fasta_mod.rb +0 -57
- data/bin/filter_spec_id.rb +0 -365
- data/bin/raw2mzXML.rb +0 -21
- data/script/gen_database_searching.rb +0 -258
data/test/tc_filter.rb
ADDED
@@ -0,0 +1,203 @@
|
|
1
|
+
|
2
|
+
require 'test/unit'
|
3
|
+
require 'spec_id/filter'
|
4
|
+
require 'spec_id/srf'
|
5
|
+
require 'set_from_hash'
|
6
|
+
require File.dirname(__FILE__) + '/test_helper'
|
7
|
+
|
8
|
+
$VERBOSE = false
|
9
|
+
|
10
|
+
|
11
|
+
class TestFilter < Test::Unit::TestCase
|
12
|
+
|
13
|
+
def initialize(arg)
|
14
|
+
super(arg)
|
15
|
+
@tfiles = File.dirname(__FILE__) + '/tfiles/'
|
16
|
+
@tfiles_l = File.dirname(__FILE__) + '/tfiles_large/'
|
17
|
+
@small_inv = @tfiles + 'bioworks_with_INV_small.xml'
|
18
|
+
@small = @tfiles + 'bioworks_small.xml'
|
19
|
+
## SRF:
|
20
|
+
@zero_srf = @tfiles_l + 'opd1_cat_inv/000.srf'
|
21
|
+
@twenty_srf = @tfiles_l + 'opd1_cat_inv/020.srf'
|
22
|
+
@zero_srg = @tfiles_l + 'bioworks_000.srg'
|
23
|
+
@both_srg = @tfiles_l + 'bioworks_both.srg'
|
24
|
+
## FASTA:
|
25
|
+
@opd1_fasta = @tfiles_l + 'opd1_cat_inv/ecoli_K12_ncbi_20060321.fasta'
|
26
|
+
@opd1_correct_fasta = @tfiles_l + 'opd1_cat_inv/correct_fictitious_314.fasta'
|
27
|
+
if File.exist? @tfiles_l
|
28
|
+
File.open(@zero_srg, 'w') {|fh| fh.puts( File.expand_path(@zero_srf) ) }
|
29
|
+
File.open(@both_srg, 'w') {|fh| fh.puts( File.expand_path(@zero_srf) ); fh.puts( File.expand_path(@twenty_srf) ) }
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def test_protein_fppr
|
34
|
+
peps_per_prot = [4,4,3,2,2]
|
35
|
+
(num, mean_fppr, std_num, std_fppr) = SpecID::Filter.new.protein_fppr(peps_per_prot, 1, 10)
|
36
|
+
assert_equal(0, mean_fppr, "no prots completely wrong")
|
37
|
+
assert_equal(0, std_fppr, "no prots completely wrong")
|
38
|
+
(num, mean_fppr, std_num, std_fppr) = SpecID::Filter.new.protein_fppr(peps_per_prot, 14, 10)
|
39
|
+
assert_equal(4.0/5, mean_fppr, "only one prot right")
|
40
|
+
assert_equal(0.0, std_fppr, "only one prot right")
|
41
|
+
end
|
42
|
+
|
43
|
+
def test_filter_sequest
|
44
|
+
hashes = [
|
45
|
+
{:xcorr => 1.2, :deltacn => 0.1, :ppm => 40, :charge => 2},
|
46
|
+
{:xcorr => 1.3, :deltacn => 0.1, :ppm => 50, :charge => 3},
|
47
|
+
{:xcorr => 1.4, :deltacn => 0.1, :ppm => 50, :charge => 1},
|
48
|
+
{:xcorr => 1.5, :deltacn => 1.1, :ppm => 20, :charge => 2},
|
49
|
+
{:xcorr => 1.3, :deltacn => 0.1, :ppm => 20, :charge => 2},
|
50
|
+
{:xcorr => 1.3, :deltacn => 0.1, :ppm => 40, :charge => 2},
|
51
|
+
]
|
52
|
+
peps = hashes.map do |hash|
|
53
|
+
pep = SRF::OUT::Pep.new.set_from_hash(hash)
|
54
|
+
end
|
55
|
+
sp = GenericSpecID.new.set_from_hash({:peps => peps})
|
56
|
+
before_size = sp.peps.size
|
57
|
+
assert_filter([1.2, 1.2, 1.2, 0.1, 50], sp, 5, "all passing")
|
58
|
+
assert_filter([1.6, 1.6, 1.6, 0.1, 50], sp, 0, "xcorrs too high")
|
59
|
+
assert_filter([1.6, 1.0, 1.0, 0.1, 50], sp, 4, "one xcorr too high")
|
60
|
+
assert_filter([1.0, 1.6, 1.0, 0.1, 50], sp, 2, "one xcorr too high")
|
61
|
+
assert_filter([1.0, 1.0, 1.6, 0.1, 50], sp, 4, "one xcorr too high")
|
62
|
+
assert_filter([1.2, 1.2, 1.2, 0.2, 50], sp, 0, "high deltacn")
|
63
|
+
|
64
|
+
## with deltcnstars:
|
65
|
+
assert_filter([1.2, 1.2, 1.2, 0.1, 50], sp, 6, "all passing", true)
|
66
|
+
assert_filter([1.2, 1.2, 1.2, 0.2, 50], sp, 1, "high deltacn", true)
|
67
|
+
assert_filter([1.0, 1.0, 1.6, 0.1, 50], sp, 5, "one xcorr too high", true)
|
68
|
+
end
|
69
|
+
|
70
|
+
def assert_filter(filter_args, spec_id, expected_passing, message, include_deltcn=false)
|
71
|
+
npeps = spec_id.filter_sequest(filter_args, include_deltcn)
|
72
|
+
assert_equal(expected_passing, npeps.size, message)
|
73
|
+
end
|
74
|
+
|
75
|
+
def test_passing_proteins
|
76
|
+
hash_prots = (0..7).map do |n|
|
77
|
+
SpecID::GenericProt.new.set_from_hash({:reference => "prot_"+n.to_s, :peps => []})
|
78
|
+
end
|
79
|
+
arr_prots = (0..7).map do |n|
|
80
|
+
SRF::OUT::Prot.new.set_from_hash({:reference => "prot_"+n.to_s, :peps => []})
|
81
|
+
end
|
82
|
+
[hash_prots, arr_prots].each do |prots|
|
83
|
+
|
84
|
+
hashes = [
|
85
|
+
{:aaseq => 'PEP0', :xcorr => 1.2, :deltacn => 0.1, :ppm => 40, :charge => 2, :prots => [prots[0],prots[1]]},
|
86
|
+
{:aaseq => 'PEP1', :xcorr => 1.3, :deltacn => 0.1, :ppm => 50, :charge => 3, :prots => [prots[1],prots[2]]},
|
87
|
+
{:aaseq => 'PEP2', :xcorr => 1.4, :deltacn => 0.1, :ppm => 50, :charge => 1, :prots => [prots[3]]},
|
88
|
+
{:aaseq => 'PEP3', :xcorr => 1.5, :deltacn => 1.1, :ppm => 20, :charge => 2, :prots => [prots[4]]},
|
89
|
+
{:aaseq => 'PEP4', :xcorr => 1.3, :deltacn => 0.1, :ppm => 20, :charge => 2, :prots => [prots[0]]},
|
90
|
+
{:aaseq => 'PEP5', :xcorr => 1.3, :deltacn => 0.1, :ppm => 40, :charge => 2, :prots => prots[1,2]},
|
91
|
+
]
|
92
|
+
|
93
|
+
peps = hashes.map do |hash|
|
94
|
+
SRF::OUT::Pep.new.set_from_hash(hash)
|
95
|
+
end
|
96
|
+
|
97
|
+
|
98
|
+
prts = SpecID.passing_proteins(peps)
|
99
|
+
exp = (0..4).map do |n|
|
100
|
+
"prot_" + n.to_s
|
101
|
+
end
|
102
|
+
refs = prts.map { |v| v.reference }.sort
|
103
|
+
assert_equal(exp, refs)
|
104
|
+
|
105
|
+
|
106
|
+
prts = SpecID.passing_proteins(peps, :update)
|
107
|
+
prot_0_before = prts.select {|v| v.reference == 'prot_0'}.first
|
108
|
+
assert_protein_match(prts, 'prot_0', %w(PEP0 PEP4))
|
109
|
+
assert_protein_match(prts, 'prot_1', %w(PEP0 PEP1 PEP5))
|
110
|
+
assert_protein_match(prts, 'prot_2', %w(PEP1 PEP5))
|
111
|
+
assert_protein_match(prts, 'prot_3', %w(PEP2))
|
112
|
+
assert_protein_match(prts, 'prot_4', %w(PEP3))
|
113
|
+
srt_ref = prts.map {|v| v.reference}.sort
|
114
|
+
assert_equal(%w(prot_0 prot_1 prot_2 prot_3 prot_4), srt_ref, "just the right number of prots")
|
115
|
+
prot_0 = prts.select {|v| v.reference == 'prot_0'}.first
|
116
|
+
assert_equal(prot_0_before.__id__, prot_0.__id__, "proteins are identical")
|
117
|
+
|
118
|
+
|
119
|
+
prot_0_before = prts.select {|v| v.reference == 'prot_0'}.first.__id__
|
120
|
+
|
121
|
+
prts = SpecID.passing_proteins(peps, :new)
|
122
|
+
assert_protein_match(prts, 'prot_0', %w(PEP0 PEP4))
|
123
|
+
assert_protein_match(prts, 'prot_1', %w(PEP0 PEP1 PEP5))
|
124
|
+
assert_protein_match(prts, 'prot_2', %w(PEP1 PEP5))
|
125
|
+
assert_protein_match(prts, 'prot_3', %w(PEP2))
|
126
|
+
assert_protein_match(prts, 'prot_4', %w(PEP3))
|
127
|
+
srt_ref = prts.map {|v| v.reference}.sort
|
128
|
+
assert_equal(%w(prot_0 prot_1 prot_2 prot_3 prot_4), srt_ref, "just the right number of prots")
|
129
|
+
prot_0 = prts.select {|v| v.reference == 'prot_0'}.first
|
130
|
+
assert_not_equal(prot_0_before, prot_0.__id__, "proteins are not identical")
|
131
|
+
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
def assert_protein_match(prts, ref, pepseqs, message='')
|
136
|
+
prt = prts.select{|v| v.reference == ref }.first
|
137
|
+
sorted_prt_peps_aaseqs = prt.peps.map {|v| v.aaseq }.sort
|
138
|
+
sorted_pepseqs = pepseqs.sort
|
139
|
+
assert_equal(pepseqs, sorted_prt_peps_aaseqs, message)
|
140
|
+
end
|
141
|
+
|
142
|
+
def test_usage
|
143
|
+
output = capture_stdout {
|
144
|
+
SpecID::Filter.run_from_argv([])
|
145
|
+
}
|
146
|
+
assert_match('usage:', output)
|
147
|
+
end
|
148
|
+
|
149
|
+
def test_basic_bioworks_xml
|
150
|
+
|
151
|
+
output = capture_stdout {
|
152
|
+
SpecID::Filter.run_from_argv([@small].push( *(%w(-1 1.0 -2 1.0 -3 1.0 -c 0.1 --ppm 1000))) )
|
153
|
+
}
|
154
|
+
## FROZEN:
|
155
|
+
assert_match(/pep_hits\s+4/, output)
|
156
|
+
assert_match(/uniq_aa_hits\s+4/, output)
|
157
|
+
assert_match(/prot_hits\s+4/, output)
|
158
|
+
|
159
|
+
|
160
|
+
output = capture_stdout {
|
161
|
+
SpecID::Filter.run_from_argv([@small_inv].push( *(%w(-1 1.0 -2 1.0 -3 1.0 -c 0.1 --ppm 1000 -f INV_))) )
|
162
|
+
}
|
163
|
+
#puts ""
|
164
|
+
#puts output
|
165
|
+
## FROZEN:
|
166
|
+
assert_match(/pep_hits\s+151/, output)
|
167
|
+
assert_match(/uniq_aa_hits\s+75/, output)
|
168
|
+
assert_match(/prot_hits\s+13/, output)
|
169
|
+
end
|
170
|
+
|
171
|
+
def test_srf
|
172
|
+
if File.exist? @tfiles_l
|
173
|
+
## dcy
|
174
|
+
output = capture_stdout {
|
175
|
+
SpecID::Filter.run_from_argv([@zero_srg].push( *(%w(-1 1.0 -2 1.0 -3 1.0 -c 0.1 --ppm 1000 -f INV_))) )
|
176
|
+
}
|
177
|
+
## FROZEN:
|
178
|
+
#puts ""
|
179
|
+
#puts output
|
180
|
+
assert_match(/pep_hits\s+2111\s+107\.2/, output)
|
181
|
+
assert_match(/uniq_aa_hits\s+2034\s+106\.6/, output)
|
182
|
+
assert_match(/prot_hits\s+1454\s+100\.0/, output)
|
183
|
+
|
184
|
+
## cys tps fps COMBINED
|
185
|
+
# tps are fictitious!
|
186
|
+
output = capture_stdout {
|
187
|
+
# that's the background freq for ecoli that this file's from
|
188
|
+
SpecID::Filter.run_from_argv([@zero_srg].push( *(%w(-1 1.0 -2 1.0 -3 1.0 -c 0.1 --ppm 1000 --occams_razor --cys 0.0115866200193321 --t).push(@opd1_correct_fasta))))
|
189
|
+
}
|
190
|
+
#puts ""
|
191
|
+
#puts output
|
192
|
+
## FROZEN:
|
193
|
+
assert_match(/num\s+tps%\s+cys%/, output, "header")
|
194
|
+
assert_match(/pep_hits\s+4374\s+9\d\.\d.*\s+83\.7/, output)
|
195
|
+
assert_match(/uniq_aa_hits\s+4203\s+9\d\.\d.*\s+82\.8/, output)
|
196
|
+
assert_match(/prot_hits\s+2986\s+9\d\..*\s+7\d\./, output)
|
197
|
+
assert_match(/occams.*\s+2986\s+8\d\..*\s+7\d\./, output)
|
198
|
+
else
|
199
|
+
assert_nil( puts("--SKIPPING TEST-- (missing dir: #{@tfiles_l})" ))
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
203
|
+
end
|
data/test/tc_gi.rb
CHANGED
@@ -7,14 +7,11 @@ class Gi2AnnotTest < Test::Unit::TestCase
|
|
7
7
|
ROOT_DIR = File.join(File.dirname(__FILE__), '..')
|
8
8
|
|
9
9
|
def test_single_query
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
puts "
|
15
|
-
|
16
|
-
#else
|
17
|
-
assert_equal('CP4-57 prophage; RNase LS [Escherichia coli K12]'+"\n", annot)
|
18
|
-
#end
|
10
|
+
annot = GI.gi2annot([16130548])
|
11
|
+
if annot
|
12
|
+
assert_equal('CP4-57 prophage; RNase LS [Escherichia coli K12]', annot.first)
|
13
|
+
else
|
14
|
+
assert_nil( puts("SKIPPING gi test (no internet connection available)") )
|
15
|
+
end
|
19
16
|
end
|
20
17
|
end
|
data/test/tc_id_precision.rb
CHANGED
@@ -30,6 +30,30 @@ class IDPrecisionTest < Test::Unit::TestCase
|
|
30
30
|
PepProts: NH,PepProts: PR,SeqCharge: NH,SeqCharge: PR,Scan(TopHit): NH,Scan(TopHit): PR,Scan(Top10): NH,Scan(Top10): PR,ScanCharge(TopHit): NH,ScanCharge(TopHit): PR,ScanCharge(Top10): NH,ScanCharge(Top10): PR
|
31
31
|
75, 1.0, 37, 1.0, 75, 1.0, 75, 1.0, 75, 1.0, 75, 1.0
|
32
32
|
95, 1.0, 49, 1.0, 95, 1.0, 95, 1.0, 95, 1.0, 95, 1.0
|
33
|
+
155, 1.0, 67, 1.0, 123, 1.0, 155, 1.0, 125, 1.0, 155, 1.0
|
34
|
+
186, 1.0, 85, 1.0, 154, 1.0, 186, 1.0, 156, 1.0, 186, 1.0
|
35
|
+
196, 1.0, 90, 1.0, 161, 1.0, 196, 1.0, 163, 1.0, 196, 1.0
|
36
|
+
214, 1.0, 94, 1.0, 168, 1.0, 214, 1.0, 170, 1.0, 214, 1.0
|
37
|
+
215, 1.0, 95, 1.0, 169, 1.0, 215, 1.0, 171, 1.0, 215, 1.0
|
38
|
+
217, 0.995391705069124, 97, 0.989690721649485, 171, 0.994152046783626, 217, 0.995391705069124, 173, 0.994219653179191, 217, 0.995391705069124
|
39
|
+
219, 0.995433789954338, 99, 0.98989898989899, 172, 0.994186046511628, 219, 0.995433789954338, 175, 0.994285714285714, 219, 0.995433789954338
|
40
|
+
227, 0.995594713656388, 106, 0.990566037735849, 180, 0.994444444444444, 227, 0.995594713656388, 183, 0.994535519125683, 227, 0.995594713656388
|
41
|
+
228, 0.995614035087719, 107, 0.990654205607477, 181, 0.994475138121547, 228, 0.995614035087719, 184, 0.994565217391304, 228, 0.995614035087719
|
42
|
+
229, 0.991266375545852, 108, 0.981481481481482, 182, 0.989010989010989, 229, 0.991266375545852, 185, 0.989189189189189, 229, 0.991266375545852
|
43
|
+
END
|
44
|
+
|
45
|
+
# This was the result we were getting before first hashing on protein
|
46
|
+
# sequences and doing uniqe peptide hits. It is very similar ( but not
|
47
|
+
# exactly the same) to what we are doing now). Must have something to do
|
48
|
+
# with the way things are hashed out.
|
49
|
+
before_doing_uniq_peptides=<<END
|
50
|
+
# NH = number of hits
|
51
|
+
# TP = true positives
|
52
|
+
# FP = false positives
|
53
|
+
# PR = precision = TP/(TP+FP)
|
54
|
+
PepProts: NH,PepProts: PR,SeqCharge: NH,SeqCharge: PR,Scan(TopHit): NH,Scan(TopHit): PR,Scan(Top10): NH,Scan(Top10): PR,ScanCharge(TopHit): NH,ScanCharge(TopHit): PR,ScanCharge(Top10): NH,ScanCharge(Top10): PR
|
55
|
+
75, 1.0, 37, 1.0, 75, 1.0, 75, 1.0, 75, 1.0, 75, 1.0
|
56
|
+
95, 1.0, 49, 1.0, 95, 1.0, 95, 1.0, 95, 1.0, 95, 1.0
|
33
57
|
125, 1.0, 67, 1.0, 123, 1.0, 125, 1.0, 125, 1.0, 125, 1.0
|
34
58
|
155, 1.0, 85, 1.0, 154, 1.0, 155, 1.0, 156, 1.0, 155, 1.0
|
35
59
|
186, 1.0, 90, 1.0, 161, 1.0, 186, 1.0, 163, 1.0, 186, 1.0
|
@@ -49,10 +73,17 @@ END
|
|
49
73
|
cmd = "#{@cmd} INV_ #{@tf_bioworks_inv_xml} -a"
|
50
74
|
#puts "RUNNING: #{cmd}"
|
51
75
|
reply = `#{cmd}`
|
76
|
+
# This is what we were getting before hashing for uniqe peptides
|
77
|
+
# It is very similar (but not identical to previous output)
|
52
78
|
string =<<END
|
53
79
|
Filename PepProts SeqCharge Scan(TopHit) Scan(Top10) ScanCharge(TopHit) ScanCharge(Top10)
|
54
80
|
./test/tfiles/bioworks_with_INV_small.xml 228.925377117814 107.877585995136 181.929045912105 228.925377117814 184.924437525838 228.925377117814
|
55
81
|
END
|
82
|
+
|
83
|
+
string =<<NEWEND
|
84
|
+
Filename PepProts SeqCharge Scan(TopHit) Scan(Top10) ScanCharge(TopHit) ScanCharge(Top10)
|
85
|
+
./test/tfiles/bioworks_with_INV_small.xml 228.939375794224 107.877585995136 181.929045912105 228.939375794224 184.924437525838 228.939375794224
|
86
|
+
NEWEND
|
56
87
|
assert_equal(string, reply, "area under the curve")
|
57
88
|
end
|
58
89
|
end
|
data/test/tc_mzxml.rb
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
require 'test/unit'
|
2
2
|
require 'spec/mzxml/parser'
|
3
3
|
|
4
|
-
|
5
4
|
class SpecMzXML < Test::Unit::TestCase
|
6
5
|
def initialize(arg)
|
7
6
|
super(arg)
|
@@ -49,23 +48,24 @@ class SpecMzXML < Test::Unit::TestCase
|
|
49
48
|
sr_raw = @tfiles + 'smallraw.RAW'
|
50
49
|
sr_noext = @tfiles + 'smallraw'
|
51
50
|
sr_mzxml = @tfiles + 'smallraw.mzXML'
|
52
|
-
|
51
|
+
klass = Spec::MzXML
|
53
52
|
# given raw
|
54
|
-
file =
|
53
|
+
file = klass.file_to_mzxml(sr_raw)
|
55
54
|
file_to_mzxml_assert(file)
|
56
55
|
# given mzXML
|
57
|
-
file =
|
56
|
+
file = klass.file_to_mzxml(sr_mzxml)
|
58
57
|
file_to_mzxml_assert(file)
|
59
58
|
File.unlink(sr_mzxml)
|
60
59
|
# given basename (and no mzXML)
|
61
|
-
file =
|
60
|
+
file = klass.file_to_mzxml(sr_noext)
|
62
61
|
file_to_mzxml_assert(file)
|
63
62
|
# given basename (and mzXML)
|
64
|
-
file =
|
63
|
+
file = klass.file_to_mzxml(sr_noext)
|
65
64
|
file_to_mzxml_assert(file)
|
66
65
|
File.unlink(sr_mzxml)
|
67
66
|
else
|
68
67
|
puts "SKIPPING tests requiring 't2x' to convert RAW to mzXML"
|
68
|
+
puts "(look in the archive folder of the gem for t2x binary for linux)"
|
69
69
|
end
|
70
70
|
end
|
71
71
|
|
@@ -84,3 +84,5 @@ class SpecMzXML < Test::Unit::TestCase
|
|
84
84
|
end
|
85
85
|
|
86
86
|
end
|
87
|
+
|
88
|
+
|
data/test/tc_precision.rb
CHANGED
data/test/tc_proph.rb
CHANGED
@@ -16,8 +16,8 @@ class ProphTest < Test::Unit::TestCase
|
|
16
16
|
|
17
17
|
def test_parse_protxml_file
|
18
18
|
file = @tfiles + 'opd1/000_020_3prots-prot.xml'
|
19
|
-
#obj =
|
20
|
-
obj =
|
19
|
+
#obj = Proph::ProtSummary.new
|
20
|
+
obj = Proph::ProtSummary.new(file)
|
21
21
|
assert_equal(3, obj.prot_groups.size)
|
22
22
|
assert_equal("1.00", obj.prot_groups.first.probability)
|
23
23
|
assert_equal("0.98", obj.prot_groups[2].probability)
|
@@ -38,7 +38,7 @@ class ProphTest < Test::Unit::TestCase
|
|
38
38
|
|
39
39
|
|
40
40
|
def Xtest_filter_by_min_pep_prob
|
41
|
-
obj =
|
41
|
+
obj = Proph::Pep::Parser.new
|
42
42
|
new_file = "tfiles/tmp.xml"
|
43
43
|
assert_match(/peptideprophet_result probability="0.[0-5]/, IO.read(@pepproph_xml))
|
44
44
|
obj.filter_by_min_pep_prob(@pepproph_xml, new_file, 0.50)
|
@@ -48,7 +48,7 @@ class ProphTest < Test::Unit::TestCase
|
|
48
48
|
end
|
49
49
|
|
50
50
|
def Xtest_uniq_by_seqcharge
|
51
|
-
cls =
|
51
|
+
cls = Proph::Pep
|
52
52
|
p1 = cls.new({ :charge => '2', :sequence => 'PEPTIDE' })
|
53
53
|
p2 = cls.new({ :charge => '3', :sequence => 'PEPTIDE' })
|
54
54
|
p3 = cls.new({ :charge => '2', :sequence => 'PEPTIDE' })
|
@@ -91,7 +91,7 @@ class ProphTest < Test::Unit::TestCase
|
|
91
91
|
s1 = Spec::Scan.new(1,2,0.10, 300.2, i1, p1)
|
92
92
|
s2 = Spec::Scan.new(2,2,0.20, 301.1, i2, p2)
|
93
93
|
s3 = Spec::Scan.new(3,2,0.30, 302.0, i3, p3)
|
94
|
-
scan =
|
94
|
+
scan = Proph::Pep.new({:scans => [s1,s2,s3]}).arithmetic_avg_scan_by_parent_time
|
95
95
|
tot_inten = i1 + i2 + i3
|
96
96
|
tm = ( t1 * (i1/tot_inten) + t2 * (i2/tot_inten) + t3 * (i3/tot_inten) )
|
97
97
|
{:ms_level => 2, :prec_inten => 130115.0/3, :num => nil, :prec_mz => 301.1.to_f, :time => tm }.each do |k,v|
|
data/test/tc_protein_summary.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
|
2
2
|
require 'test/unit'
|
3
|
-
require
|
3
|
+
require 'spec_id/protein_summary'
|
4
|
+
require File.dirname(__FILE__) + '/test_helper'
|
4
5
|
|
5
6
|
|
6
7
|
|
@@ -20,16 +21,29 @@ class ProphProtSummaryTest < Test::Unit::TestCase
|
|
20
21
|
@tf_proph_cat_inv_summary_html = @tfiles + 'opd1/opd1_cat_inv_small-prot.summary.html'
|
21
22
|
@tf_proph_cat_inv_summary_png = @tfiles + 'opd1/opd1_cat_inv_small-prot.summary.png'
|
22
23
|
@tf_peptide_count = @tfiles + "peptide_counts.tmp.txt"
|
23
|
-
@cmd = "ruby -I#{File.join(File.dirname(__FILE__), "..", "lib")} -S protein_summary.rb "
|
24
24
|
end
|
25
25
|
|
26
|
+
def runit(string_or_args)
|
27
|
+
args = if string_or_args.is_a? String
|
28
|
+
string_or_args.split(/\s+/)
|
29
|
+
else
|
30
|
+
string_or_args
|
31
|
+
end
|
32
|
+
ProteinSummary.new.create_from_command_line_args(args)
|
33
|
+
end
|
34
|
+
|
35
|
+
|
26
36
|
def test_usage
|
27
|
-
|
37
|
+
output = capture_stdout {
|
38
|
+
runit('')
|
39
|
+
}
|
40
|
+
assert_match(/usage:/, output)
|
28
41
|
end
|
29
42
|
|
30
|
-
def
|
43
|
+
def test_proph_basic
|
31
44
|
if File.exist? @tfiles_l
|
32
|
-
|
45
|
+
runit "-c 5.0 #{@tf_proph}"
|
46
|
+
ProteinSummary.new.create_from_command_line_args([@tf_proph, '-c', '5.0'])
|
33
47
|
assert(File.exist?(@tf_summary), "file #{@tf_summary} exists")
|
34
48
|
string = IO.read(@tf_summary)
|
35
49
|
assert_match(/gi\|16132176\|ref\|NP_418775\.1\|/, string)
|
@@ -41,7 +55,7 @@ class ProphProtSummaryTest < Test::Unit::TestCase
|
|
41
55
|
end
|
42
56
|
|
43
57
|
def test_bioworks_basic
|
44
|
-
|
58
|
+
runit "#{@tf_bioworks_small}"
|
45
59
|
assert(File.exist?(@tf_bioworks_small_summary_html), "file #{@tf_bioworks_small_summary_html} exists")
|
46
60
|
File.unlink @tf_bioworks_small_summary_html unless NODELETE
|
47
61
|
|
@@ -49,23 +63,32 @@ class ProphProtSummaryTest < Test::Unit::TestCase
|
|
49
63
|
end
|
50
64
|
|
51
65
|
def test_bioworks_with_precision
|
52
|
-
|
53
|
-
|
54
|
-
|
66
|
+
## Could reimplement a separate file approach?
|
67
|
+
#reply = `#{@cmd} -f #{@tf_bioworks_small} #{@tf_bioworks_small} --precision`
|
68
|
+
runit "#{@tf_bioworks_small} --precision"
|
69
|
+
assert_match(/# hits.*106/m, IO.read(@tf_bioworks_small_summary_html))
|
70
|
+
#assert_match(/False Positive Rate.*: 0.500/, IO.read(@tf_bioworks_small_summary_html))
|
71
|
+
#assert_match(/False Positive Rate.*: 0.500/, IO.read(@tf_bioworks_small_summary_html))
|
55
72
|
assert(File.exist?(@tf_bioworks_small_summary_html), "file #{@tf_bioworks_small_summary_html} exists")
|
56
73
|
File.unlink @tf_bioworks_small_summary_html unless NODELETE
|
57
74
|
end
|
58
75
|
|
59
|
-
def
|
76
|
+
def test_proph_with_precision
|
60
77
|
#puts @cmd
|
61
|
-
|
78
|
+
runit "#{@tf_proph_cat_inv} -f INV_ --precision"
|
79
|
+
html = IO.read(@tf_proph_cat_inv_summary_html)
|
80
|
+
assert_match(/# hits/, html, "in #{@tf_proph_cat_inv_summary_html}")
|
81
|
+
assert_match(/2.*0\.0000/m, html, "in #{@tf_proph_cat_inv_summary_html}")
|
82
|
+
assert_match(/3.*0\.3333/m, html, "in #{@tf_proph_cat_inv_summary_html}")
|
83
|
+
assert_match(/7.*0\.5714/m, html, "in #{@tf_proph_cat_inv_summary_html}")
|
84
|
+
|
62
85
|
File.unlink @tf_proph_cat_inv_summary_html unless NODELETE
|
63
86
|
File.unlink @tf_proph_cat_inv_summary_png unless NODELETE
|
64
87
|
end
|
65
88
|
|
66
|
-
def
|
89
|
+
def test_peptide_count
|
67
90
|
if File.exist? @tfiles_l
|
68
|
-
|
91
|
+
runit "-c 5.0 #{@tf_proph} --peptide_count #{@tf_peptide_count}"
|
69
92
|
assert(File.exist?(@tf_peptide_count), "file #{@tf_peptide_count} exists")
|
70
93
|
file = IO.read(@tf_peptide_count)
|
71
94
|
assert_match("gi|16132176|ref|NP_418775.1|\t2", file)
|
data/test/tc_sequest.rb
CHANGED
@@ -4,8 +4,10 @@
|
|
4
4
|
require 'spec_id'
|
5
5
|
require 'spec_id/sequest'
|
6
6
|
require 'test/unit'
|
7
|
+
require 'spec/mzxml'
|
7
8
|
|
8
9
|
|
10
|
+
NODELETE = false
|
9
11
|
|
10
12
|
class SequestTest < Test::Unit::TestCase
|
11
13
|
|
@@ -18,10 +20,10 @@ class SequestTest < Test::Unit::TestCase
|
|
18
20
|
@tf_bioworks_xml = @tfiles + "bioworks_small.xml"
|
19
21
|
end
|
20
22
|
|
21
|
-
def
|
23
|
+
def Xtest_set_from_bioworks
|
22
24
|
if File.exist? @tfiles_l
|
23
25
|
out_path = '.'
|
24
|
-
pepxml_objs =
|
26
|
+
pepxml_objs = Sequest::PepXML.set_from_bioworks_xml(@tf_bioworks_xml, @tf_params, {:ms_path => @tf_mzxml_path, :out_path => out_path})
|
25
27
|
pepxml_objs.each do |obj|
|
26
28
|
assert(obj.spectrum_queries.size > 2)
|
27
29
|
assert(obj.spectrum_queries.first.search_results.first.search_hits.size > 0)
|
@@ -55,10 +57,10 @@ class SequestTest < Test::Unit::TestCase
|
|
55
57
|
mzxml_path = @tfiles + "opd1"
|
56
58
|
out_path = @tfiles
|
57
59
|
pepxml_version = 18
|
58
|
-
pepxml_objs =
|
60
|
+
pepxml_objs = Sequest::PepXML.set_from_bioworks_xml(bioworks_xml, params, {:ms_data => mzxml_path, :out_path => out_path, :pepxml_version => pepxml_version, :sample_enzyme => "trypsin"})
|
59
61
|
puts "TOOK #{Time.new - st}secs"
|
60
62
|
po = pepxml_objs.first
|
61
|
-
assert_equal(pepxml_version,
|
63
|
+
assert_equal(pepxml_version, Sequest::PepXML.pepxml_version)
|
62
64
|
|
63
65
|
# MSMSPipelineAnalysis
|
64
66
|
pipe = po.msms_pipeline_analysis
|
@@ -197,9 +199,9 @@ class SequestTest < Test::Unit::TestCase
|
|
197
199
|
|
198
200
|
|
199
201
|
|
200
|
-
def
|
201
|
-
params =
|
202
|
-
scall =
|
202
|
+
def Xtest_calc_num_tol_term
|
203
|
+
params = Sequest::Params.new(@tf_params)
|
204
|
+
scall = Sequest::PepXML::SearchHit
|
203
205
|
sym = :calc_num_tol_term
|
204
206
|
assert_equal(2, scall.send(sym, params, "K.EPTIDR.E"))
|
205
207
|
assert_equal(1, scall.send(sym, params, "K.PEPTIDR.E"))
|
@@ -207,9 +209,9 @@ class SequestTest < Test::Unit::TestCase
|
|
207
209
|
assert_equal(0, scall.send(sym, params, "F.PEPTIDW.R"))
|
208
210
|
end
|
209
211
|
|
210
|
-
def
|
211
|
-
params =
|
212
|
-
scall =
|
212
|
+
def Xtest_calc_num_missed_cleavages
|
213
|
+
params = Sequest::Params.new(@tf_params)
|
214
|
+
scall = Sequest::PepXML::SearchHit
|
213
215
|
sym = :calc_num_missed_cleavages
|
214
216
|
assert_equal(0, scall.send(sym, params, "K.EPTIDR.E"))
|
215
217
|
assert_equal(0, scall.send(sym, params, "K.PEPTIDR.E"))
|
@@ -225,35 +227,27 @@ class SequestTest < Test::Unit::TestCase
|
|
225
227
|
end
|
226
228
|
|
227
229
|
|
228
|
-
def
|
229
|
-
assert_equal("hello.fasta",
|
230
|
-
assert_equal("hello.fasta",
|
230
|
+
def Xtest_sys_ind_basename
|
231
|
+
assert_equal("hello.fasta", Sequest::Params.new._sys_ind_basename("C:\\Xcalibur\\database\\hello.fasta"))
|
232
|
+
assert_equal("hello.fasta", Sequest::Params.new._sys_ind_basename("/work/john/hello.fasta"))
|
231
233
|
end
|
232
234
|
|
233
|
-
def
|
234
|
-
obj =
|
235
|
+
def Xtest_modifications
|
236
|
+
obj = Sequest::PepXML::Modifications.new(nil, "(M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000) ")
|
235
237
|
answ = {[:C, 12.0]=>"^", [:S, 80.0]=>"@", [:M, 29.0]=>"#", [:M, 15.9]=>"*", [:ct, 12.33]=>"[", [:nt, 14.2]=>"]"}
|
236
238
|
assert_equal(answ, obj.mod_symbols_hash, "mod_symbols_hash")
|
237
239
|
|
238
240
|
## need more here
|
239
241
|
end
|
240
242
|
|
241
|
-
def
|
242
|
-
hash = {"K.PEPTIDE.Z" => "K.PEPTIDE.Z", "K.*M" => "K.M", "aI" => 'I', "YI.&" => "YI.", "EI.!@#\$%^&*(){}[]|\\;:'\"<>,?/EI" => 'EI.EI'}
|
243
|
-
cl = proc {|v| SpecID::Sequest::PepXML::SearchHit.remove_non_amino_acids(v) }
|
244
|
-
hash.each do |k,v|
|
245
|
-
assert_equal(v, cl.call(k))
|
246
|
-
end
|
247
|
-
end
|
248
|
-
|
249
|
-
def test_modification_info
|
243
|
+
def Xtest_modification_info
|
250
244
|
hash = {
|
251
245
|
:mod_nterm_mass => 520.2,
|
252
246
|
:modified_peptide => "MOD*IFI^E&D",
|
253
247
|
:mod_aminoacid_mass => [[3, 150.3], [6, 345.2]],
|
254
248
|
}
|
255
249
|
answ = "<modification_info mod_nterm_mass=\"520.2\" modified_peptide=\"MOD*IFI^E&D\">\n\t<mod_aminoacid_mass position=\"3\" mass=\"150.3\"/>\n\t<mod_aminoacid_mass position=\"6\" mass=\"345.2\"/>\n</modification_info>\n"
|
256
|
-
string =
|
250
|
+
string = Sequest::PepXML::SearchHit::ModificationInfo.new(hash).to_pepxml
|
257
251
|
assert_match(_re('<modification_info'), answ)
|
258
252
|
assert_match(_re(" mod_nterm_mass=\"520.2\""), answ)
|
259
253
|
assert_match(_re(" modified_peptide=\"MOD*IFI^E&D\""), answ)
|
@@ -270,22 +264,73 @@ class SequestTest < Test::Unit::TestCase
|
|
270
264
|
end
|
271
265
|
|
272
266
|
def test_modifications
|
273
|
-
params =
|
267
|
+
params = Sequest::Params.new(@tf_params)
|
274
268
|
mod_string = "(M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000) "
|
275
269
|
params.diff_search_options = "15.90000 M 29.00000 M 80.00000 S 12.00000 C"
|
276
270
|
params.term_diff_search_options = "14.20000 12.33000"
|
277
|
-
|
278
|
-
=begin
|
279
|
-
mod = SpecID::Sequest::PepXML::Modifications(params, mod_string)
|
280
|
-
SpecID::Sequest::PepXML::Modifications
|
281
|
-
peptide = "PEPTIDE"
|
271
|
+
mod = Sequest::PepXML::Modifications.new(params, mod_string)
|
282
272
|
## no mods
|
273
|
+
peptide = "PEPTIDE"
|
283
274
|
assert_equal(nil, mod.modification_info(peptide))
|
284
275
|
peptide = "]M*EC^S@IDM#M*EMSCM["
|
285
|
-
|
286
|
-
|
276
|
+
modinfo = mod.modification_info(peptide)
|
277
|
+
assert_equal(peptide, modinfo.modified_peptide)
|
278
|
+
assert_in_delta(146.40054, modinfo.mod_nterm_mass, 0.000001)
|
279
|
+
assert_in_delta(160.52994, modinfo.mod_cterm_mass, 0.000001)
|
280
|
+
end
|
287
281
|
|
282
|
+
# splits string on ' 'and matches the line found by find_line_regexp in
|
283
|
+
# lines
|
284
|
+
def match_modline_pieces(lines, find_line_regexp, string)
|
285
|
+
pieces = string.split(' ').map {|v| /#{Regexp.escape(v)}/ }
|
286
|
+
lines.each do |line|
|
287
|
+
if line =~ find_line_regexp
|
288
|
+
pieces.each do |piece|
|
289
|
+
assert_match(piece, line)
|
290
|
+
end
|
291
|
+
end
|
292
|
+
end
|
288
293
|
end
|
289
294
|
|
295
|
+
def test_modifications_in_run
|
296
|
+
if File.exist? @tfiles_l
|
297
|
+
modfiles_sequest_dir = @tfiles_l + 'opd1_2runs_2mods/sequest/'
|
298
|
+
modfiles_data_dir = @tfiles_l + 'opd1_2runs_2mods/data/'
|
299
|
+
srgfile = modfiles_sequest_dir + 'tmp.srg'
|
300
|
+
out_path = modfiles_sequest_dir + 'pepxml'
|
301
|
+
modfiles = %w(020 040).map do |file|
|
302
|
+
modfiles_sequest_dir + file + ".srf"
|
303
|
+
end
|
304
|
+
objs = Sequest::PepXML.set_from_bioworks( SRFGroup.new(modfiles).to_srg(srgfile), {:ms_data => modfiles_data_dir, :out_path => out_path, :print => true, :backup_db_path => '/project/marcotte/marcotte/ms/database'} )
|
305
|
+
%w(020 040).each do |file|
|
306
|
+
fn = out_path + '/' + file + '.xml'
|
307
|
+
assert(File.exist?(fn), "file #{fn} exists")
|
308
|
+
beginning = IO.read(fn)
|
309
|
+
lines = beginning.split("\n")
|
310
|
+
[
|
311
|
+
[/aminoacid="M"/, '<aminoacid_modification symbol="*" massdiff="+15.9994" aminoacid="M" variable="Y" binary="N" mass="147.192"'],
|
312
|
+
|
313
|
+
[/aminoacid="S"/, '<aminoacid_modification symbol="#" massdiff="+79.9799" aminoacid="S" variable="Y" binary="N" mass="167.0581"'],
|
314
|
+
[/aminoacid="T"/, '<aminoacid_modification symbol="#" massdiff="+79.9799" aminoacid="T" variable="Y" binary="N" mass="181.085"'],
|
315
|
+
[/aminoacid="Y"/, '<aminoacid_modification symbol="#" massdiff="+79.9799" aminoacid="Y" variable="Y" binary="N" mass="243.1559"'],
|
316
|
+
[/parameter name="diff_search_options"/, '<parameter name="diff_search_options" value="15.999400 M 79.979900 STY 0.000000 M 0.000000 X 0.000000 T 0.000000 Y"/>'],
|
317
|
+
].each do |a,b|
|
318
|
+
match_modline_pieces(lines, a, b)
|
319
|
+
end
|
320
|
+
[
|
321
|
+
'<modification_info modified_peptide="Y#RLGGS#T#K">',
|
322
|
+
'<mod_aminoacid_mass position="1" mass="243.1559"/>',
|
323
|
+
'<mod_aminoacid_mass position="7" mass="167.0581"/>',
|
324
|
+
'</modification_info>',
|
325
|
+
'<mod_aminoacid_mass position="9" mass="181.085"/>'
|
326
|
+
].each do |line|
|
327
|
+
assert_match(/#{Regexp.escape(line)}/, beginning, "a modification info for a peptide")
|
328
|
+
end
|
329
|
+
File.unlink(fn) unless NODELETE
|
330
|
+
end
|
331
|
+
else
|
332
|
+
assert_nil( puts("--SKIPPING TEST-- (missing dir: #{@tfiles_l})") )
|
333
|
+
end
|
334
|
+
end
|
290
335
|
end
|
291
336
|
|