mspire 0.1.7 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +41 -14
- data/bin/bioworks2excel.rb +1 -1
- data/bin/bioworks_to_pepxml.rb +46 -59
- data/bin/fasta_shaker.rb +1 -1
- data/bin/filter.rb +6 -0
- data/bin/find_aa_freq.rb +23 -0
- data/bin/id_precision.rb +3 -2
- data/bin/mzxml_to_lmat.rb +2 -1
- data/bin/pepproph_filter.rb +1 -1
- data/bin/precision.rb +1 -1
- data/bin/protein_summary.rb +2 -451
- data/bin/raw_to_mzXML.rb +55 -0
- data/bin/srf_group.rb +26 -0
- data/changelog.txt +7 -0
- data/lib/align.rb +3 -3
- data/lib/fasta.rb +6 -1
- data/lib/gi.rb +9 -4
- data/lib/roc.rb +2 -0
- data/lib/sample_enzyme.rb +2 -1
- data/lib/spec/mzxml/parser.rb +2 -43
- data/lib/spec/mzxml.rb +65 -2
- data/lib/spec_id/aa_freqs.rb +10 -7
- data/lib/spec_id/bioworks.rb +67 -87
- data/lib/spec_id/filter.rb +794 -0
- data/lib/spec_id/precision.rb +29 -36
- data/lib/spec_id/proph.rb +5 -3
- data/lib/spec_id/protein_summary.rb +459 -0
- data/lib/spec_id/sequest.rb +323 -271
- data/lib/spec_id/srf.rb +189 -135
- data/lib/spec_id.rb +276 -227
- data/lib/spec_id_xml.rb +101 -0
- data/lib/toppred.rb +18 -0
- data/script/degenerate_peptides.rb +47 -0
- data/script/filter-peps.rb +5 -1
- data/test/tc_align.rb +1 -1
- data/test/tc_bioworks.rb +25 -22
- data/test/tc_bioworks_to_pepxml.rb +37 -4
- data/test/tc_fasta.rb +3 -1
- data/test/tc_fasta_shaker.rb +8 -6
- data/test/tc_filter.rb +203 -0
- data/test/tc_gi.rb +6 -9
- data/test/tc_id_precision.rb +31 -0
- data/test/tc_mzxml.rb +8 -6
- data/test/tc_peptide_parent_times.rb +2 -1
- data/test/tc_precision.rb +1 -1
- data/test/tc_proph.rb +5 -5
- data/test/tc_protein_summary.rb +36 -13
- data/test/tc_sequest.rb +78 -33
- data/test/tc_spec_id.rb +128 -6
- data/test/tc_srf.rb +84 -38
- metadata +67 -62
- data/bin/fasta_cat.rb +0 -39
- data/bin/fasta_cat_mod.rb +0 -59
- data/bin/fasta_mod.rb +0 -57
- data/bin/filter_spec_id.rb +0 -365
- data/bin/raw2mzXML.rb +0 -21
- data/script/gen_database_searching.rb +0 -258
data/test/tc_filter.rb
ADDED
@@ -0,0 +1,203 @@
|
|
1
|
+
|
2
|
+
require 'test/unit'
|
3
|
+
require 'spec_id/filter'
|
4
|
+
require 'spec_id/srf'
|
5
|
+
require 'set_from_hash'
|
6
|
+
require File.dirname(__FILE__) + '/test_helper'
|
7
|
+
|
8
|
+
$VERBOSE = false
|
9
|
+
|
10
|
+
|
11
|
+
class TestFilter < Test::Unit::TestCase
|
12
|
+
|
13
|
+
def initialize(arg)
|
14
|
+
super(arg)
|
15
|
+
@tfiles = File.dirname(__FILE__) + '/tfiles/'
|
16
|
+
@tfiles_l = File.dirname(__FILE__) + '/tfiles_large/'
|
17
|
+
@small_inv = @tfiles + 'bioworks_with_INV_small.xml'
|
18
|
+
@small = @tfiles + 'bioworks_small.xml'
|
19
|
+
## SRF:
|
20
|
+
@zero_srf = @tfiles_l + 'opd1_cat_inv/000.srf'
|
21
|
+
@twenty_srf = @tfiles_l + 'opd1_cat_inv/020.srf'
|
22
|
+
@zero_srg = @tfiles_l + 'bioworks_000.srg'
|
23
|
+
@both_srg = @tfiles_l + 'bioworks_both.srg'
|
24
|
+
## FASTA:
|
25
|
+
@opd1_fasta = @tfiles_l + 'opd1_cat_inv/ecoli_K12_ncbi_20060321.fasta'
|
26
|
+
@opd1_correct_fasta = @tfiles_l + 'opd1_cat_inv/correct_fictitious_314.fasta'
|
27
|
+
if File.exist? @tfiles_l
|
28
|
+
File.open(@zero_srg, 'w') {|fh| fh.puts( File.expand_path(@zero_srf) ) }
|
29
|
+
File.open(@both_srg, 'w') {|fh| fh.puts( File.expand_path(@zero_srf) ); fh.puts( File.expand_path(@twenty_srf) ) }
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def test_protein_fppr
|
34
|
+
peps_per_prot = [4,4,3,2,2]
|
35
|
+
(num, mean_fppr, std_num, std_fppr) = SpecID::Filter.new.protein_fppr(peps_per_prot, 1, 10)
|
36
|
+
assert_equal(0, mean_fppr, "no prots completely wrong")
|
37
|
+
assert_equal(0, std_fppr, "no prots completely wrong")
|
38
|
+
(num, mean_fppr, std_num, std_fppr) = SpecID::Filter.new.protein_fppr(peps_per_prot, 14, 10)
|
39
|
+
assert_equal(4.0/5, mean_fppr, "only one prot right")
|
40
|
+
assert_equal(0.0, std_fppr, "only one prot right")
|
41
|
+
end
|
42
|
+
|
43
|
+
def test_filter_sequest
|
44
|
+
hashes = [
|
45
|
+
{:xcorr => 1.2, :deltacn => 0.1, :ppm => 40, :charge => 2},
|
46
|
+
{:xcorr => 1.3, :deltacn => 0.1, :ppm => 50, :charge => 3},
|
47
|
+
{:xcorr => 1.4, :deltacn => 0.1, :ppm => 50, :charge => 1},
|
48
|
+
{:xcorr => 1.5, :deltacn => 1.1, :ppm => 20, :charge => 2},
|
49
|
+
{:xcorr => 1.3, :deltacn => 0.1, :ppm => 20, :charge => 2},
|
50
|
+
{:xcorr => 1.3, :deltacn => 0.1, :ppm => 40, :charge => 2},
|
51
|
+
]
|
52
|
+
peps = hashes.map do |hash|
|
53
|
+
pep = SRF::OUT::Pep.new.set_from_hash(hash)
|
54
|
+
end
|
55
|
+
sp = GenericSpecID.new.set_from_hash({:peps => peps})
|
56
|
+
before_size = sp.peps.size
|
57
|
+
assert_filter([1.2, 1.2, 1.2, 0.1, 50], sp, 5, "all passing")
|
58
|
+
assert_filter([1.6, 1.6, 1.6, 0.1, 50], sp, 0, "xcorrs too high")
|
59
|
+
assert_filter([1.6, 1.0, 1.0, 0.1, 50], sp, 4, "one xcorr too high")
|
60
|
+
assert_filter([1.0, 1.6, 1.0, 0.1, 50], sp, 2, "one xcorr too high")
|
61
|
+
assert_filter([1.0, 1.0, 1.6, 0.1, 50], sp, 4, "one xcorr too high")
|
62
|
+
assert_filter([1.2, 1.2, 1.2, 0.2, 50], sp, 0, "high deltacn")
|
63
|
+
|
64
|
+
## with deltcnstars:
|
65
|
+
assert_filter([1.2, 1.2, 1.2, 0.1, 50], sp, 6, "all passing", true)
|
66
|
+
assert_filter([1.2, 1.2, 1.2, 0.2, 50], sp, 1, "high deltacn", true)
|
67
|
+
assert_filter([1.0, 1.0, 1.6, 0.1, 50], sp, 5, "one xcorr too high", true)
|
68
|
+
end
|
69
|
+
|
70
|
+
def assert_filter(filter_args, spec_id, expected_passing, message, include_deltcn=false)
|
71
|
+
npeps = spec_id.filter_sequest(filter_args, include_deltcn)
|
72
|
+
assert_equal(expected_passing, npeps.size, message)
|
73
|
+
end
|
74
|
+
|
75
|
+
def test_passing_proteins
|
76
|
+
hash_prots = (0..7).map do |n|
|
77
|
+
SpecID::GenericProt.new.set_from_hash({:reference => "prot_"+n.to_s, :peps => []})
|
78
|
+
end
|
79
|
+
arr_prots = (0..7).map do |n|
|
80
|
+
SRF::OUT::Prot.new.set_from_hash({:reference => "prot_"+n.to_s, :peps => []})
|
81
|
+
end
|
82
|
+
[hash_prots, arr_prots].each do |prots|
|
83
|
+
|
84
|
+
hashes = [
|
85
|
+
{:aaseq => 'PEP0', :xcorr => 1.2, :deltacn => 0.1, :ppm => 40, :charge => 2, :prots => [prots[0],prots[1]]},
|
86
|
+
{:aaseq => 'PEP1', :xcorr => 1.3, :deltacn => 0.1, :ppm => 50, :charge => 3, :prots => [prots[1],prots[2]]},
|
87
|
+
{:aaseq => 'PEP2', :xcorr => 1.4, :deltacn => 0.1, :ppm => 50, :charge => 1, :prots => [prots[3]]},
|
88
|
+
{:aaseq => 'PEP3', :xcorr => 1.5, :deltacn => 1.1, :ppm => 20, :charge => 2, :prots => [prots[4]]},
|
89
|
+
{:aaseq => 'PEP4', :xcorr => 1.3, :deltacn => 0.1, :ppm => 20, :charge => 2, :prots => [prots[0]]},
|
90
|
+
{:aaseq => 'PEP5', :xcorr => 1.3, :deltacn => 0.1, :ppm => 40, :charge => 2, :prots => prots[1,2]},
|
91
|
+
]
|
92
|
+
|
93
|
+
peps = hashes.map do |hash|
|
94
|
+
SRF::OUT::Pep.new.set_from_hash(hash)
|
95
|
+
end
|
96
|
+
|
97
|
+
|
98
|
+
prts = SpecID.passing_proteins(peps)
|
99
|
+
exp = (0..4).map do |n|
|
100
|
+
"prot_" + n.to_s
|
101
|
+
end
|
102
|
+
refs = prts.map { |v| v.reference }.sort
|
103
|
+
assert_equal(exp, refs)
|
104
|
+
|
105
|
+
|
106
|
+
prts = SpecID.passing_proteins(peps, :update)
|
107
|
+
prot_0_before = prts.select {|v| v.reference == 'prot_0'}.first
|
108
|
+
assert_protein_match(prts, 'prot_0', %w(PEP0 PEP4))
|
109
|
+
assert_protein_match(prts, 'prot_1', %w(PEP0 PEP1 PEP5))
|
110
|
+
assert_protein_match(prts, 'prot_2', %w(PEP1 PEP5))
|
111
|
+
assert_protein_match(prts, 'prot_3', %w(PEP2))
|
112
|
+
assert_protein_match(prts, 'prot_4', %w(PEP3))
|
113
|
+
srt_ref = prts.map {|v| v.reference}.sort
|
114
|
+
assert_equal(%w(prot_0 prot_1 prot_2 prot_3 prot_4), srt_ref, "just the right number of prots")
|
115
|
+
prot_0 = prts.select {|v| v.reference == 'prot_0'}.first
|
116
|
+
assert_equal(prot_0_before.__id__, prot_0.__id__, "proteins are identical")
|
117
|
+
|
118
|
+
|
119
|
+
prot_0_before = prts.select {|v| v.reference == 'prot_0'}.first.__id__
|
120
|
+
|
121
|
+
prts = SpecID.passing_proteins(peps, :new)
|
122
|
+
assert_protein_match(prts, 'prot_0', %w(PEP0 PEP4))
|
123
|
+
assert_protein_match(prts, 'prot_1', %w(PEP0 PEP1 PEP5))
|
124
|
+
assert_protein_match(prts, 'prot_2', %w(PEP1 PEP5))
|
125
|
+
assert_protein_match(prts, 'prot_3', %w(PEP2))
|
126
|
+
assert_protein_match(prts, 'prot_4', %w(PEP3))
|
127
|
+
srt_ref = prts.map {|v| v.reference}.sort
|
128
|
+
assert_equal(%w(prot_0 prot_1 prot_2 prot_3 prot_4), srt_ref, "just the right number of prots")
|
129
|
+
prot_0 = prts.select {|v| v.reference == 'prot_0'}.first
|
130
|
+
assert_not_equal(prot_0_before, prot_0.__id__, "proteins are not identical")
|
131
|
+
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
def assert_protein_match(prts, ref, pepseqs, message='')
|
136
|
+
prt = prts.select{|v| v.reference == ref }.first
|
137
|
+
sorted_prt_peps_aaseqs = prt.peps.map {|v| v.aaseq }.sort
|
138
|
+
sorted_pepseqs = pepseqs.sort
|
139
|
+
assert_equal(pepseqs, sorted_prt_peps_aaseqs, message)
|
140
|
+
end
|
141
|
+
|
142
|
+
def test_usage
|
143
|
+
output = capture_stdout {
|
144
|
+
SpecID::Filter.run_from_argv([])
|
145
|
+
}
|
146
|
+
assert_match('usage:', output)
|
147
|
+
end
|
148
|
+
|
149
|
+
def test_basic_bioworks_xml
|
150
|
+
|
151
|
+
output = capture_stdout {
|
152
|
+
SpecID::Filter.run_from_argv([@small].push( *(%w(-1 1.0 -2 1.0 -3 1.0 -c 0.1 --ppm 1000))) )
|
153
|
+
}
|
154
|
+
## FROZEN:
|
155
|
+
assert_match(/pep_hits\s+4/, output)
|
156
|
+
assert_match(/uniq_aa_hits\s+4/, output)
|
157
|
+
assert_match(/prot_hits\s+4/, output)
|
158
|
+
|
159
|
+
|
160
|
+
output = capture_stdout {
|
161
|
+
SpecID::Filter.run_from_argv([@small_inv].push( *(%w(-1 1.0 -2 1.0 -3 1.0 -c 0.1 --ppm 1000 -f INV_))) )
|
162
|
+
}
|
163
|
+
#puts ""
|
164
|
+
#puts output
|
165
|
+
## FROZEN:
|
166
|
+
assert_match(/pep_hits\s+151/, output)
|
167
|
+
assert_match(/uniq_aa_hits\s+75/, output)
|
168
|
+
assert_match(/prot_hits\s+13/, output)
|
169
|
+
end
|
170
|
+
|
171
|
+
def test_srf
|
172
|
+
if File.exist? @tfiles_l
|
173
|
+
## dcy
|
174
|
+
output = capture_stdout {
|
175
|
+
SpecID::Filter.run_from_argv([@zero_srg].push( *(%w(-1 1.0 -2 1.0 -3 1.0 -c 0.1 --ppm 1000 -f INV_))) )
|
176
|
+
}
|
177
|
+
## FROZEN:
|
178
|
+
#puts ""
|
179
|
+
#puts output
|
180
|
+
assert_match(/pep_hits\s+2111\s+107\.2/, output)
|
181
|
+
assert_match(/uniq_aa_hits\s+2034\s+106\.6/, output)
|
182
|
+
assert_match(/prot_hits\s+1454\s+100\.0/, output)
|
183
|
+
|
184
|
+
## cys tps fps COMBINED
|
185
|
+
# tps are fictitious!
|
186
|
+
output = capture_stdout {
|
187
|
+
# that's the background freq for ecoli that this file's from
|
188
|
+
SpecID::Filter.run_from_argv([@zero_srg].push( *(%w(-1 1.0 -2 1.0 -3 1.0 -c 0.1 --ppm 1000 --occams_razor --cys 0.0115866200193321 --t).push(@opd1_correct_fasta))))
|
189
|
+
}
|
190
|
+
#puts ""
|
191
|
+
#puts output
|
192
|
+
## FROZEN:
|
193
|
+
assert_match(/num\s+tps%\s+cys%/, output, "header")
|
194
|
+
assert_match(/pep_hits\s+4374\s+9\d\.\d.*\s+83\.7/, output)
|
195
|
+
assert_match(/uniq_aa_hits\s+4203\s+9\d\.\d.*\s+82\.8/, output)
|
196
|
+
assert_match(/prot_hits\s+2986\s+9\d\..*\s+7\d\./, output)
|
197
|
+
assert_match(/occams.*\s+2986\s+8\d\..*\s+7\d\./, output)
|
198
|
+
else
|
199
|
+
assert_nil( puts("--SKIPPING TEST-- (missing dir: #{@tfiles_l})" ))
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
203
|
+
end
|
data/test/tc_gi.rb
CHANGED
@@ -7,14 +7,11 @@ class Gi2AnnotTest < Test::Unit::TestCase
|
|
7
7
|
ROOT_DIR = File.join(File.dirname(__FILE__), '..')
|
8
8
|
|
9
9
|
def test_single_query
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
puts "
|
15
|
-
|
16
|
-
#else
|
17
|
-
assert_equal('CP4-57 prophage; RNase LS [Escherichia coli K12]'+"\n", annot)
|
18
|
-
#end
|
10
|
+
annot = GI.gi2annot([16130548])
|
11
|
+
if annot
|
12
|
+
assert_equal('CP4-57 prophage; RNase LS [Escherichia coli K12]', annot.first)
|
13
|
+
else
|
14
|
+
assert_nil( puts("SKIPPING gi test (no internet connection available)") )
|
15
|
+
end
|
19
16
|
end
|
20
17
|
end
|
data/test/tc_id_precision.rb
CHANGED
@@ -30,6 +30,30 @@ class IDPrecisionTest < Test::Unit::TestCase
|
|
30
30
|
PepProts: NH,PepProts: PR,SeqCharge: NH,SeqCharge: PR,Scan(TopHit): NH,Scan(TopHit): PR,Scan(Top10): NH,Scan(Top10): PR,ScanCharge(TopHit): NH,ScanCharge(TopHit): PR,ScanCharge(Top10): NH,ScanCharge(Top10): PR
|
31
31
|
75, 1.0, 37, 1.0, 75, 1.0, 75, 1.0, 75, 1.0, 75, 1.0
|
32
32
|
95, 1.0, 49, 1.0, 95, 1.0, 95, 1.0, 95, 1.0, 95, 1.0
|
33
|
+
155, 1.0, 67, 1.0, 123, 1.0, 155, 1.0, 125, 1.0, 155, 1.0
|
34
|
+
186, 1.0, 85, 1.0, 154, 1.0, 186, 1.0, 156, 1.0, 186, 1.0
|
35
|
+
196, 1.0, 90, 1.0, 161, 1.0, 196, 1.0, 163, 1.0, 196, 1.0
|
36
|
+
214, 1.0, 94, 1.0, 168, 1.0, 214, 1.0, 170, 1.0, 214, 1.0
|
37
|
+
215, 1.0, 95, 1.0, 169, 1.0, 215, 1.0, 171, 1.0, 215, 1.0
|
38
|
+
217, 0.995391705069124, 97, 0.989690721649485, 171, 0.994152046783626, 217, 0.995391705069124, 173, 0.994219653179191, 217, 0.995391705069124
|
39
|
+
219, 0.995433789954338, 99, 0.98989898989899, 172, 0.994186046511628, 219, 0.995433789954338, 175, 0.994285714285714, 219, 0.995433789954338
|
40
|
+
227, 0.995594713656388, 106, 0.990566037735849, 180, 0.994444444444444, 227, 0.995594713656388, 183, 0.994535519125683, 227, 0.995594713656388
|
41
|
+
228, 0.995614035087719, 107, 0.990654205607477, 181, 0.994475138121547, 228, 0.995614035087719, 184, 0.994565217391304, 228, 0.995614035087719
|
42
|
+
229, 0.991266375545852, 108, 0.981481481481482, 182, 0.989010989010989, 229, 0.991266375545852, 185, 0.989189189189189, 229, 0.991266375545852
|
43
|
+
END
|
44
|
+
|
45
|
+
# This was the result we were getting before first hashing on protein
|
46
|
+
# sequences and doing uniqe peptide hits. It is very similar ( but not
|
47
|
+
# exactly the same) to what we are doing now). Must have something to do
|
48
|
+
# with the way things are hashed out.
|
49
|
+
before_doing_uniq_peptides=<<END
|
50
|
+
# NH = number of hits
|
51
|
+
# TP = true positives
|
52
|
+
# FP = false positives
|
53
|
+
# PR = precision = TP/(TP+FP)
|
54
|
+
PepProts: NH,PepProts: PR,SeqCharge: NH,SeqCharge: PR,Scan(TopHit): NH,Scan(TopHit): PR,Scan(Top10): NH,Scan(Top10): PR,ScanCharge(TopHit): NH,ScanCharge(TopHit): PR,ScanCharge(Top10): NH,ScanCharge(Top10): PR
|
55
|
+
75, 1.0, 37, 1.0, 75, 1.0, 75, 1.0, 75, 1.0, 75, 1.0
|
56
|
+
95, 1.0, 49, 1.0, 95, 1.0, 95, 1.0, 95, 1.0, 95, 1.0
|
33
57
|
125, 1.0, 67, 1.0, 123, 1.0, 125, 1.0, 125, 1.0, 125, 1.0
|
34
58
|
155, 1.0, 85, 1.0, 154, 1.0, 155, 1.0, 156, 1.0, 155, 1.0
|
35
59
|
186, 1.0, 90, 1.0, 161, 1.0, 186, 1.0, 163, 1.0, 186, 1.0
|
@@ -49,10 +73,17 @@ END
|
|
49
73
|
cmd = "#{@cmd} INV_ #{@tf_bioworks_inv_xml} -a"
|
50
74
|
#puts "RUNNING: #{cmd}"
|
51
75
|
reply = `#{cmd}`
|
76
|
+
# This is what we were getting before hashing for uniqe peptides
|
77
|
+
# It is very similar (but not identical to previous output)
|
52
78
|
string =<<END
|
53
79
|
Filename PepProts SeqCharge Scan(TopHit) Scan(Top10) ScanCharge(TopHit) ScanCharge(Top10)
|
54
80
|
./test/tfiles/bioworks_with_INV_small.xml 228.925377117814 107.877585995136 181.929045912105 228.925377117814 184.924437525838 228.925377117814
|
55
81
|
END
|
82
|
+
|
83
|
+
string =<<NEWEND
|
84
|
+
Filename PepProts SeqCharge Scan(TopHit) Scan(Top10) ScanCharge(TopHit) ScanCharge(Top10)
|
85
|
+
./test/tfiles/bioworks_with_INV_small.xml 228.939375794224 107.877585995136 181.929045912105 228.939375794224 184.924437525838 228.939375794224
|
86
|
+
NEWEND
|
56
87
|
assert_equal(string, reply, "area under the curve")
|
57
88
|
end
|
58
89
|
end
|
data/test/tc_mzxml.rb
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
require 'test/unit'
|
2
2
|
require 'spec/mzxml/parser'
|
3
3
|
|
4
|
-
|
5
4
|
class SpecMzXML < Test::Unit::TestCase
|
6
5
|
def initialize(arg)
|
7
6
|
super(arg)
|
@@ -49,23 +48,24 @@ class SpecMzXML < Test::Unit::TestCase
|
|
49
48
|
sr_raw = @tfiles + 'smallraw.RAW'
|
50
49
|
sr_noext = @tfiles + 'smallraw'
|
51
50
|
sr_mzxml = @tfiles + 'smallraw.mzXML'
|
52
|
-
|
51
|
+
klass = Spec::MzXML
|
53
52
|
# given raw
|
54
|
-
file =
|
53
|
+
file = klass.file_to_mzxml(sr_raw)
|
55
54
|
file_to_mzxml_assert(file)
|
56
55
|
# given mzXML
|
57
|
-
file =
|
56
|
+
file = klass.file_to_mzxml(sr_mzxml)
|
58
57
|
file_to_mzxml_assert(file)
|
59
58
|
File.unlink(sr_mzxml)
|
60
59
|
# given basename (and no mzXML)
|
61
|
-
file =
|
60
|
+
file = klass.file_to_mzxml(sr_noext)
|
62
61
|
file_to_mzxml_assert(file)
|
63
62
|
# given basename (and mzXML)
|
64
|
-
file =
|
63
|
+
file = klass.file_to_mzxml(sr_noext)
|
65
64
|
file_to_mzxml_assert(file)
|
66
65
|
File.unlink(sr_mzxml)
|
67
66
|
else
|
68
67
|
puts "SKIPPING tests requiring 't2x' to convert RAW to mzXML"
|
68
|
+
puts "(look in the archive folder of the gem for t2x binary for linux)"
|
69
69
|
end
|
70
70
|
end
|
71
71
|
|
@@ -84,3 +84,5 @@ class SpecMzXML < Test::Unit::TestCase
|
|
84
84
|
end
|
85
85
|
|
86
86
|
end
|
87
|
+
|
88
|
+
|
data/test/tc_precision.rb
CHANGED
data/test/tc_proph.rb
CHANGED
@@ -16,8 +16,8 @@ class ProphTest < Test::Unit::TestCase
|
|
16
16
|
|
17
17
|
def test_parse_protxml_file
|
18
18
|
file = @tfiles + 'opd1/000_020_3prots-prot.xml'
|
19
|
-
#obj =
|
20
|
-
obj =
|
19
|
+
#obj = Proph::ProtSummary.new
|
20
|
+
obj = Proph::ProtSummary.new(file)
|
21
21
|
assert_equal(3, obj.prot_groups.size)
|
22
22
|
assert_equal("1.00", obj.prot_groups.first.probability)
|
23
23
|
assert_equal("0.98", obj.prot_groups[2].probability)
|
@@ -38,7 +38,7 @@ class ProphTest < Test::Unit::TestCase
|
|
38
38
|
|
39
39
|
|
40
40
|
def Xtest_filter_by_min_pep_prob
|
41
|
-
obj =
|
41
|
+
obj = Proph::Pep::Parser.new
|
42
42
|
new_file = "tfiles/tmp.xml"
|
43
43
|
assert_match(/peptideprophet_result probability="0.[0-5]/, IO.read(@pepproph_xml))
|
44
44
|
obj.filter_by_min_pep_prob(@pepproph_xml, new_file, 0.50)
|
@@ -48,7 +48,7 @@ class ProphTest < Test::Unit::TestCase
|
|
48
48
|
end
|
49
49
|
|
50
50
|
def Xtest_uniq_by_seqcharge
|
51
|
-
cls =
|
51
|
+
cls = Proph::Pep
|
52
52
|
p1 = cls.new({ :charge => '2', :sequence => 'PEPTIDE' })
|
53
53
|
p2 = cls.new({ :charge => '3', :sequence => 'PEPTIDE' })
|
54
54
|
p3 = cls.new({ :charge => '2', :sequence => 'PEPTIDE' })
|
@@ -91,7 +91,7 @@ class ProphTest < Test::Unit::TestCase
|
|
91
91
|
s1 = Spec::Scan.new(1,2,0.10, 300.2, i1, p1)
|
92
92
|
s2 = Spec::Scan.new(2,2,0.20, 301.1, i2, p2)
|
93
93
|
s3 = Spec::Scan.new(3,2,0.30, 302.0, i3, p3)
|
94
|
-
scan =
|
94
|
+
scan = Proph::Pep.new({:scans => [s1,s2,s3]}).arithmetic_avg_scan_by_parent_time
|
95
95
|
tot_inten = i1 + i2 + i3
|
96
96
|
tm = ( t1 * (i1/tot_inten) + t2 * (i2/tot_inten) + t3 * (i3/tot_inten) )
|
97
97
|
{:ms_level => 2, :prec_inten => 130115.0/3, :num => nil, :prec_mz => 301.1.to_f, :time => tm }.each do |k,v|
|
data/test/tc_protein_summary.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
|
2
2
|
require 'test/unit'
|
3
|
-
require
|
3
|
+
require 'spec_id/protein_summary'
|
4
|
+
require File.dirname(__FILE__) + '/test_helper'
|
4
5
|
|
5
6
|
|
6
7
|
|
@@ -20,16 +21,29 @@ class ProphProtSummaryTest < Test::Unit::TestCase
|
|
20
21
|
@tf_proph_cat_inv_summary_html = @tfiles + 'opd1/opd1_cat_inv_small-prot.summary.html'
|
21
22
|
@tf_proph_cat_inv_summary_png = @tfiles + 'opd1/opd1_cat_inv_small-prot.summary.png'
|
22
23
|
@tf_peptide_count = @tfiles + "peptide_counts.tmp.txt"
|
23
|
-
@cmd = "ruby -I#{File.join(File.dirname(__FILE__), "..", "lib")} -S protein_summary.rb "
|
24
24
|
end
|
25
25
|
|
26
|
+
def runit(string_or_args)
|
27
|
+
args = if string_or_args.is_a? String
|
28
|
+
string_or_args.split(/\s+/)
|
29
|
+
else
|
30
|
+
string_or_args
|
31
|
+
end
|
32
|
+
ProteinSummary.new.create_from_command_line_args(args)
|
33
|
+
end
|
34
|
+
|
35
|
+
|
26
36
|
def test_usage
|
27
|
-
|
37
|
+
output = capture_stdout {
|
38
|
+
runit('')
|
39
|
+
}
|
40
|
+
assert_match(/usage:/, output)
|
28
41
|
end
|
29
42
|
|
30
|
-
def
|
43
|
+
def test_proph_basic
|
31
44
|
if File.exist? @tfiles_l
|
32
|
-
|
45
|
+
runit "-c 5.0 #{@tf_proph}"
|
46
|
+
ProteinSummary.new.create_from_command_line_args([@tf_proph, '-c', '5.0'])
|
33
47
|
assert(File.exist?(@tf_summary), "file #{@tf_summary} exists")
|
34
48
|
string = IO.read(@tf_summary)
|
35
49
|
assert_match(/gi\|16132176\|ref\|NP_418775\.1\|/, string)
|
@@ -41,7 +55,7 @@ class ProphProtSummaryTest < Test::Unit::TestCase
|
|
41
55
|
end
|
42
56
|
|
43
57
|
def test_bioworks_basic
|
44
|
-
|
58
|
+
runit "#{@tf_bioworks_small}"
|
45
59
|
assert(File.exist?(@tf_bioworks_small_summary_html), "file #{@tf_bioworks_small_summary_html} exists")
|
46
60
|
File.unlink @tf_bioworks_small_summary_html unless NODELETE
|
47
61
|
|
@@ -49,23 +63,32 @@ class ProphProtSummaryTest < Test::Unit::TestCase
|
|
49
63
|
end
|
50
64
|
|
51
65
|
def test_bioworks_with_precision
|
52
|
-
|
53
|
-
|
54
|
-
|
66
|
+
## Could reimplement a separate file approach?
|
67
|
+
#reply = `#{@cmd} -f #{@tf_bioworks_small} #{@tf_bioworks_small} --precision`
|
68
|
+
runit "#{@tf_bioworks_small} --precision"
|
69
|
+
assert_match(/# hits.*106/m, IO.read(@tf_bioworks_small_summary_html))
|
70
|
+
#assert_match(/False Positive Rate.*: 0.500/, IO.read(@tf_bioworks_small_summary_html))
|
71
|
+
#assert_match(/False Positive Rate.*: 0.500/, IO.read(@tf_bioworks_small_summary_html))
|
55
72
|
assert(File.exist?(@tf_bioworks_small_summary_html), "file #{@tf_bioworks_small_summary_html} exists")
|
56
73
|
File.unlink @tf_bioworks_small_summary_html unless NODELETE
|
57
74
|
end
|
58
75
|
|
59
|
-
def
|
76
|
+
def test_proph_with_precision
|
60
77
|
#puts @cmd
|
61
|
-
|
78
|
+
runit "#{@tf_proph_cat_inv} -f INV_ --precision"
|
79
|
+
html = IO.read(@tf_proph_cat_inv_summary_html)
|
80
|
+
assert_match(/# hits/, html, "in #{@tf_proph_cat_inv_summary_html}")
|
81
|
+
assert_match(/2.*0\.0000/m, html, "in #{@tf_proph_cat_inv_summary_html}")
|
82
|
+
assert_match(/3.*0\.3333/m, html, "in #{@tf_proph_cat_inv_summary_html}")
|
83
|
+
assert_match(/7.*0\.5714/m, html, "in #{@tf_proph_cat_inv_summary_html}")
|
84
|
+
|
62
85
|
File.unlink @tf_proph_cat_inv_summary_html unless NODELETE
|
63
86
|
File.unlink @tf_proph_cat_inv_summary_png unless NODELETE
|
64
87
|
end
|
65
88
|
|
66
|
-
def
|
89
|
+
def test_peptide_count
|
67
90
|
if File.exist? @tfiles_l
|
68
|
-
|
91
|
+
runit "-c 5.0 #{@tf_proph} --peptide_count #{@tf_peptide_count}"
|
69
92
|
assert(File.exist?(@tf_peptide_count), "file #{@tf_peptide_count} exists")
|
70
93
|
file = IO.read(@tf_peptide_count)
|
71
94
|
assert_match("gi|16132176|ref|NP_418775.1|\t2", file)
|
data/test/tc_sequest.rb
CHANGED
@@ -4,8 +4,10 @@
|
|
4
4
|
require 'spec_id'
|
5
5
|
require 'spec_id/sequest'
|
6
6
|
require 'test/unit'
|
7
|
+
require 'spec/mzxml'
|
7
8
|
|
8
9
|
|
10
|
+
NODELETE = false
|
9
11
|
|
10
12
|
class SequestTest < Test::Unit::TestCase
|
11
13
|
|
@@ -18,10 +20,10 @@ class SequestTest < Test::Unit::TestCase
|
|
18
20
|
@tf_bioworks_xml = @tfiles + "bioworks_small.xml"
|
19
21
|
end
|
20
22
|
|
21
|
-
def
|
23
|
+
def Xtest_set_from_bioworks
|
22
24
|
if File.exist? @tfiles_l
|
23
25
|
out_path = '.'
|
24
|
-
pepxml_objs =
|
26
|
+
pepxml_objs = Sequest::PepXML.set_from_bioworks_xml(@tf_bioworks_xml, @tf_params, {:ms_path => @tf_mzxml_path, :out_path => out_path})
|
25
27
|
pepxml_objs.each do |obj|
|
26
28
|
assert(obj.spectrum_queries.size > 2)
|
27
29
|
assert(obj.spectrum_queries.first.search_results.first.search_hits.size > 0)
|
@@ -55,10 +57,10 @@ class SequestTest < Test::Unit::TestCase
|
|
55
57
|
mzxml_path = @tfiles + "opd1"
|
56
58
|
out_path = @tfiles
|
57
59
|
pepxml_version = 18
|
58
|
-
pepxml_objs =
|
60
|
+
pepxml_objs = Sequest::PepXML.set_from_bioworks_xml(bioworks_xml, params, {:ms_data => mzxml_path, :out_path => out_path, :pepxml_version => pepxml_version, :sample_enzyme => "trypsin"})
|
59
61
|
puts "TOOK #{Time.new - st}secs"
|
60
62
|
po = pepxml_objs.first
|
61
|
-
assert_equal(pepxml_version,
|
63
|
+
assert_equal(pepxml_version, Sequest::PepXML.pepxml_version)
|
62
64
|
|
63
65
|
# MSMSPipelineAnalysis
|
64
66
|
pipe = po.msms_pipeline_analysis
|
@@ -197,9 +199,9 @@ class SequestTest < Test::Unit::TestCase
|
|
197
199
|
|
198
200
|
|
199
201
|
|
200
|
-
def
|
201
|
-
params =
|
202
|
-
scall =
|
202
|
+
def Xtest_calc_num_tol_term
|
203
|
+
params = Sequest::Params.new(@tf_params)
|
204
|
+
scall = Sequest::PepXML::SearchHit
|
203
205
|
sym = :calc_num_tol_term
|
204
206
|
assert_equal(2, scall.send(sym, params, "K.EPTIDR.E"))
|
205
207
|
assert_equal(1, scall.send(sym, params, "K.PEPTIDR.E"))
|
@@ -207,9 +209,9 @@ class SequestTest < Test::Unit::TestCase
|
|
207
209
|
assert_equal(0, scall.send(sym, params, "F.PEPTIDW.R"))
|
208
210
|
end
|
209
211
|
|
210
|
-
def
|
211
|
-
params =
|
212
|
-
scall =
|
212
|
+
def Xtest_calc_num_missed_cleavages
|
213
|
+
params = Sequest::Params.new(@tf_params)
|
214
|
+
scall = Sequest::PepXML::SearchHit
|
213
215
|
sym = :calc_num_missed_cleavages
|
214
216
|
assert_equal(0, scall.send(sym, params, "K.EPTIDR.E"))
|
215
217
|
assert_equal(0, scall.send(sym, params, "K.PEPTIDR.E"))
|
@@ -225,35 +227,27 @@ class SequestTest < Test::Unit::TestCase
|
|
225
227
|
end
|
226
228
|
|
227
229
|
|
228
|
-
def
|
229
|
-
assert_equal("hello.fasta",
|
230
|
-
assert_equal("hello.fasta",
|
230
|
+
def Xtest_sys_ind_basename
|
231
|
+
assert_equal("hello.fasta", Sequest::Params.new._sys_ind_basename("C:\\Xcalibur\\database\\hello.fasta"))
|
232
|
+
assert_equal("hello.fasta", Sequest::Params.new._sys_ind_basename("/work/john/hello.fasta"))
|
231
233
|
end
|
232
234
|
|
233
|
-
def
|
234
|
-
obj =
|
235
|
+
def Xtest_modifications
|
236
|
+
obj = Sequest::PepXML::Modifications.new(nil, "(M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000) ")
|
235
237
|
answ = {[:C, 12.0]=>"^", [:S, 80.0]=>"@", [:M, 29.0]=>"#", [:M, 15.9]=>"*", [:ct, 12.33]=>"[", [:nt, 14.2]=>"]"}
|
236
238
|
assert_equal(answ, obj.mod_symbols_hash, "mod_symbols_hash")
|
237
239
|
|
238
240
|
## need more here
|
239
241
|
end
|
240
242
|
|
241
|
-
def
|
242
|
-
hash = {"K.PEPTIDE.Z" => "K.PEPTIDE.Z", "K.*M" => "K.M", "aI" => 'I', "YI.&" => "YI.", "EI.!@#\$%^&*(){}[]|\\;:'\"<>,?/EI" => 'EI.EI'}
|
243
|
-
cl = proc {|v| SpecID::Sequest::PepXML::SearchHit.remove_non_amino_acids(v) }
|
244
|
-
hash.each do |k,v|
|
245
|
-
assert_equal(v, cl.call(k))
|
246
|
-
end
|
247
|
-
end
|
248
|
-
|
249
|
-
def test_modification_info
|
243
|
+
def Xtest_modification_info
|
250
244
|
hash = {
|
251
245
|
:mod_nterm_mass => 520.2,
|
252
246
|
:modified_peptide => "MOD*IFI^E&D",
|
253
247
|
:mod_aminoacid_mass => [[3, 150.3], [6, 345.2]],
|
254
248
|
}
|
255
249
|
answ = "<modification_info mod_nterm_mass=\"520.2\" modified_peptide=\"MOD*IFI^E&D\">\n\t<mod_aminoacid_mass position=\"3\" mass=\"150.3\"/>\n\t<mod_aminoacid_mass position=\"6\" mass=\"345.2\"/>\n</modification_info>\n"
|
256
|
-
string =
|
250
|
+
string = Sequest::PepXML::SearchHit::ModificationInfo.new(hash).to_pepxml
|
257
251
|
assert_match(_re('<modification_info'), answ)
|
258
252
|
assert_match(_re(" mod_nterm_mass=\"520.2\""), answ)
|
259
253
|
assert_match(_re(" modified_peptide=\"MOD*IFI^E&D\""), answ)
|
@@ -270,22 +264,73 @@ class SequestTest < Test::Unit::TestCase
|
|
270
264
|
end
|
271
265
|
|
272
266
|
def test_modifications
|
273
|
-
params =
|
267
|
+
params = Sequest::Params.new(@tf_params)
|
274
268
|
mod_string = "(M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000) "
|
275
269
|
params.diff_search_options = "15.90000 M 29.00000 M 80.00000 S 12.00000 C"
|
276
270
|
params.term_diff_search_options = "14.20000 12.33000"
|
277
|
-
|
278
|
-
=begin
|
279
|
-
mod = SpecID::Sequest::PepXML::Modifications(params, mod_string)
|
280
|
-
SpecID::Sequest::PepXML::Modifications
|
281
|
-
peptide = "PEPTIDE"
|
271
|
+
mod = Sequest::PepXML::Modifications.new(params, mod_string)
|
282
272
|
## no mods
|
273
|
+
peptide = "PEPTIDE"
|
283
274
|
assert_equal(nil, mod.modification_info(peptide))
|
284
275
|
peptide = "]M*EC^S@IDM#M*EMSCM["
|
285
|
-
|
286
|
-
|
276
|
+
modinfo = mod.modification_info(peptide)
|
277
|
+
assert_equal(peptide, modinfo.modified_peptide)
|
278
|
+
assert_in_delta(146.40054, modinfo.mod_nterm_mass, 0.000001)
|
279
|
+
assert_in_delta(160.52994, modinfo.mod_cterm_mass, 0.000001)
|
280
|
+
end
|
287
281
|
|
282
|
+
# splits string on ' 'and matches the line found by find_line_regexp in
|
283
|
+
# lines
|
284
|
+
def match_modline_pieces(lines, find_line_regexp, string)
|
285
|
+
pieces = string.split(' ').map {|v| /#{Regexp.escape(v)}/ }
|
286
|
+
lines.each do |line|
|
287
|
+
if line =~ find_line_regexp
|
288
|
+
pieces.each do |piece|
|
289
|
+
assert_match(piece, line)
|
290
|
+
end
|
291
|
+
end
|
292
|
+
end
|
288
293
|
end
|
289
294
|
|
295
|
+
def test_modifications_in_run
|
296
|
+
if File.exist? @tfiles_l
|
297
|
+
modfiles_sequest_dir = @tfiles_l + 'opd1_2runs_2mods/sequest/'
|
298
|
+
modfiles_data_dir = @tfiles_l + 'opd1_2runs_2mods/data/'
|
299
|
+
srgfile = modfiles_sequest_dir + 'tmp.srg'
|
300
|
+
out_path = modfiles_sequest_dir + 'pepxml'
|
301
|
+
modfiles = %w(020 040).map do |file|
|
302
|
+
modfiles_sequest_dir + file + ".srf"
|
303
|
+
end
|
304
|
+
objs = Sequest::PepXML.set_from_bioworks( SRFGroup.new(modfiles).to_srg(srgfile), {:ms_data => modfiles_data_dir, :out_path => out_path, :print => true, :backup_db_path => '/project/marcotte/marcotte/ms/database'} )
|
305
|
+
%w(020 040).each do |file|
|
306
|
+
fn = out_path + '/' + file + '.xml'
|
307
|
+
assert(File.exist?(fn), "file #{fn} exists")
|
308
|
+
beginning = IO.read(fn)
|
309
|
+
lines = beginning.split("\n")
|
310
|
+
[
|
311
|
+
[/aminoacid="M"/, '<aminoacid_modification symbol="*" massdiff="+15.9994" aminoacid="M" variable="Y" binary="N" mass="147.192"'],
|
312
|
+
|
313
|
+
[/aminoacid="S"/, '<aminoacid_modification symbol="#" massdiff="+79.9799" aminoacid="S" variable="Y" binary="N" mass="167.0581"'],
|
314
|
+
[/aminoacid="T"/, '<aminoacid_modification symbol="#" massdiff="+79.9799" aminoacid="T" variable="Y" binary="N" mass="181.085"'],
|
315
|
+
[/aminoacid="Y"/, '<aminoacid_modification symbol="#" massdiff="+79.9799" aminoacid="Y" variable="Y" binary="N" mass="243.1559"'],
|
316
|
+
[/parameter name="diff_search_options"/, '<parameter name="diff_search_options" value="15.999400 M 79.979900 STY 0.000000 M 0.000000 X 0.000000 T 0.000000 Y"/>'],
|
317
|
+
].each do |a,b|
|
318
|
+
match_modline_pieces(lines, a, b)
|
319
|
+
end
|
320
|
+
[
|
321
|
+
'<modification_info modified_peptide="Y#RLGGS#T#K">',
|
322
|
+
'<mod_aminoacid_mass position="1" mass="243.1559"/>',
|
323
|
+
'<mod_aminoacid_mass position="7" mass="167.0581"/>',
|
324
|
+
'</modification_info>',
|
325
|
+
'<mod_aminoacid_mass position="9" mass="181.085"/>'
|
326
|
+
].each do |line|
|
327
|
+
assert_match(/#{Regexp.escape(line)}/, beginning, "a modification info for a peptide")
|
328
|
+
end
|
329
|
+
File.unlink(fn) unless NODELETE
|
330
|
+
end
|
331
|
+
else
|
332
|
+
assert_nil( puts("--SKIPPING TEST-- (missing dir: #{@tfiles_l})") )
|
333
|
+
end
|
334
|
+
end
|
290
335
|
end
|
291
336
|
|