mspire 0.5.0 → 0.6.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +24 -0
- data/Rakefile +51 -0
- data/VERSION +1 -0
- data/lib/cv/description.rb +18 -0
- data/lib/cv/param.rb +33 -0
- data/lib/cv.rb +3 -0
- data/lib/io/bookmark.rb +13 -0
- data/lib/merge.rb +7 -0
- data/lib/ms/cvlist.rb +76 -0
- data/lib/ms/digester.rb +245 -0
- data/lib/ms/fasta.rb +86 -0
- data/lib/ms/ident/peptide/db.rb +243 -0
- data/lib/ms/ident/peptide.rb +72 -0
- data/lib/ms/ident/peptide_hit/qvalue.rb +56 -0
- data/lib/ms/ident/peptide_hit.rb +26 -0
- data/lib/ms/ident/pepxml/modifications.rb +83 -0
- data/lib/ms/ident/pepxml/msms_pipeline_analysis.rb +70 -0
- data/lib/ms/ident/pepxml/msms_run_summary.rb +82 -0
- data/lib/ms/ident/pepxml/parameters.rb +14 -0
- data/lib/ms/ident/pepxml/sample_enzyme.rb +165 -0
- data/lib/ms/ident/pepxml/search_database.rb +49 -0
- data/lib/ms/ident/pepxml/search_hit/modification_info.rb +79 -0
- data/lib/ms/ident/pepxml/search_hit.rb +144 -0
- data/lib/ms/ident/pepxml/search_result.rb +35 -0
- data/lib/ms/ident/pepxml/search_summary.rb +92 -0
- data/lib/ms/ident/pepxml/spectrum_query.rb +85 -0
- data/lib/ms/ident/pepxml.rb +112 -0
- data/lib/ms/ident/protein.rb +33 -0
- data/lib/ms/ident/protein_group.rb +80 -0
- data/lib/ms/ident/search.rb +114 -0
- data/lib/ms/ident.rb +37 -0
- data/lib/ms/isotope/aa.rb +59 -0
- data/lib/ms/mascot.rb +6 -0
- data/lib/ms/mass/aa.rb +79 -0
- data/lib/ms/mass.rb +55 -0
- data/lib/ms/mzml/index_list.rb +98 -0
- data/lib/ms/mzml/plms1.rb +34 -0
- data/lib/ms/mzml.rb +197 -0
- data/lib/ms/obo.rb +38 -0
- data/lib/ms/plms1.rb +156 -0
- data/lib/ms/quant/qspec/protein_group_comparison.rb +22 -0
- data/lib/ms/quant/qspec.rb +112 -0
- data/lib/ms/spectrum.rb +154 -8
- data/lib/ms.rb +3 -10
- data/lib/msplat.rb +2 -0
- data/lib/obo/ims.rb +5 -0
- data/lib/obo/ms.rb +7 -0
- data/lib/obo/ontology.rb +41 -0
- data/lib/obo/unit.rb +5 -0
- data/lib/openany.rb +23 -0
- data/lib/write_file_or_string.rb +18 -0
- data/obo/ims.obo +562 -0
- data/obo/ms.obo +11677 -0
- data/obo/unit.obo +2563 -0
- data/spec/ms/cvlist_spec.rb +60 -0
- data/spec/ms/digester_spec.rb +351 -0
- data/spec/ms/fasta_spec.rb +100 -0
- data/spec/ms/ident/peptide/db_spec.rb +108 -0
- data/spec/ms/ident/pepxml/sample_enzyme_spec.rb +181 -0
- data/spec/ms/ident/pepxml/search_hit/modification_info_spec.rb +37 -0
- data/spec/ms/ident/pepxml_spec.rb +442 -0
- data/spec/ms/ident/protein_group_spec.rb +68 -0
- data/spec/ms/mass_spec.rb +8 -0
- data/spec/ms/mzml/index_list_spec.rb +122 -0
- data/spec/ms/mzml/plms1_spec.rb +62 -0
- data/spec/ms/mzml_spec.rb +50 -0
- data/spec/ms/plms1_spec.rb +38 -0
- data/spec/ms/quant/qspec_spec.rb +25 -0
- data/spec/msplat_spec.rb +24 -0
- data/spec/obo_spec.rb +25 -0
- data/spec/spec_helper.rb +25 -0
- data/spec/testfiles/ms/ident/peptide/db/uni_11_sp_tr.fasta +69 -0
- data/spec/testfiles/ms/ident/peptide/db/uni_11_sp_tr.msd_clvg2.min_aaseq4.yml +728 -0
- data/spec/testfiles/ms/mzml/j24z.idx_comp.3.mzML +271 -0
- data/spec/testfiles/ms/mzml/openms.noidx_nocomp.12.mzML +330 -0
- data/spec/testfiles/ms/quant/kill_extra_tabs.rb +13 -0
- data/spec/testfiles/ms/quant/max_quant_output.provenance.txt +15 -0
- data/spec/testfiles/ms/quant/max_quant_output.txt +199 -0
- data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv_qspecgp +199 -0
- data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv_qspecgp.csv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_final.txt +199 -0
- data/spec/testfiles/ms/quant/pdcd5_final.txt_qspecgp +0 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.CSV.csv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.csv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.csv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv_qspecgp +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv_qspecgp.csv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.txt +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_tabdel.txt +134 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_tabdel.txt_qspecgp +134 -0
- data/spec/testfiles/ms/quant/remove_rest_of_proteins.rb +13 -0
- data/spec/testfiles/ms/quant/unlog_transform.rb +13 -0
- data/spec/testfiles/plms1/output.key +0 -0
- metadata +157 -40
- data/README +0 -77
- data/changelog.txt +0 -196
- data/lib/ms/calc.rb +0 -32
- data/lib/ms/data/interleaved.rb +0 -60
- data/lib/ms/data/lazy_io.rb +0 -73
- data/lib/ms/data/lazy_string.rb +0 -15
- data/lib/ms/data/simple.rb +0 -59
- data/lib/ms/data/transposed.rb +0 -41
- data/lib/ms/data.rb +0 -57
- data/lib/ms/format/format_error.rb +0 -12
- data/lib/ms/support/binary_search.rb +0 -126
@@ -0,0 +1,60 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'ms/cvlist'
|
3
|
+
require 'cv'
|
4
|
+
|
5
|
+
describe 'appending CV params objects to an MS::CVList' do
|
6
|
+
describe 'intelligently appending params with #param' do
|
7
|
+
before do
|
8
|
+
@cv = MS::CVList.new
|
9
|
+
end
|
10
|
+
it 'sends detailed descriptions to CV::Param.new' do
|
11
|
+
arglist = [
|
12
|
+
['IMS', 'IMS:1000052', 'position z', 22],
|
13
|
+
['IMS', 'IMS:1000030', 'continuous'],
|
14
|
+
['IMS', 'IMS:1000052', 'position z', 22, 'UO:0000008'],
|
15
|
+
['IMS', 'IMS:1000030', 'continuous', 'UO:0000008'],
|
16
|
+
['IMS', 'IMS:1000052', 'position z', 22, MS::CV::Param.new('UO:0000008')],
|
17
|
+
['IMS', 'IMS:1000030', 'continuous', MS::CV::Param.new('UO:0000008')],
|
18
|
+
]
|
19
|
+
arglist.each do |args|
|
20
|
+
@cv.param *args
|
21
|
+
end
|
22
|
+
@cv.size.should == arglist.size
|
23
|
+
arglist.each_with_index do |args, i|
|
24
|
+
@cv[i].should == MS::CV::Param.new(*args)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
it 'deciphers short accession descriptions' do
|
28
|
+
@cv.param 'MS:1000004' # sample mass
|
29
|
+
@cv.param 'IMS:1000042', 23 # max count of pixels x
|
30
|
+
{cv_ref: 'MS', accession: 'MS:1000004', name: 'sample mass', value: nil}.each do |key,val|
|
31
|
+
@cv[0].send(key).should == val
|
32
|
+
end
|
33
|
+
{cv_ref: 'IMS', accession: 'IMS:1000042', name: 'max count of pixels x', value: 23}.each do |key,val|
|
34
|
+
@cv[1].send(key).should == val
|
35
|
+
end
|
36
|
+
end
|
37
|
+
describe 'appending on initialization' do
|
38
|
+
it 'can be done with a block' do
|
39
|
+
cvlist = MS::CVList.new do
|
40
|
+
param 'MS:1000004' # sample mass
|
41
|
+
param 'IMS:1000042', 23 # max count of pixels of y
|
42
|
+
end
|
43
|
+
cvlist.size.should == 2
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
it 'can be done with brackets' do
|
48
|
+
args = ['IMS', 'IMS:1000052', 'position z', 22]
|
49
|
+
param_obj = CV::Param.new(*args)
|
50
|
+
cvlist = MS::CVList['MS:1000004', ['MS:1000004'], ['IMS:1000042', 23], param_obj, args]
|
51
|
+
cvlist.size.should == 5
|
52
|
+
cvlist[0].should == cvlist[1]
|
53
|
+
cvlist.each do |param|
|
54
|
+
param.accession.should_not be_nil
|
55
|
+
param.name.should_not be_nil
|
56
|
+
param.cv_ref.should_not be_nil
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,351 @@
|
|
1
|
+
require 'spec_helper.rb'
|
2
|
+
|
3
|
+
require 'ms/digester'
|
4
|
+
require 'pp'
|
5
|
+
|
6
|
+
describe 'a digester' do
|
7
|
+
before do
|
8
|
+
@digester = MS::Digester.new('arg', 'R')
|
9
|
+
end
|
10
|
+
|
11
|
+
def spp(input, str="")
|
12
|
+
PP.singleline_pp(input, str)
|
13
|
+
end
|
14
|
+
|
15
|
+
def nk_string(n, split)
|
16
|
+
str = []
|
17
|
+
count = 0
|
18
|
+
|
19
|
+
(n * 1000).times do
|
20
|
+
count += 1
|
21
|
+
if count < split
|
22
|
+
str << 'A'
|
23
|
+
else
|
24
|
+
count = 0
|
25
|
+
str << 'R'
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
str.join('')
|
30
|
+
end
|
31
|
+
|
32
|
+
it 'finds cleavage site indices' do
|
33
|
+
{
|
34
|
+
"" => [0,0],
|
35
|
+
"A" => [0,1],
|
36
|
+
"R" => [0,1],
|
37
|
+
"AAA" => [0,3],
|
38
|
+
"RAA" => [0,1,3],
|
39
|
+
"ARA" => [0,2,3],
|
40
|
+
"AAR" => [0,3],
|
41
|
+
"RRA" => [0,1,2,3],
|
42
|
+
"RAR" => [0,1,3],
|
43
|
+
"RRR" => [0,1,2,3],
|
44
|
+
|
45
|
+
"R\nR\nR" => [0,2,4,5],
|
46
|
+
"R\n\n\nR\nR\n\n" => [0,4,6,9]
|
47
|
+
}.each do |sequence, expected|
|
48
|
+
@digester.cleavage_sites(sequence).should == expected
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
it 'finds cleavage sites with exception' do
|
53
|
+
@digester = MS::Digester.new('argp', 'R', 'P')
|
54
|
+
{
|
55
|
+
"" => [0,0],
|
56
|
+
"A" => [0,1],
|
57
|
+
"R" => [0,1],
|
58
|
+
"AAA" => [0,3],
|
59
|
+
"RAA" => [0,1,3],
|
60
|
+
"ARA" => [0,2,3],
|
61
|
+
"AAR" => [0,3],
|
62
|
+
"RRA" => [0,1,2,3],
|
63
|
+
"RAR" => [0,1,3],
|
64
|
+
"RRR" => [0,1,2,3],
|
65
|
+
|
66
|
+
"PR" => [0,1,2],
|
67
|
+
"PR" => [0,2],
|
68
|
+
"PRR" => [0,2,3],
|
69
|
+
"RPR" => [0,3],
|
70
|
+
"RRP" => [0,1,3],
|
71
|
+
"APRA" => [0,3,4],
|
72
|
+
"ARPA" => [0,4],
|
73
|
+
"ARPARA" => [0,5,6],
|
74
|
+
"R\nPR\nR" => [0,5,6],
|
75
|
+
"RP\nR\nR" => [0,5,6],
|
76
|
+
"RP\nR\nR\n" => [0,5,7]
|
77
|
+
}.each do |sequence, expected|
|
78
|
+
@digester.cleavage_sites(sequence).should == expected
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
|
83
|
+
|
84
|
+
it 'finds cleavage sites with offset and limit' do
|
85
|
+
{
|
86
|
+
"RxxR" => [2,4],
|
87
|
+
"RxAxR" => [2,4],
|
88
|
+
"RxAAAxR" => [2,4],
|
89
|
+
"RxRRRxR" => [2,3,4]
|
90
|
+
}.each do |sequence, expected|
|
91
|
+
@digester.cleavage_sites(sequence, 2, 2).should == expected
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
it 'finds cleavage sites fast' do
|
96
|
+
str = nk_string(10, 1000)
|
97
|
+
@digester.cleavage_sites(str).length.should == 11
|
98
|
+
benchmark(20) do |x|
|
99
|
+
x.report("10kx - fragments") do
|
100
|
+
10000.times { @digester.cleavage_sites(str) }
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
it 'digests proteins' do
|
106
|
+
{
|
107
|
+
"" => [''],
|
108
|
+
"A" => ["A"],
|
109
|
+
"R" => ["R"],
|
110
|
+
"AAA" => ["AAA"],
|
111
|
+
"RAA" => ["R", "AA"],
|
112
|
+
"ARA" => ["AR", "A"],
|
113
|
+
"AAR" => ["AAR"],
|
114
|
+
"RRA" => ["R", "R", "A"],
|
115
|
+
"RAR" => ["R", "AR"],
|
116
|
+
"RRR" => ["R", "R", "R"]
|
117
|
+
}.each do |sequence, expected|
|
118
|
+
# spp(sequence)
|
119
|
+
@digester.digest(sequence).should == expected
|
120
|
+
#@digester.digest(sequence) {|frag, s, e| frag}.should == expected
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
it 'digests with missed cleavages' do
|
125
|
+
{
|
126
|
+
"" => [''],
|
127
|
+
"A" => ["A"],
|
128
|
+
"R" => ["R"],
|
129
|
+
"AAA" => ["AAA"],
|
130
|
+
"RAA" => ["R", "RAA", "AA"],
|
131
|
+
"ARA" => ["AR", "ARA", "A"],
|
132
|
+
"AAR" => ["AAR"],
|
133
|
+
"RRA" => ["R", "RR", "R", "RA", "A"],
|
134
|
+
"RAR" => ["R", "RAR", "AR"],
|
135
|
+
"RRR" => ["R", "RR", "R", "RR", "R"]
|
136
|
+
}.each do |sequence, expected|
|
137
|
+
@digester.digest(sequence, 1).should == expected
|
138
|
+
#@digester.digest(sequence, 1) {|frag, s, e| frag}.should == expected
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
it 'digests with two missed cleavages' do
|
143
|
+
{
|
144
|
+
"" => [''],
|
145
|
+
"A" => ["A"],
|
146
|
+
"R" => ["R"],
|
147
|
+
"AAA" => ["AAA"],
|
148
|
+
"RAA" => ["R", "RAA", "AA"],
|
149
|
+
"ARA" => ["AR", "ARA", "A"],
|
150
|
+
"AAR" => ["AAR"],
|
151
|
+
"RRA" => ["R", "RR", "RRA", "R", "RA", "A"],
|
152
|
+
"RAR" => ["R", "RAR", "AR"],
|
153
|
+
"RRR" => ["R", "RR", "RRR", "R", "RR", "R"]
|
154
|
+
}.each do |sequence, expected|
|
155
|
+
@digester.digest(sequence, 2).should == expected
|
156
|
+
#@digester.digest(sequence, 2) {|frag, s, e| frag}.should == expected
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
it 'digests fast' do
|
161
|
+
str = nk_string(10, 1000)
|
162
|
+
@digester.digest(str).length.should == 10
|
163
|
+
benchmark(20) do |x|
|
164
|
+
x.report("10kx - fragments") do
|
165
|
+
10000.times { @digester.digest(str) }
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
it 'finds sites to be digested' do
|
171
|
+
{
|
172
|
+
"" => [[0,0]],
|
173
|
+
"A" => [[0,1]],
|
174
|
+
"R" => [[0,1]],
|
175
|
+
"AAA" => [[0,3]],
|
176
|
+
"RAA" => [[0,1],[1,3]],
|
177
|
+
"ARA" => [[0,2],[2,3]],
|
178
|
+
"AAR" => [[0,3]],
|
179
|
+
"RRA" => [[0,1],[1,2],[2,3]],
|
180
|
+
"RAR" => [[0,1],[1,3]],
|
181
|
+
"RRR" => [[0,1],[1,2],[2,3]]
|
182
|
+
}.each do |sequence, expected|
|
183
|
+
@digester.site_digest(sequence).should == expected
|
184
|
+
end
|
185
|
+
end
|
186
|
+
|
187
|
+
it 'finds sites to be digested with missed cleavages' do
|
188
|
+
{
|
189
|
+
"" => [[0,0]],
|
190
|
+
"A" => [[0,1]],
|
191
|
+
"R" => [[0,1]],
|
192
|
+
"AAA" => [[0,3]],
|
193
|
+
"RAA" => [[0,1],[0,3],[1,3]],
|
194
|
+
"ARA" => [[0,2],[0,3],[2,3]],
|
195
|
+
"AAR" => [[0,3]],
|
196
|
+
"RRA" => [[0,1],[0,2],[1,2],[1,3],[2,3]],
|
197
|
+
"RAR" => [[0,1],[0,3],[1,3]],
|
198
|
+
"RRR" => [[0,1],[0,2],[1,2],[1,3],[2,3]]
|
199
|
+
}.each do |sequence, expected|
|
200
|
+
@digester.site_digest(sequence, 1).should == expected
|
201
|
+
end
|
202
|
+
end
|
203
|
+
|
204
|
+
it 'finds sites to be digested with two missed cleavages' do
|
205
|
+
{
|
206
|
+
"" => [[0,0]],
|
207
|
+
"A" => [[0,1]],
|
208
|
+
"R" => [[0,1]],
|
209
|
+
"AAA" => [[0,3]],
|
210
|
+
"RAA" => [[0,1],[0,3],[1,3]],
|
211
|
+
"ARA" => [[0,2],[0,3],[2,3]],
|
212
|
+
"AAR" => [[0,3]],
|
213
|
+
"RRA" => [[0,1],[0,2],[0,3],[1,2],[1,3],[2,3]],
|
214
|
+
"RAR" => [[0,1],[0,3],[1,3]],
|
215
|
+
"RRR" => [[0,1],[0,2],[0,3],[1,2],[1,3],[2,3]]
|
216
|
+
}.each do |sequence, expected|
|
217
|
+
@digester.site_digest(sequence, 2).should == expected
|
218
|
+
end
|
219
|
+
end
|
220
|
+
|
221
|
+
it 'does site digestion fast' do
|
222
|
+
str = nk_string(10, 1000)
|
223
|
+
@digester.site_digest(str).length.should == 10
|
224
|
+
benchmark(20) do |x|
|
225
|
+
x.report("10kx - fragments") do
|
226
|
+
10000.times { @digester.site_digest(str) }
|
227
|
+
end
|
228
|
+
end
|
229
|
+
end
|
230
|
+
end
|
231
|
+
|
232
|
+
|
233
|
+
describe 'performs as documented in readme' do
|
234
|
+
it 'runs cleavage sites documentation' do
|
235
|
+
d = MS::Digester.new('Trypsin', 'KR', 'P')
|
236
|
+
seq = "AARGGR"
|
237
|
+
sites = d.cleavage_sites(seq)
|
238
|
+
sites.should == [0, 3, 6]
|
239
|
+
|
240
|
+
seq[sites[0], sites[0+1] - sites[0]].should == "AAR"
|
241
|
+
seq[sites[1], sites[1+1] - sites[1]].should == "GGR"
|
242
|
+
|
243
|
+
seq = "AAR \n GGR"
|
244
|
+
sites = d.cleavage_sites(seq)
|
245
|
+
sites.should == [0, 8, 11]
|
246
|
+
|
247
|
+
seq[sites[0], sites[0+1] - sites[0]].should == "AAR \n "
|
248
|
+
seq[sites[1], sites[1+1] - sites[1]].should == "GGR"
|
249
|
+
end
|
250
|
+
end
|
251
|
+
|
252
|
+
describe 'basic trypsin digestion' do
|
253
|
+
it 'performs digestion and can specify sites of digestion' do
|
254
|
+
trypsin = MS::Digester['Trypsin']
|
255
|
+
|
256
|
+
expected = [
|
257
|
+
'MIVIGR',
|
258
|
+
'SIVHPYITNEYEPFAAEK',
|
259
|
+
'QQILSIMAG']
|
260
|
+
trypsin.digest('MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG').should == expected
|
261
|
+
|
262
|
+
expected = [
|
263
|
+
'MIVIGR',
|
264
|
+
'MIVIGRSIVHPYITNEYEPFAAEK',
|
265
|
+
'SIVHPYITNEYEPFAAEK',
|
266
|
+
'SIVHPYITNEYEPFAAEKQQILSIMAG',
|
267
|
+
'QQILSIMAG']
|
268
|
+
trypsin.digest('MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG', 1).should == expected
|
269
|
+
|
270
|
+
expected = [
|
271
|
+
[0,6],
|
272
|
+
[0,24],
|
273
|
+
[6,24],
|
274
|
+
[6,33],
|
275
|
+
[24,33]]
|
276
|
+
trypsin.site_digest('MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG', 1).should == expected
|
277
|
+
end
|
278
|
+
|
279
|
+
it 'completely ignores whitespace inside protein sequences' do
|
280
|
+
expected = [
|
281
|
+
"\tMIVIGR",
|
282
|
+
"SIVHP\nYITNEYEPFAAE K",
|
283
|
+
"QQILSI\rMAG"]
|
284
|
+
MS::Digester['Trypsin'].digest("\tMIVIGRSIVHP\nYITNEYEPFAAE KQQILSI\rMAG").should == expected
|
285
|
+
end
|
286
|
+
|
287
|
+
it 'does a trypsin digest' do
|
288
|
+
trypsin = MS::Digester[:trypsin]
|
289
|
+
{
|
290
|
+
"" => [''],
|
291
|
+
"A" => ["A"],
|
292
|
+
"R" => ["R"],
|
293
|
+
"AAA" => ["AAA"],
|
294
|
+
"RAA" => ["R", "AA"],
|
295
|
+
"ARA" => ["AR", "A"],
|
296
|
+
"AAR" => ["AAR"],
|
297
|
+
"RRA" => ["R", "R", "A"],
|
298
|
+
"RAR" => ["R", "AR"],
|
299
|
+
"RRR" => ["R", "R", "R"],
|
300
|
+
"RKR" => ["R", "K", "R"],
|
301
|
+
|
302
|
+
"ARP" => ["ARP"],
|
303
|
+
"PRA" => ["PR","A"],
|
304
|
+
"ARPARAA" => ["ARPAR", "AA"],
|
305
|
+
"RPRRR" => ["RPR", "R", "R"]
|
306
|
+
}.each do |sequence, expected|
|
307
|
+
trypsin.digest(sequence).should == expected
|
308
|
+
end
|
309
|
+
end
|
310
|
+
|
311
|
+
|
312
|
+
|
313
|
+
end
|
314
|
+
|
315
|
+
describe 'digestion with other enzymes' do
|
316
|
+
|
317
|
+
# This is how to access the already created enzyme:
|
318
|
+
# MS::Digester['Arg-C'] (or :arg_c, 'ARG-C', :ARG_C')
|
319
|
+
{
|
320
|
+
['Arg-C', :arg_c] => {
|
321
|
+
"AARC" => ["AAR", "C"],
|
322
|
+
"AARP" => ["AARP"]
|
323
|
+
},
|
324
|
+
['Asp-N', :asp_n] => {
|
325
|
+
"AABDS" => ["AA", "B", "DS"],
|
326
|
+
"ADZBS" => ["A", "DZ", "BS"],
|
327
|
+
"B" => %w(B),
|
328
|
+
"A" => %w(A),
|
329
|
+
"ABD" => %w(A B D),
|
330
|
+
},
|
331
|
+
['Asp-N_ambic', :asp_n_ambic] => {
|
332
|
+
"AAEDS" => ["AA", "E", "DS"],
|
333
|
+
"ADZES" => ["A", "DZ", "ES"],
|
334
|
+
"AED" => %w(A E D),
|
335
|
+
"GDE" => %w(G D E),
|
336
|
+
"AAECCDGG" => %w(AA ECC DGG),
|
337
|
+
}
|
338
|
+
}.each do |enzyme_names, test_hash|
|
339
|
+
it "digests with '#{enzyme_names.first}'" do
|
340
|
+
digester = MS::Digester[enzyme_names.first]
|
341
|
+
digester.should == MS::Digester[enzyme_names.last]
|
342
|
+
digester.name.should == enzyme_names.first
|
343
|
+
test_hash.each do |sequence, expected|
|
344
|
+
digester.digest(sequence).should == expected
|
345
|
+
end
|
346
|
+
end
|
347
|
+
end
|
348
|
+
end
|
349
|
+
|
350
|
+
|
351
|
+
|
@@ -0,0 +1,100 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
require 'ms/fasta'
|
4
|
+
|
5
|
+
describe 'basic fasta operations' do
|
6
|
+
before do
|
7
|
+
@headers = [">gi|5524211 [hello]", ">another B", ">again C"]
|
8
|
+
@entries = ["LCLYTHIGRNIYYGSYLYSETWNTGIMLLLITMATAFMGYVLPWGQMSFWGATVITNLFSAIPYIGTNLV\nGLMPFLHTSKHRSMMLRPLSQALFWTLTMDLLTLTWIGSQPVEYPYTIIGQMASILYFSIILAFLPIAGX\nIENY", "ABCDEF\nGHIJK", "ABCD"]
|
9
|
+
@sequences = @entries.map {|v| v.gsub("\n", '') }
|
10
|
+
@data = {}
|
11
|
+
@data['newlines'] = @headers.zip(@entries).map do |header, data|
|
12
|
+
header + "\n" + data
|
13
|
+
end.join("\n")
|
14
|
+
@data['carriage_returns_and_newlines'] = @data['newlines'].gsub("\n", "\r\n")
|
15
|
+
file_key_to_filename_pairs = @data.map do |k,v|
|
16
|
+
file_key = k + '_file'
|
17
|
+
filename = k + '.tmp'
|
18
|
+
File.open(filename, 'w') {|out| out.print v }
|
19
|
+
[file_key, filename]
|
20
|
+
end
|
21
|
+
file_key_to_filename_pairs.each {|k,v| @data[k] = v }
|
22
|
+
end
|
23
|
+
|
24
|
+
after do
|
25
|
+
@data.select {|k,v| k =~ /_file$/ }.each do |k,filename|
|
26
|
+
index = filename.sub('.tmp', '.index')
|
27
|
+
[filename, index].each do |fn|
|
28
|
+
File.unlink(fn) if File.exist? fn
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def fasta_correct?(fasta)
|
34
|
+
entries = fasta.map
|
35
|
+
@headers.size.times.zip(entries) do |i,entry|
|
36
|
+
header, sequence, entry = @headers[i], @sequences[i], entry
|
37
|
+
entry.header.should_not == nil
|
38
|
+
entry.sequence.should_not == nil
|
39
|
+
entry.header.should == header[1..-1]
|
40
|
+
entry.sequence.should == sequence
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
xit 'can deliver length and description hashes' do
|
45
|
+
# need to test
|
46
|
+
end
|
47
|
+
|
48
|
+
it 'can read a file' do
|
49
|
+
%w(newlines_file carriage_returns_and_newlines_file).each do |file|
|
50
|
+
MS::Fasta.open(@data[file]) do |fasta|
|
51
|
+
fasta_correct? fasta
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
it 'can read an IO object' do
|
57
|
+
%w(newlines_file carriage_returns_and_newlines_file).each do |file|
|
58
|
+
File.open(@data[file]) do |io|
|
59
|
+
fasta = MS::Fasta.new(io)
|
60
|
+
fasta_correct? fasta
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
it 'can read a string' do
|
66
|
+
%w(newlines carriage_returns_and_newlines).each do |key|
|
67
|
+
fasta = MS::Fasta.new @data[key]
|
68
|
+
fasta_correct? fasta
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
it 'iterates entries with foreach' do
|
73
|
+
%w(newlines_file carriage_returns_and_newlines_file).each do |file|
|
74
|
+
MS::Fasta.foreach(@data[file]) do |entry|
|
75
|
+
entry.should be_an_instance_of Bio::FastaFormat
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
it 'runs the documentation' do
|
81
|
+
fasta_file = @data['newlines_file']
|
82
|
+
ids = MS::Fasta.open(fasta_file) do |fasta|
|
83
|
+
fasta.map(&:entry_id)
|
84
|
+
end
|
85
|
+
ids.is_a?(Array)
|
86
|
+
ids.should == %w(gi|5524211 another again)
|
87
|
+
|
88
|
+
# this code is already tested above
|
89
|
+
# File.open(fasta_file) do |io|
|
90
|
+
# fasta = MS::Fasta.new(io)
|
91
|
+
# end
|
92
|
+
|
93
|
+
# taking a string
|
94
|
+
string = ">id1 a simple header\nAAASDDEEEDDD\n>id2 header again\nPPPPPPWWWWWWTTTTYY\n"
|
95
|
+
fasta = MS::Fasta.new(string)
|
96
|
+
(simple, not_simple) = fasta.partition {|entry| entry.header =~ /simple/ }
|
97
|
+
simple.first.header.include?("simple").should == true
|
98
|
+
not_simple.first.header.include?("simple").should == false
|
99
|
+
end
|
100
|
+
end
|
@@ -0,0 +1,108 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
require 'yaml'
|
4
|
+
path = 'ms/ident/peptide/db'
|
5
|
+
require path
|
6
|
+
|
7
|
+
module Kernel
|
8
|
+
|
9
|
+
def capture_stdout
|
10
|
+
out = StringIO.new
|
11
|
+
$stdout = out
|
12
|
+
yield
|
13
|
+
out.rewind
|
14
|
+
return out.read
|
15
|
+
ensure
|
16
|
+
$stdout = STDOUT
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
|
21
|
+
|
22
|
+
describe 'a uniprot fasta file' do
|
23
|
+
|
24
|
+
before do
|
25
|
+
@fasta_file = [TESTFILES, path, 'uni_11_sp_tr.fasta'].join('/')
|
26
|
+
end
|
27
|
+
|
28
|
+
describe 'amino acid expansion' do
|
29
|
+
|
30
|
+
it 'can expand out wildcard amino acid combinations' do
|
31
|
+
array = MS::Ident::Peptide::Db.expand_peptides('ALXX', 'X' => %w(* % &), 'L' => %w(P Q) )
|
32
|
+
array.sort.should == %w(AP** AP*% AP*& AP%* AP%% AP%& AP&* AP&% AP&& AQ** AQ*% AQ*& AQ%* AQ%% AQ%& AQ&* AQ&% AQ&&).sort
|
33
|
+
end
|
34
|
+
|
35
|
+
it 'will not expand explosive combinations (>MAX_NUM_AA_EXPANSION)' do
|
36
|
+
# this is from real data
|
37
|
+
worst_case = 'LTLLRPEKHEAATGVDTICTHRVDPIGPGLXXEXLYWELSXLTXXIXELGPYTLDR'
|
38
|
+
MS::Ident::Peptide::Db.expand_peptides(worst_case, 'X' => %w(* % &)).nil?.should == true
|
39
|
+
end
|
40
|
+
|
41
|
+
it 'returns the peptide in the array if no expansion' do
|
42
|
+
array = MS::Ident::Peptide::Db.expand_peptides('ZZZZZ', 'X' => %w(* % &), 'L' => %w(P Q) )
|
43
|
+
array.should == ['ZZZZZ']
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
|
48
|
+
describe 'creating a peptide centric database' do
|
49
|
+
before do
|
50
|
+
|
51
|
+
#@output_file = [TESTFILES, path, 'uni_11_sp_tr.'].join('/')
|
52
|
+
@output_file = [TESTFILES, path, "uni_11_sp_tr.msd_clvg2.min_aaseq4.yml"].join('/')
|
53
|
+
end
|
54
|
+
|
55
|
+
it 'converts a fasta file into peptide centric db' do
|
56
|
+
output_files = MS::Ident::Peptide::Db.cmdline([@fasta_file])
|
57
|
+
output_files.first.should == File.expand_path(@output_file)
|
58
|
+
File.exist?(@output_file).should == true
|
59
|
+
hash = {}
|
60
|
+
YAML.load_file(@output_file).each do |k,v|
|
61
|
+
hash[k] = v.split("\t")
|
62
|
+
end
|
63
|
+
sorted = hash.sort
|
64
|
+
# these are merely frozen, not perfectly defined
|
65
|
+
sorted.first.should == ["AAFDDAIAELDTLSEESYK", ["sp|P62258|1433E_HUMAN"]]
|
66
|
+
sorted.last.should == ["YWCRLGPPRWICQTIVSTNQYTHHR", ["tr|D2KTA8|D2KTA8_HUMAN"]]
|
67
|
+
sorted.size.should == 728
|
68
|
+
File.unlink(@output_file)
|
69
|
+
end
|
70
|
+
|
71
|
+
it 'lists approved enzymes and exits' do
|
72
|
+
output = capture_stdout do
|
73
|
+
begin
|
74
|
+
MS::Ident::Peptide::Db.cmdline(['--list-enzymes'])
|
75
|
+
rescue SystemExit
|
76
|
+
1.should == 1 # we exited
|
77
|
+
end
|
78
|
+
end
|
79
|
+
lines = output.split("\n")
|
80
|
+
lines.include?("trypsin").should == true
|
81
|
+
lines.include?("chymotrypsin").should == true
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
describe 'reading a peptide centric database' do
|
86
|
+
before do
|
87
|
+
outfiles = MS::Ident::Peptide::Db.cmdline([@fasta_file])
|
88
|
+
@outfile = outfiles.first
|
89
|
+
end
|
90
|
+
|
91
|
+
it 'creates a hash that can retrieve peptides as an array' do
|
92
|
+
hash = MS::Ident::Peptide::Db.new(@outfile)
|
93
|
+
hash["AVTEQGHELSNEER"].should == %w(sp|P31946|1433B_HUMAN sp|P31946-2|1433B_HUMAN)
|
94
|
+
hash["VRAAR"].should == ["tr|D3DX18|D3DX18_HUMAN"]
|
95
|
+
end
|
96
|
+
|
97
|
+
it 'reads the file on disk with random access or is enumerable' do
|
98
|
+
MS::Ident::Peptide::Db::IO.open(@outfile) do |io|
|
99
|
+
io["AVTEQGHELSNEER"].should == %w(sp|P31946|1433B_HUMAN sp|P31946-2|1433B_HUMAN)
|
100
|
+
io["VRAAR"].should == ["tr|D3DX18|D3DX18_HUMAN"]
|
101
|
+
io.each_with_index do |key_prots, i|
|
102
|
+
key_prots.first.should be_an_instance_of String
|
103
|
+
key_prots.last.should be_a_kind_of Array
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|