mspire 0.5.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +24 -0
- data/Rakefile +51 -0
- data/VERSION +1 -0
- data/lib/cv/description.rb +18 -0
- data/lib/cv/param.rb +33 -0
- data/lib/cv.rb +3 -0
- data/lib/io/bookmark.rb +13 -0
- data/lib/merge.rb +7 -0
- data/lib/ms/cvlist.rb +76 -0
- data/lib/ms/digester.rb +245 -0
- data/lib/ms/fasta.rb +86 -0
- data/lib/ms/ident/peptide/db.rb +243 -0
- data/lib/ms/ident/peptide.rb +72 -0
- data/lib/ms/ident/peptide_hit/qvalue.rb +56 -0
- data/lib/ms/ident/peptide_hit.rb +26 -0
- data/lib/ms/ident/pepxml/modifications.rb +83 -0
- data/lib/ms/ident/pepxml/msms_pipeline_analysis.rb +70 -0
- data/lib/ms/ident/pepxml/msms_run_summary.rb +82 -0
- data/lib/ms/ident/pepxml/parameters.rb +14 -0
- data/lib/ms/ident/pepxml/sample_enzyme.rb +165 -0
- data/lib/ms/ident/pepxml/search_database.rb +49 -0
- data/lib/ms/ident/pepxml/search_hit/modification_info.rb +79 -0
- data/lib/ms/ident/pepxml/search_hit.rb +144 -0
- data/lib/ms/ident/pepxml/search_result.rb +35 -0
- data/lib/ms/ident/pepxml/search_summary.rb +92 -0
- data/lib/ms/ident/pepxml/spectrum_query.rb +85 -0
- data/lib/ms/ident/pepxml.rb +112 -0
- data/lib/ms/ident/protein.rb +33 -0
- data/lib/ms/ident/protein_group.rb +80 -0
- data/lib/ms/ident/search.rb +114 -0
- data/lib/ms/ident.rb +37 -0
- data/lib/ms/isotope/aa.rb +59 -0
- data/lib/ms/mascot.rb +6 -0
- data/lib/ms/mass/aa.rb +79 -0
- data/lib/ms/mass.rb +55 -0
- data/lib/ms/mzml/index_list.rb +98 -0
- data/lib/ms/mzml/plms1.rb +34 -0
- data/lib/ms/mzml.rb +197 -0
- data/lib/ms/obo.rb +38 -0
- data/lib/ms/plms1.rb +156 -0
- data/lib/ms/quant/qspec/protein_group_comparison.rb +22 -0
- data/lib/ms/quant/qspec.rb +112 -0
- data/lib/ms/spectrum.rb +154 -8
- data/lib/ms.rb +3 -10
- data/lib/msplat.rb +2 -0
- data/lib/obo/ims.rb +5 -0
- data/lib/obo/ms.rb +7 -0
- data/lib/obo/ontology.rb +41 -0
- data/lib/obo/unit.rb +5 -0
- data/lib/openany.rb +23 -0
- data/lib/write_file_or_string.rb +18 -0
- data/obo/ims.obo +562 -0
- data/obo/ms.obo +11677 -0
- data/obo/unit.obo +2563 -0
- data/spec/ms/cvlist_spec.rb +60 -0
- data/spec/ms/digester_spec.rb +351 -0
- data/spec/ms/fasta_spec.rb +100 -0
- data/spec/ms/ident/peptide/db_spec.rb +108 -0
- data/spec/ms/ident/pepxml/sample_enzyme_spec.rb +181 -0
- data/spec/ms/ident/pepxml/search_hit/modification_info_spec.rb +37 -0
- data/spec/ms/ident/pepxml_spec.rb +442 -0
- data/spec/ms/ident/protein_group_spec.rb +68 -0
- data/spec/ms/mass_spec.rb +8 -0
- data/spec/ms/mzml/index_list_spec.rb +122 -0
- data/spec/ms/mzml/plms1_spec.rb +62 -0
- data/spec/ms/mzml_spec.rb +50 -0
- data/spec/ms/plms1_spec.rb +38 -0
- data/spec/ms/quant/qspec_spec.rb +25 -0
- data/spec/msplat_spec.rb +24 -0
- data/spec/obo_spec.rb +25 -0
- data/spec/spec_helper.rb +25 -0
- data/spec/testfiles/ms/ident/peptide/db/uni_11_sp_tr.fasta +69 -0
- data/spec/testfiles/ms/ident/peptide/db/uni_11_sp_tr.msd_clvg2.min_aaseq4.yml +728 -0
- data/spec/testfiles/ms/mzml/j24z.idx_comp.3.mzML +271 -0
- data/spec/testfiles/ms/mzml/openms.noidx_nocomp.12.mzML +330 -0
- data/spec/testfiles/ms/quant/kill_extra_tabs.rb +13 -0
- data/spec/testfiles/ms/quant/max_quant_output.provenance.txt +15 -0
- data/spec/testfiles/ms/quant/max_quant_output.txt +199 -0
- data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv_qspecgp +199 -0
- data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv_qspecgp.csv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_final.txt +199 -0
- data/spec/testfiles/ms/quant/pdcd5_final.txt_qspecgp +0 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.CSV.csv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.csv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.csv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv_qspecgp +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv_qspecgp.csv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.txt +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_tabdel.txt +134 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_tabdel.txt_qspecgp +134 -0
- data/spec/testfiles/ms/quant/remove_rest_of_proteins.rb +13 -0
- data/spec/testfiles/ms/quant/unlog_transform.rb +13 -0
- data/spec/testfiles/plms1/output.key +0 -0
- metadata +157 -40
- data/README +0 -77
- data/changelog.txt +0 -196
- data/lib/ms/calc.rb +0 -32
- data/lib/ms/data/interleaved.rb +0 -60
- data/lib/ms/data/lazy_io.rb +0 -73
- data/lib/ms/data/lazy_string.rb +0 -15
- data/lib/ms/data/simple.rb +0 -59
- data/lib/ms/data/transposed.rb +0 -41
- data/lib/ms/data.rb +0 -57
- data/lib/ms/format/format_error.rb +0 -12
- data/lib/ms/support/binary_search.rb +0 -126
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
require 'ms/cvlist'
|
|
3
|
+
require 'cv'
|
|
4
|
+
|
|
5
|
+
describe 'appending CV params objects to an MS::CVList' do
|
|
6
|
+
describe 'intelligently appending params with #param' do
|
|
7
|
+
before do
|
|
8
|
+
@cv = MS::CVList.new
|
|
9
|
+
end
|
|
10
|
+
it 'sends detailed descriptions to CV::Param.new' do
|
|
11
|
+
arglist = [
|
|
12
|
+
['IMS', 'IMS:1000052', 'position z', 22],
|
|
13
|
+
['IMS', 'IMS:1000030', 'continuous'],
|
|
14
|
+
['IMS', 'IMS:1000052', 'position z', 22, 'UO:0000008'],
|
|
15
|
+
['IMS', 'IMS:1000030', 'continuous', 'UO:0000008'],
|
|
16
|
+
['IMS', 'IMS:1000052', 'position z', 22, MS::CV::Param.new('UO:0000008')],
|
|
17
|
+
['IMS', 'IMS:1000030', 'continuous', MS::CV::Param.new('UO:0000008')],
|
|
18
|
+
]
|
|
19
|
+
arglist.each do |args|
|
|
20
|
+
@cv.param *args
|
|
21
|
+
end
|
|
22
|
+
@cv.size.should == arglist.size
|
|
23
|
+
arglist.each_with_index do |args, i|
|
|
24
|
+
@cv[i].should == MS::CV::Param.new(*args)
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
it 'deciphers short accession descriptions' do
|
|
28
|
+
@cv.param 'MS:1000004' # sample mass
|
|
29
|
+
@cv.param 'IMS:1000042', 23 # max count of pixels x
|
|
30
|
+
{cv_ref: 'MS', accession: 'MS:1000004', name: 'sample mass', value: nil}.each do |key,val|
|
|
31
|
+
@cv[0].send(key).should == val
|
|
32
|
+
end
|
|
33
|
+
{cv_ref: 'IMS', accession: 'IMS:1000042', name: 'max count of pixels x', value: 23}.each do |key,val|
|
|
34
|
+
@cv[1].send(key).should == val
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
describe 'appending on initialization' do
|
|
38
|
+
it 'can be done with a block' do
|
|
39
|
+
cvlist = MS::CVList.new do
|
|
40
|
+
param 'MS:1000004' # sample mass
|
|
41
|
+
param 'IMS:1000042', 23 # max count of pixels of y
|
|
42
|
+
end
|
|
43
|
+
cvlist.size.should == 2
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
it 'can be done with brackets' do
|
|
48
|
+
args = ['IMS', 'IMS:1000052', 'position z', 22]
|
|
49
|
+
param_obj = CV::Param.new(*args)
|
|
50
|
+
cvlist = MS::CVList['MS:1000004', ['MS:1000004'], ['IMS:1000042', 23], param_obj, args]
|
|
51
|
+
cvlist.size.should == 5
|
|
52
|
+
cvlist[0].should == cvlist[1]
|
|
53
|
+
cvlist.each do |param|
|
|
54
|
+
param.accession.should_not be_nil
|
|
55
|
+
param.name.should_not be_nil
|
|
56
|
+
param.cv_ref.should_not be_nil
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
@@ -0,0 +1,351 @@
|
|
|
1
|
+
require 'spec_helper.rb'
|
|
2
|
+
|
|
3
|
+
require 'ms/digester'
|
|
4
|
+
require 'pp'
|
|
5
|
+
|
|
6
|
+
describe 'a digester' do
|
|
7
|
+
before do
|
|
8
|
+
@digester = MS::Digester.new('arg', 'R')
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def spp(input, str="")
|
|
12
|
+
PP.singleline_pp(input, str)
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def nk_string(n, split)
|
|
16
|
+
str = []
|
|
17
|
+
count = 0
|
|
18
|
+
|
|
19
|
+
(n * 1000).times do
|
|
20
|
+
count += 1
|
|
21
|
+
if count < split
|
|
22
|
+
str << 'A'
|
|
23
|
+
else
|
|
24
|
+
count = 0
|
|
25
|
+
str << 'R'
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
str.join('')
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
it 'finds cleavage site indices' do
|
|
33
|
+
{
|
|
34
|
+
"" => [0,0],
|
|
35
|
+
"A" => [0,1],
|
|
36
|
+
"R" => [0,1],
|
|
37
|
+
"AAA" => [0,3],
|
|
38
|
+
"RAA" => [0,1,3],
|
|
39
|
+
"ARA" => [0,2,3],
|
|
40
|
+
"AAR" => [0,3],
|
|
41
|
+
"RRA" => [0,1,2,3],
|
|
42
|
+
"RAR" => [0,1,3],
|
|
43
|
+
"RRR" => [0,1,2,3],
|
|
44
|
+
|
|
45
|
+
"R\nR\nR" => [0,2,4,5],
|
|
46
|
+
"R\n\n\nR\nR\n\n" => [0,4,6,9]
|
|
47
|
+
}.each do |sequence, expected|
|
|
48
|
+
@digester.cleavage_sites(sequence).should == expected
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
it 'finds cleavage sites with exception' do
|
|
53
|
+
@digester = MS::Digester.new('argp', 'R', 'P')
|
|
54
|
+
{
|
|
55
|
+
"" => [0,0],
|
|
56
|
+
"A" => [0,1],
|
|
57
|
+
"R" => [0,1],
|
|
58
|
+
"AAA" => [0,3],
|
|
59
|
+
"RAA" => [0,1,3],
|
|
60
|
+
"ARA" => [0,2,3],
|
|
61
|
+
"AAR" => [0,3],
|
|
62
|
+
"RRA" => [0,1,2,3],
|
|
63
|
+
"RAR" => [0,1,3],
|
|
64
|
+
"RRR" => [0,1,2,3],
|
|
65
|
+
|
|
66
|
+
"PR" => [0,1,2],
|
|
67
|
+
"PR" => [0,2],
|
|
68
|
+
"PRR" => [0,2,3],
|
|
69
|
+
"RPR" => [0,3],
|
|
70
|
+
"RRP" => [0,1,3],
|
|
71
|
+
"APRA" => [0,3,4],
|
|
72
|
+
"ARPA" => [0,4],
|
|
73
|
+
"ARPARA" => [0,5,6],
|
|
74
|
+
"R\nPR\nR" => [0,5,6],
|
|
75
|
+
"RP\nR\nR" => [0,5,6],
|
|
76
|
+
"RP\nR\nR\n" => [0,5,7]
|
|
77
|
+
}.each do |sequence, expected|
|
|
78
|
+
@digester.cleavage_sites(sequence).should == expected
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
it 'finds cleavage sites with offset and limit' do
|
|
85
|
+
{
|
|
86
|
+
"RxxR" => [2,4],
|
|
87
|
+
"RxAxR" => [2,4],
|
|
88
|
+
"RxAAAxR" => [2,4],
|
|
89
|
+
"RxRRRxR" => [2,3,4]
|
|
90
|
+
}.each do |sequence, expected|
|
|
91
|
+
@digester.cleavage_sites(sequence, 2, 2).should == expected
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
it 'finds cleavage sites fast' do
|
|
96
|
+
str = nk_string(10, 1000)
|
|
97
|
+
@digester.cleavage_sites(str).length.should == 11
|
|
98
|
+
benchmark(20) do |x|
|
|
99
|
+
x.report("10kx - fragments") do
|
|
100
|
+
10000.times { @digester.cleavage_sites(str) }
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
it 'digests proteins' do
|
|
106
|
+
{
|
|
107
|
+
"" => [''],
|
|
108
|
+
"A" => ["A"],
|
|
109
|
+
"R" => ["R"],
|
|
110
|
+
"AAA" => ["AAA"],
|
|
111
|
+
"RAA" => ["R", "AA"],
|
|
112
|
+
"ARA" => ["AR", "A"],
|
|
113
|
+
"AAR" => ["AAR"],
|
|
114
|
+
"RRA" => ["R", "R", "A"],
|
|
115
|
+
"RAR" => ["R", "AR"],
|
|
116
|
+
"RRR" => ["R", "R", "R"]
|
|
117
|
+
}.each do |sequence, expected|
|
|
118
|
+
# spp(sequence)
|
|
119
|
+
@digester.digest(sequence).should == expected
|
|
120
|
+
#@digester.digest(sequence) {|frag, s, e| frag}.should == expected
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
it 'digests with missed cleavages' do
|
|
125
|
+
{
|
|
126
|
+
"" => [''],
|
|
127
|
+
"A" => ["A"],
|
|
128
|
+
"R" => ["R"],
|
|
129
|
+
"AAA" => ["AAA"],
|
|
130
|
+
"RAA" => ["R", "RAA", "AA"],
|
|
131
|
+
"ARA" => ["AR", "ARA", "A"],
|
|
132
|
+
"AAR" => ["AAR"],
|
|
133
|
+
"RRA" => ["R", "RR", "R", "RA", "A"],
|
|
134
|
+
"RAR" => ["R", "RAR", "AR"],
|
|
135
|
+
"RRR" => ["R", "RR", "R", "RR", "R"]
|
|
136
|
+
}.each do |sequence, expected|
|
|
137
|
+
@digester.digest(sequence, 1).should == expected
|
|
138
|
+
#@digester.digest(sequence, 1) {|frag, s, e| frag}.should == expected
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
it 'digests with two missed cleavages' do
|
|
143
|
+
{
|
|
144
|
+
"" => [''],
|
|
145
|
+
"A" => ["A"],
|
|
146
|
+
"R" => ["R"],
|
|
147
|
+
"AAA" => ["AAA"],
|
|
148
|
+
"RAA" => ["R", "RAA", "AA"],
|
|
149
|
+
"ARA" => ["AR", "ARA", "A"],
|
|
150
|
+
"AAR" => ["AAR"],
|
|
151
|
+
"RRA" => ["R", "RR", "RRA", "R", "RA", "A"],
|
|
152
|
+
"RAR" => ["R", "RAR", "AR"],
|
|
153
|
+
"RRR" => ["R", "RR", "RRR", "R", "RR", "R"]
|
|
154
|
+
}.each do |sequence, expected|
|
|
155
|
+
@digester.digest(sequence, 2).should == expected
|
|
156
|
+
#@digester.digest(sequence, 2) {|frag, s, e| frag}.should == expected
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
it 'digests fast' do
|
|
161
|
+
str = nk_string(10, 1000)
|
|
162
|
+
@digester.digest(str).length.should == 10
|
|
163
|
+
benchmark(20) do |x|
|
|
164
|
+
x.report("10kx - fragments") do
|
|
165
|
+
10000.times { @digester.digest(str) }
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
it 'finds sites to be digested' do
|
|
171
|
+
{
|
|
172
|
+
"" => [[0,0]],
|
|
173
|
+
"A" => [[0,1]],
|
|
174
|
+
"R" => [[0,1]],
|
|
175
|
+
"AAA" => [[0,3]],
|
|
176
|
+
"RAA" => [[0,1],[1,3]],
|
|
177
|
+
"ARA" => [[0,2],[2,3]],
|
|
178
|
+
"AAR" => [[0,3]],
|
|
179
|
+
"RRA" => [[0,1],[1,2],[2,3]],
|
|
180
|
+
"RAR" => [[0,1],[1,3]],
|
|
181
|
+
"RRR" => [[0,1],[1,2],[2,3]]
|
|
182
|
+
}.each do |sequence, expected|
|
|
183
|
+
@digester.site_digest(sequence).should == expected
|
|
184
|
+
end
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
it 'finds sites to be digested with missed cleavages' do
|
|
188
|
+
{
|
|
189
|
+
"" => [[0,0]],
|
|
190
|
+
"A" => [[0,1]],
|
|
191
|
+
"R" => [[0,1]],
|
|
192
|
+
"AAA" => [[0,3]],
|
|
193
|
+
"RAA" => [[0,1],[0,3],[1,3]],
|
|
194
|
+
"ARA" => [[0,2],[0,3],[2,3]],
|
|
195
|
+
"AAR" => [[0,3]],
|
|
196
|
+
"RRA" => [[0,1],[0,2],[1,2],[1,3],[2,3]],
|
|
197
|
+
"RAR" => [[0,1],[0,3],[1,3]],
|
|
198
|
+
"RRR" => [[0,1],[0,2],[1,2],[1,3],[2,3]]
|
|
199
|
+
}.each do |sequence, expected|
|
|
200
|
+
@digester.site_digest(sequence, 1).should == expected
|
|
201
|
+
end
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
it 'finds sites to be digested with two missed cleavages' do
|
|
205
|
+
{
|
|
206
|
+
"" => [[0,0]],
|
|
207
|
+
"A" => [[0,1]],
|
|
208
|
+
"R" => [[0,1]],
|
|
209
|
+
"AAA" => [[0,3]],
|
|
210
|
+
"RAA" => [[0,1],[0,3],[1,3]],
|
|
211
|
+
"ARA" => [[0,2],[0,3],[2,3]],
|
|
212
|
+
"AAR" => [[0,3]],
|
|
213
|
+
"RRA" => [[0,1],[0,2],[0,3],[1,2],[1,3],[2,3]],
|
|
214
|
+
"RAR" => [[0,1],[0,3],[1,3]],
|
|
215
|
+
"RRR" => [[0,1],[0,2],[0,3],[1,2],[1,3],[2,3]]
|
|
216
|
+
}.each do |sequence, expected|
|
|
217
|
+
@digester.site_digest(sequence, 2).should == expected
|
|
218
|
+
end
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
it 'does site digestion fast' do
|
|
222
|
+
str = nk_string(10, 1000)
|
|
223
|
+
@digester.site_digest(str).length.should == 10
|
|
224
|
+
benchmark(20) do |x|
|
|
225
|
+
x.report("10kx - fragments") do
|
|
226
|
+
10000.times { @digester.site_digest(str) }
|
|
227
|
+
end
|
|
228
|
+
end
|
|
229
|
+
end
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
describe 'performs as documented in readme' do
|
|
234
|
+
it 'runs cleavage sites documentation' do
|
|
235
|
+
d = MS::Digester.new('Trypsin', 'KR', 'P')
|
|
236
|
+
seq = "AARGGR"
|
|
237
|
+
sites = d.cleavage_sites(seq)
|
|
238
|
+
sites.should == [0, 3, 6]
|
|
239
|
+
|
|
240
|
+
seq[sites[0], sites[0+1] - sites[0]].should == "AAR"
|
|
241
|
+
seq[sites[1], sites[1+1] - sites[1]].should == "GGR"
|
|
242
|
+
|
|
243
|
+
seq = "AAR \n GGR"
|
|
244
|
+
sites = d.cleavage_sites(seq)
|
|
245
|
+
sites.should == [0, 8, 11]
|
|
246
|
+
|
|
247
|
+
seq[sites[0], sites[0+1] - sites[0]].should == "AAR \n "
|
|
248
|
+
seq[sites[1], sites[1+1] - sites[1]].should == "GGR"
|
|
249
|
+
end
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
describe 'basic trypsin digestion' do
|
|
253
|
+
it 'performs digestion and can specify sites of digestion' do
|
|
254
|
+
trypsin = MS::Digester['Trypsin']
|
|
255
|
+
|
|
256
|
+
expected = [
|
|
257
|
+
'MIVIGR',
|
|
258
|
+
'SIVHPYITNEYEPFAAEK',
|
|
259
|
+
'QQILSIMAG']
|
|
260
|
+
trypsin.digest('MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG').should == expected
|
|
261
|
+
|
|
262
|
+
expected = [
|
|
263
|
+
'MIVIGR',
|
|
264
|
+
'MIVIGRSIVHPYITNEYEPFAAEK',
|
|
265
|
+
'SIVHPYITNEYEPFAAEK',
|
|
266
|
+
'SIVHPYITNEYEPFAAEKQQILSIMAG',
|
|
267
|
+
'QQILSIMAG']
|
|
268
|
+
trypsin.digest('MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG', 1).should == expected
|
|
269
|
+
|
|
270
|
+
expected = [
|
|
271
|
+
[0,6],
|
|
272
|
+
[0,24],
|
|
273
|
+
[6,24],
|
|
274
|
+
[6,33],
|
|
275
|
+
[24,33]]
|
|
276
|
+
trypsin.site_digest('MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG', 1).should == expected
|
|
277
|
+
end
|
|
278
|
+
|
|
279
|
+
it 'completely ignores whitespace inside protein sequences' do
|
|
280
|
+
expected = [
|
|
281
|
+
"\tMIVIGR",
|
|
282
|
+
"SIVHP\nYITNEYEPFAAE K",
|
|
283
|
+
"QQILSI\rMAG"]
|
|
284
|
+
MS::Digester['Trypsin'].digest("\tMIVIGRSIVHP\nYITNEYEPFAAE KQQILSI\rMAG").should == expected
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
it 'does a trypsin digest' do
|
|
288
|
+
trypsin = MS::Digester[:trypsin]
|
|
289
|
+
{
|
|
290
|
+
"" => [''],
|
|
291
|
+
"A" => ["A"],
|
|
292
|
+
"R" => ["R"],
|
|
293
|
+
"AAA" => ["AAA"],
|
|
294
|
+
"RAA" => ["R", "AA"],
|
|
295
|
+
"ARA" => ["AR", "A"],
|
|
296
|
+
"AAR" => ["AAR"],
|
|
297
|
+
"RRA" => ["R", "R", "A"],
|
|
298
|
+
"RAR" => ["R", "AR"],
|
|
299
|
+
"RRR" => ["R", "R", "R"],
|
|
300
|
+
"RKR" => ["R", "K", "R"],
|
|
301
|
+
|
|
302
|
+
"ARP" => ["ARP"],
|
|
303
|
+
"PRA" => ["PR","A"],
|
|
304
|
+
"ARPARAA" => ["ARPAR", "AA"],
|
|
305
|
+
"RPRRR" => ["RPR", "R", "R"]
|
|
306
|
+
}.each do |sequence, expected|
|
|
307
|
+
trypsin.digest(sequence).should == expected
|
|
308
|
+
end
|
|
309
|
+
end
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
end
|
|
314
|
+
|
|
315
|
+
describe 'digestion with other enzymes' do
|
|
316
|
+
|
|
317
|
+
# This is how to access the already created enzyme:
|
|
318
|
+
# MS::Digester['Arg-C'] (or :arg_c, 'ARG-C', :ARG_C')
|
|
319
|
+
{
|
|
320
|
+
['Arg-C', :arg_c] => {
|
|
321
|
+
"AARC" => ["AAR", "C"],
|
|
322
|
+
"AARP" => ["AARP"]
|
|
323
|
+
},
|
|
324
|
+
['Asp-N', :asp_n] => {
|
|
325
|
+
"AABDS" => ["AA", "B", "DS"],
|
|
326
|
+
"ADZBS" => ["A", "DZ", "BS"],
|
|
327
|
+
"B" => %w(B),
|
|
328
|
+
"A" => %w(A),
|
|
329
|
+
"ABD" => %w(A B D),
|
|
330
|
+
},
|
|
331
|
+
['Asp-N_ambic', :asp_n_ambic] => {
|
|
332
|
+
"AAEDS" => ["AA", "E", "DS"],
|
|
333
|
+
"ADZES" => ["A", "DZ", "ES"],
|
|
334
|
+
"AED" => %w(A E D),
|
|
335
|
+
"GDE" => %w(G D E),
|
|
336
|
+
"AAECCDGG" => %w(AA ECC DGG),
|
|
337
|
+
}
|
|
338
|
+
}.each do |enzyme_names, test_hash|
|
|
339
|
+
it "digests with '#{enzyme_names.first}'" do
|
|
340
|
+
digester = MS::Digester[enzyme_names.first]
|
|
341
|
+
digester.should == MS::Digester[enzyme_names.last]
|
|
342
|
+
digester.name.should == enzyme_names.first
|
|
343
|
+
test_hash.each do |sequence, expected|
|
|
344
|
+
digester.digest(sequence).should == expected
|
|
345
|
+
end
|
|
346
|
+
end
|
|
347
|
+
end
|
|
348
|
+
end
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
|
|
3
|
+
require 'ms/fasta'
|
|
4
|
+
|
|
5
|
+
describe 'basic fasta operations' do
|
|
6
|
+
before do
|
|
7
|
+
@headers = [">gi|5524211 [hello]", ">another B", ">again C"]
|
|
8
|
+
@entries = ["LCLYTHIGRNIYYGSYLYSETWNTGIMLLLITMATAFMGYVLPWGQMSFWGATVITNLFSAIPYIGTNLV\nGLMPFLHTSKHRSMMLRPLSQALFWTLTMDLLTLTWIGSQPVEYPYTIIGQMASILYFSIILAFLPIAGX\nIENY", "ABCDEF\nGHIJK", "ABCD"]
|
|
9
|
+
@sequences = @entries.map {|v| v.gsub("\n", '') }
|
|
10
|
+
@data = {}
|
|
11
|
+
@data['newlines'] = @headers.zip(@entries).map do |header, data|
|
|
12
|
+
header + "\n" + data
|
|
13
|
+
end.join("\n")
|
|
14
|
+
@data['carriage_returns_and_newlines'] = @data['newlines'].gsub("\n", "\r\n")
|
|
15
|
+
file_key_to_filename_pairs = @data.map do |k,v|
|
|
16
|
+
file_key = k + '_file'
|
|
17
|
+
filename = k + '.tmp'
|
|
18
|
+
File.open(filename, 'w') {|out| out.print v }
|
|
19
|
+
[file_key, filename]
|
|
20
|
+
end
|
|
21
|
+
file_key_to_filename_pairs.each {|k,v| @data[k] = v }
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
after do
|
|
25
|
+
@data.select {|k,v| k =~ /_file$/ }.each do |k,filename|
|
|
26
|
+
index = filename.sub('.tmp', '.index')
|
|
27
|
+
[filename, index].each do |fn|
|
|
28
|
+
File.unlink(fn) if File.exist? fn
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def fasta_correct?(fasta)
|
|
34
|
+
entries = fasta.map
|
|
35
|
+
@headers.size.times.zip(entries) do |i,entry|
|
|
36
|
+
header, sequence, entry = @headers[i], @sequences[i], entry
|
|
37
|
+
entry.header.should_not == nil
|
|
38
|
+
entry.sequence.should_not == nil
|
|
39
|
+
entry.header.should == header[1..-1]
|
|
40
|
+
entry.sequence.should == sequence
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
xit 'can deliver length and description hashes' do
|
|
45
|
+
# need to test
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
it 'can read a file' do
|
|
49
|
+
%w(newlines_file carriage_returns_and_newlines_file).each do |file|
|
|
50
|
+
MS::Fasta.open(@data[file]) do |fasta|
|
|
51
|
+
fasta_correct? fasta
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
it 'can read an IO object' do
|
|
57
|
+
%w(newlines_file carriage_returns_and_newlines_file).each do |file|
|
|
58
|
+
File.open(@data[file]) do |io|
|
|
59
|
+
fasta = MS::Fasta.new(io)
|
|
60
|
+
fasta_correct? fasta
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
it 'can read a string' do
|
|
66
|
+
%w(newlines carriage_returns_and_newlines).each do |key|
|
|
67
|
+
fasta = MS::Fasta.new @data[key]
|
|
68
|
+
fasta_correct? fasta
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
it 'iterates entries with foreach' do
|
|
73
|
+
%w(newlines_file carriage_returns_and_newlines_file).each do |file|
|
|
74
|
+
MS::Fasta.foreach(@data[file]) do |entry|
|
|
75
|
+
entry.should be_an_instance_of Bio::FastaFormat
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
it 'runs the documentation' do
|
|
81
|
+
fasta_file = @data['newlines_file']
|
|
82
|
+
ids = MS::Fasta.open(fasta_file) do |fasta|
|
|
83
|
+
fasta.map(&:entry_id)
|
|
84
|
+
end
|
|
85
|
+
ids.is_a?(Array)
|
|
86
|
+
ids.should == %w(gi|5524211 another again)
|
|
87
|
+
|
|
88
|
+
# this code is already tested above
|
|
89
|
+
# File.open(fasta_file) do |io|
|
|
90
|
+
# fasta = MS::Fasta.new(io)
|
|
91
|
+
# end
|
|
92
|
+
|
|
93
|
+
# taking a string
|
|
94
|
+
string = ">id1 a simple header\nAAASDDEEEDDD\n>id2 header again\nPPPPPPWWWWWWTTTTYY\n"
|
|
95
|
+
fasta = MS::Fasta.new(string)
|
|
96
|
+
(simple, not_simple) = fasta.partition {|entry| entry.header =~ /simple/ }
|
|
97
|
+
simple.first.header.include?("simple").should == true
|
|
98
|
+
not_simple.first.header.include?("simple").should == false
|
|
99
|
+
end
|
|
100
|
+
end
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
|
|
3
|
+
require 'yaml'
|
|
4
|
+
path = 'ms/ident/peptide/db'
|
|
5
|
+
require path
|
|
6
|
+
|
|
7
|
+
module Kernel
|
|
8
|
+
|
|
9
|
+
def capture_stdout
|
|
10
|
+
out = StringIO.new
|
|
11
|
+
$stdout = out
|
|
12
|
+
yield
|
|
13
|
+
out.rewind
|
|
14
|
+
return out.read
|
|
15
|
+
ensure
|
|
16
|
+
$stdout = STDOUT
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
describe 'a uniprot fasta file' do
|
|
23
|
+
|
|
24
|
+
before do
|
|
25
|
+
@fasta_file = [TESTFILES, path, 'uni_11_sp_tr.fasta'].join('/')
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
describe 'amino acid expansion' do
|
|
29
|
+
|
|
30
|
+
it 'can expand out wildcard amino acid combinations' do
|
|
31
|
+
array = MS::Ident::Peptide::Db.expand_peptides('ALXX', 'X' => %w(* % &), 'L' => %w(P Q) )
|
|
32
|
+
array.sort.should == %w(AP** AP*% AP*& AP%* AP%% AP%& AP&* AP&% AP&& AQ** AQ*% AQ*& AQ%* AQ%% AQ%& AQ&* AQ&% AQ&&).sort
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
it 'will not expand explosive combinations (>MAX_NUM_AA_EXPANSION)' do
|
|
36
|
+
# this is from real data
|
|
37
|
+
worst_case = 'LTLLRPEKHEAATGVDTICTHRVDPIGPGLXXEXLYWELSXLTXXIXELGPYTLDR'
|
|
38
|
+
MS::Ident::Peptide::Db.expand_peptides(worst_case, 'X' => %w(* % &)).nil?.should == true
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
it 'returns the peptide in the array if no expansion' do
|
|
42
|
+
array = MS::Ident::Peptide::Db.expand_peptides('ZZZZZ', 'X' => %w(* % &), 'L' => %w(P Q) )
|
|
43
|
+
array.should == ['ZZZZZ']
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
describe 'creating a peptide centric database' do
|
|
49
|
+
before do
|
|
50
|
+
|
|
51
|
+
#@output_file = [TESTFILES, path, 'uni_11_sp_tr.'].join('/')
|
|
52
|
+
@output_file = [TESTFILES, path, "uni_11_sp_tr.msd_clvg2.min_aaseq4.yml"].join('/')
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
it 'converts a fasta file into peptide centric db' do
|
|
56
|
+
output_files = MS::Ident::Peptide::Db.cmdline([@fasta_file])
|
|
57
|
+
output_files.first.should == File.expand_path(@output_file)
|
|
58
|
+
File.exist?(@output_file).should == true
|
|
59
|
+
hash = {}
|
|
60
|
+
YAML.load_file(@output_file).each do |k,v|
|
|
61
|
+
hash[k] = v.split("\t")
|
|
62
|
+
end
|
|
63
|
+
sorted = hash.sort
|
|
64
|
+
# these are merely frozen, not perfectly defined
|
|
65
|
+
sorted.first.should == ["AAFDDAIAELDTLSEESYK", ["sp|P62258|1433E_HUMAN"]]
|
|
66
|
+
sorted.last.should == ["YWCRLGPPRWICQTIVSTNQYTHHR", ["tr|D2KTA8|D2KTA8_HUMAN"]]
|
|
67
|
+
sorted.size.should == 728
|
|
68
|
+
File.unlink(@output_file)
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
it 'lists approved enzymes and exits' do
|
|
72
|
+
output = capture_stdout do
|
|
73
|
+
begin
|
|
74
|
+
MS::Ident::Peptide::Db.cmdline(['--list-enzymes'])
|
|
75
|
+
rescue SystemExit
|
|
76
|
+
1.should == 1 # we exited
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
lines = output.split("\n")
|
|
80
|
+
lines.include?("trypsin").should == true
|
|
81
|
+
lines.include?("chymotrypsin").should == true
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
describe 'reading a peptide centric database' do
|
|
86
|
+
before do
|
|
87
|
+
outfiles = MS::Ident::Peptide::Db.cmdline([@fasta_file])
|
|
88
|
+
@outfile = outfiles.first
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
it 'creates a hash that can retrieve peptides as an array' do
|
|
92
|
+
hash = MS::Ident::Peptide::Db.new(@outfile)
|
|
93
|
+
hash["AVTEQGHELSNEER"].should == %w(sp|P31946|1433B_HUMAN sp|P31946-2|1433B_HUMAN)
|
|
94
|
+
hash["VRAAR"].should == ["tr|D3DX18|D3DX18_HUMAN"]
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
it 'reads the file on disk with random access or is enumerable' do
|
|
98
|
+
MS::Ident::Peptide::Db::IO.open(@outfile) do |io|
|
|
99
|
+
io["AVTEQGHELSNEER"].should == %w(sp|P31946|1433B_HUMAN sp|P31946-2|1433B_HUMAN)
|
|
100
|
+
io["VRAAR"].should == ["tr|D3DX18|D3DX18_HUMAN"]
|
|
101
|
+
io.each_with_index do |key_prots, i|
|
|
102
|
+
key_prots.first.should be_an_instance_of String
|
|
103
|
+
key_prots.last.should be_a_kind_of Array
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
end
|