ms-in_silico 0.4.0 → 0.4.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +1 -0
- data/VERSION +1 -1
- data/bin/digest.rb +50 -0
- data/lib/ms/in_silico/digester.rb +21 -15
- data/lib/ms/in_silico/fragment.rb +3 -2
- data/lib/ms/in_silico/spectrum.rb +0 -0
- data/spec/ms/in_silico/digester_spec.rb +110 -58
- data/spec/ms/in_silico/fragment_spec.rb +1 -1
- data/spec/ms/in_silico/spectrum_spec.rb +1 -1
- data/spec/readme_spec.rb +10 -10
- metadata +14 -13
- data/lib/ms/in_silico/digest.rb +0 -52
- data/spec/tap_test_helper.rb +0 -2
- data/spec/tap_test_suite.rb +0 -5
- data/tap.yml +0 -0
data/.gitignore
CHANGED
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.4.
|
1
|
+
0.4.1
|
data/bin/digest.rb
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
require 'ms/in_silico/digester'
|
5
|
+
|
6
|
+
def print_enzyme_names
|
7
|
+
puts "(tab delimited)"
|
8
|
+
puts %w(name cuts nocut cterm?).join("\t")
|
9
|
+
Ms::InSilico::Digester::ENZYMES.each do |key, enzyme|
|
10
|
+
puts [:name, :cleave_str, :cterm_exception, :cterm_cleavage].map {|v| enzyme.send(v) }.join("\t")
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
delimiter_hash = {
|
15
|
+
'space' => ' ',
|
16
|
+
'tab' => "\t",
|
17
|
+
'newline' => "\n",
|
18
|
+
}
|
19
|
+
|
20
|
+
opt = {
|
21
|
+
:enzyme => 'Trypsin',
|
22
|
+
:missed_cleavages => 0,
|
23
|
+
:delimiter => 'space',
|
24
|
+
:record_delimiter => 'newline',
|
25
|
+
}
|
26
|
+
opts = OptionParser.new do |op|
|
27
|
+
op.banner = "usage: #{File.basename(__FILE__)} [OPTIONS] SOMEPROTEINSEKUENCE ..."
|
28
|
+
op.separator "output: SOMEPR OTEINSEK UENCE"
|
29
|
+
op.separator "options:"
|
30
|
+
op.on("-e", "--enzyme <#{opt[:enzyme]}>", "specify a valid enzyme name") {|v| opt[:enzyme] = v }
|
31
|
+
op.on("-m", "--missed-cleavages <#{opt[:missed_cleavages]}>", Integer, "number of missed cleavages") {|v| opt[:missed_cleavages] = v }
|
32
|
+
op.on("-d", "--delimiter <#{opt[:delimiter]}>", "delimit the returned peptides",
|
33
|
+
"('space','tab','newline' or some other string)") {|v| opt[:delimiter] = v }
|
34
|
+
op.on("-r", "--record-delimiter <#{opt[:record_delimiter]}>", "included after each protein output") {|v| opt[:record_delimiter] = v }
|
35
|
+
op.separator ""
|
36
|
+
op.on("--print-enzymes", "prints table of valid enzyme names and exits") { print_enzyme_names ; exit }
|
37
|
+
end
|
38
|
+
opts.parse!
|
39
|
+
|
40
|
+
if ARGV.size == 0
|
41
|
+
puts opts
|
42
|
+
exit
|
43
|
+
end
|
44
|
+
|
45
|
+
[:delimiter, :record_delimiter].each {|k| opt[k] = (delimiter_hash[opt[k]] || opt[k]) }
|
46
|
+
|
47
|
+
ARGV.each do |protein|
|
48
|
+
print Ms::InSilico::Digester[opt[:enzyme]].digest(protein, opt[:missed_cleavages]).join(opt[:delimiter])
|
49
|
+
print opt[:record_delimiter]
|
50
|
+
end
|
@@ -105,10 +105,10 @@ module Ms
|
|
105
105
|
@scanner = StringScanner.new('')
|
106
106
|
end
|
107
107
|
|
108
|
-
# Returns
|
109
|
-
#
|
110
|
-
#
|
111
|
-
#
|
108
|
+
# Returns digestion sites in sequence, as determined by the
|
109
|
+
# cleave_regexp boundaries. The digestion sites correspond to the
|
110
|
+
# positions where a peptide begins and ends, such that [n, (n+1) - n]
|
111
|
+
# corresponds to the [index, length] for peptide n.
|
112
112
|
#
|
113
113
|
# d = Digester.new('Trypsin', 'KR', 'P')
|
114
114
|
# seq = "AARGGR"
|
@@ -128,19 +128,25 @@ module Ms
|
|
128
128
|
# The digested section of sequence may be specified using offset
|
129
129
|
# and length.
|
130
130
|
def cleavage_sites(seq, offset=0, length=seq.length-offset)
|
131
|
+
return [0, 1] if seq.size == 1 # adding exceptions is lame--algorithm should just work
|
132
|
+
|
131
133
|
adjustment = cterm_cleavage ? 0 : 1
|
132
134
|
limit = offset + length
|
133
|
-
|
135
|
+
|
134
136
|
positions = [offset]
|
135
137
|
pos = scan(seq, offset, limit) do |pos|
|
136
|
-
positions << pos - adjustment
|
138
|
+
positions << (pos - adjustment)
|
137
139
|
end
|
138
140
|
|
139
141
|
# add the final position
|
140
|
-
if pos < limit || positions.length == 1
|
142
|
+
if (pos < limit) || (positions.length == 1)
|
143
|
+
positions << limit
|
144
|
+
end
|
145
|
+
# adding exceptions is lame.. this code probably needs to be
|
146
|
+
# refactored (corrected).
|
147
|
+
if !cterm_cleavage && pos == limit
|
141
148
|
positions << limit
|
142
149
|
end
|
143
|
-
|
144
150
|
positions
|
145
151
|
end
|
146
152
|
|
@@ -151,14 +157,14 @@ module Ms
|
|
151
157
|
#
|
152
158
|
# Each [start_index, end_index] pair is yielded to the block, if given,
|
153
159
|
# and the collected results are returned.
|
154
|
-
def site_digest(seq, max_misses=0, offset=0, length=seq.length-offset) # :yields: start_index, end_index
|
160
|
+
def site_digest(seq, max_misses=0, offset=0, length=seq.length-offset, &block) # :yields: start_index, end_index
|
155
161
|
frag_sites = cleavage_sites(seq, offset, length)
|
156
162
|
|
157
163
|
overlay(frag_sites.length, max_misses, 1) do |start_index, end_index|
|
158
164
|
start_index = frag_sites[start_index]
|
159
165
|
end_index = frag_sites[end_index]
|
160
166
|
|
161
|
-
|
167
|
+
block ? block.call(start_index, end_index) : [start_index, end_index]
|
162
168
|
end
|
163
169
|
end
|
164
170
|
|
@@ -167,7 +173,7 @@ module Ms
|
|
167
173
|
# as in that method, the digested section of sequence may be specified using
|
168
174
|
# offset and length.
|
169
175
|
def digest(seq, max_misses=0, offset=0, length=seq.length-offset)
|
170
|
-
site_digest(seq, max_misses, offset, length).
|
176
|
+
site_digest(seq, max_misses, offset, length).map do |s, e|
|
171
177
|
seq[s, e-s]
|
172
178
|
end
|
173
179
|
end
|
@@ -183,7 +189,7 @@ module Ms
|
|
183
189
|
# Scans seq between offset and limit for the cleave_regexp, skipping whitespace
|
184
190
|
# and being mindful of exception characters. The positions of the scanner at
|
185
191
|
# each match are yielded to the block.
|
186
|
-
def scan(seq, offset, limit) # :nodoc:
|
192
|
+
def scan(seq, offset, limit, &block) # :nodoc:
|
187
193
|
scanner.string = seq
|
188
194
|
scanner.pos = offset
|
189
195
|
|
@@ -197,7 +203,7 @@ module Ms
|
|
197
203
|
# break if you scanned past the upper limit
|
198
204
|
break if pos > limit
|
199
205
|
|
200
|
-
|
206
|
+
block.call(pos)
|
201
207
|
end
|
202
208
|
|
203
209
|
scanner.pos
|
@@ -205,14 +211,14 @@ module Ms
|
|
205
211
|
|
206
212
|
# Performs an overlap-collect algorithm providing the start and end
|
207
213
|
# indicies of spans skipping up to max_misses boundaries.
|
208
|
-
def overlay(n, max_misses, offset) # :nodoc:
|
214
|
+
def overlay(n, max_misses, offset, &block) # :nodoc:
|
209
215
|
results = []
|
210
216
|
0.upto(n-1) do |start_index|
|
211
217
|
0.upto(max_misses) do |n_miss|
|
212
218
|
end_index = start_index + offset + n_miss
|
213
219
|
break if end_index == n
|
214
220
|
|
215
|
-
results <<
|
221
|
+
results << block.call(start_index, end_index)
|
216
222
|
end
|
217
223
|
end
|
218
224
|
results
|
@@ -1,9 +1,9 @@
|
|
1
|
-
require 'tap/task'
|
2
1
|
require 'ms/in_silico/spectrum'
|
3
2
|
|
4
3
|
module Ms
|
5
4
|
module InSilico
|
6
5
|
|
6
|
+
=begin
|
7
7
|
# :startdoc::task calculates a theoretical ms/ms spectrum
|
8
8
|
#
|
9
9
|
# Calculates the theoretical ms/ms spectrum for a peptide sequence.
|
@@ -66,5 +66,6 @@ module Ms
|
|
66
66
|
end
|
67
67
|
|
68
68
|
end
|
69
|
+
=end
|
69
70
|
end
|
70
|
-
end
|
71
|
+
end
|
File without changes
|
@@ -1,4 +1,4 @@
|
|
1
|
-
require
|
1
|
+
require 'spec_helper.rb'
|
2
2
|
|
3
3
|
require 'ms/in_silico/digester'
|
4
4
|
require 'pp'
|
@@ -28,58 +28,7 @@ describe 'a digester' do
|
|
28
28
|
|
29
29
|
str.join('')
|
30
30
|
end
|
31
|
-
|
32
|
-
it 'performs digestion and can specify sites of digestion' do
|
33
|
-
trypsin = Ms::InSilico::Digester['Trypsin']
|
34
|
-
|
35
|
-
expected = [
|
36
|
-
'MIVIGR',
|
37
|
-
'SIVHPYITNEYEPFAAEK',
|
38
|
-
'QQILSIMAG']
|
39
|
-
trypsin.digest('MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG').is expected
|
40
|
-
|
41
|
-
expected = [
|
42
|
-
'MIVIGR',
|
43
|
-
'MIVIGRSIVHPYITNEYEPFAAEK',
|
44
|
-
'SIVHPYITNEYEPFAAEK',
|
45
|
-
'SIVHPYITNEYEPFAAEKQQILSIMAG',
|
46
|
-
'QQILSIMAG']
|
47
|
-
trypsin.digest('MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG', 1).is expected
|
48
|
-
|
49
|
-
expected = [
|
50
|
-
[0,6],
|
51
|
-
[0,24],
|
52
|
-
[6,24],
|
53
|
-
[6,33],
|
54
|
-
[24,33]]
|
55
|
-
trypsin.site_digest('MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG', 1).is expected
|
56
|
-
end
|
57
|
-
|
58
|
-
it 'completely ignores whitespace inside protein sequences' do
|
59
|
-
expected = [
|
60
|
-
"\tMIVIGR",
|
61
|
-
"SIVHP\nYITNEYEPFAAE K",
|
62
|
-
"QQILSI\rMAG"]
|
63
|
-
Ms::InSilico::Digester['Trypsin'].digest("\tMIVIGRSIVHP\nYITNEYEPFAAE KQQILSI\rMAG").is expected
|
64
|
-
end
|
65
|
-
|
66
|
-
it 'runs cleavage sites documentation' do
|
67
|
-
d = Ms::InSilico::Digester.new('Trypsin', 'KR', 'P')
|
68
|
-
seq = "AARGGR"
|
69
|
-
sites = d.cleavage_sites(seq)
|
70
|
-
sites.is [0, 3, 6]
|
71
|
-
|
72
|
-
seq[sites[0], sites[0+1] - sites[0]].is "AAR"
|
73
|
-
seq[sites[1], sites[1+1] - sites[1]].is "GGR"
|
74
|
-
|
75
|
-
seq = "AAR \n GGR"
|
76
|
-
sites = d.cleavage_sites(seq)
|
77
|
-
sites.is [0, 8, 11]
|
78
31
|
|
79
|
-
seq[sites[0], sites[0+1] - sites[0]].is "AAR \n "
|
80
|
-
seq[sites[1], sites[1+1] - sites[1]].is "GGR"
|
81
|
-
end
|
82
|
-
|
83
32
|
it 'finds cleavage site indices' do
|
84
33
|
{
|
85
34
|
"" => [0,0],
|
@@ -129,7 +78,9 @@ describe 'a digester' do
|
|
129
78
|
@digester.cleavage_sites(sequence).is expected
|
130
79
|
end
|
131
80
|
end
|
132
|
-
|
81
|
+
|
82
|
+
|
83
|
+
|
133
84
|
it 'finds cleavage sites with offset and limit' do
|
134
85
|
{
|
135
86
|
"RxxR" => [2,4],
|
@@ -165,7 +116,8 @@ describe 'a digester' do
|
|
165
116
|
"RRR" => ["R", "R", "R"]
|
166
117
|
}.each do |sequence, expected|
|
167
118
|
# spp(sequence)
|
168
|
-
@digester.digest(sequence)
|
119
|
+
@digester.digest(sequence).is expected
|
120
|
+
#@digester.digest(sequence) {|frag, s, e| frag}.is expected
|
169
121
|
end
|
170
122
|
end
|
171
123
|
|
@@ -182,7 +134,8 @@ describe 'a digester' do
|
|
182
134
|
"RAR" => ["R", "RAR", "AR"],
|
183
135
|
"RRR" => ["R", "RR", "R", "RR", "R"]
|
184
136
|
}.each do |sequence, expected|
|
185
|
-
@digester.digest(sequence, 1)
|
137
|
+
@digester.digest(sequence, 1).is expected
|
138
|
+
#@digester.digest(sequence, 1) {|frag, s, e| frag}.is expected
|
186
139
|
end
|
187
140
|
end
|
188
141
|
|
@@ -199,7 +152,8 @@ describe 'a digester' do
|
|
199
152
|
"RAR" => ["R", "RAR", "AR"],
|
200
153
|
"RRR" => ["R", "RR", "RRR", "R", "RR", "R"]
|
201
154
|
}.each do |sequence, expected|
|
202
|
-
@digester.digest(sequence, 2)
|
155
|
+
@digester.digest(sequence, 2).is expected
|
156
|
+
#@digester.digest(sequence, 2) {|frag, s, e| frag}.is expected
|
203
157
|
end
|
204
158
|
end
|
205
159
|
|
@@ -273,9 +227,67 @@ describe 'a digester' do
|
|
273
227
|
end
|
274
228
|
end
|
275
229
|
end
|
230
|
+
end
|
231
|
+
|
232
|
+
|
233
|
+
describe 'performs as documented in readme' do
|
234
|
+
it 'runs cleavage sites documentation' do
|
235
|
+
d = Ms::InSilico::Digester.new('Trypsin', 'KR', 'P')
|
236
|
+
seq = "AARGGR"
|
237
|
+
sites = d.cleavage_sites(seq)
|
238
|
+
sites.is [0, 3, 6]
|
239
|
+
|
240
|
+
seq[sites[0], sites[0+1] - sites[0]].is "AAR"
|
241
|
+
seq[sites[1], sites[1+1] - sites[1]].is "GGR"
|
242
|
+
|
243
|
+
seq = "AAR \n GGR"
|
244
|
+
sites = d.cleavage_sites(seq)
|
245
|
+
sites.is [0, 8, 11]
|
246
|
+
|
247
|
+
seq[sites[0], sites[0+1] - sites[0]].is "AAR \n "
|
248
|
+
seq[sites[1], sites[1+1] - sites[1]].is "GGR"
|
249
|
+
end
|
250
|
+
end
|
276
251
|
|
252
|
+
describe 'basic trypsin digestion' do
|
253
|
+
it 'performs digestion and can specify sites of digestion' do
|
254
|
+
trypsin = Ms::InSilico::Digester['Trypsin']
|
255
|
+
|
256
|
+
expected = [
|
257
|
+
'MIVIGR',
|
258
|
+
'SIVHPYITNEYEPFAAEK',
|
259
|
+
'QQILSIMAG']
|
260
|
+
trypsin.digest('MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG').is expected
|
261
|
+
|
262
|
+
expected = [
|
263
|
+
'MIVIGR',
|
264
|
+
'MIVIGRSIVHPYITNEYEPFAAEK',
|
265
|
+
'SIVHPYITNEYEPFAAEK',
|
266
|
+
'SIVHPYITNEYEPFAAEKQQILSIMAG',
|
267
|
+
'QQILSIMAG']
|
268
|
+
trypsin.digest('MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG', 1).is expected
|
269
|
+
|
270
|
+
expected = [
|
271
|
+
[0,6],
|
272
|
+
[0,24],
|
273
|
+
[6,24],
|
274
|
+
[6,33],
|
275
|
+
[24,33]]
|
276
|
+
trypsin.site_digest('MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG', 1).is expected
|
277
|
+
end
|
278
|
+
|
279
|
+
it 'completely ignores whitespace inside protein sequences' do
|
280
|
+
expected = [
|
281
|
+
"\tMIVIGR",
|
282
|
+
"SIVHP\nYITNEYEPFAAE K",
|
283
|
+
"QQILSI\rMAG"]
|
284
|
+
Ms::InSilico::Digester['Trypsin'].digest("\tMIVIGRSIVHP\nYITNEYEPFAAE KQQILSI\rMAG").is expected
|
285
|
+
end
|
286
|
+
|
277
287
|
it 'does a trypsin digest' do
|
278
288
|
trypsin = Ms::InSilico::Digester::TRYPSIN
|
289
|
+
# alternate ways to specify the enzyme
|
290
|
+
Ms::InSilico::Digester::TRYPSIN.is Ms::InSilico::Digester['Trypsin']
|
279
291
|
{
|
280
292
|
"" => [''],
|
281
293
|
"A" => ["A"],
|
@@ -294,8 +306,48 @@ describe 'a digester' do
|
|
294
306
|
"ARPARAA" => ["ARPAR", "AA"],
|
295
307
|
"RPRRR" => ["RPR", "R", "R"]
|
296
308
|
}.each do |sequence, expected|
|
297
|
-
trypsin.digest(sequence)
|
309
|
+
trypsin.digest(sequence).is expected
|
310
|
+
end
|
311
|
+
end
|
312
|
+
|
313
|
+
|
314
|
+
|
315
|
+
end
|
316
|
+
|
317
|
+
describe 'digestion with other enzymes' do
|
318
|
+
|
319
|
+
# This is how to create the enzyme:
|
320
|
+
# Ms::InSilico::Digester['Arg-C']
|
321
|
+
# Ms::InSilico::Digester::ARG_C
|
322
|
+
{
|
323
|
+
['Arg-C', :ARG_C] => {
|
324
|
+
"AARC" => ["AAR", "C"],
|
325
|
+
"AARP" => ["AARP"]
|
326
|
+
},
|
327
|
+
['Asp-N', :ASP_N] => {
|
328
|
+
"AABDS" => ["AA", "B", "DS"],
|
329
|
+
"ADZBS" => ["A", "DZ", "BS"],
|
330
|
+
"B" => %w(B),
|
331
|
+
"A" => %w(A),
|
332
|
+
"ABD" => %w(A B D),
|
333
|
+
},
|
334
|
+
['Asp-N_ambic', :ASP_N_AMBIC] => {
|
335
|
+
"AAEDS" => ["AA", "E", "DS"],
|
336
|
+
"ADZES" => ["A", "DZ", "ES"],
|
337
|
+
"AED" => %w(A E D),
|
338
|
+
"GDE" => %w(G D E),
|
339
|
+
"AAECCDGG" => %w(AA ECC DGG),
|
340
|
+
}
|
341
|
+
}.each do |enzyme_names, test_hash|
|
342
|
+
it "digests with '#{enzyme_names.first}'" do
|
343
|
+
digester = Ms::InSilico::Digester[enzyme_names.first]
|
344
|
+
digester.is Ms::InSilico::Digester.const_get(enzyme_names.last)
|
345
|
+
test_hash.each do |sequence, expected|
|
346
|
+
digester.digest(sequence).is expected
|
347
|
+
end
|
298
348
|
end
|
299
349
|
end
|
300
|
-
|
301
350
|
end
|
351
|
+
|
352
|
+
|
353
|
+
|
data/spec/readme_spec.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
require
|
1
|
+
require '/spec_helper.rb'
|
2
2
|
require 'ms/in_silico/digester'
|
3
3
|
require 'ms/in_silico/spectrum'
|
4
4
|
|
@@ -8,21 +8,21 @@ describe 'readme documentation' do
|
|
8
8
|
trypsin = Ms::InSilico::Digester['Trypsin']
|
9
9
|
peptides = trypsin.digest('MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG')
|
10
10
|
expected = [
|
11
|
-
|
12
|
-
|
13
|
-
|
11
|
+
'MIVIGR',
|
12
|
+
'SIVHPYITNEYEPFAAEK',
|
13
|
+
'QQILSIMAG']
|
14
14
|
peptides.sort.is expected.sort
|
15
15
|
|
16
16
|
spectrum = Ms::InSilico::Spectrum.new(peptides[0])
|
17
17
|
spectrum.parent_ion_mass.should.be.close 688.417442373391, 10**-12
|
18
18
|
|
19
19
|
expected = [
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
20
|
+
132.047761058391,
|
21
|
+
245.131825038791,
|
22
|
+
344.200238954991,
|
23
|
+
457.284302935391,
|
24
|
+
514.305766658991,
|
25
|
+
670.406877687091]
|
26
26
|
spectrum.series('b').zip(expected) do |o,e|
|
27
27
|
o.should.be.close e, 10**-12
|
28
28
|
end
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 4
|
8
|
-
-
|
9
|
-
version: 0.4.
|
8
|
+
- 1
|
9
|
+
version: 0.4.1
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Simon Chiang
|
@@ -14,13 +14,14 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-
|
18
|
-
default_executable:
|
17
|
+
date: 2010-11-15 00:00:00 -07:00
|
18
|
+
default_executable: digest.rb
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
name: molecules
|
22
22
|
prerelease: false
|
23
23
|
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
24
25
|
requirements:
|
25
26
|
- - ">="
|
26
27
|
- !ruby/object:Gem::Version
|
@@ -35,6 +36,7 @@ dependencies:
|
|
35
36
|
name: tap
|
36
37
|
prerelease: false
|
37
38
|
requirement: &id002 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
38
40
|
requirements:
|
39
41
|
- - ">="
|
40
42
|
- !ruby/object:Gem::Version
|
@@ -49,6 +51,7 @@ dependencies:
|
|
49
51
|
name: tap-test
|
50
52
|
prerelease: false
|
51
53
|
requirement: &id003 !ruby/object:Gem::Requirement
|
54
|
+
none: false
|
52
55
|
requirements:
|
53
56
|
- - ">="
|
54
57
|
- !ruby/object:Gem::Version
|
@@ -63,6 +66,7 @@ dependencies:
|
|
63
66
|
name: spec-more
|
64
67
|
prerelease: false
|
65
68
|
requirement: &id004 !ruby/object:Gem::Requirement
|
69
|
+
none: false
|
66
70
|
requirements:
|
67
71
|
- - ">="
|
68
72
|
- !ruby/object:Gem::Version
|
@@ -73,8 +77,8 @@ dependencies:
|
|
73
77
|
version_requirements: *id004
|
74
78
|
description: peptide fragmentation and protein digestion
|
75
79
|
email: jtprince@gmail.com
|
76
|
-
executables:
|
77
|
-
|
80
|
+
executables:
|
81
|
+
- digest.rb
|
78
82
|
extensions: []
|
79
83
|
|
80
84
|
extra_rdoc_files:
|
@@ -87,8 +91,8 @@ files:
|
|
87
91
|
- README.rdoc
|
88
92
|
- Rakefile
|
89
93
|
- VERSION
|
94
|
+
- bin/digest.rb
|
90
95
|
- lib/ms/in_silico.rb
|
91
|
-
- lib/ms/in_silico/digest.rb
|
92
96
|
- lib/ms/in_silico/digester.rb
|
93
97
|
- lib/ms/in_silico/fragment.rb
|
94
98
|
- lib/ms/in_silico/spectrum.rb
|
@@ -97,9 +101,6 @@ files:
|
|
97
101
|
- spec/ms/in_silico/spectrum_spec.rb
|
98
102
|
- spec/readme_spec.rb
|
99
103
|
- spec/spec_helper.rb
|
100
|
-
- spec/tap_test_helper.rb
|
101
|
-
- spec/tap_test_suite.rb
|
102
|
-
- tap.yml
|
103
104
|
- test/ms/in_silico/digest_test.rb
|
104
105
|
- test/ms/in_silico/fragment_test.rb
|
105
106
|
has_rdoc: true
|
@@ -112,6 +113,7 @@ rdoc_options:
|
|
112
113
|
require_paths:
|
113
114
|
- lib
|
114
115
|
required_ruby_version: !ruby/object:Gem::Requirement
|
116
|
+
none: false
|
115
117
|
requirements:
|
116
118
|
- - ">="
|
117
119
|
- !ruby/object:Gem::Version
|
@@ -119,6 +121,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
119
121
|
- 0
|
120
122
|
version: "0"
|
121
123
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
124
|
+
none: false
|
122
125
|
requirements:
|
123
126
|
- - ">="
|
124
127
|
- !ruby/object:Gem::Version
|
@@ -128,17 +131,15 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
128
131
|
requirements: []
|
129
132
|
|
130
133
|
rubyforge_project: mspire
|
131
|
-
rubygems_version: 1.3.
|
134
|
+
rubygems_version: 1.3.7
|
132
135
|
signing_key:
|
133
136
|
specification_version: 3
|
134
137
|
summary: in-silico calculations for mass spec data
|
135
138
|
test_files:
|
136
|
-
- spec/tap_test_suite.rb
|
137
139
|
- spec/ms/in_silico/fragment_spec.rb
|
138
140
|
- spec/ms/in_silico/spectrum_spec.rb
|
139
141
|
- spec/ms/in_silico/digester_spec.rb
|
140
142
|
- spec/readme_spec.rb
|
141
|
-
- spec/tap_test_helper.rb
|
142
143
|
- spec/spec_helper.rb
|
143
144
|
- test/ms/in_silico/digest_test.rb
|
144
145
|
- test/ms/in_silico/fragment_test.rb
|
data/lib/ms/in_silico/digest.rb
DELETED
@@ -1,52 +0,0 @@
|
|
1
|
-
require 'tap/task'
|
2
|
-
require 'ms/in_silico/digester'
|
3
|
-
|
4
|
-
module Ms
|
5
|
-
module InSilico
|
6
|
-
# :startdoc::task digest a protein sequence into peptides
|
7
|
-
# Digest a protein sequence into an array of peptides.
|
8
|
-
#
|
9
|
-
# % rap digest MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG --:i dump
|
10
|
-
# MIVIGR
|
11
|
-
# SIVHPYITNEYEPFAAEK
|
12
|
-
# QQILSIMAG
|
13
|
-
#
|
14
|
-
class Digest < Tap::Task
|
15
|
-
|
16
|
-
config :digester, 'Trypsin' # The name of the digester
|
17
|
-
config :min_length, nil, &c.integer_or_nil # Minimum peptide length
|
18
|
-
config :max_length, nil, &c.integer_or_nil # Maximum peptide length
|
19
|
-
config :max_misses, 0, &c.integer # The max # of missed cleavage sites
|
20
|
-
config :site_digest, false, &c.boolean # Digest to sites (rather than sequences)
|
21
|
-
|
22
|
-
def process(sequence)
|
23
|
-
unless d = Digester[digester]
|
24
|
-
raise ArgumentError, "unknown digester: #{digester}"
|
25
|
-
end
|
26
|
-
|
27
|
-
# extract sequence from FASTA entries
|
28
|
-
sequence = $1 if sequence =~ /\A>.*?\n(.*)\z/m
|
29
|
-
sequence.gsub!(/\s/, "")
|
30
|
-
|
31
|
-
peptides = if site_digest
|
32
|
-
d.site_digest(sequence, max_misses)
|
33
|
-
else
|
34
|
-
d.digest(sequence, max_misses)
|
35
|
-
end
|
36
|
-
|
37
|
-
# filter
|
38
|
-
peptides.delete_if do |peptide|
|
39
|
-
peptide.length < min_length
|
40
|
-
end if min_length
|
41
|
-
|
42
|
-
peptides.delete_if do |peptide|
|
43
|
-
peptide.length > max_length
|
44
|
-
end if max_length
|
45
|
-
|
46
|
-
log 'digest', "#{sequence[0..10]}#{sequence.length > 10 ? '...' : ''} to #{peptides.length} peptides"
|
47
|
-
peptides
|
48
|
-
end
|
49
|
-
|
50
|
-
end
|
51
|
-
end
|
52
|
-
end
|
data/spec/tap_test_helper.rb
DELETED
data/spec/tap_test_suite.rb
DELETED
data/tap.yml
DELETED
File without changes
|