ms-in_silico 0.4.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +1 -0
- data/VERSION +1 -1
- data/bin/digest.rb +50 -0
- data/lib/ms/in_silico/digester.rb +21 -15
- data/lib/ms/in_silico/fragment.rb +3 -2
- data/lib/ms/in_silico/spectrum.rb +0 -0
- data/spec/ms/in_silico/digester_spec.rb +110 -58
- data/spec/ms/in_silico/fragment_spec.rb +1 -1
- data/spec/ms/in_silico/spectrum_spec.rb +1 -1
- data/spec/readme_spec.rb +10 -10
- metadata +14 -13
- data/lib/ms/in_silico/digest.rb +0 -52
- data/spec/tap_test_helper.rb +0 -2
- data/spec/tap_test_suite.rb +0 -5
- data/tap.yml +0 -0
data/.gitignore
CHANGED
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.4.
|
1
|
+
0.4.1
|
data/bin/digest.rb
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
require 'ms/in_silico/digester'
|
5
|
+
|
6
|
+
def print_enzyme_names
|
7
|
+
puts "(tab delimited)"
|
8
|
+
puts %w(name cuts nocut cterm?).join("\t")
|
9
|
+
Ms::InSilico::Digester::ENZYMES.each do |key, enzyme|
|
10
|
+
puts [:name, :cleave_str, :cterm_exception, :cterm_cleavage].map {|v| enzyme.send(v) }.join("\t")
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
delimiter_hash = {
|
15
|
+
'space' => ' ',
|
16
|
+
'tab' => "\t",
|
17
|
+
'newline' => "\n",
|
18
|
+
}
|
19
|
+
|
20
|
+
opt = {
|
21
|
+
:enzyme => 'Trypsin',
|
22
|
+
:missed_cleavages => 0,
|
23
|
+
:delimiter => 'space',
|
24
|
+
:record_delimiter => 'newline',
|
25
|
+
}
|
26
|
+
opts = OptionParser.new do |op|
|
27
|
+
op.banner = "usage: #{File.basename(__FILE__)} [OPTIONS] SOMEPROTEINSEKUENCE ..."
|
28
|
+
op.separator "output: SOMEPR OTEINSEK UENCE"
|
29
|
+
op.separator "options:"
|
30
|
+
op.on("-e", "--enzyme <#{opt[:enzyme]}>", "specify a valid enzyme name") {|v| opt[:enzyme] = v }
|
31
|
+
op.on("-m", "--missed-cleavages <#{opt[:missed_cleavages]}>", Integer, "number of missed cleavages") {|v| opt[:missed_cleavages] = v }
|
32
|
+
op.on("-d", "--delimiter <#{opt[:delimiter]}>", "delimit the returned peptides",
|
33
|
+
"('space','tab','newline' or some other string)") {|v| opt[:delimiter] = v }
|
34
|
+
op.on("-r", "--record-delimiter <#{opt[:record_delimiter]}>", "included after each protein output") {|v| opt[:record_delimiter] = v }
|
35
|
+
op.separator ""
|
36
|
+
op.on("--print-enzymes", "prints table of valid enzyme names and exits") { print_enzyme_names ; exit }
|
37
|
+
end
|
38
|
+
opts.parse!
|
39
|
+
|
40
|
+
if ARGV.size == 0
|
41
|
+
puts opts
|
42
|
+
exit
|
43
|
+
end
|
44
|
+
|
45
|
+
[:delimiter, :record_delimiter].each {|k| opt[k] = (delimiter_hash[opt[k]] || opt[k]) }
|
46
|
+
|
47
|
+
ARGV.each do |protein|
|
48
|
+
print Ms::InSilico::Digester[opt[:enzyme]].digest(protein, opt[:missed_cleavages]).join(opt[:delimiter])
|
49
|
+
print opt[:record_delimiter]
|
50
|
+
end
|
@@ -105,10 +105,10 @@ module Ms
|
|
105
105
|
@scanner = StringScanner.new('')
|
106
106
|
end
|
107
107
|
|
108
|
-
# Returns
|
109
|
-
#
|
110
|
-
#
|
111
|
-
#
|
108
|
+
# Returns digestion sites in sequence, as determined by the
|
109
|
+
# cleave_regexp boundaries. The digestion sites correspond to the
|
110
|
+
# positions where a peptide begins and ends, such that [n, (n+1) - n]
|
111
|
+
# corresponds to the [index, length] for peptide n.
|
112
112
|
#
|
113
113
|
# d = Digester.new('Trypsin', 'KR', 'P')
|
114
114
|
# seq = "AARGGR"
|
@@ -128,19 +128,25 @@ module Ms
|
|
128
128
|
# The digested section of sequence may be specified using offset
|
129
129
|
# and length.
|
130
130
|
def cleavage_sites(seq, offset=0, length=seq.length-offset)
|
131
|
+
return [0, 1] if seq.size == 1 # adding exceptions is lame--algorithm should just work
|
132
|
+
|
131
133
|
adjustment = cterm_cleavage ? 0 : 1
|
132
134
|
limit = offset + length
|
133
|
-
|
135
|
+
|
134
136
|
positions = [offset]
|
135
137
|
pos = scan(seq, offset, limit) do |pos|
|
136
|
-
positions << pos - adjustment
|
138
|
+
positions << (pos - adjustment)
|
137
139
|
end
|
138
140
|
|
139
141
|
# add the final position
|
140
|
-
if pos < limit || positions.length == 1
|
142
|
+
if (pos < limit) || (positions.length == 1)
|
143
|
+
positions << limit
|
144
|
+
end
|
145
|
+
# adding exceptions is lame.. this code probably needs to be
|
146
|
+
# refactored (corrected).
|
147
|
+
if !cterm_cleavage && pos == limit
|
141
148
|
positions << limit
|
142
149
|
end
|
143
|
-
|
144
150
|
positions
|
145
151
|
end
|
146
152
|
|
@@ -151,14 +157,14 @@ module Ms
|
|
151
157
|
#
|
152
158
|
# Each [start_index, end_index] pair is yielded to the block, if given,
|
153
159
|
# and the collected results are returned.
|
154
|
-
def site_digest(seq, max_misses=0, offset=0, length=seq.length-offset) # :yields: start_index, end_index
|
160
|
+
def site_digest(seq, max_misses=0, offset=0, length=seq.length-offset, &block) # :yields: start_index, end_index
|
155
161
|
frag_sites = cleavage_sites(seq, offset, length)
|
156
162
|
|
157
163
|
overlay(frag_sites.length, max_misses, 1) do |start_index, end_index|
|
158
164
|
start_index = frag_sites[start_index]
|
159
165
|
end_index = frag_sites[end_index]
|
160
166
|
|
161
|
-
|
167
|
+
block ? block.call(start_index, end_index) : [start_index, end_index]
|
162
168
|
end
|
163
169
|
end
|
164
170
|
|
@@ -167,7 +173,7 @@ module Ms
|
|
167
173
|
# as in that method, the digested section of sequence may be specified using
|
168
174
|
# offset and length.
|
169
175
|
def digest(seq, max_misses=0, offset=0, length=seq.length-offset)
|
170
|
-
site_digest(seq, max_misses, offset, length).
|
176
|
+
site_digest(seq, max_misses, offset, length).map do |s, e|
|
171
177
|
seq[s, e-s]
|
172
178
|
end
|
173
179
|
end
|
@@ -183,7 +189,7 @@ module Ms
|
|
183
189
|
# Scans seq between offset and limit for the cleave_regexp, skipping whitespace
|
184
190
|
# and being mindful of exception characters. The positions of the scanner at
|
185
191
|
# each match are yielded to the block.
|
186
|
-
def scan(seq, offset, limit) # :nodoc:
|
192
|
+
def scan(seq, offset, limit, &block) # :nodoc:
|
187
193
|
scanner.string = seq
|
188
194
|
scanner.pos = offset
|
189
195
|
|
@@ -197,7 +203,7 @@ module Ms
|
|
197
203
|
# break if you scanned past the upper limit
|
198
204
|
break if pos > limit
|
199
205
|
|
200
|
-
|
206
|
+
block.call(pos)
|
201
207
|
end
|
202
208
|
|
203
209
|
scanner.pos
|
@@ -205,14 +211,14 @@ module Ms
|
|
205
211
|
|
206
212
|
# Performs an overlap-collect algorithm providing the start and end
|
207
213
|
# indicies of spans skipping up to max_misses boundaries.
|
208
|
-
def overlay(n, max_misses, offset) # :nodoc:
|
214
|
+
def overlay(n, max_misses, offset, &block) # :nodoc:
|
209
215
|
results = []
|
210
216
|
0.upto(n-1) do |start_index|
|
211
217
|
0.upto(max_misses) do |n_miss|
|
212
218
|
end_index = start_index + offset + n_miss
|
213
219
|
break if end_index == n
|
214
220
|
|
215
|
-
results <<
|
221
|
+
results << block.call(start_index, end_index)
|
216
222
|
end
|
217
223
|
end
|
218
224
|
results
|
@@ -1,9 +1,9 @@
|
|
1
|
-
require 'tap/task'
|
2
1
|
require 'ms/in_silico/spectrum'
|
3
2
|
|
4
3
|
module Ms
|
5
4
|
module InSilico
|
6
5
|
|
6
|
+
=begin
|
7
7
|
# :startdoc::task calculates a theoretical ms/ms spectrum
|
8
8
|
#
|
9
9
|
# Calculates the theoretical ms/ms spectrum for a peptide sequence.
|
@@ -66,5 +66,6 @@ module Ms
|
|
66
66
|
end
|
67
67
|
|
68
68
|
end
|
69
|
+
=end
|
69
70
|
end
|
70
|
-
end
|
71
|
+
end
|
File without changes
|
@@ -1,4 +1,4 @@
|
|
1
|
-
require
|
1
|
+
require 'spec_helper.rb'
|
2
2
|
|
3
3
|
require 'ms/in_silico/digester'
|
4
4
|
require 'pp'
|
@@ -28,58 +28,7 @@ describe 'a digester' do
|
|
28
28
|
|
29
29
|
str.join('')
|
30
30
|
end
|
31
|
-
|
32
|
-
it 'performs digestion and can specify sites of digestion' do
|
33
|
-
trypsin = Ms::InSilico::Digester['Trypsin']
|
34
|
-
|
35
|
-
expected = [
|
36
|
-
'MIVIGR',
|
37
|
-
'SIVHPYITNEYEPFAAEK',
|
38
|
-
'QQILSIMAG']
|
39
|
-
trypsin.digest('MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG').is expected
|
40
|
-
|
41
|
-
expected = [
|
42
|
-
'MIVIGR',
|
43
|
-
'MIVIGRSIVHPYITNEYEPFAAEK',
|
44
|
-
'SIVHPYITNEYEPFAAEK',
|
45
|
-
'SIVHPYITNEYEPFAAEKQQILSIMAG',
|
46
|
-
'QQILSIMAG']
|
47
|
-
trypsin.digest('MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG', 1).is expected
|
48
|
-
|
49
|
-
expected = [
|
50
|
-
[0,6],
|
51
|
-
[0,24],
|
52
|
-
[6,24],
|
53
|
-
[6,33],
|
54
|
-
[24,33]]
|
55
|
-
trypsin.site_digest('MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG', 1).is expected
|
56
|
-
end
|
57
|
-
|
58
|
-
it 'completely ignores whitespace inside protein sequences' do
|
59
|
-
expected = [
|
60
|
-
"\tMIVIGR",
|
61
|
-
"SIVHP\nYITNEYEPFAAE K",
|
62
|
-
"QQILSI\rMAG"]
|
63
|
-
Ms::InSilico::Digester['Trypsin'].digest("\tMIVIGRSIVHP\nYITNEYEPFAAE KQQILSI\rMAG").is expected
|
64
|
-
end
|
65
|
-
|
66
|
-
it 'runs cleavage sites documentation' do
|
67
|
-
d = Ms::InSilico::Digester.new('Trypsin', 'KR', 'P')
|
68
|
-
seq = "AARGGR"
|
69
|
-
sites = d.cleavage_sites(seq)
|
70
|
-
sites.is [0, 3, 6]
|
71
|
-
|
72
|
-
seq[sites[0], sites[0+1] - sites[0]].is "AAR"
|
73
|
-
seq[sites[1], sites[1+1] - sites[1]].is "GGR"
|
74
|
-
|
75
|
-
seq = "AAR \n GGR"
|
76
|
-
sites = d.cleavage_sites(seq)
|
77
|
-
sites.is [0, 8, 11]
|
78
31
|
|
79
|
-
seq[sites[0], sites[0+1] - sites[0]].is "AAR \n "
|
80
|
-
seq[sites[1], sites[1+1] - sites[1]].is "GGR"
|
81
|
-
end
|
82
|
-
|
83
32
|
it 'finds cleavage site indices' do
|
84
33
|
{
|
85
34
|
"" => [0,0],
|
@@ -129,7 +78,9 @@ describe 'a digester' do
|
|
129
78
|
@digester.cleavage_sites(sequence).is expected
|
130
79
|
end
|
131
80
|
end
|
132
|
-
|
81
|
+
|
82
|
+
|
83
|
+
|
133
84
|
it 'finds cleavage sites with offset and limit' do
|
134
85
|
{
|
135
86
|
"RxxR" => [2,4],
|
@@ -165,7 +116,8 @@ describe 'a digester' do
|
|
165
116
|
"RRR" => ["R", "R", "R"]
|
166
117
|
}.each do |sequence, expected|
|
167
118
|
# spp(sequence)
|
168
|
-
@digester.digest(sequence)
|
119
|
+
@digester.digest(sequence).is expected
|
120
|
+
#@digester.digest(sequence) {|frag, s, e| frag}.is expected
|
169
121
|
end
|
170
122
|
end
|
171
123
|
|
@@ -182,7 +134,8 @@ describe 'a digester' do
|
|
182
134
|
"RAR" => ["R", "RAR", "AR"],
|
183
135
|
"RRR" => ["R", "RR", "R", "RR", "R"]
|
184
136
|
}.each do |sequence, expected|
|
185
|
-
@digester.digest(sequence, 1)
|
137
|
+
@digester.digest(sequence, 1).is expected
|
138
|
+
#@digester.digest(sequence, 1) {|frag, s, e| frag}.is expected
|
186
139
|
end
|
187
140
|
end
|
188
141
|
|
@@ -199,7 +152,8 @@ describe 'a digester' do
|
|
199
152
|
"RAR" => ["R", "RAR", "AR"],
|
200
153
|
"RRR" => ["R", "RR", "RRR", "R", "RR", "R"]
|
201
154
|
}.each do |sequence, expected|
|
202
|
-
@digester.digest(sequence, 2)
|
155
|
+
@digester.digest(sequence, 2).is expected
|
156
|
+
#@digester.digest(sequence, 2) {|frag, s, e| frag}.is expected
|
203
157
|
end
|
204
158
|
end
|
205
159
|
|
@@ -273,9 +227,67 @@ describe 'a digester' do
|
|
273
227
|
end
|
274
228
|
end
|
275
229
|
end
|
230
|
+
end
|
231
|
+
|
232
|
+
|
233
|
+
describe 'performs as documented in readme' do
|
234
|
+
it 'runs cleavage sites documentation' do
|
235
|
+
d = Ms::InSilico::Digester.new('Trypsin', 'KR', 'P')
|
236
|
+
seq = "AARGGR"
|
237
|
+
sites = d.cleavage_sites(seq)
|
238
|
+
sites.is [0, 3, 6]
|
239
|
+
|
240
|
+
seq[sites[0], sites[0+1] - sites[0]].is "AAR"
|
241
|
+
seq[sites[1], sites[1+1] - sites[1]].is "GGR"
|
242
|
+
|
243
|
+
seq = "AAR \n GGR"
|
244
|
+
sites = d.cleavage_sites(seq)
|
245
|
+
sites.is [0, 8, 11]
|
246
|
+
|
247
|
+
seq[sites[0], sites[0+1] - sites[0]].is "AAR \n "
|
248
|
+
seq[sites[1], sites[1+1] - sites[1]].is "GGR"
|
249
|
+
end
|
250
|
+
end
|
276
251
|
|
252
|
+
describe 'basic trypsin digestion' do
|
253
|
+
it 'performs digestion and can specify sites of digestion' do
|
254
|
+
trypsin = Ms::InSilico::Digester['Trypsin']
|
255
|
+
|
256
|
+
expected = [
|
257
|
+
'MIVIGR',
|
258
|
+
'SIVHPYITNEYEPFAAEK',
|
259
|
+
'QQILSIMAG']
|
260
|
+
trypsin.digest('MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG').is expected
|
261
|
+
|
262
|
+
expected = [
|
263
|
+
'MIVIGR',
|
264
|
+
'MIVIGRSIVHPYITNEYEPFAAEK',
|
265
|
+
'SIVHPYITNEYEPFAAEK',
|
266
|
+
'SIVHPYITNEYEPFAAEKQQILSIMAG',
|
267
|
+
'QQILSIMAG']
|
268
|
+
trypsin.digest('MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG', 1).is expected
|
269
|
+
|
270
|
+
expected = [
|
271
|
+
[0,6],
|
272
|
+
[0,24],
|
273
|
+
[6,24],
|
274
|
+
[6,33],
|
275
|
+
[24,33]]
|
276
|
+
trypsin.site_digest('MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG', 1).is expected
|
277
|
+
end
|
278
|
+
|
279
|
+
it 'completely ignores whitespace inside protein sequences' do
|
280
|
+
expected = [
|
281
|
+
"\tMIVIGR",
|
282
|
+
"SIVHP\nYITNEYEPFAAE K",
|
283
|
+
"QQILSI\rMAG"]
|
284
|
+
Ms::InSilico::Digester['Trypsin'].digest("\tMIVIGRSIVHP\nYITNEYEPFAAE KQQILSI\rMAG").is expected
|
285
|
+
end
|
286
|
+
|
277
287
|
it 'does a trypsin digest' do
|
278
288
|
trypsin = Ms::InSilico::Digester::TRYPSIN
|
289
|
+
# alternate ways to specify the enzyme
|
290
|
+
Ms::InSilico::Digester::TRYPSIN.is Ms::InSilico::Digester['Trypsin']
|
279
291
|
{
|
280
292
|
"" => [''],
|
281
293
|
"A" => ["A"],
|
@@ -294,8 +306,48 @@ describe 'a digester' do
|
|
294
306
|
"ARPARAA" => ["ARPAR", "AA"],
|
295
307
|
"RPRRR" => ["RPR", "R", "R"]
|
296
308
|
}.each do |sequence, expected|
|
297
|
-
trypsin.digest(sequence)
|
309
|
+
trypsin.digest(sequence).is expected
|
310
|
+
end
|
311
|
+
end
|
312
|
+
|
313
|
+
|
314
|
+
|
315
|
+
end
|
316
|
+
|
317
|
+
describe 'digestion with other enzymes' do
|
318
|
+
|
319
|
+
# This is how to create the enzyme:
|
320
|
+
# Ms::InSilico::Digester['Arg-C']
|
321
|
+
# Ms::InSilico::Digester::ARG_C
|
322
|
+
{
|
323
|
+
['Arg-C', :ARG_C] => {
|
324
|
+
"AARC" => ["AAR", "C"],
|
325
|
+
"AARP" => ["AARP"]
|
326
|
+
},
|
327
|
+
['Asp-N', :ASP_N] => {
|
328
|
+
"AABDS" => ["AA", "B", "DS"],
|
329
|
+
"ADZBS" => ["A", "DZ", "BS"],
|
330
|
+
"B" => %w(B),
|
331
|
+
"A" => %w(A),
|
332
|
+
"ABD" => %w(A B D),
|
333
|
+
},
|
334
|
+
['Asp-N_ambic', :ASP_N_AMBIC] => {
|
335
|
+
"AAEDS" => ["AA", "E", "DS"],
|
336
|
+
"ADZES" => ["A", "DZ", "ES"],
|
337
|
+
"AED" => %w(A E D),
|
338
|
+
"GDE" => %w(G D E),
|
339
|
+
"AAECCDGG" => %w(AA ECC DGG),
|
340
|
+
}
|
341
|
+
}.each do |enzyme_names, test_hash|
|
342
|
+
it "digests with '#{enzyme_names.first}'" do
|
343
|
+
digester = Ms::InSilico::Digester[enzyme_names.first]
|
344
|
+
digester.is Ms::InSilico::Digester.const_get(enzyme_names.last)
|
345
|
+
test_hash.each do |sequence, expected|
|
346
|
+
digester.digest(sequence).is expected
|
347
|
+
end
|
298
348
|
end
|
299
349
|
end
|
300
|
-
|
301
350
|
end
|
351
|
+
|
352
|
+
|
353
|
+
|
data/spec/readme_spec.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
require
|
1
|
+
require '/spec_helper.rb'
|
2
2
|
require 'ms/in_silico/digester'
|
3
3
|
require 'ms/in_silico/spectrum'
|
4
4
|
|
@@ -8,21 +8,21 @@ describe 'readme documentation' do
|
|
8
8
|
trypsin = Ms::InSilico::Digester['Trypsin']
|
9
9
|
peptides = trypsin.digest('MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG')
|
10
10
|
expected = [
|
11
|
-
|
12
|
-
|
13
|
-
|
11
|
+
'MIVIGR',
|
12
|
+
'SIVHPYITNEYEPFAAEK',
|
13
|
+
'QQILSIMAG']
|
14
14
|
peptides.sort.is expected.sort
|
15
15
|
|
16
16
|
spectrum = Ms::InSilico::Spectrum.new(peptides[0])
|
17
17
|
spectrum.parent_ion_mass.should.be.close 688.417442373391, 10**-12
|
18
18
|
|
19
19
|
expected = [
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
20
|
+
132.047761058391,
|
21
|
+
245.131825038791,
|
22
|
+
344.200238954991,
|
23
|
+
457.284302935391,
|
24
|
+
514.305766658991,
|
25
|
+
670.406877687091]
|
26
26
|
spectrum.series('b').zip(expected) do |o,e|
|
27
27
|
o.should.be.close e, 10**-12
|
28
28
|
end
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 4
|
8
|
-
-
|
9
|
-
version: 0.4.
|
8
|
+
- 1
|
9
|
+
version: 0.4.1
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Simon Chiang
|
@@ -14,13 +14,14 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-
|
18
|
-
default_executable:
|
17
|
+
date: 2010-11-15 00:00:00 -07:00
|
18
|
+
default_executable: digest.rb
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
name: molecules
|
22
22
|
prerelease: false
|
23
23
|
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
24
25
|
requirements:
|
25
26
|
- - ">="
|
26
27
|
- !ruby/object:Gem::Version
|
@@ -35,6 +36,7 @@ dependencies:
|
|
35
36
|
name: tap
|
36
37
|
prerelease: false
|
37
38
|
requirement: &id002 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
38
40
|
requirements:
|
39
41
|
- - ">="
|
40
42
|
- !ruby/object:Gem::Version
|
@@ -49,6 +51,7 @@ dependencies:
|
|
49
51
|
name: tap-test
|
50
52
|
prerelease: false
|
51
53
|
requirement: &id003 !ruby/object:Gem::Requirement
|
54
|
+
none: false
|
52
55
|
requirements:
|
53
56
|
- - ">="
|
54
57
|
- !ruby/object:Gem::Version
|
@@ -63,6 +66,7 @@ dependencies:
|
|
63
66
|
name: spec-more
|
64
67
|
prerelease: false
|
65
68
|
requirement: &id004 !ruby/object:Gem::Requirement
|
69
|
+
none: false
|
66
70
|
requirements:
|
67
71
|
- - ">="
|
68
72
|
- !ruby/object:Gem::Version
|
@@ -73,8 +77,8 @@ dependencies:
|
|
73
77
|
version_requirements: *id004
|
74
78
|
description: peptide fragmentation and protein digestion
|
75
79
|
email: jtprince@gmail.com
|
76
|
-
executables:
|
77
|
-
|
80
|
+
executables:
|
81
|
+
- digest.rb
|
78
82
|
extensions: []
|
79
83
|
|
80
84
|
extra_rdoc_files:
|
@@ -87,8 +91,8 @@ files:
|
|
87
91
|
- README.rdoc
|
88
92
|
- Rakefile
|
89
93
|
- VERSION
|
94
|
+
- bin/digest.rb
|
90
95
|
- lib/ms/in_silico.rb
|
91
|
-
- lib/ms/in_silico/digest.rb
|
92
96
|
- lib/ms/in_silico/digester.rb
|
93
97
|
- lib/ms/in_silico/fragment.rb
|
94
98
|
- lib/ms/in_silico/spectrum.rb
|
@@ -97,9 +101,6 @@ files:
|
|
97
101
|
- spec/ms/in_silico/spectrum_spec.rb
|
98
102
|
- spec/readme_spec.rb
|
99
103
|
- spec/spec_helper.rb
|
100
|
-
- spec/tap_test_helper.rb
|
101
|
-
- spec/tap_test_suite.rb
|
102
|
-
- tap.yml
|
103
104
|
- test/ms/in_silico/digest_test.rb
|
104
105
|
- test/ms/in_silico/fragment_test.rb
|
105
106
|
has_rdoc: true
|
@@ -112,6 +113,7 @@ rdoc_options:
|
|
112
113
|
require_paths:
|
113
114
|
- lib
|
114
115
|
required_ruby_version: !ruby/object:Gem::Requirement
|
116
|
+
none: false
|
115
117
|
requirements:
|
116
118
|
- - ">="
|
117
119
|
- !ruby/object:Gem::Version
|
@@ -119,6 +121,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
119
121
|
- 0
|
120
122
|
version: "0"
|
121
123
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
124
|
+
none: false
|
122
125
|
requirements:
|
123
126
|
- - ">="
|
124
127
|
- !ruby/object:Gem::Version
|
@@ -128,17 +131,15 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
128
131
|
requirements: []
|
129
132
|
|
130
133
|
rubyforge_project: mspire
|
131
|
-
rubygems_version: 1.3.
|
134
|
+
rubygems_version: 1.3.7
|
132
135
|
signing_key:
|
133
136
|
specification_version: 3
|
134
137
|
summary: in-silico calculations for mass spec data
|
135
138
|
test_files:
|
136
|
-
- spec/tap_test_suite.rb
|
137
139
|
- spec/ms/in_silico/fragment_spec.rb
|
138
140
|
- spec/ms/in_silico/spectrum_spec.rb
|
139
141
|
- spec/ms/in_silico/digester_spec.rb
|
140
142
|
- spec/readme_spec.rb
|
141
|
-
- spec/tap_test_helper.rb
|
142
143
|
- spec/spec_helper.rb
|
143
144
|
- test/ms/in_silico/digest_test.rb
|
144
145
|
- test/ms/in_silico/fragment_test.rb
|
data/lib/ms/in_silico/digest.rb
DELETED
@@ -1,52 +0,0 @@
|
|
1
|
-
require 'tap/task'
|
2
|
-
require 'ms/in_silico/digester'
|
3
|
-
|
4
|
-
module Ms
|
5
|
-
module InSilico
|
6
|
-
# :startdoc::task digest a protein sequence into peptides
|
7
|
-
# Digest a protein sequence into an array of peptides.
|
8
|
-
#
|
9
|
-
# % rap digest MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG --:i dump
|
10
|
-
# MIVIGR
|
11
|
-
# SIVHPYITNEYEPFAAEK
|
12
|
-
# QQILSIMAG
|
13
|
-
#
|
14
|
-
class Digest < Tap::Task
|
15
|
-
|
16
|
-
config :digester, 'Trypsin' # The name of the digester
|
17
|
-
config :min_length, nil, &c.integer_or_nil # Minimum peptide length
|
18
|
-
config :max_length, nil, &c.integer_or_nil # Maximum peptide length
|
19
|
-
config :max_misses, 0, &c.integer # The max # of missed cleavage sites
|
20
|
-
config :site_digest, false, &c.boolean # Digest to sites (rather than sequences)
|
21
|
-
|
22
|
-
def process(sequence)
|
23
|
-
unless d = Digester[digester]
|
24
|
-
raise ArgumentError, "unknown digester: #{digester}"
|
25
|
-
end
|
26
|
-
|
27
|
-
# extract sequence from FASTA entries
|
28
|
-
sequence = $1 if sequence =~ /\A>.*?\n(.*)\z/m
|
29
|
-
sequence.gsub!(/\s/, "")
|
30
|
-
|
31
|
-
peptides = if site_digest
|
32
|
-
d.site_digest(sequence, max_misses)
|
33
|
-
else
|
34
|
-
d.digest(sequence, max_misses)
|
35
|
-
end
|
36
|
-
|
37
|
-
# filter
|
38
|
-
peptides.delete_if do |peptide|
|
39
|
-
peptide.length < min_length
|
40
|
-
end if min_length
|
41
|
-
|
42
|
-
peptides.delete_if do |peptide|
|
43
|
-
peptide.length > max_length
|
44
|
-
end if max_length
|
45
|
-
|
46
|
-
log 'digest', "#{sequence[0..10]}#{sequence.length > 10 ? '...' : ''} to #{peptides.length} peptides"
|
47
|
-
peptides
|
48
|
-
end
|
49
|
-
|
50
|
-
end
|
51
|
-
end
|
52
|
-
end
|
data/spec/tap_test_helper.rb
DELETED
data/spec/tap_test_suite.rb
DELETED
data/tap.yml
DELETED
File without changes
|