ms-in_silico 0.4.0 → 0.4.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore CHANGED
@@ -3,3 +3,4 @@ pkg/
3
3
  rdoc/
4
4
  backup/
5
5
  .*.swp
6
+ *.gemspec
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.4.0
1
+ 0.4.1
@@ -0,0 +1,50 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'optparse'
4
+ require 'ms/in_silico/digester'
5
+
6
+ def print_enzyme_names
7
+ puts "(tab delimited)"
8
+ puts %w(name cuts nocut cterm?).join("\t")
9
+ Ms::InSilico::Digester::ENZYMES.each do |key, enzyme|
10
+ puts [:name, :cleave_str, :cterm_exception, :cterm_cleavage].map {|v| enzyme.send(v) }.join("\t")
11
+ end
12
+ end
13
+
14
+ delimiter_hash = {
15
+ 'space' => ' ',
16
+ 'tab' => "\t",
17
+ 'newline' => "\n",
18
+ }
19
+
20
+ opt = {
21
+ :enzyme => 'Trypsin',
22
+ :missed_cleavages => 0,
23
+ :delimiter => 'space',
24
+ :record_delimiter => 'newline',
25
+ }
26
+ opts = OptionParser.new do |op|
27
+ op.banner = "usage: #{File.basename(__FILE__)} [OPTIONS] SOMEPROTEINSEKUENCE ..."
28
+ op.separator "output: SOMEPR OTEINSEK UENCE"
29
+ op.separator "options:"
30
+ op.on("-e", "--enzyme <#{opt[:enzyme]}>", "specify a valid enzyme name") {|v| opt[:enzyme] = v }
31
+ op.on("-m", "--missed-cleavages <#{opt[:missed_cleavages]}>", Integer, "number of missed cleavages") {|v| opt[:missed_cleavages] = v }
32
+ op.on("-d", "--delimiter <#{opt[:delimiter]}>", "delimit the returned peptides",
33
+ "('space','tab','newline' or some other string)") {|v| opt[:delimiter] = v }
34
+ op.on("-r", "--record-delimiter <#{opt[:record_delimiter]}>", "included after each protein output") {|v| opt[:record_delimiter] = v }
35
+ op.separator ""
36
+ op.on("--print-enzymes", "prints table of valid enzyme names and exits") { print_enzyme_names ; exit }
37
+ end
38
+ opts.parse!
39
+
40
+ if ARGV.size == 0
41
+ puts opts
42
+ exit
43
+ end
44
+
45
+ [:delimiter, :record_delimiter].each {|k| opt[k] = (delimiter_hash[opt[k]] || opt[k]) }
46
+
47
+ ARGV.each do |protein|
48
+ print Ms::InSilico::Digester[opt[:enzyme]].digest(protein, opt[:missed_cleavages]).join(opt[:delimiter])
49
+ print opt[:record_delimiter]
50
+ end
@@ -105,10 +105,10 @@ module Ms
105
105
  @scanner = StringScanner.new('')
106
106
  end
107
107
 
108
- # Returns sites of digestion sites in sequence, as determined by
109
- # thecleave_regexp boundaries. The digestion sites correspond
110
- # to the positions where a peptide begins and ends, such that
111
- # [n, (n+1) - n] corresponds to the [index, length] for peptide n.
108
+ # Returns digestion sites in sequence, as determined by the
109
+ # cleave_regexp boundaries. The digestion sites correspond to the
110
+ # positions where a peptide begins and ends, such that [n, (n+1) - n]
111
+ # corresponds to the [index, length] for peptide n.
112
112
  #
113
113
  # d = Digester.new('Trypsin', 'KR', 'P')
114
114
  # seq = "AARGGR"
@@ -128,19 +128,25 @@ module Ms
128
128
  # The digested section of sequence may be specified using offset
129
129
  # and length.
130
130
  def cleavage_sites(seq, offset=0, length=seq.length-offset)
131
+ return [0, 1] if seq.size == 1 # adding exceptions is lame--algorithm should just work
132
+
131
133
  adjustment = cterm_cleavage ? 0 : 1
132
134
  limit = offset + length
133
-
135
+
134
136
  positions = [offset]
135
137
  pos = scan(seq, offset, limit) do |pos|
136
- positions << pos - adjustment
138
+ positions << (pos - adjustment)
137
139
  end
138
140
 
139
141
  # add the final position
140
- if pos < limit || positions.length == 1
142
+ if (pos < limit) || (positions.length == 1)
143
+ positions << limit
144
+ end
145
+ # adding exceptions is lame.. this code probably needs to be
146
+ # refactored (corrected).
147
+ if !cterm_cleavage && pos == limit
141
148
  positions << limit
142
149
  end
143
-
144
150
  positions
145
151
  end
146
152
 
@@ -151,14 +157,14 @@ module Ms
151
157
  #
152
158
  # Each [start_index, end_index] pair is yielded to the block, if given,
153
159
  # and the collected results are returned.
154
- def site_digest(seq, max_misses=0, offset=0, length=seq.length-offset) # :yields: start_index, end_index
160
+ def site_digest(seq, max_misses=0, offset=0, length=seq.length-offset, &block) # :yields: start_index, end_index
155
161
  frag_sites = cleavage_sites(seq, offset, length)
156
162
 
157
163
  overlay(frag_sites.length, max_misses, 1) do |start_index, end_index|
158
164
  start_index = frag_sites[start_index]
159
165
  end_index = frag_sites[end_index]
160
166
 
161
- block_given? ? yield(start_index, end_index) : [start_index, end_index]
167
+ block ? block.call(start_index, end_index) : [start_index, end_index]
162
168
  end
163
169
  end
164
170
 
@@ -167,7 +173,7 @@ module Ms
167
173
  # as in that method, the digested section of sequence may be specified using
168
174
  # offset and length.
169
175
  def digest(seq, max_misses=0, offset=0, length=seq.length-offset)
170
- site_digest(seq, max_misses, offset, length).collect do |s, e|
176
+ site_digest(seq, max_misses, offset, length).map do |s, e|
171
177
  seq[s, e-s]
172
178
  end
173
179
  end
@@ -183,7 +189,7 @@ module Ms
183
189
  # Scans seq between offset and limit for the cleave_regexp, skipping whitespace
184
190
  # and being mindful of exception characters. The positions of the scanner at
185
191
  # each match are yielded to the block.
186
- def scan(seq, offset, limit) # :nodoc:
192
+ def scan(seq, offset, limit, &block) # :nodoc:
187
193
  scanner.string = seq
188
194
  scanner.pos = offset
189
195
 
@@ -197,7 +203,7 @@ module Ms
197
203
  # break if you scanned past the upper limit
198
204
  break if pos > limit
199
205
 
200
- yield pos
206
+ block.call(pos)
201
207
  end
202
208
 
203
209
  scanner.pos
@@ -205,14 +211,14 @@ module Ms
205
211
 
206
212
  # Performs an overlap-collect algorithm providing the start and end
207
213
  # indicies of spans skipping up to max_misses boundaries.
208
- def overlay(n, max_misses, offset) # :nodoc:
214
+ def overlay(n, max_misses, offset, &block) # :nodoc:
209
215
  results = []
210
216
  0.upto(n-1) do |start_index|
211
217
  0.upto(max_misses) do |n_miss|
212
218
  end_index = start_index + offset + n_miss
213
219
  break if end_index == n
214
220
 
215
- results << yield(start_index, end_index)
221
+ results << block.call(start_index, end_index)
216
222
  end
217
223
  end
218
224
  results
@@ -1,9 +1,9 @@
1
- require 'tap/task'
2
1
  require 'ms/in_silico/spectrum'
3
2
 
4
3
  module Ms
5
4
  module InSilico
6
5
 
6
+ =begin
7
7
  # :startdoc::task calculates a theoretical ms/ms spectrum
8
8
  #
9
9
  # Calculates the theoretical ms/ms spectrum for a peptide sequence.
@@ -66,5 +66,6 @@ module Ms
66
66
  end
67
67
 
68
68
  end
69
+ =end
69
70
  end
70
- end
71
+ end
File without changes
@@ -1,4 +1,4 @@
1
- require File.dirname(__FILE__) + '/../../spec_helper.rb'
1
+ require 'spec_helper.rb'
2
2
 
3
3
  require 'ms/in_silico/digester'
4
4
  require 'pp'
@@ -28,58 +28,7 @@ describe 'a digester' do
28
28
 
29
29
  str.join('')
30
30
  end
31
-
32
- it 'performs digestion and can specify sites of digestion' do
33
- trypsin = Ms::InSilico::Digester['Trypsin']
34
-
35
- expected = [
36
- 'MIVIGR',
37
- 'SIVHPYITNEYEPFAAEK',
38
- 'QQILSIMAG']
39
- trypsin.digest('MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG').is expected
40
-
41
- expected = [
42
- 'MIVIGR',
43
- 'MIVIGRSIVHPYITNEYEPFAAEK',
44
- 'SIVHPYITNEYEPFAAEK',
45
- 'SIVHPYITNEYEPFAAEKQQILSIMAG',
46
- 'QQILSIMAG']
47
- trypsin.digest('MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG', 1).is expected
48
-
49
- expected = [
50
- [0,6],
51
- [0,24],
52
- [6,24],
53
- [6,33],
54
- [24,33]]
55
- trypsin.site_digest('MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG', 1).is expected
56
- end
57
-
58
- it 'completely ignores whitespace inside protein sequences' do
59
- expected = [
60
- "\tMIVIGR",
61
- "SIVHP\nYITNEYEPFAAE K",
62
- "QQILSI\rMAG"]
63
- Ms::InSilico::Digester['Trypsin'].digest("\tMIVIGRSIVHP\nYITNEYEPFAAE KQQILSI\rMAG").is expected
64
- end
65
-
66
- it 'runs cleavage sites documentation' do
67
- d = Ms::InSilico::Digester.new('Trypsin', 'KR', 'P')
68
- seq = "AARGGR"
69
- sites = d.cleavage_sites(seq)
70
- sites.is [0, 3, 6]
71
-
72
- seq[sites[0], sites[0+1] - sites[0]].is "AAR"
73
- seq[sites[1], sites[1+1] - sites[1]].is "GGR"
74
-
75
- seq = "AAR \n GGR"
76
- sites = d.cleavage_sites(seq)
77
- sites.is [0, 8, 11]
78
31
 
79
- seq[sites[0], sites[0+1] - sites[0]].is "AAR \n "
80
- seq[sites[1], sites[1+1] - sites[1]].is "GGR"
81
- end
82
-
83
32
  it 'finds cleavage site indices' do
84
33
  {
85
34
  "" => [0,0],
@@ -129,7 +78,9 @@ describe 'a digester' do
129
78
  @digester.cleavage_sites(sequence).is expected
130
79
  end
131
80
  end
132
-
81
+
82
+
83
+
133
84
  it 'finds cleavage sites with offset and limit' do
134
85
  {
135
86
  "RxxR" => [2,4],
@@ -165,7 +116,8 @@ describe 'a digester' do
165
116
  "RRR" => ["R", "R", "R"]
166
117
  }.each do |sequence, expected|
167
118
  # spp(sequence)
168
- @digester.digest(sequence) {|frag, s, e| frag}.is expected
119
+ @digester.digest(sequence).is expected
120
+ #@digester.digest(sequence) {|frag, s, e| frag}.is expected
169
121
  end
170
122
  end
171
123
 
@@ -182,7 +134,8 @@ describe 'a digester' do
182
134
  "RAR" => ["R", "RAR", "AR"],
183
135
  "RRR" => ["R", "RR", "R", "RR", "R"]
184
136
  }.each do |sequence, expected|
185
- @digester.digest(sequence, 1) {|frag, s, e| frag}.is expected
137
+ @digester.digest(sequence, 1).is expected
138
+ #@digester.digest(sequence, 1) {|frag, s, e| frag}.is expected
186
139
  end
187
140
  end
188
141
 
@@ -199,7 +152,8 @@ describe 'a digester' do
199
152
  "RAR" => ["R", "RAR", "AR"],
200
153
  "RRR" => ["R", "RR", "RRR", "R", "RR", "R"]
201
154
  }.each do |sequence, expected|
202
- @digester.digest(sequence, 2) {|frag, s, e| frag}.is expected
155
+ @digester.digest(sequence, 2).is expected
156
+ #@digester.digest(sequence, 2) {|frag, s, e| frag}.is expected
203
157
  end
204
158
  end
205
159
 
@@ -273,9 +227,67 @@ describe 'a digester' do
273
227
  end
274
228
  end
275
229
  end
230
+ end
231
+
232
+
233
+ describe 'performs as documented in readme' do
234
+ it 'runs cleavage sites documentation' do
235
+ d = Ms::InSilico::Digester.new('Trypsin', 'KR', 'P')
236
+ seq = "AARGGR"
237
+ sites = d.cleavage_sites(seq)
238
+ sites.is [0, 3, 6]
239
+
240
+ seq[sites[0], sites[0+1] - sites[0]].is "AAR"
241
+ seq[sites[1], sites[1+1] - sites[1]].is "GGR"
242
+
243
+ seq = "AAR \n GGR"
244
+ sites = d.cleavage_sites(seq)
245
+ sites.is [0, 8, 11]
246
+
247
+ seq[sites[0], sites[0+1] - sites[0]].is "AAR \n "
248
+ seq[sites[1], sites[1+1] - sites[1]].is "GGR"
249
+ end
250
+ end
276
251
 
252
+ describe 'basic trypsin digestion' do
253
+ it 'performs digestion and can specify sites of digestion' do
254
+ trypsin = Ms::InSilico::Digester['Trypsin']
255
+
256
+ expected = [
257
+ 'MIVIGR',
258
+ 'SIVHPYITNEYEPFAAEK',
259
+ 'QQILSIMAG']
260
+ trypsin.digest('MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG').is expected
261
+
262
+ expected = [
263
+ 'MIVIGR',
264
+ 'MIVIGRSIVHPYITNEYEPFAAEK',
265
+ 'SIVHPYITNEYEPFAAEK',
266
+ 'SIVHPYITNEYEPFAAEKQQILSIMAG',
267
+ 'QQILSIMAG']
268
+ trypsin.digest('MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG', 1).is expected
269
+
270
+ expected = [
271
+ [0,6],
272
+ [0,24],
273
+ [6,24],
274
+ [6,33],
275
+ [24,33]]
276
+ trypsin.site_digest('MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG', 1).is expected
277
+ end
278
+
279
+ it 'completely ignores whitespace inside protein sequences' do
280
+ expected = [
281
+ "\tMIVIGR",
282
+ "SIVHP\nYITNEYEPFAAE K",
283
+ "QQILSI\rMAG"]
284
+ Ms::InSilico::Digester['Trypsin'].digest("\tMIVIGRSIVHP\nYITNEYEPFAAE KQQILSI\rMAG").is expected
285
+ end
286
+
277
287
  it 'does a trypsin digest' do
278
288
  trypsin = Ms::InSilico::Digester::TRYPSIN
289
+ # alternate ways to specify the enzyme
290
+ Ms::InSilico::Digester::TRYPSIN.is Ms::InSilico::Digester['Trypsin']
279
291
  {
280
292
  "" => [''],
281
293
  "A" => ["A"],
@@ -294,8 +306,48 @@ describe 'a digester' do
294
306
  "ARPARAA" => ["ARPAR", "AA"],
295
307
  "RPRRR" => ["RPR", "R", "R"]
296
308
  }.each do |sequence, expected|
297
- trypsin.digest(sequence) {|frag, s, e| frag}.is expected
309
+ trypsin.digest(sequence).is expected
310
+ end
311
+ end
312
+
313
+
314
+
315
+ end
316
+
317
+ describe 'digestion with other enzymes' do
318
+
319
+ # This is how to create the enzyme:
320
+ # Ms::InSilico::Digester['Arg-C']
321
+ # Ms::InSilico::Digester::ARG_C
322
+ {
323
+ ['Arg-C', :ARG_C] => {
324
+ "AARC" => ["AAR", "C"],
325
+ "AARP" => ["AARP"]
326
+ },
327
+ ['Asp-N', :ASP_N] => {
328
+ "AABDS" => ["AA", "B", "DS"],
329
+ "ADZBS" => ["A", "DZ", "BS"],
330
+ "B" => %w(B),
331
+ "A" => %w(A),
332
+ "ABD" => %w(A B D),
333
+ },
334
+ ['Asp-N_ambic', :ASP_N_AMBIC] => {
335
+ "AAEDS" => ["AA", "E", "DS"],
336
+ "ADZES" => ["A", "DZ", "ES"],
337
+ "AED" => %w(A E D),
338
+ "GDE" => %w(G D E),
339
+ "AAECCDGG" => %w(AA ECC DGG),
340
+ }
341
+ }.each do |enzyme_names, test_hash|
342
+ it "digests with '#{enzyme_names.first}'" do
343
+ digester = Ms::InSilico::Digester[enzyme_names.first]
344
+ digester.is Ms::InSilico::Digester.const_get(enzyme_names.last)
345
+ test_hash.each do |sequence, expected|
346
+ digester.digest(sequence).is expected
347
+ end
298
348
  end
299
349
  end
300
-
301
350
  end
351
+
352
+
353
+
@@ -1,4 +1,4 @@
1
- require File.dirname(__FILE__) + '/../../spec_helper.rb'
1
+ require 'spec_helper.rb'
2
2
 
3
3
  require 'ms/in_silico/fragment'
4
4
 
@@ -1,4 +1,4 @@
1
- require File.dirname(__FILE__) + '/../../spec_helper'
1
+ require 'spec_helper'
2
2
  require 'ms/in_silico/spectrum'
3
3
 
4
4
  # class locate_residues tests
@@ -1,4 +1,4 @@
1
- require File.dirname(__FILE__) + '/spec_helper.rb'
1
+ require '/spec_helper.rb'
2
2
  require 'ms/in_silico/digester'
3
3
  require 'ms/in_silico/spectrum'
4
4
 
@@ -8,21 +8,21 @@ describe 'readme documentation' do
8
8
  trypsin = Ms::InSilico::Digester['Trypsin']
9
9
  peptides = trypsin.digest('MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG')
10
10
  expected = [
11
- 'MIVIGR',
12
- 'SIVHPYITNEYEPFAAEK',
13
- 'QQILSIMAG']
11
+ 'MIVIGR',
12
+ 'SIVHPYITNEYEPFAAEK',
13
+ 'QQILSIMAG']
14
14
  peptides.sort.is expected.sort
15
15
 
16
16
  spectrum = Ms::InSilico::Spectrum.new(peptides[0])
17
17
  spectrum.parent_ion_mass.should.be.close 688.417442373391, 10**-12
18
18
 
19
19
  expected = [
20
- 132.047761058391,
21
- 245.131825038791,
22
- 344.200238954991,
23
- 457.284302935391,
24
- 514.305766658991,
25
- 670.406877687091]
20
+ 132.047761058391,
21
+ 245.131825038791,
22
+ 344.200238954991,
23
+ 457.284302935391,
24
+ 514.305766658991,
25
+ 670.406877687091]
26
26
  spectrum.series('b').zip(expected) do |o,e|
27
27
  o.should.be.close e, 10**-12
28
28
  end
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 4
8
- - 0
9
- version: 0.4.0
8
+ - 1
9
+ version: 0.4.1
10
10
  platform: ruby
11
11
  authors:
12
12
  - Simon Chiang
@@ -14,13 +14,14 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-06-23 00:00:00 -06:00
18
- default_executable:
17
+ date: 2010-11-15 00:00:00 -07:00
18
+ default_executable: digest.rb
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  name: molecules
22
22
  prerelease: false
23
23
  requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
24
25
  requirements:
25
26
  - - ">="
26
27
  - !ruby/object:Gem::Version
@@ -35,6 +36,7 @@ dependencies:
35
36
  name: tap
36
37
  prerelease: false
37
38
  requirement: &id002 !ruby/object:Gem::Requirement
39
+ none: false
38
40
  requirements:
39
41
  - - ">="
40
42
  - !ruby/object:Gem::Version
@@ -49,6 +51,7 @@ dependencies:
49
51
  name: tap-test
50
52
  prerelease: false
51
53
  requirement: &id003 !ruby/object:Gem::Requirement
54
+ none: false
52
55
  requirements:
53
56
  - - ">="
54
57
  - !ruby/object:Gem::Version
@@ -63,6 +66,7 @@ dependencies:
63
66
  name: spec-more
64
67
  prerelease: false
65
68
  requirement: &id004 !ruby/object:Gem::Requirement
69
+ none: false
66
70
  requirements:
67
71
  - - ">="
68
72
  - !ruby/object:Gem::Version
@@ -73,8 +77,8 @@ dependencies:
73
77
  version_requirements: *id004
74
78
  description: peptide fragmentation and protein digestion
75
79
  email: jtprince@gmail.com
76
- executables: []
77
-
80
+ executables:
81
+ - digest.rb
78
82
  extensions: []
79
83
 
80
84
  extra_rdoc_files:
@@ -87,8 +91,8 @@ files:
87
91
  - README.rdoc
88
92
  - Rakefile
89
93
  - VERSION
94
+ - bin/digest.rb
90
95
  - lib/ms/in_silico.rb
91
- - lib/ms/in_silico/digest.rb
92
96
  - lib/ms/in_silico/digester.rb
93
97
  - lib/ms/in_silico/fragment.rb
94
98
  - lib/ms/in_silico/spectrum.rb
@@ -97,9 +101,6 @@ files:
97
101
  - spec/ms/in_silico/spectrum_spec.rb
98
102
  - spec/readme_spec.rb
99
103
  - spec/spec_helper.rb
100
- - spec/tap_test_helper.rb
101
- - spec/tap_test_suite.rb
102
- - tap.yml
103
104
  - test/ms/in_silico/digest_test.rb
104
105
  - test/ms/in_silico/fragment_test.rb
105
106
  has_rdoc: true
@@ -112,6 +113,7 @@ rdoc_options:
112
113
  require_paths:
113
114
  - lib
114
115
  required_ruby_version: !ruby/object:Gem::Requirement
116
+ none: false
115
117
  requirements:
116
118
  - - ">="
117
119
  - !ruby/object:Gem::Version
@@ -119,6 +121,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
119
121
  - 0
120
122
  version: "0"
121
123
  required_rubygems_version: !ruby/object:Gem::Requirement
124
+ none: false
122
125
  requirements:
123
126
  - - ">="
124
127
  - !ruby/object:Gem::Version
@@ -128,17 +131,15 @@ required_rubygems_version: !ruby/object:Gem::Requirement
128
131
  requirements: []
129
132
 
130
133
  rubyforge_project: mspire
131
- rubygems_version: 1.3.6
134
+ rubygems_version: 1.3.7
132
135
  signing_key:
133
136
  specification_version: 3
134
137
  summary: in-silico calculations for mass spec data
135
138
  test_files:
136
- - spec/tap_test_suite.rb
137
139
  - spec/ms/in_silico/fragment_spec.rb
138
140
  - spec/ms/in_silico/spectrum_spec.rb
139
141
  - spec/ms/in_silico/digester_spec.rb
140
142
  - spec/readme_spec.rb
141
- - spec/tap_test_helper.rb
142
143
  - spec/spec_helper.rb
143
144
  - test/ms/in_silico/digest_test.rb
144
145
  - test/ms/in_silico/fragment_test.rb
@@ -1,52 +0,0 @@
1
- require 'tap/task'
2
- require 'ms/in_silico/digester'
3
-
4
- module Ms
5
- module InSilico
6
- # :startdoc::task digest a protein sequence into peptides
7
- # Digest a protein sequence into an array of peptides.
8
- #
9
- # % rap digest MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG --:i dump
10
- # MIVIGR
11
- # SIVHPYITNEYEPFAAEK
12
- # QQILSIMAG
13
- #
14
- class Digest < Tap::Task
15
-
16
- config :digester, 'Trypsin' # The name of the digester
17
- config :min_length, nil, &c.integer_or_nil # Minimum peptide length
18
- config :max_length, nil, &c.integer_or_nil # Maximum peptide length
19
- config :max_misses, 0, &c.integer # The max # of missed cleavage sites
20
- config :site_digest, false, &c.boolean # Digest to sites (rather than sequences)
21
-
22
- def process(sequence)
23
- unless d = Digester[digester]
24
- raise ArgumentError, "unknown digester: #{digester}"
25
- end
26
-
27
- # extract sequence from FASTA entries
28
- sequence = $1 if sequence =~ /\A>.*?\n(.*)\z/m
29
- sequence.gsub!(/\s/, "")
30
-
31
- peptides = if site_digest
32
- d.site_digest(sequence, max_misses)
33
- else
34
- d.digest(sequence, max_misses)
35
- end
36
-
37
- # filter
38
- peptides.delete_if do |peptide|
39
- peptide.length < min_length
40
- end if min_length
41
-
42
- peptides.delete_if do |peptide|
43
- peptide.length > max_length
44
- end if max_length
45
-
46
- log 'digest', "#{sequence[0..10]}#{sequence.length > 10 ? '...' : ''} to #{peptides.length} peptides"
47
- peptides
48
- end
49
-
50
- end
51
- end
52
- end
@@ -1,2 +0,0 @@
1
- require 'rubygems'
2
- require 'tap/test/unit'
@@ -1,5 +0,0 @@
1
- $:.unshift File.join(File.dirname(__FILE__), '../lib')
2
-
3
- # runs all subsets (see Tap::Test::SubsetMethods)
4
- ENV["ALL"] = "true"
5
- Dir.glob("./**/*_test.rb").each {|test| require test}
data/tap.yml DELETED
File without changes