ms-in_silico 0.4.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore CHANGED
@@ -3,3 +3,4 @@ pkg/
3
3
  rdoc/
4
4
  backup/
5
5
  .*.swp
6
+ *.gemspec
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.4.0
1
+ 0.4.1
@@ -0,0 +1,50 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'optparse'
4
+ require 'ms/in_silico/digester'
5
+
6
+ def print_enzyme_names
7
+ puts "(tab delimited)"
8
+ puts %w(name cuts nocut cterm?).join("\t")
9
+ Ms::InSilico::Digester::ENZYMES.each do |key, enzyme|
10
+ puts [:name, :cleave_str, :cterm_exception, :cterm_cleavage].map {|v| enzyme.send(v) }.join("\t")
11
+ end
12
+ end
13
+
14
+ delimiter_hash = {
15
+ 'space' => ' ',
16
+ 'tab' => "\t",
17
+ 'newline' => "\n",
18
+ }
19
+
20
+ opt = {
21
+ :enzyme => 'Trypsin',
22
+ :missed_cleavages => 0,
23
+ :delimiter => 'space',
24
+ :record_delimiter => 'newline',
25
+ }
26
+ opts = OptionParser.new do |op|
27
+ op.banner = "usage: #{File.basename(__FILE__)} [OPTIONS] SOMEPROTEINSEKUENCE ..."
28
+ op.separator "output: SOMEPR OTEINSEK UENCE"
29
+ op.separator "options:"
30
+ op.on("-e", "--enzyme <#{opt[:enzyme]}>", "specify a valid enzyme name") {|v| opt[:enzyme] = v }
31
+ op.on("-m", "--missed-cleavages <#{opt[:missed_cleavages]}>", Integer, "number of missed cleavages") {|v| opt[:missed_cleavages] = v }
32
+ op.on("-d", "--delimiter <#{opt[:delimiter]}>", "delimit the returned peptides",
33
+ "('space','tab','newline' or some other string)") {|v| opt[:delimiter] = v }
34
+ op.on("-r", "--record-delimiter <#{opt[:record_delimiter]}>", "included after each protein output") {|v| opt[:record_delimiter] = v }
35
+ op.separator ""
36
+ op.on("--print-enzymes", "prints table of valid enzyme names and exits") { print_enzyme_names ; exit }
37
+ end
38
+ opts.parse!
39
+
40
+ if ARGV.size == 0
41
+ puts opts
42
+ exit
43
+ end
44
+
45
+ [:delimiter, :record_delimiter].each {|k| opt[k] = (delimiter_hash[opt[k]] || opt[k]) }
46
+
47
+ ARGV.each do |protein|
48
+ print Ms::InSilico::Digester[opt[:enzyme]].digest(protein, opt[:missed_cleavages]).join(opt[:delimiter])
49
+ print opt[:record_delimiter]
50
+ end
@@ -105,10 +105,10 @@ module Ms
105
105
  @scanner = StringScanner.new('')
106
106
  end
107
107
 
108
- # Returns sites of digestion sites in sequence, as determined by
109
- # thecleave_regexp boundaries. The digestion sites correspond
110
- # to the positions where a peptide begins and ends, such that
111
- # [n, (n+1) - n] corresponds to the [index, length] for peptide n.
108
+ # Returns digestion sites in sequence, as determined by the
109
+ # cleave_regexp boundaries. The digestion sites correspond to the
110
+ # positions where a peptide begins and ends, such that [n, (n+1) - n]
111
+ # corresponds to the [index, length] for peptide n.
112
112
  #
113
113
  # d = Digester.new('Trypsin', 'KR', 'P')
114
114
  # seq = "AARGGR"
@@ -128,19 +128,25 @@ module Ms
128
128
  # The digested section of sequence may be specified using offset
129
129
  # and length.
130
130
  def cleavage_sites(seq, offset=0, length=seq.length-offset)
131
+ return [0, 1] if seq.size == 1 # adding exceptions is lame--algorithm should just work
132
+
131
133
  adjustment = cterm_cleavage ? 0 : 1
132
134
  limit = offset + length
133
-
135
+
134
136
  positions = [offset]
135
137
  pos = scan(seq, offset, limit) do |pos|
136
- positions << pos - adjustment
138
+ positions << (pos - adjustment)
137
139
  end
138
140
 
139
141
  # add the final position
140
- if pos < limit || positions.length == 1
142
+ if (pos < limit) || (positions.length == 1)
143
+ positions << limit
144
+ end
145
+ # adding exceptions is lame.. this code probably needs to be
146
+ # refactored (corrected).
147
+ if !cterm_cleavage && pos == limit
141
148
  positions << limit
142
149
  end
143
-
144
150
  positions
145
151
  end
146
152
 
@@ -151,14 +157,14 @@ module Ms
151
157
  #
152
158
  # Each [start_index, end_index] pair is yielded to the block, if given,
153
159
  # and the collected results are returned.
154
- def site_digest(seq, max_misses=0, offset=0, length=seq.length-offset) # :yields: start_index, end_index
160
+ def site_digest(seq, max_misses=0, offset=0, length=seq.length-offset, &block) # :yields: start_index, end_index
155
161
  frag_sites = cleavage_sites(seq, offset, length)
156
162
 
157
163
  overlay(frag_sites.length, max_misses, 1) do |start_index, end_index|
158
164
  start_index = frag_sites[start_index]
159
165
  end_index = frag_sites[end_index]
160
166
 
161
- block_given? ? yield(start_index, end_index) : [start_index, end_index]
167
+ block ? block.call(start_index, end_index) : [start_index, end_index]
162
168
  end
163
169
  end
164
170
 
@@ -167,7 +173,7 @@ module Ms
167
173
  # as in that method, the digested section of sequence may be specified using
168
174
  # offset and length.
169
175
  def digest(seq, max_misses=0, offset=0, length=seq.length-offset)
170
- site_digest(seq, max_misses, offset, length).collect do |s, e|
176
+ site_digest(seq, max_misses, offset, length).map do |s, e|
171
177
  seq[s, e-s]
172
178
  end
173
179
  end
@@ -183,7 +189,7 @@ module Ms
183
189
  # Scans seq between offset and limit for the cleave_regexp, skipping whitespace
184
190
  # and being mindful of exception characters. The positions of the scanner at
185
191
  # each match are yielded to the block.
186
- def scan(seq, offset, limit) # :nodoc:
192
+ def scan(seq, offset, limit, &block) # :nodoc:
187
193
  scanner.string = seq
188
194
  scanner.pos = offset
189
195
 
@@ -197,7 +203,7 @@ module Ms
197
203
  # break if you scanned past the upper limit
198
204
  break if pos > limit
199
205
 
200
- yield pos
206
+ block.call(pos)
201
207
  end
202
208
 
203
209
  scanner.pos
@@ -205,14 +211,14 @@ module Ms
205
211
 
206
212
  # Performs an overlap-collect algorithm providing the start and end
207
213
  # indicies of spans skipping up to max_misses boundaries.
208
- def overlay(n, max_misses, offset) # :nodoc:
214
+ def overlay(n, max_misses, offset, &block) # :nodoc:
209
215
  results = []
210
216
  0.upto(n-1) do |start_index|
211
217
  0.upto(max_misses) do |n_miss|
212
218
  end_index = start_index + offset + n_miss
213
219
  break if end_index == n
214
220
 
215
- results << yield(start_index, end_index)
221
+ results << block.call(start_index, end_index)
216
222
  end
217
223
  end
218
224
  results
@@ -1,9 +1,9 @@
1
- require 'tap/task'
2
1
  require 'ms/in_silico/spectrum'
3
2
 
4
3
  module Ms
5
4
  module InSilico
6
5
 
6
+ =begin
7
7
  # :startdoc::task calculates a theoretical ms/ms spectrum
8
8
  #
9
9
  # Calculates the theoretical ms/ms spectrum for a peptide sequence.
@@ -66,5 +66,6 @@ module Ms
66
66
  end
67
67
 
68
68
  end
69
+ =end
69
70
  end
70
- end
71
+ end
File without changes
@@ -1,4 +1,4 @@
1
- require File.dirname(__FILE__) + '/../../spec_helper.rb'
1
+ require 'spec_helper.rb'
2
2
 
3
3
  require 'ms/in_silico/digester'
4
4
  require 'pp'
@@ -28,58 +28,7 @@ describe 'a digester' do
28
28
 
29
29
  str.join('')
30
30
  end
31
-
32
- it 'performs digestion and can specify sites of digestion' do
33
- trypsin = Ms::InSilico::Digester['Trypsin']
34
-
35
- expected = [
36
- 'MIVIGR',
37
- 'SIVHPYITNEYEPFAAEK',
38
- 'QQILSIMAG']
39
- trypsin.digest('MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG').is expected
40
-
41
- expected = [
42
- 'MIVIGR',
43
- 'MIVIGRSIVHPYITNEYEPFAAEK',
44
- 'SIVHPYITNEYEPFAAEK',
45
- 'SIVHPYITNEYEPFAAEKQQILSIMAG',
46
- 'QQILSIMAG']
47
- trypsin.digest('MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG', 1).is expected
48
-
49
- expected = [
50
- [0,6],
51
- [0,24],
52
- [6,24],
53
- [6,33],
54
- [24,33]]
55
- trypsin.site_digest('MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG', 1).is expected
56
- end
57
-
58
- it 'completely ignores whitespace inside protein sequences' do
59
- expected = [
60
- "\tMIVIGR",
61
- "SIVHP\nYITNEYEPFAAE K",
62
- "QQILSI\rMAG"]
63
- Ms::InSilico::Digester['Trypsin'].digest("\tMIVIGRSIVHP\nYITNEYEPFAAE KQQILSI\rMAG").is expected
64
- end
65
-
66
- it 'runs cleavage sites documentation' do
67
- d = Ms::InSilico::Digester.new('Trypsin', 'KR', 'P')
68
- seq = "AARGGR"
69
- sites = d.cleavage_sites(seq)
70
- sites.is [0, 3, 6]
71
-
72
- seq[sites[0], sites[0+1] - sites[0]].is "AAR"
73
- seq[sites[1], sites[1+1] - sites[1]].is "GGR"
74
-
75
- seq = "AAR \n GGR"
76
- sites = d.cleavage_sites(seq)
77
- sites.is [0, 8, 11]
78
31
 
79
- seq[sites[0], sites[0+1] - sites[0]].is "AAR \n "
80
- seq[sites[1], sites[1+1] - sites[1]].is "GGR"
81
- end
82
-
83
32
  it 'finds cleavage site indices' do
84
33
  {
85
34
  "" => [0,0],
@@ -129,7 +78,9 @@ describe 'a digester' do
129
78
  @digester.cleavage_sites(sequence).is expected
130
79
  end
131
80
  end
132
-
81
+
82
+
83
+
133
84
  it 'finds cleavage sites with offset and limit' do
134
85
  {
135
86
  "RxxR" => [2,4],
@@ -165,7 +116,8 @@ describe 'a digester' do
165
116
  "RRR" => ["R", "R", "R"]
166
117
  }.each do |sequence, expected|
167
118
  # spp(sequence)
168
- @digester.digest(sequence) {|frag, s, e| frag}.is expected
119
+ @digester.digest(sequence).is expected
120
+ #@digester.digest(sequence) {|frag, s, e| frag}.is expected
169
121
  end
170
122
  end
171
123
 
@@ -182,7 +134,8 @@ describe 'a digester' do
182
134
  "RAR" => ["R", "RAR", "AR"],
183
135
  "RRR" => ["R", "RR", "R", "RR", "R"]
184
136
  }.each do |sequence, expected|
185
- @digester.digest(sequence, 1) {|frag, s, e| frag}.is expected
137
+ @digester.digest(sequence, 1).is expected
138
+ #@digester.digest(sequence, 1) {|frag, s, e| frag}.is expected
186
139
  end
187
140
  end
188
141
 
@@ -199,7 +152,8 @@ describe 'a digester' do
199
152
  "RAR" => ["R", "RAR", "AR"],
200
153
  "RRR" => ["R", "RR", "RRR", "R", "RR", "R"]
201
154
  }.each do |sequence, expected|
202
- @digester.digest(sequence, 2) {|frag, s, e| frag}.is expected
155
+ @digester.digest(sequence, 2).is expected
156
+ #@digester.digest(sequence, 2) {|frag, s, e| frag}.is expected
203
157
  end
204
158
  end
205
159
 
@@ -273,9 +227,67 @@ describe 'a digester' do
273
227
  end
274
228
  end
275
229
  end
230
+ end
231
+
232
+
233
+ describe 'performs as documented in readme' do
234
+ it 'runs cleavage sites documentation' do
235
+ d = Ms::InSilico::Digester.new('Trypsin', 'KR', 'P')
236
+ seq = "AARGGR"
237
+ sites = d.cleavage_sites(seq)
238
+ sites.is [0, 3, 6]
239
+
240
+ seq[sites[0], sites[0+1] - sites[0]].is "AAR"
241
+ seq[sites[1], sites[1+1] - sites[1]].is "GGR"
242
+
243
+ seq = "AAR \n GGR"
244
+ sites = d.cleavage_sites(seq)
245
+ sites.is [0, 8, 11]
246
+
247
+ seq[sites[0], sites[0+1] - sites[0]].is "AAR \n "
248
+ seq[sites[1], sites[1+1] - sites[1]].is "GGR"
249
+ end
250
+ end
276
251
 
252
+ describe 'basic trypsin digestion' do
253
+ it 'performs digestion and can specify sites of digestion' do
254
+ trypsin = Ms::InSilico::Digester['Trypsin']
255
+
256
+ expected = [
257
+ 'MIVIGR',
258
+ 'SIVHPYITNEYEPFAAEK',
259
+ 'QQILSIMAG']
260
+ trypsin.digest('MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG').is expected
261
+
262
+ expected = [
263
+ 'MIVIGR',
264
+ 'MIVIGRSIVHPYITNEYEPFAAEK',
265
+ 'SIVHPYITNEYEPFAAEK',
266
+ 'SIVHPYITNEYEPFAAEKQQILSIMAG',
267
+ 'QQILSIMAG']
268
+ trypsin.digest('MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG', 1).is expected
269
+
270
+ expected = [
271
+ [0,6],
272
+ [0,24],
273
+ [6,24],
274
+ [6,33],
275
+ [24,33]]
276
+ trypsin.site_digest('MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG', 1).is expected
277
+ end
278
+
279
+ it 'completely ignores whitespace inside protein sequences' do
280
+ expected = [
281
+ "\tMIVIGR",
282
+ "SIVHP\nYITNEYEPFAAE K",
283
+ "QQILSI\rMAG"]
284
+ Ms::InSilico::Digester['Trypsin'].digest("\tMIVIGRSIVHP\nYITNEYEPFAAE KQQILSI\rMAG").is expected
285
+ end
286
+
277
287
  it 'does a trypsin digest' do
278
288
  trypsin = Ms::InSilico::Digester::TRYPSIN
289
+ # alternate ways to specify the enzyme
290
+ Ms::InSilico::Digester::TRYPSIN.is Ms::InSilico::Digester['Trypsin']
279
291
  {
280
292
  "" => [''],
281
293
  "A" => ["A"],
@@ -294,8 +306,48 @@ describe 'a digester' do
294
306
  "ARPARAA" => ["ARPAR", "AA"],
295
307
  "RPRRR" => ["RPR", "R", "R"]
296
308
  }.each do |sequence, expected|
297
- trypsin.digest(sequence) {|frag, s, e| frag}.is expected
309
+ trypsin.digest(sequence).is expected
310
+ end
311
+ end
312
+
313
+
314
+
315
+ end
316
+
317
+ describe 'digestion with other enzymes' do
318
+
319
+ # This is how to create the enzyme:
320
+ # Ms::InSilico::Digester['Arg-C']
321
+ # Ms::InSilico::Digester::ARG_C
322
+ {
323
+ ['Arg-C', :ARG_C] => {
324
+ "AARC" => ["AAR", "C"],
325
+ "AARP" => ["AARP"]
326
+ },
327
+ ['Asp-N', :ASP_N] => {
328
+ "AABDS" => ["AA", "B", "DS"],
329
+ "ADZBS" => ["A", "DZ", "BS"],
330
+ "B" => %w(B),
331
+ "A" => %w(A),
332
+ "ABD" => %w(A B D),
333
+ },
334
+ ['Asp-N_ambic', :ASP_N_AMBIC] => {
335
+ "AAEDS" => ["AA", "E", "DS"],
336
+ "ADZES" => ["A", "DZ", "ES"],
337
+ "AED" => %w(A E D),
338
+ "GDE" => %w(G D E),
339
+ "AAECCDGG" => %w(AA ECC DGG),
340
+ }
341
+ }.each do |enzyme_names, test_hash|
342
+ it "digests with '#{enzyme_names.first}'" do
343
+ digester = Ms::InSilico::Digester[enzyme_names.first]
344
+ digester.is Ms::InSilico::Digester.const_get(enzyme_names.last)
345
+ test_hash.each do |sequence, expected|
346
+ digester.digest(sequence).is expected
347
+ end
298
348
  end
299
349
  end
300
-
301
350
  end
351
+
352
+
353
+
@@ -1,4 +1,4 @@
1
- require File.dirname(__FILE__) + '/../../spec_helper.rb'
1
+ require 'spec_helper.rb'
2
2
 
3
3
  require 'ms/in_silico/fragment'
4
4
 
@@ -1,4 +1,4 @@
1
- require File.dirname(__FILE__) + '/../../spec_helper'
1
+ require 'spec_helper'
2
2
  require 'ms/in_silico/spectrum'
3
3
 
4
4
  # class locate_residues tests
@@ -1,4 +1,4 @@
1
- require File.dirname(__FILE__) + '/spec_helper.rb'
1
+ require '/spec_helper.rb'
2
2
  require 'ms/in_silico/digester'
3
3
  require 'ms/in_silico/spectrum'
4
4
 
@@ -8,21 +8,21 @@ describe 'readme documentation' do
8
8
  trypsin = Ms::InSilico::Digester['Trypsin']
9
9
  peptides = trypsin.digest('MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG')
10
10
  expected = [
11
- 'MIVIGR',
12
- 'SIVHPYITNEYEPFAAEK',
13
- 'QQILSIMAG']
11
+ 'MIVIGR',
12
+ 'SIVHPYITNEYEPFAAEK',
13
+ 'QQILSIMAG']
14
14
  peptides.sort.is expected.sort
15
15
 
16
16
  spectrum = Ms::InSilico::Spectrum.new(peptides[0])
17
17
  spectrum.parent_ion_mass.should.be.close 688.417442373391, 10**-12
18
18
 
19
19
  expected = [
20
- 132.047761058391,
21
- 245.131825038791,
22
- 344.200238954991,
23
- 457.284302935391,
24
- 514.305766658991,
25
- 670.406877687091]
20
+ 132.047761058391,
21
+ 245.131825038791,
22
+ 344.200238954991,
23
+ 457.284302935391,
24
+ 514.305766658991,
25
+ 670.406877687091]
26
26
  spectrum.series('b').zip(expected) do |o,e|
27
27
  o.should.be.close e, 10**-12
28
28
  end
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 4
8
- - 0
9
- version: 0.4.0
8
+ - 1
9
+ version: 0.4.1
10
10
  platform: ruby
11
11
  authors:
12
12
  - Simon Chiang
@@ -14,13 +14,14 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-06-23 00:00:00 -06:00
18
- default_executable:
17
+ date: 2010-11-15 00:00:00 -07:00
18
+ default_executable: digest.rb
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  name: molecules
22
22
  prerelease: false
23
23
  requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
24
25
  requirements:
25
26
  - - ">="
26
27
  - !ruby/object:Gem::Version
@@ -35,6 +36,7 @@ dependencies:
35
36
  name: tap
36
37
  prerelease: false
37
38
  requirement: &id002 !ruby/object:Gem::Requirement
39
+ none: false
38
40
  requirements:
39
41
  - - ">="
40
42
  - !ruby/object:Gem::Version
@@ -49,6 +51,7 @@ dependencies:
49
51
  name: tap-test
50
52
  prerelease: false
51
53
  requirement: &id003 !ruby/object:Gem::Requirement
54
+ none: false
52
55
  requirements:
53
56
  - - ">="
54
57
  - !ruby/object:Gem::Version
@@ -63,6 +66,7 @@ dependencies:
63
66
  name: spec-more
64
67
  prerelease: false
65
68
  requirement: &id004 !ruby/object:Gem::Requirement
69
+ none: false
66
70
  requirements:
67
71
  - - ">="
68
72
  - !ruby/object:Gem::Version
@@ -73,8 +77,8 @@ dependencies:
73
77
  version_requirements: *id004
74
78
  description: peptide fragmentation and protein digestion
75
79
  email: jtprince@gmail.com
76
- executables: []
77
-
80
+ executables:
81
+ - digest.rb
78
82
  extensions: []
79
83
 
80
84
  extra_rdoc_files:
@@ -87,8 +91,8 @@ files:
87
91
  - README.rdoc
88
92
  - Rakefile
89
93
  - VERSION
94
+ - bin/digest.rb
90
95
  - lib/ms/in_silico.rb
91
- - lib/ms/in_silico/digest.rb
92
96
  - lib/ms/in_silico/digester.rb
93
97
  - lib/ms/in_silico/fragment.rb
94
98
  - lib/ms/in_silico/spectrum.rb
@@ -97,9 +101,6 @@ files:
97
101
  - spec/ms/in_silico/spectrum_spec.rb
98
102
  - spec/readme_spec.rb
99
103
  - spec/spec_helper.rb
100
- - spec/tap_test_helper.rb
101
- - spec/tap_test_suite.rb
102
- - tap.yml
103
104
  - test/ms/in_silico/digest_test.rb
104
105
  - test/ms/in_silico/fragment_test.rb
105
106
  has_rdoc: true
@@ -112,6 +113,7 @@ rdoc_options:
112
113
  require_paths:
113
114
  - lib
114
115
  required_ruby_version: !ruby/object:Gem::Requirement
116
+ none: false
115
117
  requirements:
116
118
  - - ">="
117
119
  - !ruby/object:Gem::Version
@@ -119,6 +121,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
119
121
  - 0
120
122
  version: "0"
121
123
  required_rubygems_version: !ruby/object:Gem::Requirement
124
+ none: false
122
125
  requirements:
123
126
  - - ">="
124
127
  - !ruby/object:Gem::Version
@@ -128,17 +131,15 @@ required_rubygems_version: !ruby/object:Gem::Requirement
128
131
  requirements: []
129
132
 
130
133
  rubyforge_project: mspire
131
- rubygems_version: 1.3.6
134
+ rubygems_version: 1.3.7
132
135
  signing_key:
133
136
  specification_version: 3
134
137
  summary: in-silico calculations for mass spec data
135
138
  test_files:
136
- - spec/tap_test_suite.rb
137
139
  - spec/ms/in_silico/fragment_spec.rb
138
140
  - spec/ms/in_silico/spectrum_spec.rb
139
141
  - spec/ms/in_silico/digester_spec.rb
140
142
  - spec/readme_spec.rb
141
- - spec/tap_test_helper.rb
142
143
  - spec/spec_helper.rb
143
144
  - test/ms/in_silico/digest_test.rb
144
145
  - test/ms/in_silico/fragment_test.rb
@@ -1,52 +0,0 @@
1
- require 'tap/task'
2
- require 'ms/in_silico/digester'
3
-
4
- module Ms
5
- module InSilico
6
- # :startdoc::task digest a protein sequence into peptides
7
- # Digest a protein sequence into an array of peptides.
8
- #
9
- # % rap digest MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG --:i dump
10
- # MIVIGR
11
- # SIVHPYITNEYEPFAAEK
12
- # QQILSIMAG
13
- #
14
- class Digest < Tap::Task
15
-
16
- config :digester, 'Trypsin' # The name of the digester
17
- config :min_length, nil, &c.integer_or_nil # Minimum peptide length
18
- config :max_length, nil, &c.integer_or_nil # Maximum peptide length
19
- config :max_misses, 0, &c.integer # The max # of missed cleavage sites
20
- config :site_digest, false, &c.boolean # Digest to sites (rather than sequences)
21
-
22
- def process(sequence)
23
- unless d = Digester[digester]
24
- raise ArgumentError, "unknown digester: #{digester}"
25
- end
26
-
27
- # extract sequence from FASTA entries
28
- sequence = $1 if sequence =~ /\A>.*?\n(.*)\z/m
29
- sequence.gsub!(/\s/, "")
30
-
31
- peptides = if site_digest
32
- d.site_digest(sequence, max_misses)
33
- else
34
- d.digest(sequence, max_misses)
35
- end
36
-
37
- # filter
38
- peptides.delete_if do |peptide|
39
- peptide.length < min_length
40
- end if min_length
41
-
42
- peptides.delete_if do |peptide|
43
- peptide.length > max_length
44
- end if max_length
45
-
46
- log 'digest', "#{sequence[0..10]}#{sequence.length > 10 ? '...' : ''} to #{peptides.length} peptides"
47
- peptides
48
- end
49
-
50
- end
51
- end
52
- end
@@ -1,2 +0,0 @@
1
- require 'rubygems'
2
- require 'tap/test/unit'
@@ -1,5 +0,0 @@
1
- $:.unshift File.join(File.dirname(__FILE__), '../lib')
2
-
3
- # runs all subsets (see Tap::Test::SubsetMethods)
4
- ENV["ALL"] = "true"
5
- Dir.glob("./**/*_test.rb").each {|test| require test}
data/tap.yml DELETED
File without changes