ms-in_silico 0.4.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +1 -0
- data/VERSION +1 -1
- data/bin/digest.rb +50 -0
- data/lib/ms/in_silico/digester.rb +21 -15
- data/lib/ms/in_silico/fragment.rb +3 -2
- data/lib/ms/in_silico/spectrum.rb +0 -0
- data/spec/ms/in_silico/digester_spec.rb +110 -58
- data/spec/ms/in_silico/fragment_spec.rb +1 -1
- data/spec/ms/in_silico/spectrum_spec.rb +1 -1
- data/spec/readme_spec.rb +10 -10
- metadata +14 -13
- data/lib/ms/in_silico/digest.rb +0 -52
- data/spec/tap_test_helper.rb +0 -2
- data/spec/tap_test_suite.rb +0 -5
- data/tap.yml +0 -0
    
        data/.gitignore
    CHANGED
    
    
    
        data/VERSION
    CHANGED
    
    | @@ -1 +1 @@ | |
| 1 | 
            -
            0.4. | 
| 1 | 
            +
            0.4.1
         | 
    
        data/bin/digest.rb
    ADDED
    
    | @@ -0,0 +1,50 @@ | |
| 1 | 
            +
            #!/usr/bin/env ruby
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            require 'optparse'
         | 
| 4 | 
            +
            require 'ms/in_silico/digester'
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            def print_enzyme_names
         | 
| 7 | 
            +
              puts "(tab delimited)"
         | 
| 8 | 
            +
              puts %w(name cuts nocut cterm?).join("\t")
         | 
| 9 | 
            +
              Ms::InSilico::Digester::ENZYMES.each do |key, enzyme|
         | 
| 10 | 
            +
                puts [:name, :cleave_str, :cterm_exception, :cterm_cleavage].map {|v| enzyme.send(v) }.join("\t")
         | 
| 11 | 
            +
              end
         | 
| 12 | 
            +
            end
         | 
| 13 | 
            +
             | 
| 14 | 
            +
            delimiter_hash = {
         | 
| 15 | 
            +
              'space' => ' ',
         | 
| 16 | 
            +
              'tab' => "\t",
         | 
| 17 | 
            +
              'newline' => "\n",
         | 
| 18 | 
            +
            }
         | 
| 19 | 
            +
             | 
| 20 | 
            +
            opt = {
         | 
| 21 | 
            +
              :enzyme => 'Trypsin',
         | 
| 22 | 
            +
              :missed_cleavages => 0,
         | 
| 23 | 
            +
              :delimiter => 'space',
         | 
| 24 | 
            +
              :record_delimiter => 'newline',
         | 
| 25 | 
            +
            }
         | 
| 26 | 
            +
            opts = OptionParser.new do |op|
         | 
| 27 | 
            +
              op.banner = "usage: #{File.basename(__FILE__)} [OPTIONS] SOMEPROTEINSEKUENCE ..."
         | 
| 28 | 
            +
              op.separator "output: SOMEPR OTEINSEK UENCE"
         | 
| 29 | 
            +
              op.separator "options:"
         | 
| 30 | 
            +
              op.on("-e", "--enzyme <#{opt[:enzyme]}>", "specify a valid enzyme name") {|v| opt[:enzyme] = v }
         | 
| 31 | 
            +
              op.on("-m", "--missed-cleavages <#{opt[:missed_cleavages]}>", Integer, "number of missed cleavages") {|v| opt[:missed_cleavages] = v }
         | 
| 32 | 
            +
              op.on("-d", "--delimiter <#{opt[:delimiter]}>", "delimit the returned peptides",
         | 
| 33 | 
            +
                    "('space','tab','newline' or some other string)") {|v| opt[:delimiter] = v }
         | 
| 34 | 
            +
              op.on("-r", "--record-delimiter <#{opt[:record_delimiter]}>", "included after each protein output") {|v| opt[:record_delimiter] = v }
         | 
| 35 | 
            +
              op.separator ""
         | 
| 36 | 
            +
              op.on("--print-enzymes", "prints table of valid enzyme names and exits") { print_enzyme_names ; exit }
         | 
| 37 | 
            +
            end
         | 
| 38 | 
            +
            opts.parse!
         | 
| 39 | 
            +
             | 
| 40 | 
            +
            if ARGV.size == 0
         | 
| 41 | 
            +
              puts opts
         | 
| 42 | 
            +
              exit
         | 
| 43 | 
            +
            end
         | 
| 44 | 
            +
             | 
| 45 | 
            +
            [:delimiter, :record_delimiter].each {|k| opt[k] = (delimiter_hash[opt[k]] || opt[k]) }
         | 
| 46 | 
            +
             | 
| 47 | 
            +
            ARGV.each do |protein|
         | 
| 48 | 
            +
              print Ms::InSilico::Digester[opt[:enzyme]].digest(protein, opt[:missed_cleavages]).join(opt[:delimiter])
         | 
| 49 | 
            +
              print opt[:record_delimiter]
         | 
| 50 | 
            +
            end
         | 
| @@ -105,10 +105,10 @@ module Ms | |
| 105 105 | 
             
                    @scanner = StringScanner.new('')
         | 
| 106 106 | 
             
                  end
         | 
| 107 107 |  | 
| 108 | 
            -
                  # Returns  | 
| 109 | 
            -
                  #  | 
| 110 | 
            -
                  #  | 
| 111 | 
            -
                  #  | 
| 108 | 
            +
                  # Returns digestion sites in sequence, as determined by the
         | 
| 109 | 
            +
                  # cleave_regexp boundaries.  The digestion sites correspond to the
         | 
| 110 | 
            +
                  # positions where a peptide begins and ends, such that [n, (n+1) - n]
         | 
| 111 | 
            +
                  # corresponds to the [index, length] for peptide n.
         | 
| 112 112 | 
             
                  #
         | 
| 113 113 | 
             
                  #   d = Digester.new('Trypsin', 'KR', 'P')
         | 
| 114 114 | 
             
                  #   seq = "AARGGR"
         | 
| @@ -128,19 +128,25 @@ module Ms | |
| 128 128 | 
             
                  # The digested section of sequence may be specified using offset 
         | 
| 129 129 | 
             
                  # and length.
         | 
| 130 130 | 
             
                  def cleavage_sites(seq, offset=0, length=seq.length-offset)
         | 
| 131 | 
            +
                    return [0, 1] if seq.size == 1  # adding exceptions is lame--algorithm should just work
         | 
| 132 | 
            +
             | 
| 131 133 | 
             
                    adjustment = cterm_cleavage ? 0 : 1
         | 
| 132 134 | 
             
                    limit = offset + length
         | 
| 133 | 
            -
             | 
| 135 | 
            +
             | 
| 134 136 | 
             
                    positions = [offset]
         | 
| 135 137 | 
             
                    pos = scan(seq, offset, limit) do |pos|
         | 
| 136 | 
            -
                      positions << pos - adjustment
         | 
| 138 | 
            +
                      positions << (pos - adjustment)
         | 
| 137 139 | 
             
                    end
         | 
| 138 140 |  | 
| 139 141 | 
             
                    # add the final position
         | 
| 140 | 
            -
                    if pos < limit || positions.length == 1
         | 
| 142 | 
            +
                    if (pos < limit) || (positions.length == 1)
         | 
| 143 | 
            +
                      positions << limit
         | 
| 144 | 
            +
                    end
         | 
| 145 | 
            +
                    # adding exceptions is lame.. this code probably needs to be
         | 
| 146 | 
            +
                    # refactored (corrected).
         | 
| 147 | 
            +
                    if !cterm_cleavage && pos == limit
         | 
| 141 148 | 
             
                      positions << limit
         | 
| 142 149 | 
             
                    end
         | 
| 143 | 
            -
             | 
| 144 150 | 
             
                    positions
         | 
| 145 151 | 
             
                  end
         | 
| 146 152 |  | 
| @@ -151,14 +157,14 @@ module Ms | |
| 151 157 | 
             
                  # 
         | 
| 152 158 | 
             
                  # Each [start_index, end_index] pair is yielded to the block, if given,
         | 
| 153 159 | 
             
                  # and the collected results are returned.
         | 
| 154 | 
            -
                  def site_digest(seq, max_misses=0, offset=0, length=seq.length-offset) # :yields: start_index, end_index
         | 
| 160 | 
            +
                  def site_digest(seq, max_misses=0, offset=0, length=seq.length-offset, &block) # :yields: start_index, end_index
         | 
| 155 161 | 
             
                    frag_sites = cleavage_sites(seq, offset, length)
         | 
| 156 162 |  | 
| 157 163 | 
             
                    overlay(frag_sites.length, max_misses, 1) do |start_index, end_index|
         | 
| 158 164 | 
             
                      start_index = frag_sites[start_index]
         | 
| 159 165 | 
             
                      end_index = frag_sites[end_index]
         | 
| 160 166 |  | 
| 161 | 
            -
                       | 
| 167 | 
            +
                      block ? block.call(start_index, end_index) : [start_index, end_index]
         | 
| 162 168 | 
             
                    end  
         | 
| 163 169 | 
             
                  end
         | 
| 164 170 |  | 
| @@ -167,7 +173,7 @@ module Ms | |
| 167 173 | 
             
                  # as in that method, the digested section of sequence may be specified using 
         | 
| 168 174 | 
             
                  # offset and length.
         | 
| 169 175 | 
             
                  def digest(seq, max_misses=0, offset=0, length=seq.length-offset)
         | 
| 170 | 
            -
                    site_digest(seq, max_misses, offset, length). | 
| 176 | 
            +
                    site_digest(seq, max_misses, offset, length).map do |s, e|
         | 
| 171 177 | 
             
                      seq[s, e-s]
         | 
| 172 178 | 
             
                    end
         | 
| 173 179 | 
             
                  end
         | 
| @@ -183,7 +189,7 @@ module Ms | |
| 183 189 | 
             
                  # Scans seq between offset and limit for the cleave_regexp, skipping whitespace
         | 
| 184 190 | 
             
                  # and being mindful of exception characters. The positions of the scanner at
         | 
| 185 191 | 
             
                  # each match are yielded to the block.      
         | 
| 186 | 
            -
                  def scan(seq, offset, limit) # :nodoc:
         | 
| 192 | 
            +
                  def scan(seq, offset, limit, &block) # :nodoc:
         | 
| 187 193 | 
             
                    scanner.string = seq
         | 
| 188 194 | 
             
                    scanner.pos = offset
         | 
| 189 195 |  | 
| @@ -197,7 +203,7 @@ module Ms | |
| 197 203 | 
             
                      # break if you scanned past the upper limit
         | 
| 198 204 | 
             
                      break if pos > limit
         | 
| 199 205 |  | 
| 200 | 
            -
                       | 
| 206 | 
            +
                      block.call(pos)
         | 
| 201 207 | 
             
                    end
         | 
| 202 208 |  | 
| 203 209 | 
             
                    scanner.pos
         | 
| @@ -205,14 +211,14 @@ module Ms | |
| 205 211 |  | 
| 206 212 | 
             
                  # Performs an overlap-collect algorithm providing the start and end 
         | 
| 207 213 | 
             
                  # indicies of spans skipping up to max_misses boundaries.
         | 
| 208 | 
            -
                  def overlay(n, max_misses, offset) # :nodoc:
         | 
| 214 | 
            +
                  def overlay(n, max_misses, offset, &block) # :nodoc:
         | 
| 209 215 | 
             
                    results = []
         | 
| 210 216 | 
             
                    0.upto(n-1) do |start_index|
         | 
| 211 217 | 
             
                      0.upto(max_misses) do |n_miss|
         | 
| 212 218 | 
             
                        end_index = start_index + offset + n_miss
         | 
| 213 219 | 
             
                        break if end_index == n
         | 
| 214 220 |  | 
| 215 | 
            -
                        results <<  | 
| 221 | 
            +
                        results << block.call(start_index, end_index)
         | 
| 216 222 | 
             
                      end
         | 
| 217 223 | 
             
                    end
         | 
| 218 224 | 
             
                    results
         | 
| @@ -1,9 +1,9 @@ | |
| 1 | 
            -
            require 'tap/task'
         | 
| 2 1 | 
             
            require 'ms/in_silico/spectrum'
         | 
| 3 2 |  | 
| 4 3 | 
             
            module Ms
         | 
| 5 4 | 
             
              module InSilico
         | 
| 6 5 |  | 
| 6 | 
            +
            =begin
         | 
| 7 7 | 
             
                # :startdoc::task calculates a theoretical ms/ms spectrum
         | 
| 8 8 | 
             
                #
         | 
| 9 9 | 
             
                # Calculates the theoretical ms/ms spectrum for a peptide sequence.
         | 
| @@ -66,5 +66,6 @@ module Ms | |
| 66 66 | 
             
                  end
         | 
| 67 67 |  | 
| 68 68 | 
             
                end 
         | 
| 69 | 
            +
            =end
         | 
| 69 70 | 
             
              end
         | 
| 70 | 
            -
            end
         | 
| 71 | 
            +
            end
         | 
| 
            File without changes
         | 
| @@ -1,4 +1,4 @@ | |
| 1 | 
            -
            require  | 
| 1 | 
            +
            require 'spec_helper.rb'
         | 
| 2 2 |  | 
| 3 3 | 
             
            require 'ms/in_silico/digester'
         | 
| 4 4 | 
             
            require 'pp'
         | 
| @@ -28,58 +28,7 @@ describe 'a digester' do | |
| 28 28 |  | 
| 29 29 | 
             
                str.join('')
         | 
| 30 30 | 
             
              end
         | 
| 31 | 
            -
              
         | 
| 32 | 
            -
              it 'performs digestion and can specify sites of digestion' do
         | 
| 33 | 
            -
                trypsin = Ms::InSilico::Digester['Trypsin']
         | 
| 34 | 
            -
                
         | 
| 35 | 
            -
                expected = [
         | 
| 36 | 
            -
                'MIVIGR',
         | 
| 37 | 
            -
                'SIVHPYITNEYEPFAAEK',
         | 
| 38 | 
            -
                'QQILSIMAG']
         | 
| 39 | 
            -
                trypsin.digest('MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG').is expected
         | 
| 40 | 
            -
                
         | 
| 41 | 
            -
                expected =  [
         | 
| 42 | 
            -
                'MIVIGR',
         | 
| 43 | 
            -
                'MIVIGRSIVHPYITNEYEPFAAEK',
         | 
| 44 | 
            -
                'SIVHPYITNEYEPFAAEK',
         | 
| 45 | 
            -
                'SIVHPYITNEYEPFAAEKQQILSIMAG',
         | 
| 46 | 
            -
                'QQILSIMAG']
         | 
| 47 | 
            -
                trypsin.digest('MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG', 1).is expected
         | 
| 48 | 
            -
                
         | 
| 49 | 
            -
                expected = [
         | 
| 50 | 
            -
                [0,6],
         | 
| 51 | 
            -
                [0,24],
         | 
| 52 | 
            -
                [6,24],
         | 
| 53 | 
            -
                [6,33],
         | 
| 54 | 
            -
                [24,33]]
         | 
| 55 | 
            -
                trypsin.site_digest('MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG', 1).is expected
         | 
| 56 | 
            -
              end
         | 
| 57 | 
            -
              
         | 
| 58 | 
            -
              it 'completely ignores whitespace inside protein sequences' do
         | 
| 59 | 
            -
                expected = [
         | 
| 60 | 
            -
                "\tMIVIGR",
         | 
| 61 | 
            -
                "SIVHP\nYITNEYEPFAAE K",
         | 
| 62 | 
            -
                "QQILSI\rMAG"]
         | 
| 63 | 
            -
                Ms::InSilico::Digester['Trypsin'].digest("\tMIVIGRSIVHP\nYITNEYEPFAAE KQQILSI\rMAG").is expected
         | 
| 64 | 
            -
              end
         | 
| 65 | 
            -
              
         | 
| 66 | 
            -
              it 'runs cleavage sites documentation' do
         | 
| 67 | 
            -
                d = Ms::InSilico::Digester.new('Trypsin', 'KR', 'P')
         | 
| 68 | 
            -
                seq = "AARGGR"
         | 
| 69 | 
            -
                sites = d.cleavage_sites(seq)
         | 
| 70 | 
            -
                sites.is [0, 3, 6]
         | 
| 71 | 
            -
                
         | 
| 72 | 
            -
                seq[sites[0], sites[0+1] - sites[0]].is "AAR"
         | 
| 73 | 
            -
                seq[sites[1], sites[1+1] - sites[1]].is "GGR"
         | 
| 74 | 
            -
                
         | 
| 75 | 
            -
                seq = "AAR  \n  GGR"
         | 
| 76 | 
            -
                sites = d.cleavage_sites(seq)
         | 
| 77 | 
            -
                sites.is [0, 8, 11]
         | 
| 78 31 |  | 
| 79 | 
            -
                seq[sites[0], sites[0+1] - sites[0]].is "AAR  \n  "
         | 
| 80 | 
            -
                seq[sites[1], sites[1+1] - sites[1]].is "GGR"
         | 
| 81 | 
            -
              end
         | 
| 82 | 
            -
              
         | 
| 83 32 | 
             
              it 'finds cleavage site indices' do
         | 
| 84 33 | 
             
                {
         | 
| 85 34 | 
             
                  "" => [0,0],
         | 
| @@ -129,7 +78,9 @@ describe 'a digester' do | |
| 129 78 | 
             
                   @digester.cleavage_sites(sequence).is expected
         | 
| 130 79 | 
             
                end
         | 
| 131 80 | 
             
              end
         | 
| 132 | 
            -
             | 
| 81 | 
            +
             | 
| 82 | 
            +
                
         | 
| 83 | 
            +
               
         | 
| 133 84 | 
             
              it 'finds cleavage sites with offset and limit' do
         | 
| 134 85 | 
             
                {
         | 
| 135 86 | 
             
                  "RxxR" => [2,4],
         | 
| @@ -165,7 +116,8 @@ describe 'a digester' do | |
| 165 116 | 
             
                  "RRR" => ["R", "R", "R"]
         | 
| 166 117 | 
             
                }.each do |sequence, expected|
         | 
| 167 118 | 
             
                  # spp(sequence)
         | 
| 168 | 
            -
                   @digester.digest(sequence) | 
| 119 | 
            +
                   @digester.digest(sequence).is expected
         | 
| 120 | 
            +
                   #@digester.digest(sequence) {|frag, s, e| frag}.is expected
         | 
| 169 121 | 
             
                end
         | 
| 170 122 | 
             
              end
         | 
| 171 123 |  | 
| @@ -182,7 +134,8 @@ describe 'a digester' do | |
| 182 134 | 
             
                  "RAR" => ["R", "RAR", "AR"],
         | 
| 183 135 | 
             
                  "RRR" => ["R", "RR", "R", "RR", "R"]
         | 
| 184 136 | 
             
                }.each do |sequence, expected|
         | 
| 185 | 
            -
                   @digester.digest(sequence, 1) | 
| 137 | 
            +
                   @digester.digest(sequence, 1).is expected
         | 
| 138 | 
            +
                   #@digester.digest(sequence, 1) {|frag, s, e| frag}.is expected
         | 
| 186 139 | 
             
                end
         | 
| 187 140 | 
             
              end
         | 
| 188 141 |  | 
| @@ -199,7 +152,8 @@ describe 'a digester' do | |
| 199 152 | 
             
                  "RAR" => ["R", "RAR", "AR"],
         | 
| 200 153 | 
             
                  "RRR" => ["R", "RR", "RRR", "R", "RR", "R"]
         | 
| 201 154 | 
             
                }.each do |sequence, expected|
         | 
| 202 | 
            -
                   @digester.digest(sequence, 2) | 
| 155 | 
            +
                   @digester.digest(sequence, 2).is expected
         | 
| 156 | 
            +
                   #@digester.digest(sequence, 2) {|frag, s, e| frag}.is expected
         | 
| 203 157 | 
             
                end
         | 
| 204 158 | 
             
              end
         | 
| 205 159 |  | 
| @@ -273,9 +227,67 @@ describe 'a digester' do | |
| 273 227 | 
             
                  end
         | 
| 274 228 | 
             
                end
         | 
| 275 229 | 
             
              end
         | 
| 230 | 
            +
            end
         | 
| 231 | 
            +
             | 
| 232 | 
            +
             | 
| 233 | 
            +
            describe 'performs as documented in readme' do
         | 
| 234 | 
            +
             it 'runs cleavage sites documentation' do
         | 
| 235 | 
            +
                d = Ms::InSilico::Digester.new('Trypsin', 'KR', 'P')
         | 
| 236 | 
            +
                seq = "AARGGR"
         | 
| 237 | 
            +
                sites = d.cleavage_sites(seq)
         | 
| 238 | 
            +
                sites.is [0, 3, 6]
         | 
| 239 | 
            +
                
         | 
| 240 | 
            +
                seq[sites[0], sites[0+1] - sites[0]].is "AAR"
         | 
| 241 | 
            +
                seq[sites[1], sites[1+1] - sites[1]].is "GGR"
         | 
| 242 | 
            +
                
         | 
| 243 | 
            +
                seq = "AAR  \n  GGR"
         | 
| 244 | 
            +
                sites = d.cleavage_sites(seq)
         | 
| 245 | 
            +
                sites.is [0, 8, 11]
         | 
| 246 | 
            +
             | 
| 247 | 
            +
                seq[sites[0], sites[0+1] - sites[0]].is "AAR  \n  "
         | 
| 248 | 
            +
                seq[sites[1], sites[1+1] - sites[1]].is "GGR"
         | 
| 249 | 
            +
              end
         | 
| 250 | 
            +
            end
         | 
| 276 251 |  | 
| 252 | 
            +
            describe 'basic trypsin digestion' do
         | 
| 253 | 
            +
              it 'performs digestion and can specify sites of digestion' do
         | 
| 254 | 
            +
                trypsin = Ms::InSilico::Digester['Trypsin']
         | 
| 255 | 
            +
                
         | 
| 256 | 
            +
                expected = [
         | 
| 257 | 
            +
                'MIVIGR',
         | 
| 258 | 
            +
                'SIVHPYITNEYEPFAAEK',
         | 
| 259 | 
            +
                'QQILSIMAG']
         | 
| 260 | 
            +
                trypsin.digest('MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG').is expected
         | 
| 261 | 
            +
                
         | 
| 262 | 
            +
                expected =  [
         | 
| 263 | 
            +
                'MIVIGR',
         | 
| 264 | 
            +
                'MIVIGRSIVHPYITNEYEPFAAEK',
         | 
| 265 | 
            +
                'SIVHPYITNEYEPFAAEK',
         | 
| 266 | 
            +
                'SIVHPYITNEYEPFAAEKQQILSIMAG',
         | 
| 267 | 
            +
                'QQILSIMAG']
         | 
| 268 | 
            +
                trypsin.digest('MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG', 1).is expected
         | 
| 269 | 
            +
                
         | 
| 270 | 
            +
                expected = [
         | 
| 271 | 
            +
                [0,6],
         | 
| 272 | 
            +
                [0,24],
         | 
| 273 | 
            +
                [6,24],
         | 
| 274 | 
            +
                [6,33],
         | 
| 275 | 
            +
                [24,33]]
         | 
| 276 | 
            +
                trypsin.site_digest('MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG', 1).is expected
         | 
| 277 | 
            +
              end
         | 
| 278 | 
            +
             | 
| 279 | 
            +
              it 'completely ignores whitespace inside protein sequences' do
         | 
| 280 | 
            +
                expected = [
         | 
| 281 | 
            +
                "\tMIVIGR",
         | 
| 282 | 
            +
                "SIVHP\nYITNEYEPFAAE K",
         | 
| 283 | 
            +
                "QQILSI\rMAG"]
         | 
| 284 | 
            +
                Ms::InSilico::Digester['Trypsin'].digest("\tMIVIGRSIVHP\nYITNEYEPFAAE KQQILSI\rMAG").is expected
         | 
| 285 | 
            +
              end
         | 
| 286 | 
            +
             | 
| 277 287 | 
             
              it 'does a trypsin digest' do
         | 
| 278 288 | 
             
                trypsin = Ms::InSilico::Digester::TRYPSIN
         | 
| 289 | 
            +
                # alternate ways to specify the enzyme
         | 
| 290 | 
            +
                Ms::InSilico::Digester::TRYPSIN.is Ms::InSilico::Digester['Trypsin']
         | 
| 279 291 | 
             
                {
         | 
| 280 292 | 
             
                  "" => [''],
         | 
| 281 293 | 
             
                  "A" => ["A"],
         | 
| @@ -294,8 +306,48 @@ describe 'a digester' do | |
| 294 306 | 
             
                  "ARPARAA" => ["ARPAR", "AA"],
         | 
| 295 307 | 
             
                  "RPRRR" => ["RPR", "R", "R"]
         | 
| 296 308 | 
             
                }.each do |sequence, expected|
         | 
| 297 | 
            -
                   trypsin.digest(sequence) | 
| 309 | 
            +
                   trypsin.digest(sequence).is expected
         | 
| 310 | 
            +
                end
         | 
| 311 | 
            +
              end
         | 
| 312 | 
            +
             | 
| 313 | 
            +
             | 
| 314 | 
            +
             | 
| 315 | 
            +
            end
         | 
| 316 | 
            +
             | 
| 317 | 
            +
            describe 'digestion with other enzymes' do
         | 
| 318 | 
            +
             | 
| 319 | 
            +
              # This is how to create the enzyme:
         | 
| 320 | 
            +
              # Ms::InSilico::Digester['Arg-C']
         | 
| 321 | 
            +
              # Ms::InSilico::Digester::ARG_C
         | 
| 322 | 
            +
              {
         | 
| 323 | 
            +
                  ['Arg-C', :ARG_C] => { 
         | 
| 324 | 
            +
                  "AARC" => ["AAR", "C"], 
         | 
| 325 | 
            +
                  "AARP" => ["AARP"] 
         | 
| 326 | 
            +
                },
         | 
| 327 | 
            +
                  ['Asp-N', :ASP_N] => {
         | 
| 328 | 
            +
                  "AABDS" => ["AA", "B", "DS"],
         | 
| 329 | 
            +
                  "ADZBS" => ["A", "DZ", "BS"],
         | 
| 330 | 
            +
                  "B" => %w(B),
         | 
| 331 | 
            +
                  "A" => %w(A),
         | 
| 332 | 
            +
                  "ABD" => %w(A B D),
         | 
| 333 | 
            +
                },
         | 
| 334 | 
            +
                ['Asp-N_ambic', :ASP_N_AMBIC] => {
         | 
| 335 | 
            +
                  "AAEDS" => ["AA", "E", "DS"],
         | 
| 336 | 
            +
                  "ADZES" => ["A", "DZ", "ES"],
         | 
| 337 | 
            +
                  "AED" => %w(A E D),
         | 
| 338 | 
            +
                  "GDE" => %w(G D E),
         | 
| 339 | 
            +
                  "AAECCDGG" => %w(AA ECC DGG),
         | 
| 340 | 
            +
                }
         | 
| 341 | 
            +
              }.each do |enzyme_names, test_hash|
         | 
| 342 | 
            +
                it "digests with '#{enzyme_names.first}'" do
         | 
| 343 | 
            +
                  digester = Ms::InSilico::Digester[enzyme_names.first]
         | 
| 344 | 
            +
                  digester.is Ms::InSilico::Digester.const_get(enzyme_names.last)
         | 
| 345 | 
            +
                  test_hash.each do |sequence, expected|
         | 
| 346 | 
            +
                    digester.digest(sequence).is expected
         | 
| 347 | 
            +
                  end
         | 
| 298 348 | 
             
                end
         | 
| 299 349 | 
             
              end
         | 
| 300 | 
            -
             
         | 
| 301 350 | 
             
            end
         | 
| 351 | 
            +
             | 
| 352 | 
            +
             | 
| 353 | 
            +
             | 
    
        data/spec/readme_spec.rb
    CHANGED
    
    | @@ -1,4 +1,4 @@ | |
| 1 | 
            -
            require  | 
| 1 | 
            +
            require '/spec_helper.rb'
         | 
| 2 2 | 
             
            require 'ms/in_silico/digester'
         | 
| 3 3 | 
             
            require 'ms/in_silico/spectrum'
         | 
| 4 4 |  | 
| @@ -8,21 +8,21 @@ describe 'readme documentation' do | |
| 8 8 | 
             
                trypsin = Ms::InSilico::Digester['Trypsin']
         | 
| 9 9 | 
             
                peptides = trypsin.digest('MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG')
         | 
| 10 10 | 
             
                expected = [
         | 
| 11 | 
            -
             | 
| 12 | 
            -
             | 
| 13 | 
            -
             | 
| 11 | 
            +
                  'MIVIGR',
         | 
| 12 | 
            +
                  'SIVHPYITNEYEPFAAEK',
         | 
| 13 | 
            +
                  'QQILSIMAG']
         | 
| 14 14 | 
             
                peptides.sort.is expected.sort
         | 
| 15 15 |  | 
| 16 16 | 
             
                spectrum = Ms::InSilico::Spectrum.new(peptides[0])
         | 
| 17 17 | 
             
                spectrum.parent_ion_mass.should.be.close 688.417442373391, 10**-12
         | 
| 18 18 |  | 
| 19 19 | 
             
                expected = [
         | 
| 20 | 
            -
             | 
| 21 | 
            -
             | 
| 22 | 
            -
             | 
| 23 | 
            -
             | 
| 24 | 
            -
             | 
| 25 | 
            -
             | 
| 20 | 
            +
                  132.047761058391,
         | 
| 21 | 
            +
                  245.131825038791,
         | 
| 22 | 
            +
                  344.200238954991,
         | 
| 23 | 
            +
                  457.284302935391,
         | 
| 24 | 
            +
                  514.305766658991,
         | 
| 25 | 
            +
                  670.406877687091]
         | 
| 26 26 | 
             
                spectrum.series('b').zip(expected) do |o,e|
         | 
| 27 27 | 
             
                  o.should.be.close e, 10**-12
         | 
| 28 28 | 
             
                end
         | 
    
        metadata
    CHANGED
    
    | @@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version | |
| 5 5 | 
             
              segments: 
         | 
| 6 6 | 
             
              - 0
         | 
| 7 7 | 
             
              - 4
         | 
| 8 | 
            -
              -  | 
| 9 | 
            -
              version: 0.4. | 
| 8 | 
            +
              - 1
         | 
| 9 | 
            +
              version: 0.4.1
         | 
| 10 10 | 
             
            platform: ruby
         | 
| 11 11 | 
             
            authors: 
         | 
| 12 12 | 
             
            - Simon Chiang
         | 
| @@ -14,13 +14,14 @@ autorequire: | |
| 14 14 | 
             
            bindir: bin
         | 
| 15 15 | 
             
            cert_chain: []
         | 
| 16 16 |  | 
| 17 | 
            -
            date: 2010- | 
| 18 | 
            -
            default_executable: 
         | 
| 17 | 
            +
            date: 2010-11-15 00:00:00 -07:00
         | 
| 18 | 
            +
            default_executable: digest.rb
         | 
| 19 19 | 
             
            dependencies: 
         | 
| 20 20 | 
             
            - !ruby/object:Gem::Dependency 
         | 
| 21 21 | 
             
              name: molecules
         | 
| 22 22 | 
             
              prerelease: false
         | 
| 23 23 | 
             
              requirement: &id001 !ruby/object:Gem::Requirement 
         | 
| 24 | 
            +
                none: false
         | 
| 24 25 | 
             
                requirements: 
         | 
| 25 26 | 
             
                - - ">="
         | 
| 26 27 | 
             
                  - !ruby/object:Gem::Version 
         | 
| @@ -35,6 +36,7 @@ dependencies: | |
| 35 36 | 
             
              name: tap
         | 
| 36 37 | 
             
              prerelease: false
         | 
| 37 38 | 
             
              requirement: &id002 !ruby/object:Gem::Requirement 
         | 
| 39 | 
            +
                none: false
         | 
| 38 40 | 
             
                requirements: 
         | 
| 39 41 | 
             
                - - ">="
         | 
| 40 42 | 
             
                  - !ruby/object:Gem::Version 
         | 
| @@ -49,6 +51,7 @@ dependencies: | |
| 49 51 | 
             
              name: tap-test
         | 
| 50 52 | 
             
              prerelease: false
         | 
| 51 53 | 
             
              requirement: &id003 !ruby/object:Gem::Requirement 
         | 
| 54 | 
            +
                none: false
         | 
| 52 55 | 
             
                requirements: 
         | 
| 53 56 | 
             
                - - ">="
         | 
| 54 57 | 
             
                  - !ruby/object:Gem::Version 
         | 
| @@ -63,6 +66,7 @@ dependencies: | |
| 63 66 | 
             
              name: spec-more
         | 
| 64 67 | 
             
              prerelease: false
         | 
| 65 68 | 
             
              requirement: &id004 !ruby/object:Gem::Requirement 
         | 
| 69 | 
            +
                none: false
         | 
| 66 70 | 
             
                requirements: 
         | 
| 67 71 | 
             
                - - ">="
         | 
| 68 72 | 
             
                  - !ruby/object:Gem::Version 
         | 
| @@ -73,8 +77,8 @@ dependencies: | |
| 73 77 | 
             
              version_requirements: *id004
         | 
| 74 78 | 
             
            description: peptide fragmentation and protein digestion
         | 
| 75 79 | 
             
            email: jtprince@gmail.com
         | 
| 76 | 
            -
            executables:  | 
| 77 | 
            -
             | 
| 80 | 
            +
            executables: 
         | 
| 81 | 
            +
            - digest.rb
         | 
| 78 82 | 
             
            extensions: []
         | 
| 79 83 |  | 
| 80 84 | 
             
            extra_rdoc_files: 
         | 
| @@ -87,8 +91,8 @@ files: | |
| 87 91 | 
             
            - README.rdoc
         | 
| 88 92 | 
             
            - Rakefile
         | 
| 89 93 | 
             
            - VERSION
         | 
| 94 | 
            +
            - bin/digest.rb
         | 
| 90 95 | 
             
            - lib/ms/in_silico.rb
         | 
| 91 | 
            -
            - lib/ms/in_silico/digest.rb
         | 
| 92 96 | 
             
            - lib/ms/in_silico/digester.rb
         | 
| 93 97 | 
             
            - lib/ms/in_silico/fragment.rb
         | 
| 94 98 | 
             
            - lib/ms/in_silico/spectrum.rb
         | 
| @@ -97,9 +101,6 @@ files: | |
| 97 101 | 
             
            - spec/ms/in_silico/spectrum_spec.rb
         | 
| 98 102 | 
             
            - spec/readme_spec.rb
         | 
| 99 103 | 
             
            - spec/spec_helper.rb
         | 
| 100 | 
            -
            - spec/tap_test_helper.rb
         | 
| 101 | 
            -
            - spec/tap_test_suite.rb
         | 
| 102 | 
            -
            - tap.yml
         | 
| 103 104 | 
             
            - test/ms/in_silico/digest_test.rb
         | 
| 104 105 | 
             
            - test/ms/in_silico/fragment_test.rb
         | 
| 105 106 | 
             
            has_rdoc: true
         | 
| @@ -112,6 +113,7 @@ rdoc_options: | |
| 112 113 | 
             
            require_paths: 
         | 
| 113 114 | 
             
            - lib
         | 
| 114 115 | 
             
            required_ruby_version: !ruby/object:Gem::Requirement 
         | 
| 116 | 
            +
              none: false
         | 
| 115 117 | 
             
              requirements: 
         | 
| 116 118 | 
             
              - - ">="
         | 
| 117 119 | 
             
                - !ruby/object:Gem::Version 
         | 
| @@ -119,6 +121,7 @@ required_ruby_version: !ruby/object:Gem::Requirement | |
| 119 121 | 
             
                  - 0
         | 
| 120 122 | 
             
                  version: "0"
         | 
| 121 123 | 
             
            required_rubygems_version: !ruby/object:Gem::Requirement 
         | 
| 124 | 
            +
              none: false
         | 
| 122 125 | 
             
              requirements: 
         | 
| 123 126 | 
             
              - - ">="
         | 
| 124 127 | 
             
                - !ruby/object:Gem::Version 
         | 
| @@ -128,17 +131,15 @@ required_rubygems_version: !ruby/object:Gem::Requirement | |
| 128 131 | 
             
            requirements: []
         | 
| 129 132 |  | 
| 130 133 | 
             
            rubyforge_project: mspire
         | 
| 131 | 
            -
            rubygems_version: 1.3. | 
| 134 | 
            +
            rubygems_version: 1.3.7
         | 
| 132 135 | 
             
            signing_key: 
         | 
| 133 136 | 
             
            specification_version: 3
         | 
| 134 137 | 
             
            summary: in-silico calculations for mass spec data
         | 
| 135 138 | 
             
            test_files: 
         | 
| 136 | 
            -
            - spec/tap_test_suite.rb
         | 
| 137 139 | 
             
            - spec/ms/in_silico/fragment_spec.rb
         | 
| 138 140 | 
             
            - spec/ms/in_silico/spectrum_spec.rb
         | 
| 139 141 | 
             
            - spec/ms/in_silico/digester_spec.rb
         | 
| 140 142 | 
             
            - spec/readme_spec.rb
         | 
| 141 | 
            -
            - spec/tap_test_helper.rb
         | 
| 142 143 | 
             
            - spec/spec_helper.rb
         | 
| 143 144 | 
             
            - test/ms/in_silico/digest_test.rb
         | 
| 144 145 | 
             
            - test/ms/in_silico/fragment_test.rb
         | 
    
        data/lib/ms/in_silico/digest.rb
    DELETED
    
    | @@ -1,52 +0,0 @@ | |
| 1 | 
            -
            require 'tap/task'
         | 
| 2 | 
            -
            require 'ms/in_silico/digester'
         | 
| 3 | 
            -
             | 
| 4 | 
            -
            module Ms
         | 
| 5 | 
            -
              module InSilico
         | 
| 6 | 
            -
                # :startdoc::task digest a protein sequence into peptides
         | 
| 7 | 
            -
                # Digest a protein sequence into an array of peptides.
         | 
| 8 | 
            -
                #
         | 
| 9 | 
            -
                #   % rap digest MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG --:i dump
         | 
| 10 | 
            -
                #   MIVIGR
         | 
| 11 | 
            -
                #   SIVHPYITNEYEPFAAEK
         | 
| 12 | 
            -
                #   QQILSIMAG
         | 
| 13 | 
            -
                #
         | 
| 14 | 
            -
                class Digest < Tap::Task
         | 
| 15 | 
            -
                
         | 
| 16 | 
            -
                  config :digester, 'Trypsin'                # The name of the digester
         | 
| 17 | 
            -
                  config :min_length, nil, &c.integer_or_nil # Minimum peptide length
         | 
| 18 | 
            -
                  config :max_length, nil, &c.integer_or_nil # Maximum peptide length
         | 
| 19 | 
            -
                  config :max_misses, 0, &c.integer          # The max # of missed cleavage sites
         | 
| 20 | 
            -
                  config :site_digest, false, &c.boolean     # Digest to sites (rather than sequences)
         | 
| 21 | 
            -
             | 
| 22 | 
            -
                  def process(sequence)
         | 
| 23 | 
            -
                    unless d = Digester[digester]
         | 
| 24 | 
            -
                      raise ArgumentError, "unknown digester: #{digester}" 
         | 
| 25 | 
            -
                    end
         | 
| 26 | 
            -
                    
         | 
| 27 | 
            -
                    # extract sequence from FASTA entries
         | 
| 28 | 
            -
                    sequence = $1 if sequence =~ /\A>.*?\n(.*)\z/m
         | 
| 29 | 
            -
                    sequence.gsub!(/\s/, "")
         | 
| 30 | 
            -
                    
         | 
| 31 | 
            -
                    peptides = if site_digest 
         | 
| 32 | 
            -
                      d.site_digest(sequence, max_misses)
         | 
| 33 | 
            -
                    else
         | 
| 34 | 
            -
                      d.digest(sequence, max_misses)
         | 
| 35 | 
            -
                    end
         | 
| 36 | 
            -
                    
         | 
| 37 | 
            -
                    # filter
         | 
| 38 | 
            -
                    peptides.delete_if do |peptide|
         | 
| 39 | 
            -
                      peptide.length < min_length
         | 
| 40 | 
            -
                    end if min_length
         | 
| 41 | 
            -
                    
         | 
| 42 | 
            -
                    peptides.delete_if do |peptide|
         | 
| 43 | 
            -
                      peptide.length > max_length
         | 
| 44 | 
            -
                    end if max_length
         | 
| 45 | 
            -
                    
         | 
| 46 | 
            -
                    log 'digest', "#{sequence[0..10]}#{sequence.length > 10 ? '...' : ''} to #{peptides.length} peptides"
         | 
| 47 | 
            -
                    peptides
         | 
| 48 | 
            -
                  end
         | 
| 49 | 
            -
                  
         | 
| 50 | 
            -
                end 
         | 
| 51 | 
            -
              end
         | 
| 52 | 
            -
            end
         | 
    
        data/spec/tap_test_helper.rb
    DELETED
    
    
    
        data/spec/tap_test_suite.rb
    DELETED
    
    
    
        data/tap.yml
    DELETED
    
    | 
            File without changes
         |