orf_finder 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/orf.rb +306 -0
- data/lib/orf_common.rb +106 -0
- data/lib/orf_finder.rb +50 -0
- metadata +114 -0
    
        checksums.yaml
    ADDED
    
    | @@ -0,0 +1,7 @@ | |
| 1 | 
            +
            ---
         | 
| 2 | 
            +
            SHA1:
         | 
| 3 | 
            +
              metadata.gz: 192ce308aeacff33641fd4f5aa62c7566787a47e
         | 
| 4 | 
            +
              data.tar.gz: 6a55712c9764c424989fce7f46f68b76e6b7d259
         | 
| 5 | 
            +
            SHA512:
         | 
| 6 | 
            +
              metadata.gz: 46d2a795e81a037e61079ca4c0168fd57a3be4f84f090cda83da114412903cc8a2187787dfa4798f74ce00b570382e7f416078f11f722b401a67c9ab01b25012
         | 
| 7 | 
            +
              data.tar.gz: c99c76e64c9dd9bcff8c4b69361ffd7215c5a2fc4b63442904c0e9dab6d5e2d0a652549c343ac2e787e5a41183cbaf89f4932223b34cd887ca59a1c507e8506b
         | 
    
        data/lib/orf.rb
    ADDED
    
    | @@ -0,0 +1,306 @@ | |
| 1 | 
            +
            require 'bio'
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            require_relative 'orf_common'
         | 
| 4 | 
            +
            require_relative 'orf_finder'
         | 
| 5 | 
            +
            #
         | 
| 6 | 
            +
            #
         | 
| 7 | 
            +
            #
         | 
| 8 | 
            +
            class ORF
         | 
| 9 | 
            +
              #
         | 
| 10 | 
            +
              include ORF::ORFCommon
         | 
| 11 | 
            +
              #
         | 
| 12 | 
            +
              #
         | 
| 13 | 
            +
              attr_reader :logger, :options, :seq, :sequence
         | 
| 14 | 
            +
              attr_writer :options
         | 
| 15 | 
            +
             | 
| 16 | 
            +
              # class initializer that normalizes sequence to Bio::Sequence,
         | 
| 17 | 
            +
              #  merges given options and creates logger
         | 
| 18 | 
            +
              def initialize(sequence, options = {}, logger_file = nil)
         | 
| 19 | 
            +
                # logger for instance
         | 
| 20 | 
            +
                if logger_file.nil?
         | 
| 21 | 
            +
                  @logger = Logger.new(STDOUT)
         | 
| 22 | 
            +
                else
         | 
| 23 | 
            +
                  @logger = logger_file.clone
         | 
| 24 | 
            +
                end
         | 
| 25 | 
            +
                logger.progname = 'ORFCommon'
         | 
| 26 | 
            +
                logger.level    = (options[:debug] ? Logger::INFO : Logger::ERROR)
         | 
| 27 | 
            +
                #
         | 
| 28 | 
            +
                sequence = Bio::Sequence::NA.new(sequence) if sequence.class == String
         | 
| 29 | 
            +
                @sequence = sequence
         | 
| 30 | 
            +
                @seq = @sequence.to_s
         | 
| 31 | 
            +
                #
         | 
| 32 | 
            +
                self.options = ORFFinder::DEFAULT_OPTIONS.merge(options.nil? ? {} : options)
         | 
| 33 | 
            +
             | 
| 34 | 
            +
                logger.info 'ORF has been initialized'
         | 
| 35 | 
            +
                find
         | 
| 36 | 
            +
              end
         | 
| 37 | 
            +
             | 
| 38 | 
            +
              #
         | 
| 39 | 
            +
              # For a given sequence, find longest ORF
         | 
| 40 | 
            +
              #
         | 
| 41 | 
            +
              def self.find(sequence, options = {})
         | 
| 42 | 
            +
                # merge options with default
         | 
| 43 | 
            +
                orf = ORF.new(sequence, options)
         | 
| 44 | 
            +
                @result = orf.find
         | 
| 45 | 
            +
                #
         | 
| 46 | 
            +
              end
         | 
| 47 | 
            +
             | 
| 48 | 
            +
              #
         | 
| 49 | 
            +
              # return aminoacid sequence
         | 
| 50 | 
            +
              def aa
         | 
| 51 | 
            +
                # return already generated aa sequence
         | 
| 52 | 
            +
                return @res_aa unless @res_aa.nil?
         | 
| 53 | 
            +
                # save result
         | 
| 54 | 
            +
                l = longest
         | 
| 55 | 
            +
                return l if @res_aa.nil?
         | 
| 56 | 
            +
                @res_aa
         | 
| 57 | 
            +
              end
         | 
| 58 | 
            +
             | 
| 59 | 
            +
              #
         | 
| 60 | 
            +
              # return nucletotide sequence
         | 
| 61 | 
            +
              def nt
         | 
| 62 | 
            +
                return @res_nt unless @res_nt.nil?
         | 
| 63 | 
            +
                longest
         | 
| 64 | 
            +
              end
         | 
| 65 | 
            +
             | 
| 66 | 
            +
              #
         | 
| 67 | 
            +
              #
         | 
| 68 | 
            +
              # finds all possible orfs in sequence
         | 
| 69 | 
            +
              def find
         | 
| 70 | 
            +
                # if sequence is nil or empty there is no point
         | 
| 71 | 
            +
                #  in trying to run the find algorithm
         | 
| 72 | 
            +
                return sequence if sequence.nil? || sequence.size == 0
         | 
| 73 | 
            +
                #
         | 
| 74 | 
            +
                orf = { frame1: {}, frame2: {}, frame3: {} }
         | 
| 75 | 
            +
                #
         | 
| 76 | 
            +
                start_idx = all_codons_indices(:start)
         | 
| 77 | 
            +
                stop_idx  = all_codons_indices(:stop)
         | 
| 78 | 
            +
                res       = all_sequences(start_idx, stop_idx, seq.size, [0, 1, 2])
         | 
| 79 | 
            +
                #
         | 
| 80 | 
            +
                logger.info "start codons idx: #{start_idx}"
         | 
| 81 | 
            +
                logger.info "stop codons idx: #{stop_idx}"
         | 
| 82 | 
            +
                logger.info res
         | 
| 83 | 
            +
                # iterate over each frame and range to return the
         | 
| 84 | 
            +
                #  longest above the minimum sequence length
         | 
| 85 | 
            +
                # these are the preferences:
         | 
| 86 | 
            +
                #  1: range that has start and stop codons
         | 
| 87 | 
            +
                #  2: range that only has start/stop
         | 
| 88 | 
            +
                #  3: full sequence
         | 
| 89 | 
            +
                res.each_with_index do |frame, index|
         | 
| 90 | 
            +
                  find_longest(frame, index, orf)
         | 
| 91 | 
            +
                end
         | 
| 92 | 
            +
                # print ranges if debug is activated
         | 
| 93 | 
            +
                orf.each { |k, f| f[:orfs].each { |r| print_range(k, r) } } \
         | 
| 94 | 
            +
                  if options[:debug]
         | 
| 95 | 
            +
                #
         | 
| 96 | 
            +
                @orf = orf
         | 
| 97 | 
            +
              end
         | 
| 98 | 
            +
             | 
| 99 | 
            +
              private
         | 
| 100 | 
            +
             | 
| 101 | 
            +
              #
         | 
| 102 | 
            +
              # iterate over all ranges in frame and find the longest
         | 
| 103 | 
            +
              def find_longest(frame, index, orf)
         | 
| 104 | 
            +
                # temporary arrays to keep valid and fallback ranges
         | 
| 105 | 
            +
                frame_val = []
         | 
| 106 | 
            +
                frame_fal = []
         | 
| 107 | 
            +
                frame.each do |range|
         | 
| 108 | 
            +
                  if range[:fallback]
         | 
| 109 | 
            +
                    frame_fal << range
         | 
| 110 | 
            +
                  else
         | 
| 111 | 
            +
                    frame_val << range
         | 
| 112 | 
            +
                  end
         | 
| 113 | 
            +
                end
         | 
| 114 | 
            +
                # hash name
         | 
| 115 | 
            +
                hash_name = frame_sym(index)
         | 
| 116 | 
            +
                orf[hash_name][:orfs] = (frame_val.empty? ? frame_fal : frame_val)
         | 
| 117 | 
            +
                longest = { len: nil, range: nil }
         | 
| 118 | 
            +
                orf[hash_name][:orfs].each do |range|
         | 
| 119 | 
            +
                  len = range[:stop] - range[:start] + 1
         | 
| 120 | 
            +
                  if longest[:range].nil? || len > longest[:len]
         | 
| 121 | 
            +
                    longest[:len]   = len
         | 
| 122 | 
            +
                    longest[:range] = range
         | 
| 123 | 
            +
                  end
         | 
| 124 | 
            +
                end
         | 
| 125 | 
            +
                orf[hash_name][:longest] = longest[:range]
         | 
| 126 | 
            +
              end
         | 
| 127 | 
            +
             | 
| 128 | 
            +
              #
         | 
| 129 | 
            +
              # get the longest sequence in each frame and translate
         | 
| 130 | 
            +
              #  to aminoacid
         | 
| 131 | 
            +
              def longest
         | 
| 132 | 
            +
                # run find method if search has not been done
         | 
| 133 | 
            +
                find if @orf.nil?
         | 
| 134 | 
            +
                #
         | 
| 135 | 
            +
                res_nt = { frame1: '', frame2: '', frame3: '' }
         | 
| 136 | 
            +
                res_aa = res_nt.clone
         | 
| 137 | 
            +
                # if @orf is empty then no point in continuing
         | 
| 138 | 
            +
                return res_nt if @orf.nil? || @orf.size == 0
         | 
| 139 | 
            +
                # for each orf get the longest sequence
         | 
| 140 | 
            +
                @orf.each do |key, val|
         | 
| 141 | 
            +
                  res_nt[key] = get_range(val[:longest])
         | 
| 142 | 
            +
                end
         | 
| 143 | 
            +
                @res_nt = res_nt
         | 
| 144 | 
            +
                # translate to aa sequence
         | 
| 145 | 
            +
                unless @res_nt.nil?
         | 
| 146 | 
            +
                  @res_nt.each do |key, val|
         | 
| 147 | 
            +
                    res_aa[key] = val.translate
         | 
| 148 | 
            +
                  end
         | 
| 149 | 
            +
                end
         | 
| 150 | 
            +
                @res_aa = res_aa
         | 
| 151 | 
            +
                # return the nucleotide sequence as default
         | 
| 152 | 
            +
                res_nt
         | 
| 153 | 
            +
              end
         | 
| 154 | 
            +
             | 
| 155 | 
            +
              #
         | 
| 156 | 
            +
              # Find all indexes for valid codons
         | 
| 157 | 
            +
              #  (either for :start or :stop)
         | 
| 158 | 
            +
              def all_codons_indices(option_name)
         | 
| 159 | 
            +
                idxs = []
         | 
| 160 | 
            +
                option_name = option_name.to_sym
         | 
| 161 | 
            +
                # if start option does not exist, then should
         | 
| 162 | 
            +
                #  treat start of sequence as the start
         | 
| 163 | 
            +
                return idxs if options[option_name].nil? || options[option_name].empty?
         | 
| 164 | 
            +
                # iterate over all start codons to see which
         | 
| 165 | 
            +
                #  is best
         | 
| 166 | 
            +
                options[option_name].each do |codon|
         | 
| 167 | 
            +
                  # initialize temporary index as empty
         | 
| 168 | 
            +
                  temp_idxs = []
         | 
| 169 | 
            +
                  # index starts at position 0
         | 
| 170 | 
            +
                  new_idx   = seq.index(codon, 0)
         | 
| 171 | 
            +
                  until new_idx.nil?
         | 
| 172 | 
            +
                    # necessary normalization
         | 
| 173 | 
            +
                    temp_idxs << index_normalization(option_name, new_idx)
         | 
| 174 | 
            +
                    new_idx   = seq.index(codon, new_idx + 1)
         | 
| 175 | 
            +
                  end
         | 
| 176 | 
            +
                  idxs << temp_idxs
         | 
| 177 | 
            +
                end
         | 
| 178 | 
            +
                idxs.flatten.sort
         | 
| 179 | 
            +
              end
         | 
| 180 | 
            +
             | 
| 181 | 
            +
              #
         | 
| 182 | 
            +
              # get indexes only from a given frame
         | 
| 183 | 
            +
              # because of a bug the start flag must be given
         | 
| 184 | 
            +
              #  indicating if it is looking for start or stop
         | 
| 185 | 
            +
              #  codons in frame
         | 
| 186 | 
            +
              def filter_codons_by_frame(idxs, frame, start = true)
         | 
| 187 | 
            +
                idxs.collect do |i|
         | 
| 188 | 
            +
                  if start && (i - frame) % 3 == 0
         | 
| 189 | 
            +
                    i
         | 
| 190 | 
            +
                  elsif !start && (i + 1 - frame) % 3 == 0
         | 
| 191 | 
            +
                    i
         | 
| 192 | 
            +
                  end
         | 
| 193 | 
            +
                end.compact
         | 
| 194 | 
            +
              end
         | 
| 195 | 
            +
             | 
| 196 | 
            +
              #
         | 
| 197 | 
            +
              # from the combination of start and stop indexes, find
         | 
| 198 | 
            +
              #  the longest one
         | 
| 199 | 
            +
              def valid_sequences_by_frame(start_idxs, stop_idxs, frame, seq_size)
         | 
| 200 | 
            +
                #
         | 
| 201 | 
            +
                seq_size -= (seq_size - frame) % 3
         | 
| 202 | 
            +
                start = start_idxs.clone
         | 
| 203 | 
            +
                stop  = stop_idxs.clone
         | 
| 204 | 
            +
                #
         | 
| 205 | 
            +
                stop << seq_size - 1 if stop_idxs.empty?
         | 
| 206 | 
            +
                start << frame if start_idxs.empty?
         | 
| 207 | 
            +
                #
         | 
| 208 | 
            +
                if options[:debug]
         | 
| 209 | 
            +
                  logger.info "frame: #{frame}"
         | 
| 210 | 
            +
                  logger.info "  start: #{start} | stop :#{stop}"
         | 
| 211 | 
            +
                  logger.info "  seq size: #{seq_size}"
         | 
| 212 | 
            +
                  logger.info "  #{seq[frame..seq_size]}"
         | 
| 213 | 
            +
                end
         | 
| 214 | 
            +
                #
         | 
| 215 | 
            +
                valid = []
         | 
| 216 | 
            +
                fallback = []
         | 
| 217 | 
            +
                # iterate on each start codon
         | 
| 218 | 
            +
                sequences_in_frame({ start: start, stop: stop },
         | 
| 219 | 
            +
                                       { valid: valid, fallback: fallback },
         | 
| 220 | 
            +
                                       seq_size,
         | 
| 221 | 
            +
                                       frame,
         | 
| 222 | 
            +
                                       start_idxs.empty? || stop_idxs.empty?)
         | 
| 223 | 
            +
                if valid.empty?
         | 
| 224 | 
            +
                  valid = fallback.uniq.collect do |r|
         | 
| 225 | 
            +
                    if get_range_str(r[:start], r[:stop], false).size == size_of_frame(frame)
         | 
| 226 | 
            +
                      nil
         | 
| 227 | 
            +
                    else
         | 
| 228 | 
            +
                      r
         | 
| 229 | 
            +
                    end
         | 
| 230 | 
            +
                  end.compact
         | 
| 231 | 
            +
                  logger.info 'no ORF with start and stop codons,' \
         | 
| 232 | 
            +
                    ' defaulting to fallback'
         | 
| 233 | 
            +
                end
         | 
| 234 | 
            +
                valid
         | 
| 235 | 
            +
              end
         | 
| 236 | 
            +
             | 
| 237 | 
            +
              #
         | 
| 238 | 
            +
              # given star and stop codons indexes, decide which are the valid
         | 
| 239 | 
            +
              #  sequence for an orf
         | 
| 240 | 
            +
              # TODO: reject sequences that have a stop codon in them
         | 
| 241 | 
            +
              def sequences_in_frame(idxs, arrays, seq_size, frame, added_pos)
         | 
| 242 | 
            +
                start = idxs[:start]
         | 
| 243 | 
            +
                stop  = idxs[:stop]
         | 
| 244 | 
            +
                arr   = []
         | 
| 245 | 
            +
                #
         | 
| 246 | 
            +
                #
         | 
| 247 | 
            +
                # iterate on each start codon
         | 
| 248 | 
            +
                start.each do |pos_start|
         | 
| 249 | 
            +
                  # iterate on each stop codon
         | 
| 250 | 
            +
                  stop.each do |pos_stop|
         | 
| 251 | 
            +
                    # add a fallback where starts from begining
         | 
| 252 | 
            +
                    # note: must check if from beggining to end there
         | 
| 253 | 
            +
                    #  are stop codons, if so do not show it
         | 
| 254 | 
            +
                    if (pos_stop + 1 - frame) >= options[:min] &&
         | 
| 255 | 
            +
                       !(pos_stop > stop.bsearch { |el| el >= (frame - 1) })
         | 
| 256 | 
            +
                      arr << { start: frame, stop: pos_stop, fallback: true }
         | 
| 257 | 
            +
                    end
         | 
| 258 | 
            +
                    # ignore if start is bigger than stop index
         | 
| 259 | 
            +
                    next if pos_start >= pos_stop
         | 
| 260 | 
            +
                    # ignore if there is a stop codon between pos_start
         | 
| 261 | 
            +
                    #  and pos_stop
         | 
| 262 | 
            +
                    next if pos_stop > stop.bsearch { |el| el >= (pos_start - 1) }
         | 
| 263 | 
            +
                    # ignore if size of orf is smaller than minimum
         | 
| 264 | 
            +
                    next if (pos_stop + 1 - pos_start) < options[:min]
         | 
| 265 | 
            +
                    # if all conditions hold add as valid orf
         | 
| 266 | 
            +
                    arr << { start: pos_start,
         | 
| 267 | 
            +
                             stop:  pos_stop,
         | 
| 268 | 
            +
                             fallback: added_pos }
         | 
| 269 | 
            +
                  end
         | 
| 270 | 
            +
                  next unless ((seq_size - 1) - pos_start) >= options[:min]
         | 
| 271 | 
            +
             | 
| 272 | 
            +
                  next if !(temp_res = stop.bsearch { |el| el >= (pos_start - 1) }).nil? &&
         | 
| 273 | 
            +
                          (seq_size - 1) > temp_res
         | 
| 274 | 
            +
                  arr << { start: pos_start,
         | 
| 275 | 
            +
                           stop: seq_size - 1,
         | 
| 276 | 
            +
                           fallback: true }
         | 
| 277 | 
            +
                end
         | 
| 278 | 
            +
                #
         | 
| 279 | 
            +
                arr.each do |item|
         | 
| 280 | 
            +
                  if item[:fallback]
         | 
| 281 | 
            +
                    arrays[:fallback] << item
         | 
| 282 | 
            +
                  else
         | 
| 283 | 
            +
                    arrays[:valid] << item
         | 
| 284 | 
            +
                  end
         | 
| 285 | 
            +
                end
         | 
| 286 | 
            +
              end
         | 
| 287 | 
            +
             | 
| 288 | 
            +
              #
         | 
| 289 | 
            +
              #
         | 
| 290 | 
            +
              #
         | 
| 291 | 
            +
              def all_sequences(start_idx, stop_idx, seq_size, read_frame = [0, 1, 2])
         | 
| 292 | 
            +
                #
         | 
| 293 | 
            +
                start = [[], [], []]
         | 
| 294 | 
            +
                stop  = [[], [], []]
         | 
| 295 | 
            +
                valid = []
         | 
| 296 | 
            +
                read_frame.each do |frame|
         | 
| 297 | 
            +
                  start[frame] = filter_codons_by_frame(start_idx, frame, true)
         | 
| 298 | 
            +
                  stop[frame]  = filter_codons_by_frame(stop_idx, frame,  false)
         | 
| 299 | 
            +
                  valid << valid_sequences_by_frame(start[frame],
         | 
| 300 | 
            +
                                                    stop[frame],
         | 
| 301 | 
            +
                                                    frame, seq_size)
         | 
| 302 | 
            +
                end
         | 
| 303 | 
            +
                #
         | 
| 304 | 
            +
                valid
         | 
| 305 | 
            +
              end
         | 
| 306 | 
            +
            end
         | 
    
        data/lib/orf_common.rb
    ADDED
    
    | @@ -0,0 +1,106 @@ | |
| 1 | 
            +
            require 'logger'
         | 
| 2 | 
            +
            #
         | 
| 3 | 
            +
            #
         | 
| 4 | 
            +
            #
         | 
| 5 | 
            +
            class ORF
         | 
| 6 | 
            +
              module ORFCommon
         | 
| 7 | 
            +
                #
         | 
| 8 | 
            +
             | 
| 9 | 
            +
                def range_to_s(range, str = '')
         | 
| 10 | 
            +
                  print_range(str, range)
         | 
| 11 | 
            +
                end
         | 
| 12 | 
            +
             | 
| 13 | 
            +
                private
         | 
| 14 | 
            +
             | 
| 15 | 
            +
                #
         | 
| 16 | 
            +
                # transform range to sequence
         | 
| 17 | 
            +
                def get_range(arg1, arg2 = nil)
         | 
| 18 | 
            +
                  return Bio::Sequence::NA.new('') if arg1.nil?
         | 
| 19 | 
            +
                  if arg2.nil?
         | 
| 20 | 
            +
                    start = arg1[:start]
         | 
| 21 | 
            +
                    stop = arg1[:stop]
         | 
| 22 | 
            +
                  else
         | 
| 23 | 
            +
                    start = arg1
         | 
| 24 | 
            +
                    stop = arg2
         | 
| 25 | 
            +
                  end
         | 
| 26 | 
            +
                  Bio::Sequence::NA.new(get_range_str(start, stop))
         | 
| 27 | 
            +
                end
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                #
         | 
| 30 | 
            +
                # tranform range to string
         | 
| 31 | 
            +
                def get_range_str(start, stop, include_codons = true)
         | 
| 32 | 
            +
                  # check if there is a start codon before start
         | 
| 33 | 
            +
                  #  and an end codon after stop, if there is, show it!
         | 
| 34 | 
            +
                  start_codon = ''
         | 
| 35 | 
            +
                  stop_codon = ''
         | 
| 36 | 
            +
                  if include_codons
         | 
| 37 | 
            +
                    if start - 3 >= 0 &&
         | 
| 38 | 
            +
                       options[:start].include?(seq[(start - 3)..(start - 1)])
         | 
| 39 | 
            +
                      start_codon = seq[(start - 3)..(start - 1)]
         | 
| 40 | 
            +
                    end
         | 
| 41 | 
            +
             | 
| 42 | 
            +
                    if stop + 3 <= seq.size - 1 &&
         | 
| 43 | 
            +
                       options[:stop].include?(seq[(stop + 1)..(stop + 3)])
         | 
| 44 | 
            +
                      stop_codon = seq[(stop + 1)..(stop + 3)]
         | 
| 45 | 
            +
                    end
         | 
| 46 | 
            +
                  end
         | 
| 47 | 
            +
                  "#{start_codon}#{seq[start..stop]}#{stop_codon}"
         | 
| 48 | 
            +
                end
         | 
| 49 | 
            +
             | 
| 50 | 
            +
                #
         | 
| 51 | 
            +
                # auxiliary method that prints range
         | 
| 52 | 
            +
                def print_range(key, range)
         | 
| 53 | 
            +
                  # simple proc to add spaces, works as auxiliary
         | 
| 54 | 
            +
                  #  method to print range
         | 
| 55 | 
            +
                  add_spaces = proc do |str|
         | 
| 56 | 
            +
                    str.gsub(/([atgc]{1})/, '\1 ').strip
         | 
| 57 | 
            +
                  end
         | 
| 58 | 
            +
                  if range.nil?
         | 
| 59 | 
            +
                    str = "#{key} : (empty)"
         | 
| 60 | 
            +
                  else
         | 
| 61 | 
            +
                    orf = add_spaces.call(get_range_str(range[:start], range[:stop]))
         | 
| 62 | 
            +
                    pre = if range[:start] == 0
         | 
| 63 | 
            +
                            ''
         | 
| 64 | 
            +
                          else
         | 
| 65 | 
            +
                            add_spaces.call(get_range_str(0, range[:start] - 1))
         | 
| 66 | 
            +
                          end
         | 
| 67 | 
            +
                    suf = if range[:end] == seq.size - 1
         | 
| 68 | 
            +
                            ''
         | 
| 69 | 
            +
                          else
         | 
| 70 | 
            +
                            add_spaces.call(get_range_str(range[:stop] + 1, seq.size - 1))
         | 
| 71 | 
            +
                          end
         | 
| 72 | 
            +
                    #
         | 
| 73 | 
            +
                    sep = '|'
         | 
| 74 | 
            +
                    str = "#{key}: #{pre}#{sep}#{orf}#{sep}#{suf}"
         | 
| 75 | 
            +
                    str += ' : ' \
         | 
| 76 | 
            +
                      "size=#{seq[range[:start]..range[:stop]].size}"
         | 
| 77 | 
            +
                    str += ' (fallback)' if range[:fallback]
         | 
| 78 | 
            +
                  end
         | 
| 79 | 
            +
                  puts str
         | 
| 80 | 
            +
                end
         | 
| 81 | 
            +
             | 
| 82 | 
            +
                #
         | 
| 83 | 
            +
                # necessary normalization for index to start after
         | 
| 84 | 
            +
                #  start codon and end just before stop codon
         | 
| 85 | 
            +
                # example: aaa atg aaa aaa taa aaa
         | 
| 86 | 
            +
                #  the search results in codon 2 and 5, while the
         | 
| 87 | 
            +
                #  resulting ord are codons 3 and 4
         | 
| 88 | 
            +
                def index_normalization(option_name, idx)
         | 
| 89 | 
            +
                  if option_name == :start
         | 
| 90 | 
            +
                    idx + 3
         | 
| 91 | 
            +
                  elsif option_name == :stop
         | 
| 92 | 
            +
                    idx - 1
         | 
| 93 | 
            +
                  end
         | 
| 94 | 
            +
                end
         | 
| 95 | 
            +
             | 
| 96 | 
            +
                #
         | 
| 97 | 
            +
                # create hash symbol from index
         | 
| 98 | 
            +
                def frame_sym(index)
         | 
| 99 | 
            +
                  "frame#{index + 1}".to_sym
         | 
| 100 | 
            +
                end
         | 
| 101 | 
            +
             | 
| 102 | 
            +
                def size_of_frame(frame)
         | 
| 103 | 
            +
                  seq.size - frame - (seq.size - frame) % 3
         | 
| 104 | 
            +
                end
         | 
| 105 | 
            +
              end
         | 
| 106 | 
            +
            end
         | 
    
        data/lib/orf_finder.rb
    ADDED
    
    | @@ -0,0 +1,50 @@ | |
| 1 | 
            +
            require_relative 'orf'
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            #
         | 
| 4 | 
            +
            #
         | 
| 5 | 
            +
            # Wrapper class that processes the direct and reverse sequences
         | 
| 6 | 
            +
            class ORFFinder
         | 
| 7 | 
            +
              #
         | 
| 8 | 
            +
              DEFAULT_OPTIONS = { start: %w(atg),
         | 
| 9 | 
            +
                                  stop:  %w(tag taa tga),
         | 
| 10 | 
            +
                                  reverse: true,
         | 
| 11 | 
            +
                                  direct: true,
         | 
| 12 | 
            +
                                  min: 6,
         | 
| 13 | 
            +
                                  default_to_seq: false,
         | 
| 14 | 
            +
                                  debug: false }
         | 
| 15 | 
            +
             | 
| 16 | 
            +
              def initialize(sequence, options = {}, logger = nil)
         | 
| 17 | 
            +
                #
         | 
| 18 | 
            +
                sequence = Bio::Sequence::NA.new(sequence) if sequence.class == String
         | 
| 19 | 
            +
                options = DEFAULT_OPTIONS.merge(options.nil? ? {} : options)
         | 
| 20 | 
            +
                #
         | 
| 21 | 
            +
                @output = {}
         | 
| 22 | 
            +
                @output[:direct]  = ORF.new(sequence, options, logger) if options[:direct]
         | 
| 23 | 
            +
                @output[:reverse] = ORF.new(sequence.complement, options, logger) \
         | 
| 24 | 
            +
                  if options[:reverse]
         | 
| 25 | 
            +
              end
         | 
| 26 | 
            +
             | 
| 27 | 
            +
              def nt
         | 
| 28 | 
            +
                res = {}
         | 
| 29 | 
            +
                @output.each do |key, value|
         | 
| 30 | 
            +
                  res[key] = value.nt
         | 
| 31 | 
            +
                end
         | 
| 32 | 
            +
                res
         | 
| 33 | 
            +
              end
         | 
| 34 | 
            +
             | 
| 35 | 
            +
              def aa
         | 
| 36 | 
            +
                res = {}
         | 
| 37 | 
            +
                @output.each do |key, value|
         | 
| 38 | 
            +
                  res[key] = value.aa
         | 
| 39 | 
            +
                end
         | 
| 40 | 
            +
                res
         | 
| 41 | 
            +
              end
         | 
| 42 | 
            +
             | 
| 43 | 
            +
              def direct
         | 
| 44 | 
            +
                @output[:direct]
         | 
| 45 | 
            +
              end
         | 
| 46 | 
            +
             | 
| 47 | 
            +
              def reverse
         | 
| 48 | 
            +
                @output[:reverse]
         | 
| 49 | 
            +
              end
         | 
| 50 | 
            +
            end
         | 
    
        metadata
    ADDED
    
    | @@ -0,0 +1,114 @@ | |
| 1 | 
            +
            --- !ruby/object:Gem::Specification
         | 
| 2 | 
            +
            name: orf_finder
         | 
| 3 | 
            +
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            +
              version: 0.0.1
         | 
| 5 | 
            +
            platform: ruby
         | 
| 6 | 
            +
            authors:
         | 
| 7 | 
            +
            - André Veríssimo
         | 
| 8 | 
            +
            autorequire: 
         | 
| 9 | 
            +
            bindir: bin
         | 
| 10 | 
            +
            cert_chain: []
         | 
| 11 | 
            +
            date: 2016-01-11 00:00:00.000000000 Z
         | 
| 12 | 
            +
            dependencies:
         | 
| 13 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 14 | 
            +
              name: bio
         | 
| 15 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 16 | 
            +
                requirements:
         | 
| 17 | 
            +
                - - "~>"
         | 
| 18 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 19 | 
            +
                    version: '1.5'
         | 
| 20 | 
            +
                - - ">="
         | 
| 21 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 22 | 
            +
                    version: 1.5.0
         | 
| 23 | 
            +
              type: :runtime
         | 
| 24 | 
            +
              prerelease: false
         | 
| 25 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 26 | 
            +
                requirements:
         | 
| 27 | 
            +
                - - "~>"
         | 
| 28 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 29 | 
            +
                    version: '1.5'
         | 
| 30 | 
            +
                - - ">="
         | 
| 31 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 32 | 
            +
                    version: 1.5.0
         | 
| 33 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 34 | 
            +
              name: byebug
         | 
| 35 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 36 | 
            +
                requirements:
         | 
| 37 | 
            +
                - - "~>"
         | 
| 38 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 39 | 
            +
                    version: '8.2'
         | 
| 40 | 
            +
                - - ">="
         | 
| 41 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 42 | 
            +
                    version: 8.2.1
         | 
| 43 | 
            +
              type: :development
         | 
| 44 | 
            +
              prerelease: false
         | 
| 45 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 46 | 
            +
                requirements:
         | 
| 47 | 
            +
                - - "~>"
         | 
| 48 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 49 | 
            +
                    version: '8.2'
         | 
| 50 | 
            +
                - - ">="
         | 
| 51 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 52 | 
            +
                    version: 8.2.1
         | 
| 53 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 54 | 
            +
              name: rspec
         | 
| 55 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 56 | 
            +
                requirements:
         | 
| 57 | 
            +
                - - "~>"
         | 
| 58 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 59 | 
            +
                    version: '3.4'
         | 
| 60 | 
            +
                - - ">="
         | 
| 61 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 62 | 
            +
                    version: 3.4.0
         | 
| 63 | 
            +
              type: :development
         | 
| 64 | 
            +
              prerelease: false
         | 
| 65 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 66 | 
            +
                requirements:
         | 
| 67 | 
            +
                - - "~>"
         | 
| 68 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 69 | 
            +
                    version: '3.4'
         | 
| 70 | 
            +
                - - ">="
         | 
| 71 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 72 | 
            +
                    version: 3.4.0
         | 
| 73 | 
            +
            description: |2
         | 
| 74 | 
            +
                  ORF Finder is a library that with a sequence of nucletotides it
         | 
| 75 | 
            +
                  finds the all the possible ORFs in the sequence.
         | 
| 76 | 
            +
                  It will look for a sequence that starts with a start codon and
         | 
| 77 | 
            +
                  ends with a stop codon.
         | 
| 78 | 
            +
                  It will default to the beggining of the sequence if it cannot
         | 
| 79 | 
            +
                  find an ORF long enought with the start codons. It will also
         | 
| 80 | 
            +
                  use the end of the sequence if no stop codons are present in the
         | 
| 81 | 
            +
                  sequence reading frame.
         | 
| 82 | 
            +
            email: andre.verissimo@tecnico.ulisboa.pt
         | 
| 83 | 
            +
            executables: []
         | 
| 84 | 
            +
            extensions: []
         | 
| 85 | 
            +
            extra_rdoc_files: []
         | 
| 86 | 
            +
            files:
         | 
| 87 | 
            +
            - lib/orf.rb
         | 
| 88 | 
            +
            - lib/orf_common.rb
         | 
| 89 | 
            +
            - lib/orf_finder.rb
         | 
| 90 | 
            +
            homepage: http://rubygems.org/gems/hola
         | 
| 91 | 
            +
            licenses:
         | 
| 92 | 
            +
            - GPL v3
         | 
| 93 | 
            +
            metadata: {}
         | 
| 94 | 
            +
            post_install_message: 
         | 
| 95 | 
            +
            rdoc_options: []
         | 
| 96 | 
            +
            require_paths:
         | 
| 97 | 
            +
            - lib
         | 
| 98 | 
            +
            required_ruby_version: !ruby/object:Gem::Requirement
         | 
| 99 | 
            +
              requirements:
         | 
| 100 | 
            +
              - - ">="
         | 
| 101 | 
            +
                - !ruby/object:Gem::Version
         | 
| 102 | 
            +
                  version: '0'
         | 
| 103 | 
            +
            required_rubygems_version: !ruby/object:Gem::Requirement
         | 
| 104 | 
            +
              requirements:
         | 
| 105 | 
            +
              - - ">="
         | 
| 106 | 
            +
                - !ruby/object:Gem::Version
         | 
| 107 | 
            +
                  version: '0'
         | 
| 108 | 
            +
            requirements: []
         | 
| 109 | 
            +
            rubyforge_project: 
         | 
| 110 | 
            +
            rubygems_version: 2.4.8
         | 
| 111 | 
            +
            signing_key: 
         | 
| 112 | 
            +
            specification_version: 4
         | 
| 113 | 
            +
            summary: Finds the longest orfs in a nucleotide sequence.
         | 
| 114 | 
            +
            test_files: []
         |