sequence_logo 1.0.3 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +17 -17
- data/Gemfile +4 -4
- data/LICENSE +21 -21
- data/README.md +65 -65
- data/Rakefile +5 -5
- data/TODO.txt +7 -7
- data/bin/glue_logos +2 -2
- data/bin/sequence_logo +2 -2
- data/lib/sequence_logo/cli.rb +36 -36
- data/lib/sequence_logo/exec/glue_logos.rb +97 -66
- data/lib/sequence_logo/exec/sequence_logo.rb +51 -51
- data/lib/sequence_logo/pmflogo_lib.rb +113 -113
- data/lib/sequence_logo/version.rb +3 -3
- data/lib/sequence_logo/ytilib/addon.rb +246 -246
- data/lib/sequence_logo/ytilib/bismark.rb +70 -70
- data/lib/sequence_logo/ytilib/hack1.rb +75 -75
- data/lib/sequence_logo/ytilib/infocod.rb +108 -108
- data/lib/sequence_logo/ytilib/iupac.rb +92 -92
- data/lib/sequence_logo/ytilib/pm.rb +562 -562
- data/lib/sequence_logo/ytilib/pmsd.rb +98 -98
- data/lib/sequence_logo/ytilib/ppm_support.rb +85 -85
- data/lib/sequence_logo/ytilib/randoom.rb +131 -131
- data/lib/sequence_logo/ytilib/ytilib.rb +146 -146
- data/lib/sequence_logo/ytilib.rb +9 -9
- data/lib/sequence_logo.rb +7 -7
- data/sequence_logo.gemspec +21 -21
- data/test/data/pcm/AHR_si.pcm +10 -10
- data/test/data/pcm/AIRE_f2.pcm +19 -19
- metadata +3 -4
| @@ -1,99 +1,99 @@ | |
| 1 | 
            -
            #!/usr/bin/ruby
         | 
| 2 | 
            -
            module Ytilib
         | 
| 3 | 
            -
              class PM
         | 
| 4 | 
            -
                def score_sigma(trycount = 4**10, approx = false, bg = nil)
         | 
| 5 | 
            -
                  
         | 
| 6 | 
            -
                  scores = []
         | 
| 7 | 
            -
                  if @size <= 10 && !approx
         | 
| 8 | 
            -
                    (0...4**@size).each { |i| 
         | 
| 9 | 
            -
                      word = i.to_s(4).rjust(@size, "0").tr("0123", "ACGT")
         | 
| 10 | 
            -
                      scores << score(word)
         | 
| 11 | 
            -
                    }
         | 
| 12 | 
            -
                  else
         | 
| 13 | 
            -
                    trycount.times {
         | 
| 14 | 
            -
                      word = bg ? Randoom.rand_seq(@size, bg) : Randoom.rand_seq(@size)
         | 
| 15 | 
            -
                      scores << score(word)
         | 
| 16 | 
            -
                    }
         | 
| 17 | 
            -
                  end
         | 
| 18 | 
            -
                  sum1 = scores.inject(0) { |sum,s| sum += s }
         | 
| 19 | 
            -
                  mean = sum1 / scores.size
         | 
| 20 | 
            -
                  
         | 
| 21 | 
            -
                  sum2, sumc = 0, 0
         | 
| 22 | 
            -
                  scores.each { |score|
         | 
| 23 | 
            -
                    sum2 += (score-mean)**2
         | 
| 24 | 
            -
                    sumc += (score-mean)
         | 
| 25 | 
            -
                  }
         | 
| 26 | 
            -
                  variance = (sum2 - sumc**2 / scores.size) / (scores.size-1)
         | 
| 27 | 
            -
                  
         | 
| 28 | 
            -
                  sigma = Math.sqrt(variance)
         | 
| 29 | 
            -
                  if block_given?
         | 
| 30 | 
            -
                    yield(sigma, mean)
         | 
| 31 | 
            -
                  end
         | 
| 32 | 
            -
                  
         | 
| 33 | 
            -
                  return sigma
         | 
| 34 | 
            -
                end
         | 
| 35 | 
            -
                
         | 
| 36 | 
            -
                def fast_score_sigma
         | 
| 37 | 
            -
                  n, mean, m2 = 0, 0, 0
         | 
| 38 | 
            -
                  
         | 
| 39 | 
            -
                  recursive_walk([matrix['A'],matrix['C'],matrix['G'],matrix['T']], 0, 0) { |x|
         | 
| 40 | 
            -
                    n = n + 1
         | 
| 41 | 
            -
                    delta = x - mean
         | 
| 42 | 
            -
                    mean = mean + delta/n
         | 
| 43 | 
            -
                    m2 = m2 + delta*(x-mean)
         | 
| 44 | 
            -
                  }
         | 
| 45 | 
            -
                  
         | 
| 46 | 
            -
                  variance = m2/(n - 1)
         | 
| 47 | 
            -
                  
         | 
| 48 | 
            -
                  if block_given?
         | 
| 49 | 
            -
                    yield(sigma = Math.sqrt(variance), mean)
         | 
| 50 | 
            -
                  end
         | 
| 51 | 
            -
                  
         | 
| 52 | 
            -
                  return sigma
         | 
| 53 | 
            -
                end
         | 
| 54 | 
            -
                
         | 
| 55 | 
            -
                def fast_score_sigma_precise
         | 
| 56 | 
            -
                  n, mean = 0, 0
         | 
| 57 | 
            -
                  
         | 
| 58 | 
            -
                  recursive_walk([matrix['A'],matrix['C'],matrix['G'],matrix['T']], 0, 0) { |x|
         | 
| 59 | 
            -
                    n += 1
         | 
| 60 | 
            -
                    delta = x - mean
         | 
| 61 | 
            -
                    mean = mean + delta/n
         | 
| 62 | 
            -
                  }
         | 
| 63 | 
            -
                  
         | 
| 64 | 
            -
                  n, m2 = 0, 0
         | 
| 65 | 
            -
                  recursive_walk([matrix['A'],matrix['C'],matrix['G'],matrix['T']], 0, 0) { |x|
         | 
| 66 | 
            -
                    n = n + 1
         | 
| 67 | 
            -
                    delta = x - mean
         | 
| 68 | 
            -
                    m2 = m2 + delta*(x-mean)
         | 
| 69 | 
            -
                  }
         | 
| 70 | 
            -
                  
         | 
| 71 | 
            -
                  variance = m2/(n - 1)
         | 
| 72 | 
            -
                  
         | 
| 73 | 
            -
                  if block_given?
         | 
| 74 | 
            -
                    yield(sigma = Math.sqrt(variance), mean)
         | 
| 75 | 
            -
                  end
         | 
| 76 | 
            -
                  
         | 
| 77 | 
            -
                  return sigma
         | 
| 78 | 
            -
                end
         | 
| 79 | 
            -
                
         | 
| 80 | 
            -
              private
         | 
| 81 | 
            -
                def recursive_walk(matrix, score, i)
         | 
| 82 | 
            -
                  if i < @size
         | 
| 83 | 
            -
                    
         | 
| 84 | 
            -
                    recursive_walk(matrix, score + matrix[0][i], i+1) { |x| yield x }
         | 
| 85 | 
            -
                    recursive_walk(matrix, score + matrix[1][i], i+1) { |x| yield x }
         | 
| 86 | 
            -
                    recursive_walk(matrix, score + matrix[2][i], i+1) { |x| yield x }
         | 
| 87 | 
            -
                    recursive_walk(matrix, score + matrix[3][i], i+1) { |x| yield x }
         | 
| 88 | 
            -
                    
         | 
| 89 | 
            -
                  else
         | 
| 90 | 
            -
                    if block_given?
         | 
| 91 | 
            -
                      yield(score)
         | 
| 92 | 
            -
                    else
         | 
| 93 | 
            -
                      raise "no block for recursive walk"
         | 
| 94 | 
            -
                    end
         | 
| 95 | 
            -
                  end
         | 
| 96 | 
            -
                end
         | 
| 97 | 
            -
              
         | 
| 98 | 
            -
              end
         | 
| 1 | 
            +
            #!/usr/bin/ruby
         | 
| 2 | 
            +
            module Ytilib
         | 
| 3 | 
            +
              class PM
         | 
| 4 | 
            +
                def score_sigma(trycount = 4**10, approx = false, bg = nil)
         | 
| 5 | 
            +
                  
         | 
| 6 | 
            +
                  scores = []
         | 
| 7 | 
            +
                  if @size <= 10 && !approx
         | 
| 8 | 
            +
                    (0...4**@size).each { |i| 
         | 
| 9 | 
            +
                      word = i.to_s(4).rjust(@size, "0").tr("0123", "ACGT")
         | 
| 10 | 
            +
                      scores << score(word)
         | 
| 11 | 
            +
                    }
         | 
| 12 | 
            +
                  else
         | 
| 13 | 
            +
                    trycount.times {
         | 
| 14 | 
            +
                      word = bg ? Randoom.rand_seq(@size, bg) : Randoom.rand_seq(@size)
         | 
| 15 | 
            +
                      scores << score(word)
         | 
| 16 | 
            +
                    }
         | 
| 17 | 
            +
                  end
         | 
| 18 | 
            +
                  sum1 = scores.inject(0) { |sum,s| sum += s }
         | 
| 19 | 
            +
                  mean = sum1 / scores.size
         | 
| 20 | 
            +
                  
         | 
| 21 | 
            +
                  sum2, sumc = 0, 0
         | 
| 22 | 
            +
                  scores.each { |score|
         | 
| 23 | 
            +
                    sum2 += (score-mean)**2
         | 
| 24 | 
            +
                    sumc += (score-mean)
         | 
| 25 | 
            +
                  }
         | 
| 26 | 
            +
                  variance = (sum2 - sumc**2 / scores.size) / (scores.size-1)
         | 
| 27 | 
            +
                  
         | 
| 28 | 
            +
                  sigma = Math.sqrt(variance)
         | 
| 29 | 
            +
                  if block_given?
         | 
| 30 | 
            +
                    yield(sigma, mean)
         | 
| 31 | 
            +
                  end
         | 
| 32 | 
            +
                  
         | 
| 33 | 
            +
                  return sigma
         | 
| 34 | 
            +
                end
         | 
| 35 | 
            +
                
         | 
| 36 | 
            +
                def fast_score_sigma
         | 
| 37 | 
            +
                  n, mean, m2 = 0, 0, 0
         | 
| 38 | 
            +
                  
         | 
| 39 | 
            +
                  recursive_walk([matrix['A'],matrix['C'],matrix['G'],matrix['T']], 0, 0) { |x|
         | 
| 40 | 
            +
                    n = n + 1
         | 
| 41 | 
            +
                    delta = x - mean
         | 
| 42 | 
            +
                    mean = mean + delta/n
         | 
| 43 | 
            +
                    m2 = m2 + delta*(x-mean)
         | 
| 44 | 
            +
                  }
         | 
| 45 | 
            +
                  
         | 
| 46 | 
            +
                  variance = m2/(n - 1)
         | 
| 47 | 
            +
                  
         | 
| 48 | 
            +
                  if block_given?
         | 
| 49 | 
            +
                    yield(sigma = Math.sqrt(variance), mean)
         | 
| 50 | 
            +
                  end
         | 
| 51 | 
            +
                  
         | 
| 52 | 
            +
                  return sigma
         | 
| 53 | 
            +
                end
         | 
| 54 | 
            +
                
         | 
| 55 | 
            +
                def fast_score_sigma_precise
         | 
| 56 | 
            +
                  n, mean = 0, 0
         | 
| 57 | 
            +
                  
         | 
| 58 | 
            +
                  recursive_walk([matrix['A'],matrix['C'],matrix['G'],matrix['T']], 0, 0) { |x|
         | 
| 59 | 
            +
                    n += 1
         | 
| 60 | 
            +
                    delta = x - mean
         | 
| 61 | 
            +
                    mean = mean + delta/n
         | 
| 62 | 
            +
                  }
         | 
| 63 | 
            +
                  
         | 
| 64 | 
            +
                  n, m2 = 0, 0
         | 
| 65 | 
            +
                  recursive_walk([matrix['A'],matrix['C'],matrix['G'],matrix['T']], 0, 0) { |x|
         | 
| 66 | 
            +
                    n = n + 1
         | 
| 67 | 
            +
                    delta = x - mean
         | 
| 68 | 
            +
                    m2 = m2 + delta*(x-mean)
         | 
| 69 | 
            +
                  }
         | 
| 70 | 
            +
                  
         | 
| 71 | 
            +
                  variance = m2/(n - 1)
         | 
| 72 | 
            +
                  
         | 
| 73 | 
            +
                  if block_given?
         | 
| 74 | 
            +
                    yield(sigma = Math.sqrt(variance), mean)
         | 
| 75 | 
            +
                  end
         | 
| 76 | 
            +
                  
         | 
| 77 | 
            +
                  return sigma
         | 
| 78 | 
            +
                end
         | 
| 79 | 
            +
                
         | 
| 80 | 
            +
              private
         | 
| 81 | 
            +
                def recursive_walk(matrix, score, i)
         | 
| 82 | 
            +
                  if i < @size
         | 
| 83 | 
            +
                    
         | 
| 84 | 
            +
                    recursive_walk(matrix, score + matrix[0][i], i+1) { |x| yield x }
         | 
| 85 | 
            +
                    recursive_walk(matrix, score + matrix[1][i], i+1) { |x| yield x }
         | 
| 86 | 
            +
                    recursive_walk(matrix, score + matrix[2][i], i+1) { |x| yield x }
         | 
| 87 | 
            +
                    recursive_walk(matrix, score + matrix[3][i], i+1) { |x| yield x }
         | 
| 88 | 
            +
                    
         | 
| 89 | 
            +
                  else
         | 
| 90 | 
            +
                    if block_given?
         | 
| 91 | 
            +
                      yield(score)
         | 
| 92 | 
            +
                    else
         | 
| 93 | 
            +
                      raise "no block for recursive walk"
         | 
| 94 | 
            +
                    end
         | 
| 95 | 
            +
                  end
         | 
| 96 | 
            +
                end
         | 
| 97 | 
            +
              
         | 
| 98 | 
            +
              end
         | 
| 99 99 | 
             
            end
         | 
| @@ -1,85 +1,85 @@ | |
| 1 | 
            -
            class Object
         | 
| 2 | 
            -
              def deep_dup
         | 
| 3 | 
            -
                Marshal.load(Marshal.dump(self))
         | 
| 4 | 
            -
              end
         | 
| 5 | 
            -
            end
         | 
| 6 | 
            -
             | 
| 7 | 
            -
            def get_ppm_from_file(in_file_name)
         | 
| 8 | 
            -
              case File.ext_wo_name(in_file_name)
         | 
| 9 | 
            -
              when 'pat', 'pcm'
         | 
| 10 | 
            -
                pm = PM.load(in_file_name)
         | 
| 11 | 
            -
                pm.fixwc  if pm.words_count
         | 
| 12 | 
            -
              when 'mfa', 'fasta', 'plain'
         | 
| 13 | 
            -
                pm = PM.new_pcm(Ytilib.read_seqs2array(in_file_name))
         | 
| 14 | 
            -
              when 'xml'
         | 
| 15 | 
            -
                pm = PM.from_bismark(Bismark.new(in_file_name).elements["//PPM"])
         | 
| 16 | 
            -
              when in_file_name
         | 
| 17 | 
            -
                pm = PPM.from_IUPAC(in_file_name.upcase)
         | 
| 18 | 
            -
              end
         | 
| 19 | 
            -
              pm.get_ppm
         | 
| 20 | 
            -
            rescue
         | 
| 21 | 
            -
              nil
         | 
| 22 | 
            -
            end
         | 
| 23 | 
            -
             | 
| 24 | 
            -
            class PPM
         | 
| 25 | 
            -
              attr_accessor :name
         | 
| 26 | 
            -
              
         | 
| 27 | 
            -
              def get_ppm
         | 
| 28 | 
            -
                self
         | 
| 29 | 
            -
              end
         | 
| 30 | 
            -
              
         | 
| 31 | 
            -
              def get_line(v)
         | 
| 32 | 
            -
                ( (v - icd4of4) / icd4of4 ).abs
         | 
| 33 | 
            -
              end
         | 
| 34 | 
            -
             | 
| 35 | 
            -
              def get_logo(icd_mode)
         | 
| 36 | 
            -
                case icd_mode.to_s
         | 
| 37 | 
            -
                when 'weblogo'
         | 
| 38 | 
            -
                  get_logo_weblogo
         | 
| 39 | 
            -
                when 'discrete'
         | 
| 40 | 
            -
                  get_logo_discrete
         | 
| 41 | 
            -
                end
         | 
| 42 | 
            -
              end
         | 
| 43 | 
            -
             | 
| 44 | 
            -
             | 
| 45 | 
            -
              def get_logo_weblogo
         | 
| 46 | 
            -
                rseq = []
         | 
| 47 | 
            -
                @matrix['A'].each_index { |i|
         | 
| 48 | 
            -
                  rseq << 2 + ['A','C','G','T'].inject(0) { |sum, l|
         | 
| 49 | 
            -
                    pn = @matrix[l][i]
         | 
| 50 | 
            -
                    sum += (pn == 0) ? 0 : pn * Math.log(pn) / Math.log(2)
         | 
| 51 | 
            -
                  }
         | 
| 52 | 
            -
                }
         | 
| 53 | 
            -
                
         | 
| 54 | 
            -
                mat = {'A'=>[], 'C'=>[], 'G'=>[], 'T'=>[]}
         | 
| 55 | 
            -
                @matrix['A'].each_index { |i|
         | 
| 56 | 
            -
                  ['A','C','G','T'].each { |l|
         | 
| 57 | 
            -
                    mat[l][i]= @matrix[l][i] * rseq[i] / 2 # so we can handle a '2 bit' scale here
         | 
| 58 | 
            -
                  }
         | 
| 59 | 
            -
                }
         | 
| 60 | 
            -
                
         | 
| 61 | 
            -
                mat
         | 
| 62 | 
            -
              end
         | 
| 63 | 
            -
             | 
| 64 | 
            -
              def get_logo_discrete
         | 
| 65 | 
            -
                checkerr("words count is undefined") { !words_count }
         | 
| 66 | 
            -
                
         | 
| 67 | 
            -
                rseq = []
         | 
| 68 | 
            -
                @matrix['A'].each_index { |i|
         | 
| 69 | 
            -
                  rseq << (icd4of4 == 0 ? 1.0 : ( (infocod(i) - icd4of4) / icd4of4 ).abs)
         | 
| 70 | 
            -
                }
         | 
| 71 | 
            -
                
         | 
| 72 | 
            -
                mat = {'A'=>[], 'C'=>[], 'G'=>[], 'T'=>[]}
         | 
| 73 | 
            -
                @matrix['A'].each_index { |i|
         | 
| 74 | 
            -
                  ['A','C','G','T'].each { |l| 
         | 
| 75 | 
            -
                    mat[l][i] = @matrix[l][i] * rseq[i]
         | 
| 76 | 
            -
                  }
         | 
| 77 | 
            -
                }
         | 
| 78 | 
            -
                
         | 
| 79 | 
            -
                mat
         | 
| 80 | 
            -
              end
         | 
| 81 | 
            -
              
         | 
| 82 | 
            -
              def revcomp
         | 
| 83 | 
            -
                deep_dup.revcomp!
         | 
| 84 | 
            -
              end
         | 
| 85 | 
            -
            end
         | 
| 1 | 
            +
            class Object
         | 
| 2 | 
            +
              def deep_dup
         | 
| 3 | 
            +
                Marshal.load(Marshal.dump(self))
         | 
| 4 | 
            +
              end
         | 
| 5 | 
            +
            end
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            def get_ppm_from_file(in_file_name)
         | 
| 8 | 
            +
              case File.ext_wo_name(in_file_name)
         | 
| 9 | 
            +
              when 'pat', 'pcm'
         | 
| 10 | 
            +
                pm = PM.load(in_file_name)
         | 
| 11 | 
            +
                pm.fixwc  if pm.words_count
         | 
| 12 | 
            +
              when 'mfa', 'fasta', 'plain'
         | 
| 13 | 
            +
                pm = PM.new_pcm(Ytilib.read_seqs2array(in_file_name))
         | 
| 14 | 
            +
              when 'xml'
         | 
| 15 | 
            +
                pm = PM.from_bismark(Bismark.new(in_file_name).elements["//PPM"])
         | 
| 16 | 
            +
              when in_file_name
         | 
| 17 | 
            +
                pm = PPM.from_IUPAC(in_file_name.upcase)
         | 
| 18 | 
            +
              end
         | 
| 19 | 
            +
              pm.get_ppm
         | 
| 20 | 
            +
            rescue
         | 
| 21 | 
            +
              nil
         | 
| 22 | 
            +
            end
         | 
| 23 | 
            +
             | 
| 24 | 
            +
            class PPM
         | 
| 25 | 
            +
              attr_accessor :name
         | 
| 26 | 
            +
              
         | 
| 27 | 
            +
              def get_ppm
         | 
| 28 | 
            +
                self
         | 
| 29 | 
            +
              end
         | 
| 30 | 
            +
              
         | 
| 31 | 
            +
              def get_line(v)
         | 
| 32 | 
            +
                ( (v - icd4of4) / icd4of4 ).abs
         | 
| 33 | 
            +
              end
         | 
| 34 | 
            +
             | 
| 35 | 
            +
              def get_logo(icd_mode)
         | 
| 36 | 
            +
                case icd_mode.to_s
         | 
| 37 | 
            +
                when 'weblogo'
         | 
| 38 | 
            +
                  get_logo_weblogo
         | 
| 39 | 
            +
                when 'discrete'
         | 
| 40 | 
            +
                  get_logo_discrete
         | 
| 41 | 
            +
                end
         | 
| 42 | 
            +
              end
         | 
| 43 | 
            +
             | 
| 44 | 
            +
             | 
| 45 | 
            +
              def get_logo_weblogo
         | 
| 46 | 
            +
                rseq = []
         | 
| 47 | 
            +
                @matrix['A'].each_index { |i|
         | 
| 48 | 
            +
                  rseq << 2 + ['A','C','G','T'].inject(0) { |sum, l|
         | 
| 49 | 
            +
                    pn = @matrix[l][i]
         | 
| 50 | 
            +
                    sum += (pn == 0) ? 0 : pn * Math.log(pn) / Math.log(2)
         | 
| 51 | 
            +
                  }
         | 
| 52 | 
            +
                }
         | 
| 53 | 
            +
                
         | 
| 54 | 
            +
                mat = {'A'=>[], 'C'=>[], 'G'=>[], 'T'=>[]}
         | 
| 55 | 
            +
                @matrix['A'].each_index { |i|
         | 
| 56 | 
            +
                  ['A','C','G','T'].each { |l|
         | 
| 57 | 
            +
                    mat[l][i]= @matrix[l][i] * rseq[i] / 2 # so we can handle a '2 bit' scale here
         | 
| 58 | 
            +
                  }
         | 
| 59 | 
            +
                }
         | 
| 60 | 
            +
                
         | 
| 61 | 
            +
                mat
         | 
| 62 | 
            +
              end
         | 
| 63 | 
            +
             | 
| 64 | 
            +
              def get_logo_discrete
         | 
| 65 | 
            +
                checkerr("words count is undefined") { !words_count }
         | 
| 66 | 
            +
                
         | 
| 67 | 
            +
                rseq = []
         | 
| 68 | 
            +
                @matrix['A'].each_index { |i|
         | 
| 69 | 
            +
                  rseq << (icd4of4 == 0 ? 1.0 : ( (infocod(i) - icd4of4) / icd4of4 ).abs)
         | 
| 70 | 
            +
                }
         | 
| 71 | 
            +
                
         | 
| 72 | 
            +
                mat = {'A'=>[], 'C'=>[], 'G'=>[], 'T'=>[]}
         | 
| 73 | 
            +
                @matrix['A'].each_index { |i|
         | 
| 74 | 
            +
                  ['A','C','G','T'].each { |l| 
         | 
| 75 | 
            +
                    mat[l][i] = @matrix[l][i] * rseq[i]
         | 
| 76 | 
            +
                  }
         | 
| 77 | 
            +
                }
         | 
| 78 | 
            +
                
         | 
| 79 | 
            +
                mat
         | 
| 80 | 
            +
              end
         | 
| 81 | 
            +
              
         | 
| 82 | 
            +
              def revcomp
         | 
| 83 | 
            +
                deep_dup.revcomp!
         | 
| 84 | 
            +
              end
         | 
| 85 | 
            +
            end
         | 
| @@ -1,131 +1,131 @@ | |
| 1 | 
            -
            #!/usr/bin/ruby
         | 
| 2 | 
            -
            module Ytilib
         | 
| 3 | 
            -
             | 
| 4 | 
            -
            srand
         | 
| 5 | 
            -
             | 
| 6 | 
            -
            module Randoom
         | 
| 7 | 
            -
              
         | 
| 8 | 
            -
              private
         | 
| 9 | 
            -
             | 
| 10 | 
            -
              def Randoom.new_counts
         | 
| 11 | 
            -
                { 'A' => 0, 'C' => 0, 'G' => 0, 'T' => 0, 'N' => 0 }
         | 
| 12 | 
            -
              end
         | 
| 13 | 
            -
              
         | 
| 14 | 
            -
              def Randoom.random_letter(probs)
         | 
| 15 | 
            -
                random = rand()
         | 
| 16 | 
            -
                return 'A' if random < probs['A']
         | 
| 17 | 
            -
                return 'C' if random < probs['A'] + probs['C']
         | 
| 18 | 
            -
                return 'G' if random < probs['A'] + probs['C'] + probs['G']
         | 
| 19 | 
            -
                return 'T'
         | 
| 20 | 
            -
              end
         | 
| 21 | 
            -
             | 
| 22 | 
            -
              public
         | 
| 23 | 
            -
              
         | 
| 24 | 
            -
              def Randoom.calc_probs(input)
         | 
| 25 | 
            -
                counts = new_counts
         | 
| 26 | 
            -
                counts.default = 0
         | 
| 27 | 
            -
                (0...input.length).each { |i|
         | 
| 28 | 
            -
                  counts[input[i,1].upcase] += 1
         | 
| 29 | 
            -
                }
         | 
| 30 | 
            -
                return make_probs!(counts)
         | 
| 31 | 
            -
              end
         | 
| 32 | 
            -
                
         | 
| 33 | 
            -
              def Randoom.rand_seq(req_len, probs = DEF_PROBS, probs_m = nil)
         | 
| 34 | 
            -
                randoom = ''
         | 
| 35 | 
            -
                if (probs_m == nil)
         | 
| 36 | 
            -
                  req_len.times { randoom << random_letter(probs) }
         | 
| 37 | 
            -
                  return randoom  
         | 
| 38 | 
            -
                end
         | 
| 39 | 
            -
                random_l = random_letter(probs)
         | 
| 40 | 
            -
                randoom = random_l
         | 
| 41 | 
            -
                (req_len-1).times {
         | 
| 42 | 
            -
                  cur_probs = probs_m[random_l]
         | 
| 43 | 
            -
                  random_l = random_letter(cur_probs)
         | 
| 44 | 
            -
                  randoom << random_l
         | 
| 45 | 
            -
                }
         | 
| 46 | 
            -
                return randoom
         | 
| 47 | 
            -
              end
         | 
| 48 | 
            -
              
         | 
| 49 | 
            -
              def Randoom.calc_probs_m(input)
         | 
| 50 | 
            -
                probs_m = { 'A' => {}, 'C' => {}, 'G' => {}, 'T' => {} }
         | 
| 51 | 
            -
                counts = { 'A' => new_counts, 'C' => new_counts, 'G' => new_counts, 'T' => new_counts, 'N' => new_counts }
         | 
| 52 | 
            -
                (0...input.length-1).each { |i|
         | 
| 53 | 
            -
                  pair = input[i, 2].upcase
         | 
| 54 | 
            -
                  counts[pair[0,1]][pair[1,1]] += 1
         | 
| 55 | 
            -
                }
         | 
| 56 | 
            -
                probs_m['A'] = make_probs!(counts['A'])
         | 
| 57 | 
            -
                probs_m['C'] = make_probs!(counts['C'])
         | 
| 58 | 
            -
                probs_m['G'] = make_probs!(counts['G'])
         | 
| 59 | 
            -
                probs_m['T'] = make_probs!(counts['T'])
         | 
| 60 | 
            -
                return probs_m
         | 
| 61 | 
            -
              end
         | 
| 62 | 
            -
              
         | 
| 63 | 
            -
              def Randoom.make_probs_m!(counts)
         | 
| 64 | 
            -
                ['A','C','G','T','N'].each { |l2|
         | 
| 65 | 
            -
                  addv = counts['N'][l2] / 4.0
         | 
| 66 | 
            -
                  ['A','C','G','T'].each { |l1|
         | 
| 67 | 
            -
                    counts[l1][l2] += addv
         | 
| 68 | 
            -
                  }
         | 
| 69 | 
            -
                }
         | 
| 70 | 
            -
                
         | 
| 71 | 
            -
                probs_m = { 'A' => {}, 'C' => {}, 'G' => {}, 'T' => {} }
         | 
| 72 | 
            -
                probs_m['A'] = make_probs!(counts['A'])
         | 
| 73 | 
            -
                probs_m['C'] = make_probs!(counts['C'])
         | 
| 74 | 
            -
                probs_m['G'] = make_probs!(counts['G'])
         | 
| 75 | 
            -
                probs_m['T'] = make_probs!(counts['T'])
         | 
| 76 | 
            -
                return probs_m
         | 
| 77 | 
            -
              end
         | 
| 78 | 
            -
              
         | 
| 79 | 
            -
              def Randoom.make_probs!(counts, length = nil)
         | 
| 80 | 
            -
                probs = { 'A' => 0, 'C' => 0, 'G' => 0, 'T' => 0 }
         | 
| 81 | 
            -
                length = counts['A'] + counts['C'] + counts['G'] + counts['T'] + counts['N'] if length == nil
         | 
| 82 | 
            -
                length = length.to_f
         | 
| 83 | 
            -
                ['A','C','G','T'].each { |l| counts[l] += counts['N'] / 4.0 }
         | 
| 84 | 
            -
                return probs if length == 0
         | 
| 85 | 
            -
                probs['A'] = counts['A'] / length
         | 
| 86 | 
            -
                probs['C'] = counts['C'] / length
         | 
| 87 | 
            -
                probs['G'] = counts['G'] / length
         | 
| 88 | 
            -
                probs['T'] = 1 - probs['A'] - probs['C'] - probs['G']
         | 
| 89 | 
            -
                return probs
         | 
| 90 | 
            -
              end
         | 
| 91 | 
            -
              
         | 
| 92 | 
            -
              def Randoom.equalize!(probs)
         | 
| 93 | 
            -
                probs['A'] = probs['T'] = (probs['A'] + probs['T']) / 2
         | 
| 94 | 
            -
                probs['C'] = probs['G'] = (probs['C'] + probs['G']) / 2
         | 
| 95 | 
            -
                return probs
         | 
| 96 | 
            -
              end
         | 
| 97 | 
            -
              
         | 
| 98 | 
            -
              def Randoom.twostrand!(probs)
         | 
| 99 | 
            -
                return Randoom.equalize!(probs)
         | 
| 100 | 
            -
              end
         | 
| 101 | 
            -
              
         | 
| 102 | 
            -
              DEF_PROBS = PPM.probs2IUPAC!({ 'A' => 0.25, 'C' => 0.25, 'G' => 0.25, 'T' => 0.25, 'N' => 0.25 })
         | 
| 103 | 
            -
              
         | 
| 104 | 
            -
              # probabilities counted without _random.fa files for human genome
         | 
| 105 | 
            -
              DMEL40_PROBS1 = {"A"=>0.287729562173578, "C"=>0.21236364146414, "G"=>0.212259972960341, "T"=>0.287646823401942}
         | 
| 106 | 
            -
              DMEL40_PROBS2 = {"A"=>0.28768819278776, "C"=>0.21231180721224, "G"=>0.21231180721224, "T"=>0.28768819278776}
         | 
| 107 | 
            -
              
         | 
| 108 | 
            -
              DMEL40_PROBS1_M = {"A"=>{"A"=>0.350403075314602, "C"=>0.181194374386404, "G"=>0.188361404205017, "T"=>0.280041146093977}, 
         | 
| 109 | 
            -
                "C"=>{"A"=>0.325366772443085, "C"=>0.222264645612127, "G"=>0.197213801868993, "T"=>0.255154780075794}, 
         | 
| 110 | 
            -
                "G"=>{"A"=>0.260710563672393, "C"=>0.27150575901391, "G"=>0.222294234776053, "T"=>0.245489442537644}, 
         | 
| 111 | 
            -
                "T"=>{"A"=>0.217189093089999, "C"=>0.192590127484359, "G"=>0.239869076706963, "T"=>0.350351702718679}}
         | 
| 112 | 
            -
              
         | 
| 113 | 
            -
              HG17_PROBS1 = {"A"=>0.295309361730334, "C"=>0.204413561169847, "G"=>0.204519414193999, "T"=>0.295757662905821}
         | 
| 114 | 
            -
              HG17_PROBS2 = {"A"=>0.295533512318077, "C"=>0.204466487681923, "G"=>0.204466487681923, "T"=>0.295533512318077}
         | 
| 115 | 
            -
              
         | 
| 116 | 
            -
              HG17_PROBS1_M = {"A"=>{"A"=>0.331091206257755, "C"=>0.170458424092748, "G"=>0.236770972081246, "T"=>0.261679397568252}, 
         | 
| 117 | 
            -
                "C"=>{"A"=>0.354813019140533, "C"=>0.254741288394943, "G"=>0.0481667110625576, "T"=>0.342278981401966}, 
         | 
| 118 | 
            -
                "G"=>{"A"=>0.290057117684408, "C"=>0.208514091370804, "G"=>0.254732297362797, "T"=>0.246696493581991}, 
         | 
| 119 | 
            -
                "T"=>{"A"=>0.222087715262152, "C"=>0.200697606508443, "G"=>0.245657322003887, "T"=>0.331557356225517}}
         | 
| 120 | 
            -
              
         | 
| 121 | 
            -
              HG18_PROBS1 = {"A"=>0.291900580635872, "C"=>0.207855064518284, "G"=>0.207968587245859, "T"=>0.292275767599985}
         | 
| 122 | 
            -
              HG18_PROBS2 = {"A"=>0.292088174117929, "C"=>0.207911825882071, "G"=>0.207911825882071, "T"=>0.292088174117929}
         | 
| 123 | 
            -
              
         | 
| 124 | 
            -
              MM9_PROBS1 = {"A"=>0.289755259854654, "C"=>0.210085673636132, "G"=>0.210143929198141, "T"=>0.290015137311074}
         | 
| 125 | 
            -
              MM9_PROBS2 = {"A"=>0.289885198582864, "C"=>0.210114801417136, "G"=>0.210114801417136, "T"=>0.289885198582864}
         | 
| 126 | 
            -
              
         | 
| 127 | 
            -
              MM9_PROBS1_M = {"A"=>{"A"=>0.310389104265713, "C"=>0.184962574392377, "G"=>0.251904718465914, "T"=>0.252743602875996}, "C"=>{"A"=>0.352189584318682, "C"=>0.250794045222924, "G"=>0.0494404816637487, "T"=>0.347575888794645}, "G"=>{"A"=>0.295931117515178, "C"=>0.197870954111653, "G"=>0.250756985626016, "T"=>0.255440942747154}, "T"=>{"A"=>0.219437756702452, "C"=>0.214548041970626, "G"=>0.255405334730743, "T"=>0.310608866596179}}
         | 
| 128 | 
            -
             | 
| 129 | 
            -
            end
         | 
| 130 | 
            -
             | 
| 131 | 
            -
            end
         | 
| 1 | 
            +
            #!/usr/bin/ruby
         | 
| 2 | 
            +
            module Ytilib
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            srand
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            module Randoom
         | 
| 7 | 
            +
              
         | 
| 8 | 
            +
              private
         | 
| 9 | 
            +
             | 
| 10 | 
            +
              def Randoom.new_counts
         | 
| 11 | 
            +
                { 'A' => 0, 'C' => 0, 'G' => 0, 'T' => 0, 'N' => 0 }
         | 
| 12 | 
            +
              end
         | 
| 13 | 
            +
              
         | 
| 14 | 
            +
              def Randoom.random_letter(probs)
         | 
| 15 | 
            +
                random = rand()
         | 
| 16 | 
            +
                return 'A' if random < probs['A']
         | 
| 17 | 
            +
                return 'C' if random < probs['A'] + probs['C']
         | 
| 18 | 
            +
                return 'G' if random < probs['A'] + probs['C'] + probs['G']
         | 
| 19 | 
            +
                return 'T'
         | 
| 20 | 
            +
              end
         | 
| 21 | 
            +
             | 
| 22 | 
            +
              public
         | 
| 23 | 
            +
              
         | 
| 24 | 
            +
              def Randoom.calc_probs(input)
         | 
| 25 | 
            +
                counts = new_counts
         | 
| 26 | 
            +
                counts.default = 0
         | 
| 27 | 
            +
                (0...input.length).each { |i|
         | 
| 28 | 
            +
                  counts[input[i,1].upcase] += 1
         | 
| 29 | 
            +
                }
         | 
| 30 | 
            +
                return make_probs!(counts)
         | 
| 31 | 
            +
              end
         | 
| 32 | 
            +
                
         | 
| 33 | 
            +
              def Randoom.rand_seq(req_len, probs = DEF_PROBS, probs_m = nil)
         | 
| 34 | 
            +
                randoom = ''
         | 
| 35 | 
            +
                if (probs_m == nil)
         | 
| 36 | 
            +
                  req_len.times { randoom << random_letter(probs) }
         | 
| 37 | 
            +
                  return randoom  
         | 
| 38 | 
            +
                end
         | 
| 39 | 
            +
                random_l = random_letter(probs)
         | 
| 40 | 
            +
                randoom = random_l
         | 
| 41 | 
            +
                (req_len-1).times {
         | 
| 42 | 
            +
                  cur_probs = probs_m[random_l]
         | 
| 43 | 
            +
                  random_l = random_letter(cur_probs)
         | 
| 44 | 
            +
                  randoom << random_l
         | 
| 45 | 
            +
                }
         | 
| 46 | 
            +
                return randoom
         | 
| 47 | 
            +
              end
         | 
| 48 | 
            +
              
         | 
| 49 | 
            +
              def Randoom.calc_probs_m(input)
         | 
| 50 | 
            +
                probs_m = { 'A' => {}, 'C' => {}, 'G' => {}, 'T' => {} }
         | 
| 51 | 
            +
                counts = { 'A' => new_counts, 'C' => new_counts, 'G' => new_counts, 'T' => new_counts, 'N' => new_counts }
         | 
| 52 | 
            +
                (0...input.length-1).each { |i|
         | 
| 53 | 
            +
                  pair = input[i, 2].upcase
         | 
| 54 | 
            +
                  counts[pair[0,1]][pair[1,1]] += 1
         | 
| 55 | 
            +
                }
         | 
| 56 | 
            +
                probs_m['A'] = make_probs!(counts['A'])
         | 
| 57 | 
            +
                probs_m['C'] = make_probs!(counts['C'])
         | 
| 58 | 
            +
                probs_m['G'] = make_probs!(counts['G'])
         | 
| 59 | 
            +
                probs_m['T'] = make_probs!(counts['T'])
         | 
| 60 | 
            +
                return probs_m
         | 
| 61 | 
            +
              end
         | 
| 62 | 
            +
              
         | 
| 63 | 
            +
              def Randoom.make_probs_m!(counts)
         | 
| 64 | 
            +
                ['A','C','G','T','N'].each { |l2|
         | 
| 65 | 
            +
                  addv = counts['N'][l2] / 4.0
         | 
| 66 | 
            +
                  ['A','C','G','T'].each { |l1|
         | 
| 67 | 
            +
                    counts[l1][l2] += addv
         | 
| 68 | 
            +
                  }
         | 
| 69 | 
            +
                }
         | 
| 70 | 
            +
                
         | 
| 71 | 
            +
                probs_m = { 'A' => {}, 'C' => {}, 'G' => {}, 'T' => {} }
         | 
| 72 | 
            +
                probs_m['A'] = make_probs!(counts['A'])
         | 
| 73 | 
            +
                probs_m['C'] = make_probs!(counts['C'])
         | 
| 74 | 
            +
                probs_m['G'] = make_probs!(counts['G'])
         | 
| 75 | 
            +
                probs_m['T'] = make_probs!(counts['T'])
         | 
| 76 | 
            +
                return probs_m
         | 
| 77 | 
            +
              end
         | 
| 78 | 
            +
              
         | 
| 79 | 
            +
              def Randoom.make_probs!(counts, length = nil)
         | 
| 80 | 
            +
                probs = { 'A' => 0, 'C' => 0, 'G' => 0, 'T' => 0 }
         | 
| 81 | 
            +
                length = counts['A'] + counts['C'] + counts['G'] + counts['T'] + counts['N'] if length == nil
         | 
| 82 | 
            +
                length = length.to_f
         | 
| 83 | 
            +
                ['A','C','G','T'].each { |l| counts[l] += counts['N'] / 4.0 }
         | 
| 84 | 
            +
                return probs if length == 0
         | 
| 85 | 
            +
                probs['A'] = counts['A'] / length
         | 
| 86 | 
            +
                probs['C'] = counts['C'] / length
         | 
| 87 | 
            +
                probs['G'] = counts['G'] / length
         | 
| 88 | 
            +
                probs['T'] = 1 - probs['A'] - probs['C'] - probs['G']
         | 
| 89 | 
            +
                return probs
         | 
| 90 | 
            +
              end
         | 
| 91 | 
            +
              
         | 
| 92 | 
            +
              def Randoom.equalize!(probs)
         | 
| 93 | 
            +
                probs['A'] = probs['T'] = (probs['A'] + probs['T']) / 2
         | 
| 94 | 
            +
                probs['C'] = probs['G'] = (probs['C'] + probs['G']) / 2
         | 
| 95 | 
            +
                return probs
         | 
| 96 | 
            +
              end
         | 
| 97 | 
            +
              
         | 
| 98 | 
            +
              def Randoom.twostrand!(probs)
         | 
| 99 | 
            +
                return Randoom.equalize!(probs)
         | 
| 100 | 
            +
              end
         | 
| 101 | 
            +
              
         | 
| 102 | 
            +
              DEF_PROBS = PPM.probs2IUPAC!({ 'A' => 0.25, 'C' => 0.25, 'G' => 0.25, 'T' => 0.25, 'N' => 0.25 })
         | 
| 103 | 
            +
              
         | 
| 104 | 
            +
              # probabilities counted without _random.fa files for human genome
         | 
| 105 | 
            +
              DMEL40_PROBS1 = {"A"=>0.287729562173578, "C"=>0.21236364146414, "G"=>0.212259972960341, "T"=>0.287646823401942}
         | 
| 106 | 
            +
              DMEL40_PROBS2 = {"A"=>0.28768819278776, "C"=>0.21231180721224, "G"=>0.21231180721224, "T"=>0.28768819278776}
         | 
| 107 | 
            +
              
         | 
| 108 | 
            +
              DMEL40_PROBS1_M = {"A"=>{"A"=>0.350403075314602, "C"=>0.181194374386404, "G"=>0.188361404205017, "T"=>0.280041146093977}, 
         | 
| 109 | 
            +
                "C"=>{"A"=>0.325366772443085, "C"=>0.222264645612127, "G"=>0.197213801868993, "T"=>0.255154780075794}, 
         | 
| 110 | 
            +
                "G"=>{"A"=>0.260710563672393, "C"=>0.27150575901391, "G"=>0.222294234776053, "T"=>0.245489442537644}, 
         | 
| 111 | 
            +
                "T"=>{"A"=>0.217189093089999, "C"=>0.192590127484359, "G"=>0.239869076706963, "T"=>0.350351702718679}}
         | 
| 112 | 
            +
              
         | 
| 113 | 
            +
              HG17_PROBS1 = {"A"=>0.295309361730334, "C"=>0.204413561169847, "G"=>0.204519414193999, "T"=>0.295757662905821}
         | 
| 114 | 
            +
              HG17_PROBS2 = {"A"=>0.295533512318077, "C"=>0.204466487681923, "G"=>0.204466487681923, "T"=>0.295533512318077}
         | 
| 115 | 
            +
              
         | 
| 116 | 
            +
              HG17_PROBS1_M = {"A"=>{"A"=>0.331091206257755, "C"=>0.170458424092748, "G"=>0.236770972081246, "T"=>0.261679397568252}, 
         | 
| 117 | 
            +
                "C"=>{"A"=>0.354813019140533, "C"=>0.254741288394943, "G"=>0.0481667110625576, "T"=>0.342278981401966}, 
         | 
| 118 | 
            +
                "G"=>{"A"=>0.290057117684408, "C"=>0.208514091370804, "G"=>0.254732297362797, "T"=>0.246696493581991}, 
         | 
| 119 | 
            +
                "T"=>{"A"=>0.222087715262152, "C"=>0.200697606508443, "G"=>0.245657322003887, "T"=>0.331557356225517}}
         | 
| 120 | 
            +
              
         | 
| 121 | 
            +
              HG18_PROBS1 = {"A"=>0.291900580635872, "C"=>0.207855064518284, "G"=>0.207968587245859, "T"=>0.292275767599985}
         | 
| 122 | 
            +
              HG18_PROBS2 = {"A"=>0.292088174117929, "C"=>0.207911825882071, "G"=>0.207911825882071, "T"=>0.292088174117929}
         | 
| 123 | 
            +
              
         | 
| 124 | 
            +
              MM9_PROBS1 = {"A"=>0.289755259854654, "C"=>0.210085673636132, "G"=>0.210143929198141, "T"=>0.290015137311074}
         | 
| 125 | 
            +
              MM9_PROBS2 = {"A"=>0.289885198582864, "C"=>0.210114801417136, "G"=>0.210114801417136, "T"=>0.289885198582864}
         | 
| 126 | 
            +
              
         | 
| 127 | 
            +
              MM9_PROBS1_M = {"A"=>{"A"=>0.310389104265713, "C"=>0.184962574392377, "G"=>0.251904718465914, "T"=>0.252743602875996}, "C"=>{"A"=>0.352189584318682, "C"=>0.250794045222924, "G"=>0.0494404816637487, "T"=>0.347575888794645}, "G"=>{"A"=>0.295931117515178, "C"=>0.197870954111653, "G"=>0.250756985626016, "T"=>0.255440942747154}, "T"=>{"A"=>0.219437756702452, "C"=>0.214548041970626, "G"=>0.255405334730743, "T"=>0.310608866596179}}
         | 
| 128 | 
            +
             | 
| 129 | 
            +
            end
         | 
| 130 | 
            +
             | 
| 131 | 
            +
            end
         |