RubyGems - sequence_logo - Versions diffs - 1.0.2 - Mend

sequence_logo 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

data/.gitignore +17 -0
data/Gemfile +4 -0
data/LICENSE +22 -0
data/README.md +61 -0
data/Rakefile +5 -0
data/bin/create_all_logos +3 -0
data/bin/generate_logo +3 -0
data/bin/pmflogo +3 -0
data/lib/sequence_logo.rb +7 -0
data/lib/sequence_logo/assets/nucl_simpa/a.png +0 -0
data/lib/sequence_logo/assets/nucl_simpa/c.png +0 -0
data/lib/sequence_logo/assets/nucl_simpa/g.png +0 -0
data/lib/sequence_logo/assets/nucl_simpa/t.png +0 -0
data/lib/sequence_logo/exec/create_all_logos.rb +25 -0
data/lib/sequence_logo/exec/generate_logo.rb +18 -0
data/lib/sequence_logo/exec/pmflogo.rb +26 -0
data/lib/sequence_logo/pmflogo_lib.rb +193 -0
data/lib/sequence_logo/version.rb +3 -0
data/lib/sequence_logo/ytilib.rb +9 -0
data/lib/sequence_logo/ytilib/addon.rb +247 -0
data/lib/sequence_logo/ytilib/bismark.rb +71 -0
data/lib/sequence_logo/ytilib/hack1.rb +75 -0
data/lib/sequence_logo/ytilib/infocod.rb +108 -0
data/lib/sequence_logo/ytilib/iupac.rb +92 -0
data/lib/sequence_logo/ytilib/pm.rb +562 -0
data/lib/sequence_logo/ytilib/pmsd.rb +99 -0
data/lib/sequence_logo/ytilib/randoom.rb +131 -0
data/lib/sequence_logo/ytilib/ytilib.rb +147 -0
data/sequence_logo.gemspec +21 -0
metadata +103 -0

data/lib/sequence_logo/ytilib/bismark.rb ADDED Viewed

@@ -0,0 +1,71 @@
+#!/usr/bin/ruby
+module Ytilib
+require "rexml/document"
+include REXML
+class Bismark < Document
+  def initialize(source = nil, add_dtd = false)
+    dtd = add_dtd ? "<!DOCTYPE smallbismark SYSTEM 'smallbismark.dtd'>#{$/}" : ""
+    source == nil ? super("<?xml version='1.0' encoding='UTF-8'?>#{$/}#{dtd}") : super(source)
+    super(IO.read(source)) if source != nil && root == nil
+    if source == nil
+      self.add_element("smallbismark")
+      # xmlns breaks XPath for a REXML library under Linux, strange, indeed
+      # self.add_element("smallbismark", {"xmlns" => "http://bioinform.imb.ac.ru/smallBiSMark/smallbismark.dtd"})
+      self.root.add_element("comment", {"name" => "WARNING"}).add_text("This is a draft version of small-BiSMark. Specification is the subject to change!")
+    end
+  end
+  def getXML
+    beautify
+    s = ""; write(s, 1, true)
+    s.rstrip!
+    return s
+  end
+  alias get_xml getXML
+  def get_pm(xpath)
+    pwmnode = self.elements[xpath]
+    pm = PM.new_pm(pwmnode.attribute("length").value.to_i)
+    toi = pwmnode.name == "PCM"
+    pwmnode.elements.each("pm-column") { |c|
+      position = c.attribute("position").value.to_i - 1
+      weights = [c.elements["a"].get_text.value.strip.to_f,
+                c.elements["c"].get_text.value.strip.to_f,
+                c.elements["g"].get_text.value.strip.to_f,
+                c.elements["t"].get_text.value.strip.to_f]
+      weights.collect { |w| w.to_i } if toi
+      pm['A'][position], pm['C'][position], pm['G'][position], pm['T'][position] = weights[0], weights[1], weights[2], weights[3]
+    }
+    return pm
+  end
+private
+  CONTAIN_NO_TEXT = {
+                      "segment" => :vasya_shmyak,
+                      "group" => :vasya_shmyak,
+                      "smallbismark" => :vasya_shmyak,
+                      "motif" => :vasya_shmyak,
+                      "PWM" => :vasya_shmyak,
+                      "PCM" => :vasya_shmyak,
+                      "PPM" => :vasya_shmyak,
+                      "source" => :vasya_shmyak,
+                      "factor"  => :vasya_shmyak,
+                      "pm-column" => :vasya_shmyak,
+                      "word-list" => :vasya_shmyak}
+  def beautify(node = self)
+    if node == self
+      self.delete_if { |e| e.is_a?(Text) }
+      self.each { |e| beautify(e) }
+    else
+      node.delete_if { |e| e.is_a?(Text) } if node.respond_to?(:delete_if) && Bismark::CONTAIN_NO_TEXT.has_key?(node.name)
+      node.each { |e| beautify(e) } if node.respond_to?(:each)
+    end
+  end
+end
+end

data/lib/sequence_logo/ytilib/hack1.rb ADDED Viewed

@@ -0,0 +1,75 @@
+require 'rexml/formatters/pretty'
+module REXML
+  module Formatters
+    # The Transitive formatter writes an XML document that parses to an
+    # identical document as the source document.  This means that no extra
+    # whitespace nodes are inserted, and whitespace within text nodes is
+    # preserved.  Within these constraints, the document is pretty-printed,
+    # with whitespace inserted into the metadata to introduce formatting.
+    #
+    # Note that this is only useful if the original XML is not already
+    # formatted.  Since this formatter does not alter whitespace nodes, the
+    # results of formatting already formatted XML will be odd.
+    class Transitive < Default
+      def initialize( indentation=2 )
+        @indentation = indentation
+        @level = 0
+      end
+      protected
+      def write_element( node, output )
+        output << "\n" << ' '*@level
+        output << "<#{node.expanded_name}"
+        node.attributes.each_attribute do |attr|
+          output << " "
+          attr.write( output )
+        end unless node.attributes.empty?
+        if node.children.empty?
+          output << "/>"
+        else
+          output << ">"
+          # If compact and all children are text, and if the formatted output
+          # is less than the specified width, then try to print everything on
+          # one line
+          skip = false
+          @level += @indentation
+          only_text = true
+          node.children.each { |child|
+          	only_text = child.is_a?(REXML::Text) && only_text
+            write( child, output )
+          }
+          @level -= @indentation
+          output << "#{only_text ? "" : "\n" + ' '*@level}" << "</#{node.expanded_name}>"
+        end
+      end
+      def write_text( node, output )
+        output << node.to_s()
+      end
+    end
+  end
+  class Document
+    def write( output=$stdout, indent=-1, trans=false, ie_hack=false )
+      if xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
+        output = Output.new( output, xml_decl.encoding )
+      end
+      formatter = if indent > -1
+          if trans
+            REXML::Formatters::Transitive.new( indent )
+          else
+            REXML::Formatters::Pretty.new( indent, ie_hack )
+          end
+        else
+          REXML::Formatters::Default.new( ie_hack )
+        end
+      formatter.write( self, output )
+    end
+  end
+end

data/lib/sequence_logo/ytilib/infocod.rb ADDED Viewed

@@ -0,0 +1,108 @@
+#!/usr/bin/ruby
+class Float
+  # Using Stieltjes formula from http://www.luschny.de/math/factorial/approx/SimpleCases.html
+  def log_fact
+    return 0.0 if self <= 1
+    a0 = 1.0/12
+    a1 = 1.0/30
+    a2 = 53.0/210
+    a3 = 195.0/371
+    a4 = 22999.0/22737
+    a5 = 29944523.0/19733142
+    a6 = 109535241009.0/48264275462
+    z_big = self+1;
+    (1.0/2)*Math.log(2*Math::PI)+(z_big-1.0/2)*Math.log(z_big)-z_big + a0/(z_big+a1/(z_big+a2/(z_big+a3/(z_big+a4/(z_big+a5/(z_big+a6/z_big))))))
+  end
+end
+class Integer
+  def log_fact
+    self.to_f.log_fact
+  end
+end
+# Naive version
+=begin
+class Integer
+  @@fact_hash = {}
+  def log_fact
+    return 0.0 if self == 0
+    return nil if self < 0
+    if self <= 170
+      @@fact_hash[self] = Math.log( lambda { |k| return k if self.times { |i| k *= i.next } }.call(1) )
+    else
+      return self.to_f.log_fact
+    end unless @@fact_hash.has_key?(self)
+    return @@fact_hash[self]
+  end
+end
+=end
+module Ytilib
+  class PM
+    def infocod(position = nil)
+      return infocod_private(position) if position
+      (0...@size).collect { |i| infocod_private(i) }
+    end
+    alias icd infocod
+    def icd2of4(floor = false)
+      i2o4 = @words_count / 2.0
+      i2o4 = i2o4.floor if floor
+      ([i2o4, i2o4, 0, 0].inject(0.0) { |sum, k_i| sum += k_i.log_fact  } - @words_count.log_fact ) / @words_count
+      # 0 is equal to @words_count % 2, because 0! = 1!
+    end
+    def icd3of4(floor = false)
+      i3o4 = @words_count / 3.0
+      i3o4 = i3o4.floor if floor
+      addon = floor ? @words_count % 3 : 0
+      ([i3o4, i3o4, i3o4, addon].inject(0.0) { |sum, k_i| sum += k_i.log_fact  } - @words_count.log_fact ) / @words_count
+    end
+    def icdThc
+      icd3of4
+    end
+    def icdTlc
+      io = @words_count / 6.0
+      ([2*io, 2*io, io, io].inject(0.0) { |sum, k_i| sum += k_i.log_fact  } - @words_count.log_fact ) / @words_count
+    end
+    def icd4of4(floor = false)
+      i4o4 = @words_count / 4.0
+      i4o4 = i4o4.floor if floor
+      ([i4o4, i4o4, i4o4, i4o4].inject(0.0) { |sum, k_i| sum += k_i.log_fact  } - @words_count.log_fact ) / @words_count
+    end
+  protected
+    def infocod_private(position)
+      k_i = ['A','C','G','T'].collect { |letter| @matrix[letter][position] }
+      ( k_i.inject(0.0) { |sum, k_i| sum += k_i.log_fact  } - @words_count.log_fact ) / @words_count
+    end
+  end
+  class PPM
+    def to_pcm(words_count = nil)
+      @words_count = words_count if words_count
+      checkerr("words count is not specified") { !@words_count }
+      counts = PM.new_matrix(@size)
+      (0...size).each { |i|
+        ['A', 'C', 'G', 'T'].each { |l|
+          counts[l][i] = @matrix[l][i] * @words_count
+        }
+      }
+      return PM.new(size, counts)
+    end
+    alias to_pcm get_pcm
+    def infocod(position = nil)
+      return to_pcm.infocod(position)
+    end
+    def icd(position = nil)
+      return to_pcm.infocod(position)
+    end
+  end
+end

data/lib/sequence_logo/ytilib/iupac.rb ADDED Viewed

@@ -0,0 +1,92 @@
+class IUPAC < String
+  CODE = {"A" => "A", "C" => "C", "G" => "G", "T" => "T",
+          "AG" => "R", "CT" => "Y", "GT" => "K", "AC" => "M",
+          "CG" => "S", "AT" => "W", "CGT" => "B", "AGT" => "D", "ACT" => "H", "ACG" => "V", "ACGT" => "N"}
+  REVCODE = CODE.invert
+  def dup
+    IUPAC.new(self)
+  end
+  def initialize(words)
+    if words.is_a?(Array)
+      iupac = (0...words[0].size).collect { |i|
+        (0...words.size).collect { |j| words[j][i,1] }.uniq.sort.inject("") { |cola, letter| cola += letter }
+      }.inject("") { |iup, cola|
+        checkerr("bad letter set #{cola}") { !CODE.has_key?(cola) }
+        iup += CODE[cola]
+      }
+      super(iupac)
+    elsif words.is_a?(IUPAC)
+      super(words)
+    elsif words.is_a?(String)
+      checkerr("word #{words} has strange characters") { words.tr('ACGTURYKMSWBDHVN', '').size > 0 }
+      super(words)
+    end
+  end
+  def ==(iupac)
+    return false if self.size != iupac.size
+    (0...self.size).inject(true) { |result, i| result &= IUPACOM[self[i,1]][iupac[i,1]] }
+  end
+  def merge(iupac)
+    return nil if self.size != iupac.size
+    res = (0...self.size).inject("") { |res, i|
+      merges = REVCODE[self[i,1]].split(//).concat(REVCODE[iupac[i,1]].split(//)).uniq.sort.inject("") { |s, c| s += c}
+      res << CODE[merges]
+    }
+    return IUPAC.new(res)
+  end
+  def include?(iupac)
+    return false if self.size < iupac.size || !iupac.is_a?(IUPAC)
+    (0..self.size-iupac.size).each { |i|
+      return i if IUPAC.new(self[i,iupac.size]) == iupac
+    }
+    return false
+  end
+  def compl
+    return self.tr("ACGTRYKMSWBDHVN", "TGCAYRMKSWVHDBN")
+  end
+  def compl!
+    self.tr!("ACGTRYKMSWBDHVN", "TGCAYRMKSWVHDBN")
+    return self
+  end
+  alias reverse_string reverse
+  def reverse
+    return IUPAC.new(reverse_string)
+  end
+  alias comp! compl!
+  alias complement! compl!
+  alias comp compl
+  alias complement compl
+private
+  IUPACOM = { "A" => {"A" => :llib, "R" => :llib, "M" => :llib, "W" => :llib, "D" => :llib, "H" => :llib, "V" => :llib, "N" => :llib},
+                "C" => {"C" => :llib, "Y" => :llib, "M" => :llib, "S" => :llib, "B" => :llib, "H" => :llib, "V" => :llib, "N" => :llib},
+                "G" => {"G" => :llib, "R" => :llib, "K" => :llib, "S" => :llib, "B" => :llib, "D" => :llib, "V" => :llib, "N" => :llib},
+                "T" => {"T" => :llib, "Y" => :llib, "K" => :llib, "W" => :llib, "B" => :llib, "D" => :llib, "H" => :llib, "N" => :llib}
+  }
+  IUPACOM["R"] = IUPACOM["G"].merge(IUPACOM["A"])
+  IUPACOM["Y"] = IUPACOM["T"].merge(IUPACOM["C"])
+  IUPACOM["K"] = IUPACOM["G"].merge(IUPACOM["T"])
+  IUPACOM["M"] = IUPACOM["A"].merge(IUPACOM["C"])
+  IUPACOM["S"] = IUPACOM["G"].merge(IUPACOM["C"])
+  IUPACOM["W"] = IUPACOM["A"].merge(IUPACOM["T"])
+  IUPACOM["B"] = IUPACOM["G"].merge(IUPACOM["T"].merge(IUPACOM["C"]))
+  IUPACOM["D"] = IUPACOM["G"].merge(IUPACOM["A"].merge(IUPACOM["T"]))
+  IUPACOM["H"] = IUPACOM["A"].merge(IUPACOM["C"].merge(IUPACOM["T"]))
+  IUPACOM["V"] = IUPACOM["G"].merge(IUPACOM["C"].merge(IUPACOM["A"]))
+  IUPACOM["N"] = IUPACOM["A"].merge(IUPACOM["C"].merge(IUPACOM["G"].merge(IUPACOM["T"])))
+#  IUPACMERGE = CODE.merge({
+#    "AA" => "A", "CC" => "C", "GG" => "G", "TT" => "T",
+#
+#  })
+end

data/lib/sequence_logo/ytilib/pm.rb ADDED Viewed

@@ -0,0 +1,562 @@
+module Ytilib
+  class PM
+    attr_reader :matrix, :size
+    attr_accessor :words_count
+    alias length size
+    def score_mean(bckgr = Randoom::DEF_PROBS)
+      (0...@size).inject(0.0) { |mean, i| mean += ['A','C','G','T'].inject(0.0) { |sum,l| sum += @matrix[l][i] * bckgr[l] } }
+    end
+    def score_variance(bckgr = Randoom::DEF_PROBS)
+      (0...@size).inject(0.0) { |m2, i|
+        deltai = ['A','C','G','T'].inject(0.0) { |sum,l| sum += @matrix[l][i]**2 * bckgr[l] } - ['A','C','G','T'].inject(0.0) { |sum,l| sum += matrix[l][i] * bckgr[l] }**2
+        m2 += deltai
+      }
+    end
+    def p_value(threshold, mean = nil, variance = nil)
+      mean = mean ? mean : score_mean
+      variance = variance ? variance : score_variance
+      n_ = (threshold - mean) / Math.sqrt(variance)
+      p_value = (1 - Math.erf2(n_/Math.sqrt(2))) / 2.0
+    end
+    def best_word
+      return (0...size).inject("") { |word, i|
+        max = ['A', 'C', 'G', 'T'].collect { |l| @matrix[l][i] }.max
+        maxlets = ['A', 'C', 'G', 'T'].select { |l| @matrix[l][i] == max }
+        word << (maxlets.size == 1 ? maxlets.first : "N")
+      }
+    end
+    def strict_consensus
+      return IUPAC.new((0...size).inject("") { |word, i|
+        max = ['A', 'C', 'G', 'T'].collect { |l| @matrix[l][i] }.max
+        maxlets = ['A', 'C', 'G', 'T'].inject("") { |lets, l| lets += @matrix[l][i] == max ? l : ""}
+        word += IUPAC::CODE[maxlets]
+      })
+    end
+    def consensus_string(beautiful = false)
+      checkerr("words count is undefined") { !@words_count }
+      i2o4, thc, tlc = icd2of4, icdThc, icdTlc
+      icd = infocod
+      return String.new((0...size).inject("") { |word, i|
+        scores = ['A', 'C', 'G', 'T'].collect { |l| @matrix[l][i] }.uniq.sort.reverse
+        if icd[i] > i2o4
+          scores = [scores.first]
+        elsif icd[i] > thc
+          scores = scores[0..1]
+        elsif icd[i] > tlc
+          scores = scores[0..2]
+        end
+        lets = ['A', 'C', 'G', 'T'].inject("") { |lets, l| lets += scores.include?(@matrix[l][i]) ? l : ""}
+        reslet = IUPAC::CODE[lets]
+        reslet = reslet.downcase if beautiful && lets.size > 2
+        word += reslet
+      })
+    end
+    def consensus
+      checkerr("words count is undefined") { !@words_count }
+      i2o4, thc, tlc = icd2of4, icdThc, icdTlc
+      icd = infocod
+      return IUPAC.new((0...size).inject("") { |word, i|
+        scores = ['A', 'C', 'G', 'T'].collect { |l| @matrix[l][i] }.uniq.sort.reverse
+        if icd[i] > i2o4
+          scores = [scores.first]
+        elsif icd[i] > thc
+          scores = scores[0..1]
+        elsif icd[i] > tlc
+          scores = scores[0..2]
+        end
+        lets = ['A', 'C', 'G', 'T'].inject("") { |lets, l| lets += scores.include?(@matrix[l][i]) ? l : ""}
+        word += IUPAC::CODE[lets]
+      })
+    end
+    def find_hit(s, score_g, use2strands = true)
+      (0..(s.size - @size)).each { |i|
+        seq, seq_rc = s[i, @size], s[i, @size].revcomp!
+        score_p, score_rc = score(seq), score(seq_rc)
+        r = use2strands ? [score_p,score_rc].max : score_p
+        return i if r >= score_g
+      }
+      return nil
+    end
+    def find_hits(s, score_g, use2strands = true)
+      (0..(s.size - @size)).select { |i|
+        seq, seq_rc = s[i, @size], s[i, @size].revcomp!
+        score_p, score_rc = score(seq), score(seq_rc)
+        r = use2strands ? [score_p,score_rc].max : score_p
+        r >= score_g ? i : nil
+      }.compact
+    end
+    def collect_hits(s, score_g, use2strands = true)
+      result = []
+      (0..(s.size - @size)).each { |i|
+        seq, seq_rc = s[i, @size], s[i, @size].revcomp!
+        score_p, score_rc = score(seq.upcase), score(seq_rc.upcase)
+        result << [score_p, seq, false, i] if score_p >= score_g
+        result << [score_rc, seq_rc, true, i] if score_rc >= score_g
+      }
+      result
+    end
+    def best_hit(s, use2strands = true)
+      checkerr("too short sequence") { s.size < @size }
+      return (0..(s.size - @size)).inject(-Float::MAX) { |r, i|
+        seq, seq_rc = s[i, @size], s[i, @size].revcomp!
+        score_p, score_rc = score(seq), score(seq_rc)
+        r = use2strands ? [r,score_p,score_rc].max : [r,score_p].max
+      }
+    end
+    def eql?(pm)
+      return ['A','C','G','T'].inject(true) { |equal, letter|
+        equal = equal && @matrix[letter].eql?(pm.matrix[letter])
+      }
+    end
+    def flexeql?(pm)
+      checkerr("for what?") { true }
+      return ['A','C','G','T'].inject(true) { |equal, letter|
+        # report "letter=#{letter}"
+        equal = equal && (0...@size).inject(true) { |deepequal, position|
+          # report "position=#{position}, delta=#{@matrix[letter][position] - pm.matrix[letter][position]}"
+          deepequal = deepequal && (@matrix[letter][position] - pm.matrix[letter][position]).abs < 10**-11
+        }
+      }
+    end
+    def initialize(size, matrix = nil, words_count = nil)
+      checkerr("matrix['A'].size != size, #{matrix['A'].size} != #{size}") { matrix != nil && size != matrix['A'].size }
+      @size = size
+      @matrix = matrix == nil ? PM.new_matrix(size) : matrix
+      if !words_count || words_count <= 0
+        words_count = col_sum(0)
+        @words_count = words_count.round >= 2 ? words_count.round : nil
+      else
+        @words_count = words_count
+      end
+    end
+    def col_sum(index = 0, letset = ['A','C','G','T'])
+      return letset.inject(0) { |sum, l| sum += @matrix[l][index] }
+    end
+    def PM.col_sum(matrix, index = 0)
+      return matrix['A'][index] + matrix['C'][index] + matrix['G'][index] + matrix['T'][index]
+    end
+    def to_pwm!(words_count = nil, probs = Randoom::DEF_PROBS, pseudocount = 1)
+      @words_count = words_count if words_count && words_count > 0
+      @matrix.each_key do |letter|
+        (0...@size).each { |pos|
+          #p "pcm"
+          #p @matrix[letter][pos]
+          #p @matrix[letter][pos] + (probs[letter] * pseudocount)
+          #p ( (@words_count + pseudocount) * probs[letter])
+          #exit
+          @matrix[letter][pos] = Math::log( (@matrix[letter][pos] + (probs[letter] * pseudocount)) / ( (@words_count + pseudocount) * probs[letter]) )
+        }
+      end
+      return self
+    end
+    def get_pwm(words_count = nil, probs = Randoom::DEF_PROBS, pseudocount = 1)
+      return self.dup.to_pwm!(words_count, probs, pseudocount)
+    end
+    alias to_pwm get_pwm
+    def get_ppm(words_count = nil)
+      words_count = @words_count unless words_count
+      checkerr("undefined words count") { !words_count || words_count <= 0 }
+      ppm = @matrix['N'] ?  PM.new_matrix_iupac(@size) : PM.new_matrix(@size)
+      @matrix.each_key { |letter|
+        (0...@size).each { |i|
+          ppm[letter][i] = @matrix[letter][i].to_f / words_count
+        }
+      }
+      return PPM.new(@size, ppm, words_count)
+    end
+    alias to_ppm get_ppm
+    def score(word)
+      checkerr("word size != pwm.size") { @size != word.size }
+      checkerr("word #{word} has strange characters") {
+        @matrix.keys.include?('N') ? word.tr('ACGTRYKMSWBDHVN', '').size > 0 : word.tr('ACGT', '').size > 0
+      }
+      return (0...@size).inject(0) { |sum, i|
+        sum += @matrix[word[i,1]][i]
+      }
+    end
+    def best_score
+      return (0...size).inject(0) { |sum, i|
+        sum += ['A', 'C', 'G', 'T'].collect { |l| @matrix[l][i] }.max
+      }
+    end
+    def worst_score
+      return (0...size).inject(0) { |sum, i|
+        sum += ['A', 'C', 'G', 'T'].collect { |l| @matrix[l][i] }.min
+      }
+    end
+    def dup
+      new_matrix = {}
+      @matrix.each_key { |letter| new_matrix[letter] = @matrix[letter].dup }
+      return PM.new(@size, new_matrix, @words_count)
+    end
+    def PM.new_pcm(words, iupacomp = false)
+      size = words[0].size
+      counts = PM.new_matrix(size)
+      counts.each_value { |arr| arr.fill(0) }
+      words.each { |word|
+        0.upto(size-1) { |i|
+          letter = word[i,1].upcase
+          checkerr("unknown letter #{letter}") { !['A', 'C', 'G', 'T', 'N'].include?(letter) }
+          if letter != 'N'
+            counts[letter][i] += 1
+          else
+            ['A', 'C', 'G', 'T'].each { |l| counts[l][i] += 0.25 }
+          end
+        }
+      }
+      newpcm = PM.new(size, counts, words.size)
+      newpcm.iupacomp! if iupacomp
+      return newpcm
+    end
+    def PM.new_pwm(words)
+      pcm = PM.new_pcm(words)
+      pcm.to_pwm!
+      return pcm
+    end
+    def PM.load(filename)
+      # supporting pat & pwm formats (letter-column and letter-row format)
+      input = IO.read(filename)
+      tm = []
+      input.each_line { |line|
+        l_a = line.split
+        begin
+          l_a = l_a.collect { |a_i| Float(a_i) }
+        rescue
+          next
+        end
+        tm << l_a
+      }
+      tm = tm.transpose if tm.size == 4
+      matrix = PM.new_matrix(tm.size)
+      tm.each_index { |i| ['A', 'C', 'G', 'T'].each_with_index { |l, j| matrix[l][i] = tm[i][j] }  }
+      ppm_mode = (0...tm.size).inject(true) { |ppm_ya, i| ppm_ya &= col_sum(matrix, i).round == 1 }
+      return ppm_mode ? PPM.new(tm.size, matrix) : PM.new(tm.size, matrix)
+    end
+    def save(filename)
+      File.open(filename, "w") { |out_f|
+        case File.ext_wo_name(filename)
+        when "pwm"
+          ['A', 'C', 'G', 'T'].each { |letter|
+            @matrix[letter].each { |e|
+              out_f << "#{e} "
+            }
+            out_f << $/
+          }
+        when "pat"
+          out_f.puts File.name_wo_ext(filename)
+          (0...@size).each { |i|
+            ['A', 'C', 'G', 'T'].each { |letter|
+              out_f << "#{@matrix[letter][i]} "
+            }
+            out_f << $/
+          }
+        when "xml"
+          checkerr("small-BiSMark is not supported at this moment")
+        else
+          checkerr("unknown motif file format specified")
+        end
+      }
+    end
+    def positiv!
+      min = @matrix.values.collect { |v| v.min }.min.abs
+      @matrix.each_value { |v| (0...v.size).each { |i| v[i] += min } }
+      return self
+    end
+    def revcomp!
+      @matrix['A'], @matrix['T'] = @matrix['T'], @matrix['A']
+      @matrix['C'], @matrix['G'] = @matrix['G'], @matrix['C']
+      @matrix.each_value { |v| v.reverse! }
+      self
+    end
+    def to_bismark(b)
+      pwm = @matrix['A'][0].is_a?(Float)
+      attributes = {"length" => @size}
+      attributes["words-count"] = @words_count if @words_count && @words_count > 0
+      pe = b.add_element( pwm ? "PWM" : "PCM", attributes )
+      (0...@matrix['A'].size).each { |i|
+        pm_c = pe.add_element("pm-column", {"position" => i+1})
+        ['A', 'C', 'G', 'T'].each { |l|
+          pm_c.add_element(l.downcase).add_text(@matrix[l][i].to_s)
+        }
+      }
+    end
+    def PM.from_bismark(b, iupacomp = false)
+      checkerr("empty small-BiSMark file?") { !b }
+      float_m = (b.name == "PPM" || b.name == "PWM" || b.name == "WPCM")
+      words_count = b.attributes["words-count"] ? b.attributes["words-count"].to_f : nil
+      matrix = {"A" => [], "C" => [], "G" => [], "T" => []}
+      b.elements.each("pm-column") { |pmc|
+        position = pmc.attributes["position"].to_i
+        ['A', 'C', 'G', 'T'].each { |l| matrix[l][position-1] = float_m ? pmc.elements[l.downcase].get_text.to_s.to_f : pmc.elements[l.downcase].get_text.to_s.to_i }
+      }
+      if b.name == "PPM"
+        newppm = PPM.new(matrix['A'].size, matrix, words_count)
+        newppm.iupacomp! if iupacomp
+        return newppm
+      end
+      if b.name == "PCM"
+        @words_count = col_sum(matrix)
+        newpcm = PM.new(matrix['A'].size, matrix, words_count)
+        newpcm.iupacomp! if iupacomp
+        return newpcm
+      end
+      if b.name == "PWM" && iupacomp
+        raise "cannot force IUPAC compatible PWM"
+      end
+      return PM.new(matrix['A'].size, matrix, words_count)
+    end
+    IUPAC_LS = (IUPAC::CODE.keys - ['A','C','G','T']).collect { |iul| [IUPAC::CODE[iul], iul.split(//)] }
+    def iupacomp!
+      @words_count = (0...@size).collect { |i| col_sum(i) }.max unless @words_count # for unbalanced matrices (Genomatix has some)
+      # @words_count = @words_count.round < 2.0 ? nil : @words_count.round
+      IUPAC_LS.each { |iul_ls|
+        @matrix[iul_ls[0]] = (0...@size).collect { |i| col_sum(i, iul_ls[1]) / iul_ls[1].size }
+      }
+      return self
+    end
+    def m3sd(bckgr = Randoom::DEF_PROBS)
+      mean = (0...@size).inject(0.0) { |mean, i| mean += ['A','C','G','T'].inject(0.0) { |sum,l| sum += @matrix[l][i] * bckgr[l] } }
+      dev = (0...@size).inject(0.0) { |m2, i|
+        deltai = ['A','C','G','T'].inject(0.0) { |sum,l| sum += @matrix[l][i]**2 * bckgr[l] } - ['A','C','G','T'].inject(0.0) { |sum,l| sum += matrix[l][i] * bckgr[l] }**2
+        m2 += deltai
+      }
+      sigma = Math.sqrt(dev)
+      mean+3*sigma
+    end
+    def fixwc
+      return unless @words_count
+      @words_count = (0...@size).collect { |i| col_sum(i) }.max
+    end
+    protected
+    def PM.new_matrix(size)
+      return {
+        'A' => Array.new(size),
+        'C' => Array.new(size),
+        'G' => Array.new(size),
+        'T' => Array.new(size) }
+    end
+    def PM.new_matrix_iupac(size)
+      return {
+        'A' => Array.new(size),
+        'C' => Array.new(size),
+        'G' => Array.new(size),
+        'T' => Array.new(size),
+        'R' => Array.new(size),
+        'Y' => Array.new(size),
+        'K' => Array.new(size),
+        'M' => Array.new(size),
+        'S' => Array.new(size),
+        'W' => Array.new(size),
+        'B' => Array.new(size),
+        'D' => Array.new(size),
+        'H' => Array.new(size),
+        'V' => Array.new(size),
+        'N' => Array.new(size)
+        }
+    end
+  end
+  class PPM < PM
+    #DEPRECATED, use iupacomp! instead
+    #def make_N_comp!
+    #  @matrix['N'] = (0...size).collect { 0.25 }
+    #  return self
+    #end
+    def initialize(size, matrix = nil, words_count = nil)
+      checkerr("matrix['A'].size != size") { matrix != nil && size != matrix['A'].size }
+      @size = size
+      @matrix = matrix == nil ? PM.new_matrix(size) : matrix
+      @words_count = words_count
+    end
+    def iupacomp!
+      @words_count = 4.0 unless @words_count
+      IUPAC_LS.each { |iul_ls|
+        @matrix[iul_ls[0]] = (0...@size).collect { |i| col_sum(i, iul_ls[1]) / iul_ls[1].size }
+      }
+      return self
+    end
+    def score(word)
+      checkerr("word size != ppm.size") { @size != word.size }
+      checkerr("word #{word} has strange characters") {
+        @matrix.keys.include?('N') ? word.tr('ACGTRYKMSWBDHVN', '').size > 0 : word.tr('ACGT', '').size > 0
+      }
+      return (0...@size).inject(1) { |mul, i|
+        mul *= @matrix[word[i,1]][i]
+      }
+    end
+    def best_score
+      return (0...size).inject(1) { |mul, i|
+        mul *= ['A', 'C', 'G', 'T'].collect { |l| @matrix[l][i] }.max
+      }
+    end
+    def worst_score
+      return (0...size).inject(0) { |mul, i|
+        mul *= ['A', 'C', 'G', 'T'].collect { |l| @matrix[l][i] }.min
+      }
+    end
+    def to_bismark(b)
+      attributes = {"length" => @size}
+      attributes["words-count"] = @words_count if @words_count
+      pe = b.add_element("PPM", attributes)
+      (0...@matrix['A'].size).each { |i|
+        pm_c = pe.add_element("pm-column", {"position" => i+1})
+        ['A', 'C', 'G', 'T'].each { |l|
+          pm_c.add_element(l.downcase).add_text(@matrix[l][i].to_s)
+        }
+      }
+    end
+    def PPM.probs2IUPAC!(probs)
+      IUPAC_LS.each { |iul_ls|
+        probs[iul_ls[0]] = iul_ls[1].inject(0) { |sum, l| sum += probs[l] } / iul_ls[1].size
+      }
+      return probs
+    end
+    def get_pwm(words_count = nil, probs = Randoom::DEF_PROBS, pseudocount = 1.0)
+      probs = PPM.probs2IUPAC!(probs.dup)
+      words_count = @words_count if !words_count || words_count == 0
+      checkerr("undefined words count") { !words_count }
+      pwm = @matrix['N'] ? PM.new_matrix_iupac(@size) : PM.new_matrix(@size)
+      @matrix.each_key do |letter|
+        (0...@size).each { |pos|
+          pwm[letter][pos] = Math::log( (@matrix[letter][pos] * words_count + (probs[letter] * pseudocount) ) / ( (words_count + pseudocount) * probs[letter]) )
+        }
+      end
+      return PM.new(@size, pwm, words_count)
+      #pcm = get_pcm(words_count)
+      #pcm.iupacomp! if @matrix['N']
+      #return pcm.to_pwm!(words_count, probs, pseudocount)
+    end
+    alias to_pwm get_pwm
+    def get_pwm0pc(probs = Randoom::DEF_PROBS)
+      new_matrix = {}
+      @matrix.each_key { |letter| new_matrix[letter] = @matrix[letter].dup }
+      newpm = PM.new(@size, new_matrix, nil)
+      new_matrix.each_key do |letter|
+        (0...@size).each { |pos|
+          new_matrix[letter][pos] = Math::log(@matrix[letter][pos] / probs[letter])
+        }
+      end
+      return newpm
+    end
+    def to_pwm!
+      raise "cannot force PPM class to PWM, use to_pwm instead"
+    end
+    def get_pcm(words_count = nil)
+      words_count = @words_count unless words_count
+      checkerr("undefined words count") { !words_count }
+      counts = PM.new_matrix(@size)
+      (0...size).each { |i|
+        ['A', 'C', 'G', 'T'].each { |l|
+          counts[l][i] = @matrix[l][i] * words_count
+        }
+      }
+      newpcm = PM.new(size, counts, words_count).iupacomp!
+      return newpcm
+    end
+    alias to_pcm get_pcm
+    def PPM.from_IUPAC(iupac)
+      matrix = {"A" => [], "C" => [], "G" => [], "T" => []}
+      (0...iupac.size).each { |i|
+        matrix.each_key { |k| matrix[k] << 0.0 }
+        letters = IUPAC::REVCODE[iupac[i]]
+        (0...letters.size).each { |j|
+          matrix[letters[j]][-1] = 1.0/letters.size
+        }
+      }
+      newppm = PPM.new(iupac.size, matrix, 4.0)
+      newppm.iupacomp!
+      newppm
+    end
+  end
+end