RubyGems - trace_visualization - Versions diffs - 0.0.1 - Mend

trace_visualization 0.0.1

Files changed (38) hide show

checksums.yaml +7 -0
data/.gitignore +17 -0
data/Gemfile +6 -0
data/LICENSE +339 -0
data/LICENSE.txt +22 -0
data/README.md +4 -0
data/Rakefile +1 -0
data/lib/trace_visualization/bwt.rb +32 -0
data/lib/trace_visualization/bwt.rbold +32 -0
data/lib/trace_visualization/data/irepetition.rb +49 -0
data/lib/trace_visualization/data/repetition.rb +101 -0
data/lib/trace_visualization/generators.rb +53 -0
data/lib/trace_visualization/longest_common_prefix.rb +34 -0
data/lib/trace_visualization/mapping.rb +120 -0
data/lib/trace_visualization/reorder.rb +25 -0
data/lib/trace_visualization/repetitions.rb +66 -0
data/lib/trace_visualization/repetitions_concatenation.rb +134 -0
data/lib/trace_visualization/repetitions_context.rb +18 -0
data/lib/trace_visualization/repetitions_incrementation.rb +81 -0
data/lib/trace_visualization/repetitions_psy.rb +83 -0
data/lib/trace_visualization/suffix_array.rb +203 -0
data/lib/trace_visualization/utils.rb +47 -0
data/lib/trace_visualization/version.rb +3 -0
data/lib/trace_visualization/visualization/console_color_print.rb +32 -0
data/lib/trace_visualization.rb +10 -0
data/spec/bwt_spec.rb +47 -0
data/spec/generators_spec.rb +30 -0
data/spec/longest_common_prefix_spec.rb +29 -0
data/spec/mapping_spec.rb +67 -0
data/spec/reorder_spec.rb +42 -0
data/spec/repetitions_concatenation_spec.rb +58 -0
data/spec/repetitions_incrementation_spec.rb +88 -0
data/spec/repetitions_psy_spec.rb +39 -0
data/spec/repetitions_spec.rb +18 -0
data/spec/spec_helper.rb +19 -0
data/spec/suffix_array_spec.rb +68 -0
data/trace_visualization.gemspec +35 -0
metadata +204 -0

data/lib/trace_visualization/suffix_array.rb ADDED Viewed

@@ -0,0 +1,203 @@
+module TraceVisualization
+  module SuffixArray
+    def self.naive(str)
+      n = str.length
+      tmp    = Array.new(n)
+      result = Array.new(n)
+      for i in 0 ... n
+        tmp[i] = [str[i .. -1], i]
+      end
+      tmp.sort! { |x, y| x[0] <=> y[0] }
+      for i in 0 ... n
+        result[i] = tmp[i][1]
+      end
+      result
+    end
+    def self.effective(str)
+      n = str.length
+      s = []
+      if str.instance_of? String
+        str.each_char { |c| s << c.ord }
+      elsif str.instance_of? Array
+        str.each { |c| s << c.ord }
+      end
+      3.times { s << 0 }
+      suffix_array = Array.new(n + 3, 0)
+      effective_linear(s, suffix_array, n, s.max + 1)
+      suffix_array[0 ... -3]
+    end
+    # Find the suffix array SA.
+  	# Used approach from article "Linear Work Suffix Array Construction"
+    # by Juha Karkkainen, Peter Sanders and Stefan Burkhardt
+    def self.effective_linear(s, suffix_array, n, alphabet_size)
+  		n0 = (n + 2) / 3
+  		n1 = (n + 1) / 3
+  		n2 = n / 3
+  		n02 = n0 + n2
+      s12 = Array.new(n02 + 3, 0)
+      sa12 = Array.new(n02 + 3, 0)
+      s0 = Array.new(n0, 0)
+      sa0 = Array.new(n0, 0)
+  		# Generate positions of mod 1 and mod 2 suffixes
+  		# the "+(n0-n1)" adds a dummy mod 1 suffix if n%3 == 1
+      i = j = 0
+      while (i < n + (n0 - n1))
+        if i % 3 != 0
+          s12[j] = i
+          j += 1
+        end
+        i += 1
+      end
+  		# LSB radix sort the mod 1 and mod 2 triples
+  		radix_pass(s12, sa12, s[2 ... s.length], n02, alphabet_size)
+  		radix_pass(sa12, s12, s[1 ... s.length], n02, alphabet_size)
+  		radix_pass(s12, sa12, s, n02, alphabet_size)
+  		# Find lexicographic names of triples
+  		name, c0, c1, c2 = 0, -1, -1, -1
+      for i in 0 ... n02
+  			if (s[sa12[i]] != c0 || s[sa12[i] + 1] != c1 || s[sa12[i] + 2] != c2)
+  				name += 1
+  				c0 = s[sa12[i]]
+  				c1 = s[sa12[i] + 1]
+  				c2 = s[sa12[i] + 2]
+        end
+  			if (sa12[i] % 3 == 1)
+  				s12[sa12[i]/3] = name      # Left half
+        else
+  				s12[sa12[i]/3 + n0] = name # Right half
+        end
+      end
+  		# Recurse if names are not yet unique
+  		if name < n02
+  			effective_linear(s12, sa12, n02, name)
+  			# Store unique names in s12 using the suffix array
+        for i in 0 ... n02
+  				s12[sa12[i]] = i + 1
+        end
+      else
+  			# Generate the suffix array of s12 directly
+        for i in 0 ... n02
+  				sa12[s12[i] - 1] = i
+        end
+      end
+  		# Stably sort the mod 0 suffixes from sa12 by their first character
+      i, j = 0, 0
+      while i < n02
+        if sa12[i] < n0
+          s0[j] = 3 * sa12[i]
+          j += 1
+        end
+        i += 1
+      end
+  		radix_pass(s0, sa0, s, n0, alphabet_size)
+  		# Merge sorted sa0 suffixes and sorted sa12 suffixes
+      p, t, k = 0, n0 - n1, 0
+      while k < n
+  			# Pos of current offset 12 suffix
+  			i = get_i(n0, sa12, t)
+  			# Pos of current offset 0 suffix
+  			j = sa0[p]
+        # Different compares for mod 1 and mod 2 suffixes
+  			if (sa12[t] < n0 ? leq_pairs(s[i], s12[sa12[t] + n0], s[j], s12[j/3]) : leq_triples(s[i], s[i + 1], s12[sa12[t] - n0 + 1], s[j], s[j + 1], s12[j/3 + n0]))
+  				suffix_array[k] = i
+  				t += 1
+  				if t == n02
+  					# Done: only sa0 suffixes left
+            k += 1
+            while p < n0
+              suffix_array[k] = sa0[p]
+              p += 1
+              k += 1
+            end
+          end
+        else
+  				suffix_array[k] = j
+  				p += 1;
+  				if p == n0
+  					# Done: only sa12 suffixes left
+            k += 1
+            while t < n02
+              suffix_array[k] = get_i(n0, sa12, t)
+              t += 1
+              k += 1
+            end
+          end
+        end
+        k += 1
+      end
+    end
+    private
+    # Stably sort a[0 .. n - 1] to b[0 .. n - 1] with keys in 0 .. K from r
+    # Params:
+    # +a+:: positions in r for sort
+    # +b+:: sorted positions in r
+    # +r+:: source
+    # +n+:: number of positions in a and b
+    # +k+:: size of alphabet
+    def self.radix_pass(a, b, r, n, k)
+      c = Array.new(k + 1, 0)
+      # Count occurrences
+      for i in 0 ... n
+        c[r[a[i]]] += 1
+      end
+      # Exclusive prefix sums
+      sum = 0
+      for i in 0 .. k
+        t = c[i]
+        c[i] = sum
+        sum += t
+      end
+      # Sort
+      for i in 0 ... n
+        b[c[r[a[i]]]] = a[i]
+        c[r[a[i]]] += 1
+      end
+    end
+  	def self.get_i(n0, sa12, t)
+  		sa12[t] < n0 ? sa12[t] * 3 + 1 : (sa12[t] - n0) * 3 + 2
+    end
+    # Lexicographic order for pairs
+  	def self.leq_pairs(a1, a2, b1, b2)
+  		a1 < b1 || a1 == b1 && a2 <= b2
+    end
+    # Lexicographic order for triples
+  	def self.leq_triples(a1, a2, a3, b1, b2, b3)
+  		a1 < b1 || a1 == b1 && leq_pairs(a2, a3, b2, b3)
+    end
+  end
+end

data/lib/trace_visualization/utils.rb ADDED Viewed

@@ -0,0 +1,47 @@
+module TraceVisualization
+  module Utils
+    def self.rhash(lp, rp)
+      lp.hash + rp.hash
+    end
+    # Get the start position of lines
+    def self.lines_pos(str)
+      lines_pos = [0]
+      pos = -1
+      while (pos = str.index(/\n/, pos + 1))
+        lines_pos << pos + 1 if pos + 1 < str.length
+      end
+      lines_pos
+    end
+    # Repetitions by line
+    def self.rs_by_line(rs, lines_pos, rs_by_line)
+      for r in rs
+        r_pos = r.left_positions
+        r.lines = []
+        i, j = 0, 0
+        while (i < lines_pos.size && j < r_pos.size)
+          a, b = lines_pos[i], (i + 1 < lines_pos.size ? lines_pos[i + 1] : 2**32)
+          if a <= r_pos[j] && r_pos[j] < b
+            rs_by_line[i] << [r, r_pos[j]]
+            r.lines << i
+            j += 1
+          else
+            i += 1
+          end
+        end
+      end
+      rs_by_line.each { |item| item.sort! { |a, b| a[1] <=> b[1] } }
+      rs_by_line
+    end
+  end # module Utils
+end # module TraceVisualization

data/lib/trace_visualization/version.rb ADDED Viewed

@@ -0,0 +1,3 @@
+module TraceVisualization
+  VERSION = "0.0.1"
+end

data/lib/trace_visualization/visualization/console_color_print.rb ADDED Viewed

@@ -0,0 +1,32 @@
+require 'travis/data/repetition'
+module Travis
+  module Visualization
+    module ConsoleColorPrint
+      GRN  = "\033[1;32m"
+      YLW  = "\033[1;33m"
+      FNSH = "\033[0m"
+      def self.hl(str, repetition)
+        result = ""
+        prev_position = 0
+        positions = repetition.build_positions
+        positions.each do |position|
+          result += str[prev_position ... position[0][0]]
+          for i in 0 ... position.size
+            pos, len = position[i]
+            result += GRN + "#{str[pos ... pos + len]}" + FNSH
+            result += YLW + "#{str[pos + len ... position[i + 1][0]]}" + FNSH if i < position.size - 1
+          end
+          prev_position = position[-1][0] + position[-1][1]
+        end
+        result += str[prev_position .. -1]
+      end
+    end
+  end
+end

data/lib/trace_visualization.rb ADDED Viewed

@@ -0,0 +1,10 @@
+require "trace_visualization/version"
+module TraceVisualization
+  # Should be 'greater' of all possible chars in the lexicographical order
+  TERMINATION_CHAR = '$'
+  FORBIDDEN_CHARS = /\n/
+end

data/spec/bwt_spec.rb ADDED Viewed

@@ -0,0 +1,47 @@
+require 'trace_visualization'
+require 'trace_visualization/bwt'
+require 'trace_visualization/mapping'
+require 'trace_visualization/suffix_array'
+describe TraceVisualization::BurrowsWheelerTransform do
+  it "naive approach" do
+    str = "^BANANA|"
+    bwt = TraceVisualization::BurrowsWheelerTransform.naive(str)
+    bwt.should eq ["B", "N", "N", "^", "A", "A", "|", "A"]
+  end
+  it "effective implementation" do
+    str = "abaababa"
+    bwt = TraceVisualization::BurrowsWheelerTransform.bwt(str, TraceVisualization::SuffixArray.effective(str), str.length)
+    bwt.should eq ["b", "b", "b", "a", "a", "a", "a", "a"]
+    10.times do
+      rnd_str = (0 ... 60).map { (65 + rand(26)).chr }.join + TraceVisualization::TERMINATION_CHAR
+      sa_naive     = TraceVisualization::SuffixArray.naive(rnd_str)
+      sa_effective = TraceVisualization::SuffixArray.effective(rnd_str)
+      sa_effective.should eq sa_naive
+      bwt_naive     = TraceVisualization::BurrowsWheelerTransform.naive(rnd_str)
+      bwt_effective = TraceVisualization::BurrowsWheelerTransform.bwt(rnd_str, TraceVisualization::SuffixArray.effective(rnd_str), rnd_str.length)
+      bwt_effective.should eq bwt_naive
+    end
+  end
+  it "test with mapping" do
+    str = "127.0.0.1 a 127.0.0.1 b"
+    arr = TraceVisualization::Mapping.parse(str)
+    ip, ws, a, b = arr[0], arr[1], arr[2], arr[6]
+    bwt     = TraceVisualization::BurrowsWheelerTransform.bwt(arr, TraceVisualization::SuffixArray.effective(arr), arr.length)
+    bwt_str = TraceVisualization::Mapping.restore(bwt)
+    bwt.should     eq [ip, ip, a, ws, ws, b, ws]
+    bwt_str.should eq "127.0.0.1127.0.0.1a  b "
+  end
+end

data/spec/generators_spec.rb ADDED Viewed

@@ -0,0 +1,30 @@
+require 'trace_visualization/generators'
+describe TraceVisualization::Generators do
+  describe TraceVisualization::Generators::Thue do
+    it "thue_2_3" do
+      TraceVisualization::Generators::Thue.str_2_3(0).should eq "a"
+      TraceVisualization::Generators::Thue.str_2_3(1).should eq "ab"
+      TraceVisualization::Generators::Thue.str_2_3(2).should eq "abba"
+      TraceVisualization::Generators::Thue.str_2_3(3).should eq "abbabaab"
+      TraceVisualization::Generators::Thue.str_2_3(4).should eq "abbabaabbaababba"
+      TraceVisualization::Generators::Thue.str_2_3(5).should eq "abbabaabbaababbabaababbaabbabaab"
+    end
+    it "thue_3_2" do
+      TraceVisualization::Generators::Thue.str_3_2(0).should eq "a"
+      TraceVisualization::Generators::Thue.str_3_2(1).should eq "abcab"
+      TraceVisualization::Generators::Thue.str_3_2(2).should eq "abcabacabcbacbcacbabcabacabcb"
+    end
+    it "fibonacci" do
+      TraceVisualization::Generators::Fibonacci.str(0).should eq "b"
+      TraceVisualization::Generators::Fibonacci.str(1).should eq "a"
+      TraceVisualization::Generators::Fibonacci.str(2).should eq "ab"
+      TraceVisualization::Generators::Fibonacci.str(3).should eq "aba"
+      TraceVisualization::Generators::Fibonacci.str(4).should eq "abaab"
+      TraceVisualization::Generators::Fibonacci.str(5).should eq "abaababa"
+      TraceVisualization::Generators::Fibonacci.str(6).should eq "abaababaabaab"
+    end
+  end
+end

data/spec/longest_common_prefix_spec.rb ADDED Viewed

@@ -0,0 +1,29 @@
+require 'trace_visualization/longest_common_prefix'
+require 'trace_visualization/suffix_array'
+require 'trace_visualization/mapping'
+describe TraceVisualization::LongestCommonPrefix do
+  context '.effective' do
+    it 'should return array with longest common prefix in linear time' do
+  		str = "mississippi"
+      sa = TraceVisualization::SuffixArray.effective(str)
+      lcp = TraceVisualization::LongestCommonPrefix.effective(str, sa, str.size)
+      lcp.should eq([0, 1, 1, 4, 0, 0, 1, 0, 2, 1, 3])
+    end
+    it 'should return correct result for mapped string', :current => true do
+      str = "127.0.0.1 foo\r\n127.0.0.1 bar"
+      arr = TraceVisualization::Mapping.parse(str)
+      sa = TraceVisualization::SuffixArray.effective(arr)
+      lcp = TraceVisualization::LongestCommonPrefix.effective(arr, sa, arr.size)
+      sa.should eq([6, 5, 8, 1, 10, 9, 2, 4, 3, 11, 7, 0])
+      lcp.should eq([0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 2])
+    end
+  end
+end

data/spec/mapping_spec.rb ADDED Viewed

@@ -0,0 +1,67 @@
+require 'trace_visualization/mapping'
+describe TraceVisualization::Mapping do
+  it "simple id values" do
+    str = "foo[1234]bar[1235]far[1234]"
+    arr = TraceVisualization::Mapping.parse(str)
+    arr.size.should eq(12)
+    ids = arr.find_all { |item| item.type == "id" }
+    ids.size.should eq(3)
+    ids[0].src.should eq("[1234]")
+    ids[1].src.should eq("[1235]")
+    ids[2].src.should eq("[1234]")
+    ids[0].should eq(ids[2])
+    str2 = TraceVisualization::Mapping.restore(arr)
+    str2.should eq(str)
+  end
+  it "ip values" do
+    str = "user1 ip : 127.0.0.1 \r\nuser2 ip : 127.0.0.2"
+    arr = TraceVisualization::Mapping.parse(str)
+    arr.size.should eq(27)
+    ips = arr.find_all { |item| item.type == "ip" }
+    ips.size.should eq(2)
+    ips[0].src.should eq("127.0.0.1")
+    ips[1].src.should eq("127.0.0.2")
+    str2 = TraceVisualization::Mapping.restore(arr)
+    str2.should eq(str)
+  end
+  it "compare different types" do
+    # Ids
+    id_1 = TraceVisualization::Mapping::Item.new("[12345678]", "id")
+    id_2 = TraceVisualization::Mapping::Item.new("[12345679]", "id")
+    TraceVisualization::Reorder.process([id_1, id_2])
+    id_1.should be < id_2
+    # IPs
+    ip_1 = TraceVisualization::Mapping::Item.new("127.0.0.1", "ip")
+    ip_2 = TraceVisualization::Mapping::Item.new("127.0.0.2", "ip")
+    TraceVisualization::Reorder.process([ip_1, ip_2])
+    ip_1.should be < ip_2
+    # Time
+    time_1 = TraceVisualization::Mapping::Item.new("[16 Jan 2013 00:10:00]", "time")
+    time_2 = TraceVisualization::Mapping::Item.new("[16 Jan 2013 00:10:01]", "time")
+    TraceVisualization::Reorder.process([time_1, time_2])
+    # Different
+    TraceVisualization::Reorder.process([time_1, time_2, id_1, ip_1])
+    time_1.should be < time_2
+    id_1.should be < ip_1
+    id_1.should be < time_1
+  end
+end

data/spec/reorder_spec.rb ADDED Viewed

@@ -0,0 +1,42 @@
+require 'trace_visualization/reorder'
+describe TraceVisualization::Reorder do
+  class A
+    include Comparable
+    attr_accessor :ord
+    attr_accessor :value
+    def initialize(theValue)
+      @value = theValue
+    end
+    def <=>(anOther)
+      @value <=> anOther.value
+    end
+  end
+  it "reorder values and set correct order" do
+    data = Array.new(100) { |index| A.new(index * 100) }
+    data.shuffle!
+    TraceVisualization::Reorder.process(data)
+    data.each { |item| item.ord.should eq(item.value / 100 + 1) }
+  end
+  it "duplicate values with the same order", :current => true do
+    x1, x2 = A.new(1), A.new(2)
+    x3, x4 = A.new(123456789), A.new(123456789)
+    data = [x1, x2, x3, x4].shuffle
+    TraceVisualization::Reorder.process(data)
+    x1.ord.should eq(1)
+    x2.ord.should eq(2)
+    x3.ord.should eq(3)
+    x4.ord.should eq(3)
+  end
+end

data/spec/repetitions_concatenation_spec.rb ADDED Viewed

@@ -0,0 +1,58 @@
+require 'trace_visualization/repetitions_concatenation'
+require 'trace_visualization/repetitions_context'
+require 'trace_visualization/data/repetition'
+describe TraceVisualization::RepetitionsConcatenation do
+  it "process common positions" do
+    str = "aaaxbbbyaaazbbbvaaawbbb"
+    context = TraceVisualization::Repetitions::Context.new(str, [])
+    lpos = [0, 8, 16]
+    rpos = [4, 12, 20]
+    ppos = [[0, 4], [8, 12], [16, 20]]
+    left  = TraceVisualization::Data::Repetition.new(3, lpos)
+    right = TraceVisualization::Data::Repetition.new(3, rpos)
+    cpl, cpr = TraceVisualization::RepetitionsConcatenation.process_common_positions(left, right, 1, context)
+    left.left_positions.should  eq cpl
+    right.left_positions.should eq cpr
+    lpos.should eq left.left_positions
+    rpos.should eq right.left_positions
+  end
+  it "don't concatenate repetitions through forbidden chars" do
+    str = <<EOF
+aaa
+bbb
+aaa
+bbb
+EOF
+    context = TraceVisualization::Repetitions::Context.new(str, [])
+    lpos, rpos = [0, 8], [4, 12]
+    left  = TraceVisualization::Data::Repetition.new(3, lpos)
+    right = TraceVisualization::Data::Repetition.new(3, rpos)
+    cpl, cpr = TraceVisualization::RepetitionsConcatenation.process_common_positions(left, right, 1, context)
+    cpl.should eq []
+    cpr.should eq []
+    #
+    str.gsub!(TraceVisualization::FORBIDDEN_CHARS, "x")
+    context = TraceVisualization::Repetitions::Context.new(str, [])
+    lpos, rpos = [0, 8], [4, 12]
+    left  = TraceVisualization::Data::Repetition.new(3, lpos)
+    right = TraceVisualization::Data::Repetition.new(3, rpos)
+    cpl, cpr = TraceVisualization::RepetitionsConcatenation.process_common_positions(left, right, 1, context)
+    cpl.should eq [0, 8]
+    cpr.should eq [4, 12]
+  end
+end

data/spec/repetitions_incrementation_spec.rb ADDED Viewed

@@ -0,0 +1,88 @@
+require 'trace_visualization/repetitions_incrementation'
+require 'trace_visualization/data/repetition'
+describe TraceVisualization::RepetitionsIncrementation do
+  it "simple incrementation" do
+    str = <<EOF
+foo 12
+foo 13
+foo 12
+foo 13
+foo 12
+foo 13
+EOF
+    hashes = []
+    r1  = TraceVisualization::Data::Repetition.new(5, [0, 7, 14, 21, 28, 35])
+    r12 = TraceVisualization::Data::Repetition.new(6, [0, 14, 28])
+    r13 = TraceVisualization::Data::Repetition.new(6, [7, 21, 35])
+    repetitions = [r1, r12, r13]
+    TraceVisualization::RepetitionsIncrementation.incrementation(str, repetitions, hashes, 1)
+    repetition = repetitions[-1]
+    repetitions.size.should  eq 4
+    repetition.length.should eq 6
+    repetition.k.should      eq 1
+    repetition.left_positions.should  eq [0, 7, 14, 21, 28, 35]
+    repetition.right_positions.should eq [6, 13, 20, 27, 34, 41]
+  end
+  it "left incrementation" do
+    str = <<EOF
+12_foo
+14_foo
+22_foo
+24_foo
+30_bar
+32_foo
+34_foo
+EOF
+    hashes = []
+    r1 = TraceVisualization::Data::Repetition.new(4, [2, 9, 16, 23, 37, 44])
+    r2 = TraceVisualization::Data::Repetition.new(5, [1, 15, 36])
+    r3 = TraceVisualization::Data::Repetition.new(5, [8, 22, 43])
+    repetitions = [r1, r2, r3]
+    TraceVisualization::RepetitionsIncrementation.incrementation(str, repetitions, hashes, 1)
+    repetition = repetitions[-1]
+    repetitions.size.should  eq 4
+    repetition.k.should      eq 1
+    repetition.length.should eq 5
+    repetition.left_positions.should  eq [1, 8, 15, 22, 36, 43]
+    repetition.right_positions.should eq [2, 9, 16, 23, 37, 44]
+  end
+  it "test fake repetition" do
+    str = <<EOF
+abcde11edcb
+abcde22bcde
+EOF
+    left  = TraceVisualization::Data::Repetition.new(5, [0, 12])
+    right = TraceVisualization::Data::Repetition.new(0, [7, 19])
+    repetition = TraceVisualization::Data::Repetition.new(7, [0, 12], [7, 19])
+    repetition.left  = left
+    repetition.right = right
+    repetition.k     = 2
+    fake = TraceVisualization::RepetitionsIncrementation.fake_repetition(
+      repetition.class, repetition.left_positions, repetition.right_positions,
+      "right"
+    )
+    right.left_positions.should  eq fake.left_positions
+    right.right_positions.should eq fake.right_positions
+  end
+end