RubyGems - trace_visualization - Versions diffs - 0.0.3 → 0.0.5 - Mend

trace_visualization 0.0.3 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

checksums.yaml +4 -4
data/Gemfile +1 -1
data/bin/trace_visualization +21 -26
data/lib/trace_visualization.rb +2 -2
data/lib/trace_visualization/algorithm.rb +42 -0
data/lib/trace_visualization/data/irepetition.rb +5 -1
data/lib/trace_visualization/data/repetition.rb +14 -6
data/lib/trace_visualization/data/sorted_array.rb +44 -0
data/lib/trace_visualization/data/{lexeme.rb → token.rb} +7 -8
data/lib/trace_visualization/lexeme_overlap_filter.rb +18 -18
data/lib/trace_visualization/mapping.rb +72 -185
data/lib/trace_visualization/repetitions/concatenation.rb +136 -0
data/lib/trace_visualization/repetitions/context.rb +70 -0
data/lib/trace_visualization/repetitions/filter.rb +153 -0
data/lib/trace_visualization/repetitions/incrementation.rb +89 -0
data/lib/trace_visualization/repetitions_psy.rb +12 -5
data/lib/trace_visualization/utils.rb +5 -42
data/lib/trace_visualization/version.rb +1 -1
data/lib/trace_visualization/visualization/console_color_print.rb +20 -4
data/spec/algorithm_spec.rb +69 -0
data/spec/bwt_spec.rb +2 -5
data/spec/data/sorted_array_spec.rb +27 -0
data/spec/lexeme_overlap_filter_spec.rb +22 -22
data/spec/longest_common_prefix_spec.rb +3 -8
data/spec/mapping_spec.rb +72 -69
data/spec/repetitions/concatenation_spec.rb +65 -0
data/spec/repetitions/filter_spec.rb +180 -0
data/spec/repetitions/incrementation_spec.rb +29 -0
data/spec/repetitions_psy_spec.rb +7 -16
data/spec/suffix_array_spec.rb +11 -31
data/spec/utils_spec.rb +21 -9
data/spec/visualization/console_color_print_spec.rb +26 -0
data/trace_visualization.gemspec +2 -2
metadata +27 -13
data/lib/trace_visualization/repetitions_concatenation.rb +0 -134
data/lib/trace_visualization/repetitions_context.rb +0 -18
data/spec/repetitions_concatenation_spec.rb +0 -64

data/lib/trace_visualization/repetitions_psy.rb CHANGED

@@ -5,12 +5,19 @@ require 'trace_visualization/data/repetition'
 module TraceVisualization
   module Repetitions
-    def self.psy1(str, p_min, decode_result = true)
-      sa  = TraceVisualization::SuffixArray.effective(str)
-      lcp = TraceVisualization::LongestCommonPrefix.effective(str, sa, str.size)
-      bwt = TraceVisualization::BurrowsWheelerTransform.bwt(str, sa, str.length)
+    # Computes all the complete nonextendible repeats using PSY1 algorithm
+    # @param data [Array] Array of objects
+    # @param p_min [Integer] The minimum number of positions in which we have repetition
+    # @param decode_result [Boolean]
+    def self.psy1(data, p_min, decode_result = true)
+      raise ArgumentError, 'Data is empty' if data == nil || data.size == 0
+      sa  = TraceVisualization::SuffixArray.effective(data)
+      lcp = TraceVisualization::LongestCommonPrefix.effective(data, sa, data.size)
+      bwt = TraceVisualization::BurrowsWheelerTransform.bwt(data, sa, data.size)
-      result = psy1_original(lcp, bwt, p_min, str.length)
+      result = psy1_original(lcp, bwt, p_min, data.length)
       result = decode_psy1_result(result, sa) if decode_result
       result

data/lib/trace_visualization/utils.rb CHANGED

@@ -3,50 +3,13 @@ require 'trace_visualization/assert'
 module TraceVisualization
   module Utils
-    def self.rhash(lp, rp)
-      lp.hash + rp.hash
+    def self.set_default_options(options, default_options)
+      options.update(default_options.merge(options))
     end
-    # Get the start position of lines
-    def self.lines_pos(str)
-      TraceVisualization.assert_instance_of(str, Mapping)
-      lines_pos = [0]
-      pos = -1
-      while (pos = str.index(/\n/, pos + 1))
-        lines_pos << pos + 1 if pos + 1 < str.length
-      end
-      lines_pos
+    def self.rhash(lp, rp)
+      lp.hash + rp.hash
     end
-    # Repetitions by line
-    def self.rs_by_line(rs, lines_pos, rs_by_line)
-      for r in rs
-        r_pos = r.left_positions
-        r.lines = []
-        i, j = 0, 0
-        while (i < lines_pos.size && j < r_pos.size)
-          a, b = lines_pos[i], (i + 1 < lines_pos.size ? lines_pos[i + 1] : 2**32)
-          if a <= r_pos[j] && r_pos[j] < b
-            rs_by_line[i] << [r, r_pos[j]]
-            r.lines << i
-            j += 1
-          else
-            i += 1
-          end
-        end
-      end
-      rs_by_line.each { |item| item.sort! { |a, b| a[1] <=> b[1] } }
-      rs_by_line
-    end
     # Read data from file
     # Allowed options
@@ -79,7 +42,7 @@ module TraceVisualization
       end
       str
-    end
+    end
   end # module Utils
 end # module TraceVisualization

data/lib/trace_visualization/version.rb CHANGED

@@ -1,3 +1,3 @@
 module TraceVisualization
-  VERSION = '0.0.3'
+  VERSION = '0.0.5'
 end

data/lib/trace_visualization/visualization/console_color_print.rb CHANGED

@@ -16,18 +16,34 @@ module TraceVisualization
         positions = repetition.build_positions
         positions.each do |position|
-          result += mapping.restore(prev_position, position[0][0])
+          result += mapping.restore(prev_position, position[0][0] - prev_position)
           for i in 0 ... position.size
             pos, len = position[i]
-            result += GRN + "#{mapping.restore(pos, pos + len)}" + FINISH
-            result += YLW + "#{mapping.restore(pos + len, position[i + 1][0])}" + FINISH if i < position.size - 1
+            result += GRN + "#{mapping.restore(pos, len)}" + FINISH
+            result += YLW + "#{mapping.restore(pos + len, position[i + 1][0] - (pos + len))}" + FINISH if i < position.size - 1
           end
           prev_position = position[-1][0] + position[-1][1]
         end
-        result += mapping.restore(prev_position, -1)
+        result += mapping.restore(prev_position, mapping.length - prev_position)
+      end
+      def self.hl_stdout(mapping, repetition)
+        if repetition.instance_of? Array
+          puts "* * * S T A R T * * *"
+          for r in repetition
+            puts r
+            puts hl(mapping, r)
+            puts "- - - - - - - - - - -"
+          end
+        else
+          puts "* * * S T A R T * * *"
+          puts repetition
+          puts hl(mapping, repetition)
+          puts "- - - - - - - - - - -"
+        end
       end
     end
   end

data/spec/algorithm_spec.rb ADDED

@@ -0,0 +1,69 @@
+require 'trace_visualization'
+require 'trace_visualization/algorithm'
+require 'trace_visualization/visualization/console_color_print'
+include TraceVisualization
+describe Algorithm do
+  it 'smoke' do
+    mapping = Mapping.new
+    mapping.process { from_string "test1\ntest2\ntest3\ntest4" }
+    repetitions = Algorithm.process(mapping, {})
+    repetitions.size.should eq 1
+    repetition = repetitions[0]
+    repetition.k.should eq 0
+    repetition.length.should eq 4
+    repetition.left_positions.size.should eq 4
+    repetition.left.should be_nil
+    repetition.right.should be_nil
+  end
+  it 'two strict repetitions with k = 1' do
+    mapping = Mapping.new
+    mapping.process { from_string "fooAbar\nfooBbar\nfooCbar" }
+    repetitions = Algorithm.process(mapping, {})
+    repetitions.size.should eq 3
+    repetitions[0].k.should eq 0
+    repetitions[1].k.should eq 0
+    repetitions[2].k.should eq 1
+    repetitions[0].length.should eq 3
+    repetitions[1].length.should eq 3
+    repetitions[2].length.should eq 7
+  end
+  it 'two strict repetitions with k = 2' do
+    mapping = Mapping.new
+    mapping.process { from_string "fooABbar\nfooCDbar\nfooEFbar" }
+    repetitions = Algorithm.process(mapping, {})
+    repetitions.size.should eq 3
+    repetitions[0].k.should eq 0
+    repetitions[1].k.should eq 0
+    repetitions[2].k.should eq 2
+    repetitions[0].length.should eq 3
+    repetitions[1].length.should eq 3
+    repetitions[2].length.should eq 8
+  end
+  it 'one strict repetition - self-concatenation' do
+    mapping = Mapping.new
+    mapping.process { from_string "testAtest\ntestBtest\ntestCtest" }
+    repetitions = Algorithm.process(mapping, {})
+    # for repetition in repetitions
+    #   puts "* * * * * * * * * *"
+    #   puts "k = #{repetition.k}, length = #{repetition.length}, positions.size = #{repetition.left_positions.size}"
+    #   puts Visualization::ConsoleColorPrint.hl(mapping, repetition)
+    #   puts "* * * * * * * * * *"
+    # end
+  end
+end

data/spec/bwt_spec.rb CHANGED

@@ -34,12 +34,9 @@ describe TraceVisualization::BurrowsWheelerTransform do
   end
   it "test with mapping" do
-    str = "127.0.0.1 a 127.0.0.1 b" + TraceVisualization::TERMINATION_CHAR
-    mapped_str = TraceVisualization::Mapping.init do
-      default_tokens
-    end
+    str = "{TOKEN;ip;127.0.0.1;123;1} a {TOKEN;ip;127.0.0.1;123;1} b" + TraceVisualization::TERMINATION_CHAR
+    mapped_str = TraceVisualization::Mapping.new
     mapped_str.process { from_string str }
     ip, ws, a, b = mapped_str[0], mapped_str[1], mapped_str[2], mapped_str[6]

data/spec/data/sorted_array_spec.rb ADDED

@@ -0,0 +1,27 @@
+require 'trace_visualization'
+require 'trace_visualization/data/sorted_array'
+include TraceVisualization
+include TraceVisualization::Data
+describe SortedArray do
+  it 'should correct insert values' do
+    a = SortedArray.new([4, 5, 1, 2])
+    a << 3
+    a.push 6
+    a << 0
+    a.should eq [0, 1, 2, 3, 4, 5, 6]
+  end
+  it 'should correct index value' do
+    a = SortedArray.new([1, 4, 5, 4, 1, 2, 5])
+    a.should eq [1, 1, 2, 4, 4, 5, 5]
+    a.index(1).should eq 0
+    a.index(2).should eq 2
+    a.index(4).should eq 3
+    a.index(5).should eq 5
+    a.index(0).should eq nil
+    a.index(3).should eq nil
+    a.index(6).should eq nil
+  end
+end

data/spec/lexeme_overlap_filter_spec.rb CHANGED

@@ -1,20 +1,20 @@
 require 'trace_visualization/lexeme_overlap_filter'
-require 'trace_visualization/data/lexeme'
+require 'trace_visualization/data/token'
 include TraceVisualization::Data
 describe TraceVisualization::LexemeOverlapFilter do
   it 'test 1' do
-    lexeme2 = Lexeme.new(:name, "aa")
-    lexeme3 = Lexeme.new(:name, "aaa")
-    lexeme6 = Lexeme.new(:name, "aaaaaa")
+    lexeme2 = Token.new(:name, "aa")
+    lexeme3 = Token.new(:name, "aaa")
+    lexeme6 = Token.new(:name, "aaaaaa")
-    i1 = LexemePos.new(lexeme2, 2)
-    i2 = LexemePos.new(lexeme2, 6)
-    i3 = LexemePos.new(lexeme2, 10)
-    i4 = LexemePos.new(lexeme6, 8)
-    i5 = LexemePos.new(lexeme3, 3)
+    i1 = TokenPosition.new(lexeme2, 2)
+    i2 = TokenPosition.new(lexeme2, 6)
+    i3 = TokenPosition.new(lexeme2, 10)
+    i4 = TokenPosition.new(lexeme6, 8)
+    i5 = TokenPosition.new(lexeme3, 3)
     lexeme_positions = [i1, i2, i3, i4, i5]
@@ -24,14 +24,14 @@ describe TraceVisualization::LexemeOverlapFilter do
   end
   it 'test 2' do
-    lexeme2 = Lexeme.new(:name, "aa")
-    lexeme4 = Lexeme.new(:name, "aaaa")
+    lexeme2 = Token.new(:name, "aa")
+    lexeme4 = Token.new(:name, "aaaa")
-    i1 = LexemePos.new(lexeme2, 0)
-    i2 = LexemePos.new(lexeme4, 2)
-    i3 = LexemePos.new(lexeme2, 4)
-    i4 = LexemePos.new(lexeme4, 5)
-    i5 = LexemePos.new(lexeme2, 7)
+    i1 = TokenPosition.new(lexeme2, 0)
+    i2 = TokenPosition.new(lexeme4, 2)
+    i3 = TokenPosition.new(lexeme2, 4)
+    i4 = TokenPosition.new(lexeme4, 5)
+    i5 = TokenPosition.new(lexeme2, 7)
     lexemes = [i1, i2, i3, i4, i5]
@@ -41,13 +41,13 @@ describe TraceVisualization::LexemeOverlapFilter do
   end
   it 'test 3' do
-    lexeme1 = Lexeme.new(:name, "a")
-    lexeme3 = Lexeme.new(:name, "aaa")
+    lexeme1 = Token.new(:name, "a")
+    lexeme3 = Token.new(:name, "aaa")
-    i1 = LexemePos.new(lexeme1, 1)
-    i2 = LexemePos.new(lexeme1, 3)
-    i3 = LexemePos.new(lexeme3, 0)
-    i4 = LexemePos.new(lexeme3, 3)
+    i1 = TokenPosition.new(lexeme1, 1)
+    i2 = TokenPosition.new(lexeme1, 3)
+    i3 = TokenPosition.new(lexeme3, 0)
+    i4 = TokenPosition.new(lexeme3, 3)
     lexemes = [i1, i2, i3, i4]

data/spec/longest_common_prefix_spec.rb CHANGED

@@ -15,15 +15,10 @@ describe TraceVisualization::LongestCommonPrefix do
     end
     it 'should return correct result for mapped string', :current => true do
-      str = "127.0.0.1 foo\r\n127.0.0.1 bar"
+      str = "{TOKEN;ip;127.0.0.1;1000;1} foo\r\n{TOKEN;ip;127.0.0.1;1000;1} bar"
-      mapping = TraceVisualization::Mapping.init do
-        default_tokens
-      end
-      mapping.process do
-        from_string(str)
-      end
+      mapping = TraceVisualization::Mapping.new
+      mapping.process { from_string(str) }
       sa = TraceVisualization::SuffixArray.effective(mapping)
       lcp = TraceVisualization::LongestCommonPrefix.effective(mapping, sa, mapping.length)

data/spec/mapping_spec.rb CHANGED

@@ -1,113 +1,116 @@
 require 'trace_visualization'
 require 'trace_visualization/mapping'
+require 'tempfile'
 include TraceVisualization
 include TraceVisualization::Data
-describe TraceVisualization::Mapping do
+describe Mapping do
   it 'simple id values' do
-    str = "foo[1234]bar[1235]far[1234]\n"
+    str = "foo{TOKEN;id;1234;1234;1}bar{TOKEN;id;1235;1235;1}far{TOKEN;id;1234;1234;1}"
-    mapping = TraceVisualization::Mapping.init do
-      default_tokens
-    end
+    mapping = Mapping.new
     mapping.process do
       from_string(str)
     end
-    mapping.length.should eq 13
+    mapping.length.should eq 12
-    ids = mapping.find_all { |lexeme| lexeme.name == :ID }
+    ids = mapping.find_all { |lexeme| lexeme.name == :id }
     ids.size.should eq(3)
-    ids[0].value.should eq("[1234]")
-    ids[1].value.should eq("[1235]")
-    ids[2].value.should eq("[1234]")
-    ids[0].should eq(ids[2])
-    mapping.restore.should eq str
+    ids[0].value.should eq("1234")
+    ids[1].value.should eq("1235")
+    ids[2].value.should eq("1234")
+    ids[0].should eq(ids[2])
   end
   it 'ip values' do
-    str = "user1 ip : 127.0.0.1 \r\nuser2 ip : 127.0.0.2\r\n"
+    str = "user1 ip: {TOKEN;ip;127.0.0.1;123;1} \nuser2 ip: {TOKEN;ip;127.0.0.2;122;1}"
-    mapping = TraceVisualization::Mapping.init do
-      default_tokens
-    end
+    mapping = Mapping.new
     mapping.process do
       from_string(str)
     end
-    mapping.length.should eq 29
+    mapping.length.should eq 24
-    ips = mapping.find_all { |lexeme| lexeme.name == :IP }
+    ips = mapping.find_all { |lexeme| lexeme.name == :ip }
     ips.size.should eq(2)
     ips[0].value.should eq("127.0.0.1")
     ips[1].value.should eq("127.0.0.2")
-    mapping.restore.should eq str
   end
-  it 'compare different types' do
-    mapping = TraceVisualization::Mapping.init do
-      default_tokens
-    end
+  it 'token to_i conversion' do
+    token = Token.new('unknown', 0, 0)
+    token.ord = 0
+    token.to_i.should eq 0
+  end
+  it 'item as array index' do
+    token = Token.new('unknown', 0, 0)
+    token.ord = 1
+    array = [0, 1, 2]
-    mapping.tokens.should_not be_nil
-    mapping.tokens[:ID].should_not be_nil
-    mapping.tokens[:IP].should_not be_nil
-    mapping.tokens[:TIME].should_not be_nil
+    array[token].should eq 1
+  end
+  it 'preprocessed string' do
+    str = 'Text {TOKEN;id;[1234];1234;1} text {TOKEN;ip;127.0.0.127;1;1} text'
-    # Ids
-    id_1 = Lexeme.new(:ID, "[12345678]", mapping.tokens[:ID][1].call("[12345678]"))
-    id_2 = Lexeme.new(:ID, "[12345679]", mapping.tokens[:ID][1].call("[12345679]"))
-    Reorder.process([id_1, id_2])
+    mapping = Mapping.new
+    mapping.process { from_string str }
-    id_1.should be < id_2
+    mapping.size.should eq 18
+  end
+  it 'Mapping.lines should contains positions of lines (from string)' do
+    str = "{TOKEN;id;1;1;1}x\n{TOKEN;id;1;1;1}y\n{TOKEN;id;1;1;1}z"
-    # IPs
-    ip_1 = Lexeme.new(:IP, "127.0.0.1", mapping.tokens[:IP][1].call("127.0.0.1"))
-    ip_2 = Lexeme.new(:IP, "127.0.0.2", mapping.tokens[:IP][1].call("127.0.0.2"))
-    Reorder.process([ip_1, ip_2])
+    mapping = Mapping.new
+    mapping.process { from_string str }
-    ip_1.should be < ip_2
-    # Time
-    time_1 = Lexeme.new(:TIME, '[16 Jan 2013 00:10:00]', mapping.tokens[:TIME][1].call('[16 Jan 2013 00:10:00]'))
-    time_2 = Lexeme.new(:TIME, '[16 Jan 2013 00:10:01]', mapping.tokens[:TIME][1].call('[16 Jan 2013 00:10:01]'))
-    Reorder.process([time_1, time_2])
-    # Different
-    Reorder.process([time_1, time_2, id_1, ip_1])
-    time_1.should be < time_2
-    id_1.should be < ip_1
-    id_1.should be < time_1
+    mapping.size.should eq 8
+    mapping.lines.should eq [0, 3, 6]
   end
-  it 'Lexeme to_i conversion' do
-    lexeme = TraceVisualization::Data::Lexeme.new('unknown', 0, 0)
-    lexeme.ord = 0
-    lexeme.to_i.should eq 0
+  it 'Mapping.lines should contains positions of lines (from file)' do
+    data = <<-DATA
+line1
+line2
+line3
+DATA
+    tmp_file = Tempfile.new('trace_visualization')
+    open(tmp_file.path, "w") { |fd| fd.write data }
+    mapping = Mapping.new
+    mapping.process { from_file tmp_file.path }
+    tmp_file.close
+    tmp_file.unlink
+    mapping.lines.should eq [0, 6, 12]
   end
-  it 'item as array index' do
-    lexeme = TraceVisualization::Data::Lexeme.new('unknown', 0, 0)
-    lexeme.ord = 1
-    array = [0, 1, 2]
+  it 'subarray method for mapping' do
+    mapping = Mapping.new
+    mapping.process { from_string "test test test" }
-    array[lexeme].should eq 1
-  end
-  it 'preprocessed string' do
-    str = 'Text {LEXEME;ID;[1234];1234} text {LEXEME;IP;127.0.0.127;1} text'
+    submapping = mapping[5 ... 9]
+    submapping.size.should eq 4
+    submapping.join.should eq "test"
-    mapping = TraceVisualization::Mapping.new
-    mapping.process do
-      from_preprocessed_string str
-    end
+    mapping[0 .. -1].join.should eq "test test test"
+  end
+  it 'forbidden char scan' do
+    mapping = Mapping.new
+    mapping.process { from_string "test test test" }
+    mapping[0 .. -1].join.scan(TraceVisualization::FORBIDDEN_CHARS).size.should eq 0
-    mapping.size.should eq 19
+    mapping.process { from_string "test\ntest\ntest" }
+    mapping[0 .. -1].join.scan(TraceVisualization::FORBIDDEN_CHARS).size.should_not eq 0
   end
 end