trace_visualization 0.0.3 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +1 -1
  3. data/bin/trace_visualization +21 -26
  4. data/lib/trace_visualization.rb +2 -2
  5. data/lib/trace_visualization/algorithm.rb +42 -0
  6. data/lib/trace_visualization/data/irepetition.rb +5 -1
  7. data/lib/trace_visualization/data/repetition.rb +14 -6
  8. data/lib/trace_visualization/data/sorted_array.rb +44 -0
  9. data/lib/trace_visualization/data/{lexeme.rb → token.rb} +7 -8
  10. data/lib/trace_visualization/lexeme_overlap_filter.rb +18 -18
  11. data/lib/trace_visualization/mapping.rb +72 -185
  12. data/lib/trace_visualization/repetitions/concatenation.rb +136 -0
  13. data/lib/trace_visualization/repetitions/context.rb +70 -0
  14. data/lib/trace_visualization/repetitions/filter.rb +153 -0
  15. data/lib/trace_visualization/repetitions/incrementation.rb +89 -0
  16. data/lib/trace_visualization/repetitions_psy.rb +12 -5
  17. data/lib/trace_visualization/utils.rb +5 -42
  18. data/lib/trace_visualization/version.rb +1 -1
  19. data/lib/trace_visualization/visualization/console_color_print.rb +20 -4
  20. data/spec/algorithm_spec.rb +69 -0
  21. data/spec/bwt_spec.rb +2 -5
  22. data/spec/data/sorted_array_spec.rb +27 -0
  23. data/spec/lexeme_overlap_filter_spec.rb +22 -22
  24. data/spec/longest_common_prefix_spec.rb +3 -8
  25. data/spec/mapping_spec.rb +72 -69
  26. data/spec/repetitions/concatenation_spec.rb +65 -0
  27. data/spec/repetitions/filter_spec.rb +180 -0
  28. data/spec/repetitions/incrementation_spec.rb +29 -0
  29. data/spec/repetitions_psy_spec.rb +7 -16
  30. data/spec/suffix_array_spec.rb +11 -31
  31. data/spec/utils_spec.rb +21 -9
  32. data/spec/visualization/console_color_print_spec.rb +26 -0
  33. data/trace_visualization.gemspec +2 -2
  34. metadata +27 -13
  35. data/lib/trace_visualization/repetitions_concatenation.rb +0 -134
  36. data/lib/trace_visualization/repetitions_context.rb +0 -18
  37. data/spec/repetitions_concatenation_spec.rb +0 -64
@@ -5,12 +5,19 @@ require 'trace_visualization/data/repetition'
5
5
 
6
6
  module TraceVisualization
7
7
  module Repetitions
8
- def self.psy1(str, p_min, decode_result = true)
9
- sa = TraceVisualization::SuffixArray.effective(str)
10
- lcp = TraceVisualization::LongestCommonPrefix.effective(str, sa, str.size)
11
- bwt = TraceVisualization::BurrowsWheelerTransform.bwt(str, sa, str.length)
8
+
9
+ # Computes all the complete nonextendible repeats using PSY1 algorithm
10
+ # @param data [Array] Array of objects
11
+ # @param p_min [Integer] The minimum number of positions in which we have repetition
12
+ # @param decode_result [Boolean]
13
+ def self.psy1(data, p_min, decode_result = true)
14
+ raise ArgumentError, 'Data is empty' if data == nil || data.size == 0
15
+
16
+ sa = TraceVisualization::SuffixArray.effective(data)
17
+ lcp = TraceVisualization::LongestCommonPrefix.effective(data, sa, data.size)
18
+ bwt = TraceVisualization::BurrowsWheelerTransform.bwt(data, sa, data.size)
12
19
 
13
- result = psy1_original(lcp, bwt, p_min, str.length)
20
+ result = psy1_original(lcp, bwt, p_min, data.length)
14
21
  result = decode_psy1_result(result, sa) if decode_result
15
22
 
16
23
  result
@@ -3,50 +3,13 @@ require 'trace_visualization/assert'
3
3
  module TraceVisualization
4
4
  module Utils
5
5
 
6
- def self.rhash(lp, rp)
7
- lp.hash + rp.hash
6
+ def self.set_default_options(options, default_options)
7
+ options.update(default_options.merge(options))
8
8
  end
9
9
 
10
-
11
- # Get the start position of lines
12
- def self.lines_pos(str)
13
- TraceVisualization.assert_instance_of(str, Mapping)
14
-
15
- lines_pos = [0]
16
- pos = -1
17
-
18
- while (pos = str.index(/\n/, pos + 1))
19
- lines_pos << pos + 1 if pos + 1 < str.length
20
- end
21
-
22
- lines_pos
10
+ def self.rhash(lp, rp)
11
+ lp.hash + rp.hash
23
12
  end
24
-
25
- # Repetitions by line
26
- def self.rs_by_line(rs, lines_pos, rs_by_line)
27
- for r in rs
28
- r_pos = r.left_positions
29
- r.lines = []
30
- i, j = 0, 0
31
-
32
- while (i < lines_pos.size && j < r_pos.size)
33
- a, b = lines_pos[i], (i + 1 < lines_pos.size ? lines_pos[i + 1] : 2**32)
34
-
35
- if a <= r_pos[j] && r_pos[j] < b
36
- rs_by_line[i] << [r, r_pos[j]]
37
- r.lines << i
38
-
39
- j += 1
40
- else
41
- i += 1
42
- end
43
- end
44
- end
45
-
46
- rs_by_line.each { |item| item.sort! { |a, b| a[1] <=> b[1] } }
47
-
48
- rs_by_line
49
- end
50
13
 
51
14
  # Read data from file
52
15
  # Allowed options
@@ -79,7 +42,7 @@ module TraceVisualization
79
42
  end
80
43
 
81
44
  str
82
- end
45
+ end
83
46
 
84
47
  end # module Utils
85
48
  end # module TraceVisualization
@@ -1,3 +1,3 @@
1
1
  module TraceVisualization
2
- VERSION = '0.0.3'
2
+ VERSION = '0.0.5'
3
3
  end
@@ -16,18 +16,34 @@ module TraceVisualization
16
16
  positions = repetition.build_positions
17
17
 
18
18
  positions.each do |position|
19
- result += mapping.restore(prev_position, position[0][0])
19
+ result += mapping.restore(prev_position, position[0][0] - prev_position)
20
20
 
21
21
  for i in 0 ... position.size
22
22
  pos, len = position[i]
23
- result += GRN + "#{mapping.restore(pos, pos + len)}" + FINISH
24
- result += YLW + "#{mapping.restore(pos + len, position[i + 1][0])}" + FINISH if i < position.size - 1
23
+ result += GRN + "#{mapping.restore(pos, len)}" + FINISH
24
+ result += YLW + "#{mapping.restore(pos + len, position[i + 1][0] - (pos + len))}" + FINISH if i < position.size - 1
25
25
  end
26
26
 
27
27
  prev_position = position[-1][0] + position[-1][1]
28
28
  end
29
29
 
30
- result += mapping.restore(prev_position, -1)
30
+ result += mapping.restore(prev_position, mapping.length - prev_position)
31
+ end
32
+
33
+ def self.hl_stdout(mapping, repetition)
34
+ if repetition.instance_of? Array
35
+ puts "* * * S T A R T * * *"
36
+ for r in repetition
37
+ puts r
38
+ puts hl(mapping, r)
39
+ puts "- - - - - - - - - - -"
40
+ end
41
+ else
42
+ puts "* * * S T A R T * * *"
43
+ puts repetition
44
+ puts hl(mapping, repetition)
45
+ puts "- - - - - - - - - - -"
46
+ end
31
47
  end
32
48
  end
33
49
  end
@@ -0,0 +1,69 @@
1
+ require 'trace_visualization'
2
+ require 'trace_visualization/algorithm'
3
+ require 'trace_visualization/visualization/console_color_print'
4
+
5
+ include TraceVisualization
6
+
7
+ describe Algorithm do
8
+ it 'smoke' do
9
+ mapping = Mapping.new
10
+ mapping.process { from_string "test1\ntest2\ntest3\ntest4" }
11
+
12
+ repetitions = Algorithm.process(mapping, {})
13
+
14
+ repetitions.size.should eq 1
15
+
16
+ repetition = repetitions[0]
17
+ repetition.k.should eq 0
18
+ repetition.length.should eq 4
19
+ repetition.left_positions.size.should eq 4
20
+ repetition.left.should be_nil
21
+ repetition.right.should be_nil
22
+ end
23
+
24
+ it 'two strict repetitions with k = 1' do
25
+ mapping = Mapping.new
26
+ mapping.process { from_string "fooAbar\nfooBbar\nfooCbar" }
27
+
28
+ repetitions = Algorithm.process(mapping, {})
29
+
30
+ repetitions.size.should eq 3
31
+ repetitions[0].k.should eq 0
32
+ repetitions[1].k.should eq 0
33
+ repetitions[2].k.should eq 1
34
+
35
+ repetitions[0].length.should eq 3
36
+ repetitions[1].length.should eq 3
37
+ repetitions[2].length.should eq 7
38
+ end
39
+
40
+ it 'two strict repetitions with k = 2' do
41
+ mapping = Mapping.new
42
+ mapping.process { from_string "fooABbar\nfooCDbar\nfooEFbar" }
43
+
44
+ repetitions = Algorithm.process(mapping, {})
45
+
46
+ repetitions.size.should eq 3
47
+ repetitions[0].k.should eq 0
48
+ repetitions[1].k.should eq 0
49
+ repetitions[2].k.should eq 2
50
+
51
+ repetitions[0].length.should eq 3
52
+ repetitions[1].length.should eq 3
53
+ repetitions[2].length.should eq 8
54
+ end
55
+
56
+ it 'one strict repetition - self-concatenation' do
57
+ mapping = Mapping.new
58
+ mapping.process { from_string "testAtest\ntestBtest\ntestCtest" }
59
+
60
+ repetitions = Algorithm.process(mapping, {})
61
+
62
+ # for repetition in repetitions
63
+ # puts "* * * * * * * * * *"
64
+ # puts "k = #{repetition.k}, length = #{repetition.length}, positions.size = #{repetition.left_positions.size}"
65
+ # puts Visualization::ConsoleColorPrint.hl(mapping, repetition)
66
+ # puts "* * * * * * * * * *"
67
+ # end
68
+ end
69
+ end
@@ -34,12 +34,9 @@ describe TraceVisualization::BurrowsWheelerTransform do
34
34
  end
35
35
 
36
36
  it "test with mapping" do
37
- str = "127.0.0.1 a 127.0.0.1 b" + TraceVisualization::TERMINATION_CHAR
38
-
39
- mapped_str = TraceVisualization::Mapping.init do
40
- default_tokens
41
- end
37
+ str = "{TOKEN;ip;127.0.0.1;123;1} a {TOKEN;ip;127.0.0.1;123;1} b" + TraceVisualization::TERMINATION_CHAR
42
38
 
39
+ mapped_str = TraceVisualization::Mapping.new
43
40
  mapped_str.process { from_string str }
44
41
 
45
42
  ip, ws, a, b = mapped_str[0], mapped_str[1], mapped_str[2], mapped_str[6]
@@ -0,0 +1,27 @@
1
+ require 'trace_visualization'
2
+ require 'trace_visualization/data/sorted_array'
3
+
4
+ include TraceVisualization
5
+ include TraceVisualization::Data
6
+
7
+ describe SortedArray do
8
+ it 'should correct insert values' do
9
+ a = SortedArray.new([4, 5, 1, 2])
10
+ a << 3
11
+ a.push 6
12
+ a << 0
13
+ a.should eq [0, 1, 2, 3, 4, 5, 6]
14
+ end
15
+
16
+ it 'should correct index value' do
17
+ a = SortedArray.new([1, 4, 5, 4, 1, 2, 5])
18
+ a.should eq [1, 1, 2, 4, 4, 5, 5]
19
+ a.index(1).should eq 0
20
+ a.index(2).should eq 2
21
+ a.index(4).should eq 3
22
+ a.index(5).should eq 5
23
+ a.index(0).should eq nil
24
+ a.index(3).should eq nil
25
+ a.index(6).should eq nil
26
+ end
27
+ end
@@ -1,20 +1,20 @@
1
1
  require 'trace_visualization/lexeme_overlap_filter'
2
- require 'trace_visualization/data/lexeme'
2
+ require 'trace_visualization/data/token'
3
3
 
4
4
  include TraceVisualization::Data
5
5
 
6
6
  describe TraceVisualization::LexemeOverlapFilter do
7
7
 
8
8
  it 'test 1' do
9
- lexeme2 = Lexeme.new(:name, "aa")
10
- lexeme3 = Lexeme.new(:name, "aaa")
11
- lexeme6 = Lexeme.new(:name, "aaaaaa")
9
+ lexeme2 = Token.new(:name, "aa")
10
+ lexeme3 = Token.new(:name, "aaa")
11
+ lexeme6 = Token.new(:name, "aaaaaa")
12
12
 
13
- i1 = LexemePos.new(lexeme2, 2)
14
- i2 = LexemePos.new(lexeme2, 6)
15
- i3 = LexemePos.new(lexeme2, 10)
16
- i4 = LexemePos.new(lexeme6, 8)
17
- i5 = LexemePos.new(lexeme3, 3)
13
+ i1 = TokenPosition.new(lexeme2, 2)
14
+ i2 = TokenPosition.new(lexeme2, 6)
15
+ i3 = TokenPosition.new(lexeme2, 10)
16
+ i4 = TokenPosition.new(lexeme6, 8)
17
+ i5 = TokenPosition.new(lexeme3, 3)
18
18
 
19
19
  lexeme_positions = [i1, i2, i3, i4, i5]
20
20
 
@@ -24,14 +24,14 @@ describe TraceVisualization::LexemeOverlapFilter do
24
24
  end
25
25
 
26
26
  it 'test 2' do
27
- lexeme2 = Lexeme.new(:name, "aa")
28
- lexeme4 = Lexeme.new(:name, "aaaa")
27
+ lexeme2 = Token.new(:name, "aa")
28
+ lexeme4 = Token.new(:name, "aaaa")
29
29
 
30
- i1 = LexemePos.new(lexeme2, 0)
31
- i2 = LexemePos.new(lexeme4, 2)
32
- i3 = LexemePos.new(lexeme2, 4)
33
- i4 = LexemePos.new(lexeme4, 5)
34
- i5 = LexemePos.new(lexeme2, 7)
30
+ i1 = TokenPosition.new(lexeme2, 0)
31
+ i2 = TokenPosition.new(lexeme4, 2)
32
+ i3 = TokenPosition.new(lexeme2, 4)
33
+ i4 = TokenPosition.new(lexeme4, 5)
34
+ i5 = TokenPosition.new(lexeme2, 7)
35
35
 
36
36
  lexemes = [i1, i2, i3, i4, i5]
37
37
 
@@ -41,13 +41,13 @@ describe TraceVisualization::LexemeOverlapFilter do
41
41
  end
42
42
 
43
43
  it 'test 3' do
44
- lexeme1 = Lexeme.new(:name, "a")
45
- lexeme3 = Lexeme.new(:name, "aaa")
44
+ lexeme1 = Token.new(:name, "a")
45
+ lexeme3 = Token.new(:name, "aaa")
46
46
 
47
- i1 = LexemePos.new(lexeme1, 1)
48
- i2 = LexemePos.new(lexeme1, 3)
49
- i3 = LexemePos.new(lexeme3, 0)
50
- i4 = LexemePos.new(lexeme3, 3)
47
+ i1 = TokenPosition.new(lexeme1, 1)
48
+ i2 = TokenPosition.new(lexeme1, 3)
49
+ i3 = TokenPosition.new(lexeme3, 0)
50
+ i4 = TokenPosition.new(lexeme3, 3)
51
51
 
52
52
  lexemes = [i1, i2, i3, i4]
53
53
 
@@ -15,15 +15,10 @@ describe TraceVisualization::LongestCommonPrefix do
15
15
  end
16
16
 
17
17
  it 'should return correct result for mapped string', :current => true do
18
- str = "127.0.0.1 foo\r\n127.0.0.1 bar"
18
+ str = "{TOKEN;ip;127.0.0.1;1000;1} foo\r\n{TOKEN;ip;127.0.0.1;1000;1} bar"
19
19
 
20
- mapping = TraceVisualization::Mapping.init do
21
- default_tokens
22
- end
23
-
24
- mapping.process do
25
- from_string(str)
26
- end
20
+ mapping = TraceVisualization::Mapping.new
21
+ mapping.process { from_string(str) }
27
22
 
28
23
  sa = TraceVisualization::SuffixArray.effective(mapping)
29
24
  lcp = TraceVisualization::LongestCommonPrefix.effective(mapping, sa, mapping.length)
@@ -1,113 +1,116 @@
1
1
  require 'trace_visualization'
2
2
  require 'trace_visualization/mapping'
3
+ require 'tempfile'
3
4
 
4
5
  include TraceVisualization
5
6
  include TraceVisualization::Data
6
7
 
7
- describe TraceVisualization::Mapping do
8
+ describe Mapping do
8
9
  it 'simple id values' do
9
- str = "foo[1234]bar[1235]far[1234]\n"
10
+ str = "foo{TOKEN;id;1234;1234;1}bar{TOKEN;id;1235;1235;1}far{TOKEN;id;1234;1234;1}"
10
11
 
11
- mapping = TraceVisualization::Mapping.init do
12
- default_tokens
13
- end
12
+ mapping = Mapping.new
14
13
 
15
14
  mapping.process do
16
15
  from_string(str)
17
16
  end
18
17
 
19
- mapping.length.should eq 13
18
+ mapping.length.should eq 12
20
19
 
21
- ids = mapping.find_all { |lexeme| lexeme.name == :ID }
20
+ ids = mapping.find_all { |lexeme| lexeme.name == :id }
22
21
  ids.size.should eq(3)
23
- ids[0].value.should eq("[1234]")
24
- ids[1].value.should eq("[1235]")
25
- ids[2].value.should eq("[1234]")
26
- ids[0].should eq(ids[2])
27
-
28
- mapping.restore.should eq str
22
+ ids[0].value.should eq("1234")
23
+ ids[1].value.should eq("1235")
24
+ ids[2].value.should eq("1234")
25
+ ids[0].should eq(ids[2])
29
26
  end
30
27
 
31
28
  it 'ip values' do
32
- str = "user1 ip : 127.0.0.1 \r\nuser2 ip : 127.0.0.2\r\n"
29
+ str = "user1 ip: {TOKEN;ip;127.0.0.1;123;1} \nuser2 ip: {TOKEN;ip;127.0.0.2;122;1}"
33
30
 
34
- mapping = TraceVisualization::Mapping.init do
35
- default_tokens
36
- end
31
+ mapping = Mapping.new
37
32
 
38
33
  mapping.process do
39
34
  from_string(str)
40
35
  end
41
36
 
42
- mapping.length.should eq 29
37
+ mapping.length.should eq 24
43
38
 
44
- ips = mapping.find_all { |lexeme| lexeme.name == :IP }
39
+ ips = mapping.find_all { |lexeme| lexeme.name == :ip }
45
40
  ips.size.should eq(2)
46
41
  ips[0].value.should eq("127.0.0.1")
47
42
  ips[1].value.should eq("127.0.0.2")
48
-
49
- mapping.restore.should eq str
50
43
  end
51
44
 
52
- it 'compare different types' do
53
- mapping = TraceVisualization::Mapping.init do
54
- default_tokens
55
- end
45
+ it 'token to_i conversion' do
46
+ token = Token.new('unknown', 0, 0)
47
+ token.ord = 0
48
+ token.to_i.should eq 0
49
+ end
50
+
51
+ it 'item as array index' do
52
+ token = Token.new('unknown', 0, 0)
53
+ token.ord = 1
54
+ array = [0, 1, 2]
56
55
 
57
- mapping.tokens.should_not be_nil
58
- mapping.tokens[:ID].should_not be_nil
59
- mapping.tokens[:IP].should_not be_nil
60
- mapping.tokens[:TIME].should_not be_nil
56
+ array[token].should eq 1
57
+ end
58
+
59
+ it 'preprocessed string' do
60
+ str = 'Text {TOKEN;id;[1234];1234;1} text {TOKEN;ip;127.0.0.127;1;1} text'
61
61
 
62
- # Ids
63
- id_1 = Lexeme.new(:ID, "[12345678]", mapping.tokens[:ID][1].call("[12345678]"))
64
- id_2 = Lexeme.new(:ID, "[12345679]", mapping.tokens[:ID][1].call("[12345679]"))
65
- Reorder.process([id_1, id_2])
62
+ mapping = Mapping.new
63
+ mapping.process { from_string str }
66
64
 
67
- id_1.should be < id_2
65
+ mapping.size.should eq 18
66
+ end
67
+
68
+ it 'Mapping.lines should contains positions of lines (from string)' do
69
+ str = "{TOKEN;id;1;1;1}x\n{TOKEN;id;1;1;1}y\n{TOKEN;id;1;1;1}z"
68
70
 
69
- # IPs
70
- ip_1 = Lexeme.new(:IP, "127.0.0.1", mapping.tokens[:IP][1].call("127.0.0.1"))
71
- ip_2 = Lexeme.new(:IP, "127.0.0.2", mapping.tokens[:IP][1].call("127.0.0.2"))
72
- Reorder.process([ip_1, ip_2])
71
+ mapping = Mapping.new
72
+ mapping.process { from_string str }
73
73
 
74
- ip_1.should be < ip_2
75
-
76
- # Time
77
- time_1 = Lexeme.new(:TIME, '[16 Jan 2013 00:10:00]', mapping.tokens[:TIME][1].call('[16 Jan 2013 00:10:00]'))
78
- time_2 = Lexeme.new(:TIME, '[16 Jan 2013 00:10:01]', mapping.tokens[:TIME][1].call('[16 Jan 2013 00:10:01]'))
79
- Reorder.process([time_1, time_2])
80
-
81
- # Different
82
- Reorder.process([time_1, time_2, id_1, ip_1])
83
-
84
- time_1.should be < time_2
85
- id_1.should be < ip_1
86
- id_1.should be < time_1
74
+ mapping.size.should eq 8
75
+ mapping.lines.should eq [0, 3, 6]
87
76
  end
88
77
 
89
- it 'Lexeme to_i conversion' do
90
- lexeme = TraceVisualization::Data::Lexeme.new('unknown', 0, 0)
91
- lexeme.ord = 0
92
- lexeme.to_i.should eq 0
78
+ it 'Mapping.lines should contains positions of lines (from file)' do
79
+ data = <<-DATA
80
+ line1
81
+ line2
82
+ line3
83
+ DATA
84
+
85
+ tmp_file = Tempfile.new('trace_visualization')
86
+ open(tmp_file.path, "w") { |fd| fd.write data }
87
+
88
+ mapping = Mapping.new
89
+ mapping.process { from_file tmp_file.path }
90
+
91
+ tmp_file.close
92
+ tmp_file.unlink
93
+
94
+ mapping.lines.should eq [0, 6, 12]
93
95
  end
94
96
 
95
- it 'item as array index' do
96
- lexeme = TraceVisualization::Data::Lexeme.new('unknown', 0, 0)
97
- lexeme.ord = 1
98
- array = [0, 1, 2]
97
+ it 'subarray method for mapping' do
98
+ mapping = Mapping.new
99
+ mapping.process { from_string "test test test" }
99
100
 
100
- array[lexeme].should eq 1
101
- end
102
-
103
- it 'preprocessed string' do
104
- str = 'Text {LEXEME;ID;[1234];1234} text {LEXEME;IP;127.0.0.127;1} text'
101
+ submapping = mapping[5 ... 9]
102
+ submapping.size.should eq 4
103
+ submapping.join.should eq "test"
105
104
 
106
- mapping = TraceVisualization::Mapping.new
107
- mapping.process do
108
- from_preprocessed_string str
109
- end
105
+ mapping[0 .. -1].join.should eq "test test test"
106
+ end
107
+
108
+ it 'forbidden char scan' do
109
+ mapping = Mapping.new
110
+ mapping.process { from_string "test test test" }
111
+ mapping[0 .. -1].join.scan(TraceVisualization::FORBIDDEN_CHARS).size.should eq 0
110
112
 
111
- mapping.size.should eq 19
113
+ mapping.process { from_string "test\ntest\ntest" }
114
+ mapping[0 .. -1].join.scan(TraceVisualization::FORBIDDEN_CHARS).size.should_not eq 0
112
115
  end
113
116
  end