trace_visualization 0.0.3 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +1 -1
  3. data/bin/trace_visualization +21 -26
  4. data/lib/trace_visualization.rb +2 -2
  5. data/lib/trace_visualization/algorithm.rb +42 -0
  6. data/lib/trace_visualization/data/irepetition.rb +5 -1
  7. data/lib/trace_visualization/data/repetition.rb +14 -6
  8. data/lib/trace_visualization/data/sorted_array.rb +44 -0
  9. data/lib/trace_visualization/data/{lexeme.rb → token.rb} +7 -8
  10. data/lib/trace_visualization/lexeme_overlap_filter.rb +18 -18
  11. data/lib/trace_visualization/mapping.rb +72 -185
  12. data/lib/trace_visualization/repetitions/concatenation.rb +136 -0
  13. data/lib/trace_visualization/repetitions/context.rb +70 -0
  14. data/lib/trace_visualization/repetitions/filter.rb +153 -0
  15. data/lib/trace_visualization/repetitions/incrementation.rb +89 -0
  16. data/lib/trace_visualization/repetitions_psy.rb +12 -5
  17. data/lib/trace_visualization/utils.rb +5 -42
  18. data/lib/trace_visualization/version.rb +1 -1
  19. data/lib/trace_visualization/visualization/console_color_print.rb +20 -4
  20. data/spec/algorithm_spec.rb +69 -0
  21. data/spec/bwt_spec.rb +2 -5
  22. data/spec/data/sorted_array_spec.rb +27 -0
  23. data/spec/lexeme_overlap_filter_spec.rb +22 -22
  24. data/spec/longest_common_prefix_spec.rb +3 -8
  25. data/spec/mapping_spec.rb +72 -69
  26. data/spec/repetitions/concatenation_spec.rb +65 -0
  27. data/spec/repetitions/filter_spec.rb +180 -0
  28. data/spec/repetitions/incrementation_spec.rb +29 -0
  29. data/spec/repetitions_psy_spec.rb +7 -16
  30. data/spec/suffix_array_spec.rb +11 -31
  31. data/spec/utils_spec.rb +21 -9
  32. data/spec/visualization/console_color_print_spec.rb +26 -0
  33. data/trace_visualization.gemspec +2 -2
  34. metadata +27 -13
  35. data/lib/trace_visualization/repetitions_concatenation.rb +0 -134
  36. data/lib/trace_visualization/repetitions_context.rb +0 -18
  37. data/spec/repetitions_concatenation_spec.rb +0 -64
@@ -5,12 +5,19 @@ require 'trace_visualization/data/repetition'
5
5
 
6
6
  module TraceVisualization
7
7
  module Repetitions
8
- def self.psy1(str, p_min, decode_result = true)
9
- sa = TraceVisualization::SuffixArray.effective(str)
10
- lcp = TraceVisualization::LongestCommonPrefix.effective(str, sa, str.size)
11
- bwt = TraceVisualization::BurrowsWheelerTransform.bwt(str, sa, str.length)
8
+
9
+ # Computes all the complete nonextendible repeats using PSY1 algorithm
10
+ # @param data [Array] Array of objects
11
+ # @param p_min [Integer] The minimum number of positions in which we have repetition
12
+ # @param decode_result [Boolean]
13
+ def self.psy1(data, p_min, decode_result = true)
14
+ raise ArgumentError, 'Data is empty' if data == nil || data.size == 0
15
+
16
+ sa = TraceVisualization::SuffixArray.effective(data)
17
+ lcp = TraceVisualization::LongestCommonPrefix.effective(data, sa, data.size)
18
+ bwt = TraceVisualization::BurrowsWheelerTransform.bwt(data, sa, data.size)
12
19
 
13
- result = psy1_original(lcp, bwt, p_min, str.length)
20
+ result = psy1_original(lcp, bwt, p_min, data.length)
14
21
  result = decode_psy1_result(result, sa) if decode_result
15
22
 
16
23
  result
@@ -3,50 +3,13 @@ require 'trace_visualization/assert'
3
3
  module TraceVisualization
4
4
  module Utils
5
5
 
6
- def self.rhash(lp, rp)
7
- lp.hash + rp.hash
6
+ def self.set_default_options(options, default_options)
7
+ options.update(default_options.merge(options))
8
8
  end
9
9
 
10
-
11
- # Get the start position of lines
12
- def self.lines_pos(str)
13
- TraceVisualization.assert_instance_of(str, Mapping)
14
-
15
- lines_pos = [0]
16
- pos = -1
17
-
18
- while (pos = str.index(/\n/, pos + 1))
19
- lines_pos << pos + 1 if pos + 1 < str.length
20
- end
21
-
22
- lines_pos
10
+ def self.rhash(lp, rp)
11
+ lp.hash + rp.hash
23
12
  end
24
-
25
- # Repetitions by line
26
- def self.rs_by_line(rs, lines_pos, rs_by_line)
27
- for r in rs
28
- r_pos = r.left_positions
29
- r.lines = []
30
- i, j = 0, 0
31
-
32
- while (i < lines_pos.size && j < r_pos.size)
33
- a, b = lines_pos[i], (i + 1 < lines_pos.size ? lines_pos[i + 1] : 2**32)
34
-
35
- if a <= r_pos[j] && r_pos[j] < b
36
- rs_by_line[i] << [r, r_pos[j]]
37
- r.lines << i
38
-
39
- j += 1
40
- else
41
- i += 1
42
- end
43
- end
44
- end
45
-
46
- rs_by_line.each { |item| item.sort! { |a, b| a[1] <=> b[1] } }
47
-
48
- rs_by_line
49
- end
50
13
 
51
14
  # Read data from file
52
15
  # Allowed options
@@ -79,7 +42,7 @@ module TraceVisualization
79
42
  end
80
43
 
81
44
  str
82
- end
45
+ end
83
46
 
84
47
  end # module Utils
85
48
  end # module TraceVisualization
@@ -1,3 +1,3 @@
1
1
  module TraceVisualization
2
- VERSION = '0.0.3'
2
+ VERSION = '0.0.5'
3
3
  end
@@ -16,18 +16,34 @@ module TraceVisualization
16
16
  positions = repetition.build_positions
17
17
 
18
18
  positions.each do |position|
19
- result += mapping.restore(prev_position, position[0][0])
19
+ result += mapping.restore(prev_position, position[0][0] - prev_position)
20
20
 
21
21
  for i in 0 ... position.size
22
22
  pos, len = position[i]
23
- result += GRN + "#{mapping.restore(pos, pos + len)}" + FINISH
24
- result += YLW + "#{mapping.restore(pos + len, position[i + 1][0])}" + FINISH if i < position.size - 1
23
+ result += GRN + "#{mapping.restore(pos, len)}" + FINISH
24
+ result += YLW + "#{mapping.restore(pos + len, position[i + 1][0] - (pos + len))}" + FINISH if i < position.size - 1
25
25
  end
26
26
 
27
27
  prev_position = position[-1][0] + position[-1][1]
28
28
  end
29
29
 
30
- result += mapping.restore(prev_position, -1)
30
+ result += mapping.restore(prev_position, mapping.length - prev_position)
31
+ end
32
+
33
+ def self.hl_stdout(mapping, repetition)
34
+ if repetition.instance_of? Array
35
+ puts "* * * S T A R T * * *"
36
+ for r in repetition
37
+ puts r
38
+ puts hl(mapping, r)
39
+ puts "- - - - - - - - - - -"
40
+ end
41
+ else
42
+ puts "* * * S T A R T * * *"
43
+ puts repetition
44
+ puts hl(mapping, repetition)
45
+ puts "- - - - - - - - - - -"
46
+ end
31
47
  end
32
48
  end
33
49
  end
@@ -0,0 +1,69 @@
1
+ require 'trace_visualization'
2
+ require 'trace_visualization/algorithm'
3
+ require 'trace_visualization/visualization/console_color_print'
4
+
5
+ include TraceVisualization
6
+
7
+ describe Algorithm do
8
+ it 'smoke' do
9
+ mapping = Mapping.new
10
+ mapping.process { from_string "test1\ntest2\ntest3\ntest4" }
11
+
12
+ repetitions = Algorithm.process(mapping, {})
13
+
14
+ repetitions.size.should eq 1
15
+
16
+ repetition = repetitions[0]
17
+ repetition.k.should eq 0
18
+ repetition.length.should eq 4
19
+ repetition.left_positions.size.should eq 4
20
+ repetition.left.should be_nil
21
+ repetition.right.should be_nil
22
+ end
23
+
24
+ it 'two strict repetitions with k = 1' do
25
+ mapping = Mapping.new
26
+ mapping.process { from_string "fooAbar\nfooBbar\nfooCbar" }
27
+
28
+ repetitions = Algorithm.process(mapping, {})
29
+
30
+ repetitions.size.should eq 3
31
+ repetitions[0].k.should eq 0
32
+ repetitions[1].k.should eq 0
33
+ repetitions[2].k.should eq 1
34
+
35
+ repetitions[0].length.should eq 3
36
+ repetitions[1].length.should eq 3
37
+ repetitions[2].length.should eq 7
38
+ end
39
+
40
+ it 'two strict repetitions with k = 2' do
41
+ mapping = Mapping.new
42
+ mapping.process { from_string "fooABbar\nfooCDbar\nfooEFbar" }
43
+
44
+ repetitions = Algorithm.process(mapping, {})
45
+
46
+ repetitions.size.should eq 3
47
+ repetitions[0].k.should eq 0
48
+ repetitions[1].k.should eq 0
49
+ repetitions[2].k.should eq 2
50
+
51
+ repetitions[0].length.should eq 3
52
+ repetitions[1].length.should eq 3
53
+ repetitions[2].length.should eq 8
54
+ end
55
+
56
+ it 'one strict repetition - self-concatenation' do
57
+ mapping = Mapping.new
58
+ mapping.process { from_string "testAtest\ntestBtest\ntestCtest" }
59
+
60
+ repetitions = Algorithm.process(mapping, {})
61
+
62
+ # for repetition in repetitions
63
+ # puts "* * * * * * * * * *"
64
+ # puts "k = #{repetition.k}, length = #{repetition.length}, positions.size = #{repetition.left_positions.size}"
65
+ # puts Visualization::ConsoleColorPrint.hl(mapping, repetition)
66
+ # puts "* * * * * * * * * *"
67
+ # end
68
+ end
69
+ end
@@ -34,12 +34,9 @@ describe TraceVisualization::BurrowsWheelerTransform do
34
34
  end
35
35
 
36
36
  it "test with mapping" do
37
- str = "127.0.0.1 a 127.0.0.1 b" + TraceVisualization::TERMINATION_CHAR
38
-
39
- mapped_str = TraceVisualization::Mapping.init do
40
- default_tokens
41
- end
37
+ str = "{TOKEN;ip;127.0.0.1;123;1} a {TOKEN;ip;127.0.0.1;123;1} b" + TraceVisualization::TERMINATION_CHAR
42
38
 
39
+ mapped_str = TraceVisualization::Mapping.new
43
40
  mapped_str.process { from_string str }
44
41
 
45
42
  ip, ws, a, b = mapped_str[0], mapped_str[1], mapped_str[2], mapped_str[6]
@@ -0,0 +1,27 @@
1
+ require 'trace_visualization'
2
+ require 'trace_visualization/data/sorted_array'
3
+
4
+ include TraceVisualization
5
+ include TraceVisualization::Data
6
+
7
+ describe SortedArray do
8
+ it 'should correct insert values' do
9
+ a = SortedArray.new([4, 5, 1, 2])
10
+ a << 3
11
+ a.push 6
12
+ a << 0
13
+ a.should eq [0, 1, 2, 3, 4, 5, 6]
14
+ end
15
+
16
+ it 'should correct index value' do
17
+ a = SortedArray.new([1, 4, 5, 4, 1, 2, 5])
18
+ a.should eq [1, 1, 2, 4, 4, 5, 5]
19
+ a.index(1).should eq 0
20
+ a.index(2).should eq 2
21
+ a.index(4).should eq 3
22
+ a.index(5).should eq 5
23
+ a.index(0).should eq nil
24
+ a.index(3).should eq nil
25
+ a.index(6).should eq nil
26
+ end
27
+ end
@@ -1,20 +1,20 @@
1
1
  require 'trace_visualization/lexeme_overlap_filter'
2
- require 'trace_visualization/data/lexeme'
2
+ require 'trace_visualization/data/token'
3
3
 
4
4
  include TraceVisualization::Data
5
5
 
6
6
  describe TraceVisualization::LexemeOverlapFilter do
7
7
 
8
8
  it 'test 1' do
9
- lexeme2 = Lexeme.new(:name, "aa")
10
- lexeme3 = Lexeme.new(:name, "aaa")
11
- lexeme6 = Lexeme.new(:name, "aaaaaa")
9
+ lexeme2 = Token.new(:name, "aa")
10
+ lexeme3 = Token.new(:name, "aaa")
11
+ lexeme6 = Token.new(:name, "aaaaaa")
12
12
 
13
- i1 = LexemePos.new(lexeme2, 2)
14
- i2 = LexemePos.new(lexeme2, 6)
15
- i3 = LexemePos.new(lexeme2, 10)
16
- i4 = LexemePos.new(lexeme6, 8)
17
- i5 = LexemePos.new(lexeme3, 3)
13
+ i1 = TokenPosition.new(lexeme2, 2)
14
+ i2 = TokenPosition.new(lexeme2, 6)
15
+ i3 = TokenPosition.new(lexeme2, 10)
16
+ i4 = TokenPosition.new(lexeme6, 8)
17
+ i5 = TokenPosition.new(lexeme3, 3)
18
18
 
19
19
  lexeme_positions = [i1, i2, i3, i4, i5]
20
20
 
@@ -24,14 +24,14 @@ describe TraceVisualization::LexemeOverlapFilter do
24
24
  end
25
25
 
26
26
  it 'test 2' do
27
- lexeme2 = Lexeme.new(:name, "aa")
28
- lexeme4 = Lexeme.new(:name, "aaaa")
27
+ lexeme2 = Token.new(:name, "aa")
28
+ lexeme4 = Token.new(:name, "aaaa")
29
29
 
30
- i1 = LexemePos.new(lexeme2, 0)
31
- i2 = LexemePos.new(lexeme4, 2)
32
- i3 = LexemePos.new(lexeme2, 4)
33
- i4 = LexemePos.new(lexeme4, 5)
34
- i5 = LexemePos.new(lexeme2, 7)
30
+ i1 = TokenPosition.new(lexeme2, 0)
31
+ i2 = TokenPosition.new(lexeme4, 2)
32
+ i3 = TokenPosition.new(lexeme2, 4)
33
+ i4 = TokenPosition.new(lexeme4, 5)
34
+ i5 = TokenPosition.new(lexeme2, 7)
35
35
 
36
36
  lexemes = [i1, i2, i3, i4, i5]
37
37
 
@@ -41,13 +41,13 @@ describe TraceVisualization::LexemeOverlapFilter do
41
41
  end
42
42
 
43
43
  it 'test 3' do
44
- lexeme1 = Lexeme.new(:name, "a")
45
- lexeme3 = Lexeme.new(:name, "aaa")
44
+ lexeme1 = Token.new(:name, "a")
45
+ lexeme3 = Token.new(:name, "aaa")
46
46
 
47
- i1 = LexemePos.new(lexeme1, 1)
48
- i2 = LexemePos.new(lexeme1, 3)
49
- i3 = LexemePos.new(lexeme3, 0)
50
- i4 = LexemePos.new(lexeme3, 3)
47
+ i1 = TokenPosition.new(lexeme1, 1)
48
+ i2 = TokenPosition.new(lexeme1, 3)
49
+ i3 = TokenPosition.new(lexeme3, 0)
50
+ i4 = TokenPosition.new(lexeme3, 3)
51
51
 
52
52
  lexemes = [i1, i2, i3, i4]
53
53
 
@@ -15,15 +15,10 @@ describe TraceVisualization::LongestCommonPrefix do
15
15
  end
16
16
 
17
17
  it 'should return correct result for mapped string', :current => true do
18
- str = "127.0.0.1 foo\r\n127.0.0.1 bar"
18
+ str = "{TOKEN;ip;127.0.0.1;1000;1} foo\r\n{TOKEN;ip;127.0.0.1;1000;1} bar"
19
19
 
20
- mapping = TraceVisualization::Mapping.init do
21
- default_tokens
22
- end
23
-
24
- mapping.process do
25
- from_string(str)
26
- end
20
+ mapping = TraceVisualization::Mapping.new
21
+ mapping.process { from_string(str) }
27
22
 
28
23
  sa = TraceVisualization::SuffixArray.effective(mapping)
29
24
  lcp = TraceVisualization::LongestCommonPrefix.effective(mapping, sa, mapping.length)
@@ -1,113 +1,116 @@
1
1
  require 'trace_visualization'
2
2
  require 'trace_visualization/mapping'
3
+ require 'tempfile'
3
4
 
4
5
  include TraceVisualization
5
6
  include TraceVisualization::Data
6
7
 
7
- describe TraceVisualization::Mapping do
8
+ describe Mapping do
8
9
  it 'simple id values' do
9
- str = "foo[1234]bar[1235]far[1234]\n"
10
+ str = "foo{TOKEN;id;1234;1234;1}bar{TOKEN;id;1235;1235;1}far{TOKEN;id;1234;1234;1}"
10
11
 
11
- mapping = TraceVisualization::Mapping.init do
12
- default_tokens
13
- end
12
+ mapping = Mapping.new
14
13
 
15
14
  mapping.process do
16
15
  from_string(str)
17
16
  end
18
17
 
19
- mapping.length.should eq 13
18
+ mapping.length.should eq 12
20
19
 
21
- ids = mapping.find_all { |lexeme| lexeme.name == :ID }
20
+ ids = mapping.find_all { |lexeme| lexeme.name == :id }
22
21
  ids.size.should eq(3)
23
- ids[0].value.should eq("[1234]")
24
- ids[1].value.should eq("[1235]")
25
- ids[2].value.should eq("[1234]")
26
- ids[0].should eq(ids[2])
27
-
28
- mapping.restore.should eq str
22
+ ids[0].value.should eq("1234")
23
+ ids[1].value.should eq("1235")
24
+ ids[2].value.should eq("1234")
25
+ ids[0].should eq(ids[2])
29
26
  end
30
27
 
31
28
  it 'ip values' do
32
- str = "user1 ip : 127.0.0.1 \r\nuser2 ip : 127.0.0.2\r\n"
29
+ str = "user1 ip: {TOKEN;ip;127.0.0.1;123;1} \nuser2 ip: {TOKEN;ip;127.0.0.2;122;1}"
33
30
 
34
- mapping = TraceVisualization::Mapping.init do
35
- default_tokens
36
- end
31
+ mapping = Mapping.new
37
32
 
38
33
  mapping.process do
39
34
  from_string(str)
40
35
  end
41
36
 
42
- mapping.length.should eq 29
37
+ mapping.length.should eq 24
43
38
 
44
- ips = mapping.find_all { |lexeme| lexeme.name == :IP }
39
+ ips = mapping.find_all { |lexeme| lexeme.name == :ip }
45
40
  ips.size.should eq(2)
46
41
  ips[0].value.should eq("127.0.0.1")
47
42
  ips[1].value.should eq("127.0.0.2")
48
-
49
- mapping.restore.should eq str
50
43
  end
51
44
 
52
- it 'compare different types' do
53
- mapping = TraceVisualization::Mapping.init do
54
- default_tokens
55
- end
45
+ it 'token to_i conversion' do
46
+ token = Token.new('unknown', 0, 0)
47
+ token.ord = 0
48
+ token.to_i.should eq 0
49
+ end
50
+
51
+ it 'item as array index' do
52
+ token = Token.new('unknown', 0, 0)
53
+ token.ord = 1
54
+ array = [0, 1, 2]
56
55
 
57
- mapping.tokens.should_not be_nil
58
- mapping.tokens[:ID].should_not be_nil
59
- mapping.tokens[:IP].should_not be_nil
60
- mapping.tokens[:TIME].should_not be_nil
56
+ array[token].should eq 1
57
+ end
58
+
59
+ it 'preprocessed string' do
60
+ str = 'Text {TOKEN;id;[1234];1234;1} text {TOKEN;ip;127.0.0.127;1;1} text'
61
61
 
62
- # Ids
63
- id_1 = Lexeme.new(:ID, "[12345678]", mapping.tokens[:ID][1].call("[12345678]"))
64
- id_2 = Lexeme.new(:ID, "[12345679]", mapping.tokens[:ID][1].call("[12345679]"))
65
- Reorder.process([id_1, id_2])
62
+ mapping = Mapping.new
63
+ mapping.process { from_string str }
66
64
 
67
- id_1.should be < id_2
65
+ mapping.size.should eq 18
66
+ end
67
+
68
+ it 'Mapping.lines should contains positions of lines (from string)' do
69
+ str = "{TOKEN;id;1;1;1}x\n{TOKEN;id;1;1;1}y\n{TOKEN;id;1;1;1}z"
68
70
 
69
- # IPs
70
- ip_1 = Lexeme.new(:IP, "127.0.0.1", mapping.tokens[:IP][1].call("127.0.0.1"))
71
- ip_2 = Lexeme.new(:IP, "127.0.0.2", mapping.tokens[:IP][1].call("127.0.0.2"))
72
- Reorder.process([ip_1, ip_2])
71
+ mapping = Mapping.new
72
+ mapping.process { from_string str }
73
73
 
74
- ip_1.should be < ip_2
75
-
76
- # Time
77
- time_1 = Lexeme.new(:TIME, '[16 Jan 2013 00:10:00]', mapping.tokens[:TIME][1].call('[16 Jan 2013 00:10:00]'))
78
- time_2 = Lexeme.new(:TIME, '[16 Jan 2013 00:10:01]', mapping.tokens[:TIME][1].call('[16 Jan 2013 00:10:01]'))
79
- Reorder.process([time_1, time_2])
80
-
81
- # Different
82
- Reorder.process([time_1, time_2, id_1, ip_1])
83
-
84
- time_1.should be < time_2
85
- id_1.should be < ip_1
86
- id_1.should be < time_1
74
+ mapping.size.should eq 8
75
+ mapping.lines.should eq [0, 3, 6]
87
76
  end
88
77
 
89
- it 'Lexeme to_i conversion' do
90
- lexeme = TraceVisualization::Data::Lexeme.new('unknown', 0, 0)
91
- lexeme.ord = 0
92
- lexeme.to_i.should eq 0
78
+ it 'Mapping.lines should contains positions of lines (from file)' do
79
+ data = <<-DATA
80
+ line1
81
+ line2
82
+ line3
83
+ DATA
84
+
85
+ tmp_file = Tempfile.new('trace_visualization')
86
+ open(tmp_file.path, "w") { |fd| fd.write data }
87
+
88
+ mapping = Mapping.new
89
+ mapping.process { from_file tmp_file.path }
90
+
91
+ tmp_file.close
92
+ tmp_file.unlink
93
+
94
+ mapping.lines.should eq [0, 6, 12]
93
95
  end
94
96
 
95
- it 'item as array index' do
96
- lexeme = TraceVisualization::Data::Lexeme.new('unknown', 0, 0)
97
- lexeme.ord = 1
98
- array = [0, 1, 2]
97
+ it 'subarray method for mapping' do
98
+ mapping = Mapping.new
99
+ mapping.process { from_string "test test test" }
99
100
 
100
- array[lexeme].should eq 1
101
- end
102
-
103
- it 'preprocessed string' do
104
- str = 'Text {LEXEME;ID;[1234];1234} text {LEXEME;IP;127.0.0.127;1} text'
101
+ submapping = mapping[5 ... 9]
102
+ submapping.size.should eq 4
103
+ submapping.join.should eq "test"
105
104
 
106
- mapping = TraceVisualization::Mapping.new
107
- mapping.process do
108
- from_preprocessed_string str
109
- end
105
+ mapping[0 .. -1].join.should eq "test test test"
106
+ end
107
+
108
+ it 'forbidden char scan' do
109
+ mapping = Mapping.new
110
+ mapping.process { from_string "test test test" }
111
+ mapping[0 .. -1].join.scan(TraceVisualization::FORBIDDEN_CHARS).size.should eq 0
110
112
 
111
- mapping.size.should eq 19
113
+ mapping.process { from_string "test\ntest\ntest" }
114
+ mapping[0 .. -1].join.scan(TraceVisualization::FORBIDDEN_CHARS).size.should_not eq 0
112
115
  end
113
116
  end