trace_visualization 0.0.3 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +1 -1
  3. data/bin/trace_visualization +21 -26
  4. data/lib/trace_visualization.rb +2 -2
  5. data/lib/trace_visualization/algorithm.rb +42 -0
  6. data/lib/trace_visualization/data/irepetition.rb +5 -1
  7. data/lib/trace_visualization/data/repetition.rb +14 -6
  8. data/lib/trace_visualization/data/sorted_array.rb +44 -0
  9. data/lib/trace_visualization/data/{lexeme.rb → token.rb} +7 -8
  10. data/lib/trace_visualization/lexeme_overlap_filter.rb +18 -18
  11. data/lib/trace_visualization/mapping.rb +72 -185
  12. data/lib/trace_visualization/repetitions/concatenation.rb +136 -0
  13. data/lib/trace_visualization/repetitions/context.rb +70 -0
  14. data/lib/trace_visualization/repetitions/filter.rb +153 -0
  15. data/lib/trace_visualization/repetitions/incrementation.rb +89 -0
  16. data/lib/trace_visualization/repetitions_psy.rb +12 -5
  17. data/lib/trace_visualization/utils.rb +5 -42
  18. data/lib/trace_visualization/version.rb +1 -1
  19. data/lib/trace_visualization/visualization/console_color_print.rb +20 -4
  20. data/spec/algorithm_spec.rb +69 -0
  21. data/spec/bwt_spec.rb +2 -5
  22. data/spec/data/sorted_array_spec.rb +27 -0
  23. data/spec/lexeme_overlap_filter_spec.rb +22 -22
  24. data/spec/longest_common_prefix_spec.rb +3 -8
  25. data/spec/mapping_spec.rb +72 -69
  26. data/spec/repetitions/concatenation_spec.rb +65 -0
  27. data/spec/repetitions/filter_spec.rb +180 -0
  28. data/spec/repetitions/incrementation_spec.rb +29 -0
  29. data/spec/repetitions_psy_spec.rb +7 -16
  30. data/spec/suffix_array_spec.rb +11 -31
  31. data/spec/utils_spec.rb +21 -9
  32. data/spec/visualization/console_color_print_spec.rb +26 -0
  33. data/trace_visualization.gemspec +2 -2
  34. metadata +27 -13
  35. data/lib/trace_visualization/repetitions_concatenation.rb +0 -134
  36. data/lib/trace_visualization/repetitions_context.rb +0 -18
  37. data/spec/repetitions_concatenation_spec.rb +0 -64
@@ -0,0 +1,136 @@
1
+ require 'trace_visualization'
2
+
3
+ module TraceVisualization
4
+ module Repetitions
5
+ module Concatenation
6
+
7
+ def self.process(context, options = {})
8
+ Utils.set_default_options(options, { :positions_min_size => 3, :k => 3 })
9
+ k = options[:k]
10
+
11
+ result = []
12
+
13
+ useful_cnt = 0
14
+
15
+ pairs_cnt = {}
16
+ context.repetitions_by_line.each do |item|
17
+ for i in 0 ... item.size
18
+ for j in i + 1 ... item.size
19
+ left, right = item[i][0], item[j][0]
20
+ delta = k - left.k - right.k
21
+
22
+ next if not concat_condition(left, right, delta, options[:positions_min_size])
23
+
24
+ key = (left.id << 32) + right.id
25
+ val = (pairs_cnt[key] || 0) + 1
26
+ pairs_cnt[key] = val
27
+ next if val != options[:positions_min_size]
28
+
29
+ lps, rps = process_common_positions(left, right, delta, context)
30
+
31
+ if lps.size >= options[:positions_min_size]
32
+ result << create_repetition(left, right, delta, lps, rps)
33
+ end
34
+
35
+ useful_cnt += 1
36
+ end
37
+ end
38
+ end
39
+
40
+ options[:counter] << [k, useful_cnt] if options[:counter]
41
+
42
+ process_new_repetitions(result, context)
43
+
44
+ context.repetitions.concat(result)
45
+ end
46
+
47
+ def self.process_new_repetitions(new_repetitions, context)
48
+ context.init_repetitions_by_line(new_repetitions)
49
+ end
50
+
51
+ def self.process_full_search(rs, k, context, options = {})
52
+ opts = {
53
+ :positions_min_size => 3
54
+ }.merge options
55
+
56
+ result = []
57
+
58
+ useful_cnt = 0
59
+
60
+ for left in rs
61
+ for right in rs
62
+ delta = k - left.k - right.k
63
+ next if not concat_condition(left, right, delta, options[:positions_min_size])
64
+
65
+ # @@processed_path.add(key(left, right, delta))
66
+
67
+ lps, rps = process_common_positions(left, right, delta, context)
68
+
69
+ if lps.size >= options[:positions_min_size]
70
+ result << create_repetition(left, right, delta, lps, rps)
71
+ end
72
+
73
+ useful_cnt += 1
74
+ end
75
+ end
76
+
77
+ puts "Total: #{rs.size ** 2} #{useful_cnt} #{result.size}"
78
+
79
+ rs.concat(result)
80
+ end
81
+
82
+ # Condition for potential concatenation
83
+ def self.concat_condition(left, right, delta, positions_min_size)
84
+ delta >= 0 && left.id != right.id &&
85
+ left.positions_size >= positions_min_size &&
86
+ right.positions_size >= positions_min_size
87
+ end
88
+
89
+ # *Attention* Position arrays are modified in place which can lead to side
90
+ # effects. Don't send left == right!
91
+ def self.process_common_positions(left, right, delta, context)
92
+ lr_pos = left.left_positions
93
+ lr_pos.collect! { |pos| pos + left.length + delta }
94
+
95
+ rr_pos = right.left_positions
96
+
97
+ cpr = lr_pos & rr_pos
98
+ cpl = cpr.collect { |pos| pos - left.length - delta }
99
+
100
+ idx = 0
101
+ while idx < cpr.size
102
+ xxx = context.mapping[cpl[idx] + left.length ... cpr[idx]]
103
+ xxx = xxx.join if xxx.instance_of? Array
104
+ if xxx.scan(TraceVisualization::FORBIDDEN_CHARS).size != 0
105
+ cpr.delete_at(idx)
106
+ cpl.delete_at(idx)
107
+ else
108
+ idx += 1
109
+ end
110
+ end
111
+
112
+ lr_pos.collect! { |lpos| lpos - left.length - delta }
113
+
114
+ [cpl, cpr]
115
+ end
116
+
117
+ # Create repetition
118
+ # @param left [TraceVisualization::Repetition] left parent
119
+ # @param right [TraceVisualization::Repetition] right parent
120
+ # @param delta
121
+ # @param lps
122
+ # @param rps
123
+ def self.create_repetition(left, right, delta, lps, rps)
124
+ r = left.class.new(left.length + right.length + delta, lps, rps)
125
+
126
+ r.k = left.k + right.k + delta
127
+ r.pcount = left.pcount + right.pcount
128
+ r.left = left
129
+ r.right = right
130
+ r.strict_ids = left.strict_ids + right.strict_ids
131
+
132
+ r
133
+ end
134
+ end
135
+ end
136
+ end
@@ -0,0 +1,70 @@
1
+ require 'trace_visualization/utils'
2
+
3
+ module TraceVisualization
4
+ module Repetitions
5
+ class Context
6
+ attr_accessor :mapping, :repetitions, :repetitions_by_line, :hashes
7
+
8
+ def initialize mapping
9
+ @mapping = mapping
10
+ @repetitions_by_line = Array.new(@mapping.lines.size) { [] }
11
+ @hashes = []
12
+ end
13
+
14
+ def repetitions=(repetitions)
15
+ @repetitions = repetitions
16
+ init_repetitions_by_line(repetitions)
17
+ end
18
+
19
+ def lines
20
+ @mapping.lines
21
+ end
22
+
23
+ def init_repetitions_by_line(new_repetitions)
24
+ for r in new_repetitions
25
+ r_pos = r.left_positions
26
+ r.lines.clear
27
+ i, j = 0, 0
28
+
29
+ while (i < @mapping.lines.size && j < r_pos.size)
30
+ a, b = @mapping.lines[i], (i + 1 < @mapping.lines.size ? @mapping.lines[i + 1] : 2**32)
31
+
32
+ if a <= r_pos[j] && r_pos[j] < b
33
+ @repetitions_by_line[i] << [r, r_pos[j]]
34
+ r.lines << i
35
+
36
+ j += 1
37
+ else
38
+ i += 1
39
+ end
40
+ end
41
+ end
42
+
43
+ @repetitions_by_line.each { |item| item.sort! { |a, b| a[1] <=> b[1] } }
44
+ end
45
+
46
+ def delete_repetition(repetition)
47
+ repetition.lines.each do |line|
48
+ @repetitions_by_line[line].delete_if { |item| item[0].id == repetition.id }
49
+ end
50
+ end
51
+
52
+ def merge_repetitions(x, y)
53
+ x.left_positions.concat(y.left_positions).uniq!.sort!
54
+
55
+ if x.right_positions
56
+ x.right_positions.concat(y.right_positions).uniq!.sort!
57
+ end
58
+
59
+ y.lines.each do |line|
60
+ for i in 0 ... @repetitions_by_line[line].size
61
+ if @repetitions_by_line[line][i][0].id == y.id
62
+ @repetitions_by_line[line][i][0] = x
63
+ x.lines << line unless x.lines.index line
64
+ end
65
+ end
66
+ end
67
+ end
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,153 @@
1
+ require 'trace_visualization/utils'
2
+ require 'trace_visualization/repetitions/concatenation'
3
+
4
+ module TraceVisualization
5
+ module Repetitions
6
+ module Filter
7
+
8
+ # Filter for strict repetitions
9
+ def self.strict_repetitions_filter(str, context, options = {})
10
+ Utils.set_default_options(options, { :positions_min_size => 3 })
11
+
12
+ repetitions_filter(str, context, options)
13
+ split(str, context)
14
+ merge(context)
15
+ end
16
+
17
+ # Filter repetitions
18
+ def self.repetitions_filter(str, context, options = {})
19
+ Utils.set_default_options(options, { :positions_min_size => 3 })
20
+
21
+ context.repetitions.delete_if do |repetition|
22
+ flag = repetition.positions_size < options[:positions_min_size]
23
+
24
+ context.delete_repetition(repetition) if flag
25
+
26
+ flag
27
+ end
28
+
29
+ fix_boundaries(str, context.repetitions)
30
+ delete_duplicates(context)
31
+ end
32
+
33
+ # Filter repetitions: Boundaries
34
+ # * Change repetitions with '\n' at the beginning or end
35
+ def self.fix_boundaries(str, rs)
36
+ rs.each do |r|
37
+ if str[r.get_left_pos(0)].to_str == "\n" && r.length > 1
38
+ for i in 0 ... r.positions_size
39
+ r.set_left_pos(i, r.get_left_pos(i) + 1)
40
+ end
41
+ r.length -= 1
42
+ end
43
+
44
+ if str[r.get_left_pos(0) + r.length - 1].to_str == "\n"
45
+ r.length -= 1
46
+ end
47
+ end
48
+ end
49
+
50
+ # Filter repetitions: Split
51
+ # * Split repetitions with '\n' at the center
52
+ # * Delete duplicate after split (one repeat a subset of the other)
53
+ #
54
+ # *NB* It has meaning only for the strict repetitions, approximate
55
+ # repetitions doesn't consist '\n'
56
+ def self.split(str, context)
57
+ splitted = []
58
+ context.repetitions.delete_if do |r|
59
+ pos = nil
60
+
61
+ for i in r.get_left_pos(0) ... r.get_left_pos(0) + r.length
62
+ if str[i].to_str == "\n"
63
+ pos = i
64
+ break
65
+ end
66
+ end
67
+
68
+ if pos != nil
69
+ # right part of split
70
+ if r.length - pos - 1 >= 2
71
+ length = r.length - pos - 1
72
+ positions = (0 ... r.positions_size).collect { |i| r.get_left_pos(i) + pos + 1}
73
+
74
+ nr = r.class.new(length, positions)
75
+
76
+ splitted << nr
77
+ end
78
+
79
+ # left part of split
80
+ if pos >= 2
81
+ r.length = pos
82
+ splitted << r
83
+ end
84
+ end
85
+
86
+ context.delete_repetition(r) if pos != nil
87
+
88
+ (pos != nil)
89
+ end
90
+
91
+ splitted.delete_if do |r|
92
+ flag = false
93
+
94
+ context.repetitions.each do |x|
95
+ flag = ((x.left_positions & r.left_positions).size == r.left_positions.size && x.length >= r.length)
96
+ break if flag
97
+ end
98
+
99
+ flag
100
+ end
101
+
102
+ Concatenation.process_new_repetitions(splitted, context)
103
+
104
+ context.repetitions.concat(splitted)
105
+ end
106
+
107
+ # Filter repetitions: delete duplicate
108
+ def self.delete_duplicates(context)
109
+ i = 0
110
+ while i < context.repetitions.size
111
+ j = i + 1
112
+ while j < context.repetitions.size
113
+ if context.repetitions[i] == context.repetitions[j]
114
+ context.delete_repetition(context.repetitions[j])
115
+ context.repetitions.delete_at(j)
116
+ else
117
+ j += 1
118
+ end
119
+ end
120
+ i += 1
121
+ end
122
+ end
123
+
124
+ # Merge repetitions with common positions
125
+ def self.merge(context)
126
+ i = 0
127
+ while i < context.repetitions.size
128
+ j = 0
129
+ while j < context.repetitions.size
130
+
131
+ if i != j
132
+ x, y = context.repetitions[i], context.repetitions[j]
133
+ if x.length == y.length && x.k == y.k
134
+ common_positions = x.left_positions & y.left_positions
135
+ if common_positions.size == y.left_positions.size
136
+ context.delete_repetition(y)
137
+ context.repetitions.delete_at(j)
138
+ elsif common_positions.size > 0
139
+ context.merge_repetitions(x, y)
140
+ context.repetitions.delete_at(j)
141
+ end
142
+ end
143
+ end
144
+
145
+ j += 1
146
+ end
147
+ i += 1
148
+ end
149
+ end
150
+
151
+ end # Filter
152
+ end # Repetitions
153
+ end # TraceVisualization
@@ -0,0 +1,89 @@
1
+ require 'trace_visualization'
2
+
3
+ module TraceVisualization
4
+ module Repetitions
5
+ module Incrementation
6
+
7
+ # Process
8
+ def self.process(context, options = {})
9
+ Utils.set_default_options(options, {
10
+ :k => 1, :common_positions_size => 2
11
+ })
12
+
13
+ k = options[:k]
14
+ repetitions = context.repetitions
15
+
16
+ result = []
17
+
18
+ for core in repetitions
19
+ next if core.k != 0
20
+
21
+ # Positions for Right Incrementation
22
+ positions_ri = []
23
+
24
+ # Positions for Left Incrementation
25
+ positions_li = []
26
+
27
+ for r in repetitions
28
+ next if r == core || core.length + k != r.length
29
+
30
+ clps, rlps = core.left_positions, r.left_positions
31
+
32
+ common_positions_ri = clps & rlps
33
+ common_positions_li = clps.collect { |p| p - k } & rlps
34
+
35
+ positions_ri |= common_positions_ri if common_positions_ri.size >= options[:common_positions_size]
36
+ positions_li |= common_positions_li if common_positions_li.size >= options[:common_positions_size]
37
+ end
38
+
39
+ if positions_ri.size > 1
40
+ left_positions = positions_ri.sort
41
+ right_positions = left_positions.collect { |pos| pos + core.length + k }
42
+
43
+ hash = TraceVisualization::Utils.rhash(left_positions, right_positions)
44
+
45
+ unless context.hashes.include? hash
46
+ context.hashes << hash
47
+ result << create_repetition(core, k, left_positions, right_positions, "right")
48
+ end
49
+ end
50
+
51
+ if positions_li.size > 1
52
+ left_positions = positions_li.sort
53
+ right_positions = left_positions.collect { |pos| pos + k}
54
+
55
+ hash = TraceVisualization::Utils.rhash(left_positions, right_positions)
56
+
57
+ unless context.hashes.include? hash
58
+ context.hashes << hash
59
+ result << create_repetition(core, k, left_positions, right_positions, "left")
60
+ end
61
+ end
62
+ end
63
+
64
+ context.repetitions.concat(result)
65
+ end
66
+
67
+ # Create repetition
68
+ def self.create_repetition(core, k, left_positions, right_positions, type)
69
+ repetition = core.class.new(core.length + k, left_positions, right_positions)
70
+ repetition.k = k
71
+
72
+ fake = fake_repetition(core.class, left_positions, right_positions, type)
73
+
74
+ if type == "left"
75
+ repetition.left, repetition.right = fake, core
76
+ else
77
+ repetition.left, repetition.right = core, fake
78
+ end
79
+
80
+ repetition
81
+ end
82
+
83
+ # Create fake repetition
84
+ def self.fake_repetition(cls, left_positions, right_positions, type)
85
+ cls.new(0, type == "left" ? left_positions : right_positions)
86
+ end
87
+ end
88
+ end
89
+ end