trace_visualization 0.0.3 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +1 -1
  3. data/bin/trace_visualization +21 -26
  4. data/lib/trace_visualization.rb +2 -2
  5. data/lib/trace_visualization/algorithm.rb +42 -0
  6. data/lib/trace_visualization/data/irepetition.rb +5 -1
  7. data/lib/trace_visualization/data/repetition.rb +14 -6
  8. data/lib/trace_visualization/data/sorted_array.rb +44 -0
  9. data/lib/trace_visualization/data/{lexeme.rb → token.rb} +7 -8
  10. data/lib/trace_visualization/lexeme_overlap_filter.rb +18 -18
  11. data/lib/trace_visualization/mapping.rb +72 -185
  12. data/lib/trace_visualization/repetitions/concatenation.rb +136 -0
  13. data/lib/trace_visualization/repetitions/context.rb +70 -0
  14. data/lib/trace_visualization/repetitions/filter.rb +153 -0
  15. data/lib/trace_visualization/repetitions/incrementation.rb +89 -0
  16. data/lib/trace_visualization/repetitions_psy.rb +12 -5
  17. data/lib/trace_visualization/utils.rb +5 -42
  18. data/lib/trace_visualization/version.rb +1 -1
  19. data/lib/trace_visualization/visualization/console_color_print.rb +20 -4
  20. data/spec/algorithm_spec.rb +69 -0
  21. data/spec/bwt_spec.rb +2 -5
  22. data/spec/data/sorted_array_spec.rb +27 -0
  23. data/spec/lexeme_overlap_filter_spec.rb +22 -22
  24. data/spec/longest_common_prefix_spec.rb +3 -8
  25. data/spec/mapping_spec.rb +72 -69
  26. data/spec/repetitions/concatenation_spec.rb +65 -0
  27. data/spec/repetitions/filter_spec.rb +180 -0
  28. data/spec/repetitions/incrementation_spec.rb +29 -0
  29. data/spec/repetitions_psy_spec.rb +7 -16
  30. data/spec/suffix_array_spec.rb +11 -31
  31. data/spec/utils_spec.rb +21 -9
  32. data/spec/visualization/console_color_print_spec.rb +26 -0
  33. data/trace_visualization.gemspec +2 -2
  34. metadata +27 -13
  35. data/lib/trace_visualization/repetitions_concatenation.rb +0 -134
  36. data/lib/trace_visualization/repetitions_context.rb +0 -18
  37. data/spec/repetitions_concatenation_spec.rb +0 -64
@@ -0,0 +1,136 @@
1
+ require 'trace_visualization'
2
+
3
+ module TraceVisualization
4
+ module Repetitions
5
+ module Concatenation
6
+
7
+ def self.process(context, options = {})
8
+ Utils.set_default_options(options, { :positions_min_size => 3, :k => 3 })
9
+ k = options[:k]
10
+
11
+ result = []
12
+
13
+ useful_cnt = 0
14
+
15
+ pairs_cnt = {}
16
+ context.repetitions_by_line.each do |item|
17
+ for i in 0 ... item.size
18
+ for j in i + 1 ... item.size
19
+ left, right = item[i][0], item[j][0]
20
+ delta = k - left.k - right.k
21
+
22
+ next if not concat_condition(left, right, delta, options[:positions_min_size])
23
+
24
+ key = (left.id << 32) + right.id
25
+ val = (pairs_cnt[key] || 0) + 1
26
+ pairs_cnt[key] = val
27
+ next if val != options[:positions_min_size]
28
+
29
+ lps, rps = process_common_positions(left, right, delta, context)
30
+
31
+ if lps.size >= options[:positions_min_size]
32
+ result << create_repetition(left, right, delta, lps, rps)
33
+ end
34
+
35
+ useful_cnt += 1
36
+ end
37
+ end
38
+ end
39
+
40
+ options[:counter] << [k, useful_cnt] if options[:counter]
41
+
42
+ process_new_repetitions(result, context)
43
+
44
+ context.repetitions.concat(result)
45
+ end
46
+
47
+ def self.process_new_repetitions(new_repetitions, context)
48
+ context.init_repetitions_by_line(new_repetitions)
49
+ end
50
+
51
+ def self.process_full_search(rs, k, context, options = {})
52
+ opts = {
53
+ :positions_min_size => 3
54
+ }.merge options
55
+
56
+ result = []
57
+
58
+ useful_cnt = 0
59
+
60
+ for left in rs
61
+ for right in rs
62
+ delta = k - left.k - right.k
63
+ next if not concat_condition(left, right, delta, options[:positions_min_size])
64
+
65
+ # @@processed_path.add(key(left, right, delta))
66
+
67
+ lps, rps = process_common_positions(left, right, delta, context)
68
+
69
+ if lps.size >= options[:positions_min_size]
70
+ result << create_repetition(left, right, delta, lps, rps)
71
+ end
72
+
73
+ useful_cnt += 1
74
+ end
75
+ end
76
+
77
+ puts "Total: #{rs.size ** 2} #{useful_cnt} #{result.size}"
78
+
79
+ rs.concat(result)
80
+ end
81
+
82
+ # Condition for potential concatenation
83
+ def self.concat_condition(left, right, delta, positions_min_size)
84
+ delta >= 0 && left.id != right.id &&
85
+ left.positions_size >= positions_min_size &&
86
+ right.positions_size >= positions_min_size
87
+ end
88
+
89
+ # *Attention* Position arrays are modified in place which can lead to side
90
+ # effects. Don't send left == right!
91
+ def self.process_common_positions(left, right, delta, context)
92
+ lr_pos = left.left_positions
93
+ lr_pos.collect! { |pos| pos + left.length + delta }
94
+
95
+ rr_pos = right.left_positions
96
+
97
+ cpr = lr_pos & rr_pos
98
+ cpl = cpr.collect { |pos| pos - left.length - delta }
99
+
100
+ idx = 0
101
+ while idx < cpr.size
102
+ xxx = context.mapping[cpl[idx] + left.length ... cpr[idx]]
103
+ xxx = xxx.join if xxx.instance_of? Array
104
+ if xxx.scan(TraceVisualization::FORBIDDEN_CHARS).size != 0
105
+ cpr.delete_at(idx)
106
+ cpl.delete_at(idx)
107
+ else
108
+ idx += 1
109
+ end
110
+ end
111
+
112
+ lr_pos.collect! { |lpos| lpos - left.length - delta }
113
+
114
+ [cpl, cpr]
115
+ end
116
+
117
+ # Create repetition
118
+ # @param left [TraceVisualization::Repetition] left parent
119
+ # @param right [TraceVisualization::Repetition] right parent
120
+ # @param delta
121
+ # @param lps
122
+ # @param rps
123
+ def self.create_repetition(left, right, delta, lps, rps)
124
+ r = left.class.new(left.length + right.length + delta, lps, rps)
125
+
126
+ r.k = left.k + right.k + delta
127
+ r.pcount = left.pcount + right.pcount
128
+ r.left = left
129
+ r.right = right
130
+ r.strict_ids = left.strict_ids + right.strict_ids
131
+
132
+ r
133
+ end
134
+ end
135
+ end
136
+ end
@@ -0,0 +1,70 @@
1
+ require 'trace_visualization/utils'
2
+
3
+ module TraceVisualization
4
+ module Repetitions
5
+ class Context
6
+ attr_accessor :mapping, :repetitions, :repetitions_by_line, :hashes
7
+
8
+ def initialize mapping
9
+ @mapping = mapping
10
+ @repetitions_by_line = Array.new(@mapping.lines.size) { [] }
11
+ @hashes = []
12
+ end
13
+
14
+ def repetitions=(repetitions)
15
+ @repetitions = repetitions
16
+ init_repetitions_by_line(repetitions)
17
+ end
18
+
19
+ def lines
20
+ @mapping.lines
21
+ end
22
+
23
+ def init_repetitions_by_line(new_repetitions)
24
+ for r in new_repetitions
25
+ r_pos = r.left_positions
26
+ r.lines.clear
27
+ i, j = 0, 0
28
+
29
+ while (i < @mapping.lines.size && j < r_pos.size)
30
+ a, b = @mapping.lines[i], (i + 1 < @mapping.lines.size ? @mapping.lines[i + 1] : 2**32)
31
+
32
+ if a <= r_pos[j] && r_pos[j] < b
33
+ @repetitions_by_line[i] << [r, r_pos[j]]
34
+ r.lines << i
35
+
36
+ j += 1
37
+ else
38
+ i += 1
39
+ end
40
+ end
41
+ end
42
+
43
+ @repetitions_by_line.each { |item| item.sort! { |a, b| a[1] <=> b[1] } }
44
+ end
45
+
46
+ def delete_repetition(repetition)
47
+ repetition.lines.each do |line|
48
+ @repetitions_by_line[line].delete_if { |item| item[0].id == repetition.id }
49
+ end
50
+ end
51
+
52
+ def merge_repetitions(x, y)
53
+ x.left_positions.concat(y.left_positions).uniq!.sort!
54
+
55
+ if x.right_positions
56
+ x.right_positions.concat(y.right_positions).uniq!.sort!
57
+ end
58
+
59
+ y.lines.each do |line|
60
+ for i in 0 ... @repetitions_by_line[line].size
61
+ if @repetitions_by_line[line][i][0].id == y.id
62
+ @repetitions_by_line[line][i][0] = x
63
+ x.lines << line unless x.lines.index line
64
+ end
65
+ end
66
+ end
67
+ end
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,153 @@
1
+ require 'trace_visualization/utils'
2
+ require 'trace_visualization/repetitions/concatenation'
3
+
4
+ module TraceVisualization
5
+ module Repetitions
6
+ module Filter
7
+
8
+ # Filter for strict repetitions
9
+ def self.strict_repetitions_filter(str, context, options = {})
10
+ Utils.set_default_options(options, { :positions_min_size => 3 })
11
+
12
+ repetitions_filter(str, context, options)
13
+ split(str, context)
14
+ merge(context)
15
+ end
16
+
17
+ # Filter repetitions
18
+ def self.repetitions_filter(str, context, options = {})
19
+ Utils.set_default_options(options, { :positions_min_size => 3 })
20
+
21
+ context.repetitions.delete_if do |repetition|
22
+ flag = repetition.positions_size < options[:positions_min_size]
23
+
24
+ context.delete_repetition(repetition) if flag
25
+
26
+ flag
27
+ end
28
+
29
+ fix_boundaries(str, context.repetitions)
30
+ delete_duplicates(context)
31
+ end
32
+
33
+ # Filter repetitions: Boundaries
34
+ # * Change repetitions with '\n' at the beginning or end
35
+ def self.fix_boundaries(str, rs)
36
+ rs.each do |r|
37
+ if str[r.get_left_pos(0)].to_str == "\n" && r.length > 1
38
+ for i in 0 ... r.positions_size
39
+ r.set_left_pos(i, r.get_left_pos(i) + 1)
40
+ end
41
+ r.length -= 1
42
+ end
43
+
44
+ if str[r.get_left_pos(0) + r.length - 1].to_str == "\n"
45
+ r.length -= 1
46
+ end
47
+ end
48
+ end
49
+
50
+ # Filter repetitions: Split
51
+ # * Split repetitions with '\n' at the center
52
+ # * Delete duplicate after split (one repeat a subset of the other)
53
+ #
54
+ # *NB* It has meaning only for the strict repetitions, approximate
55
+ # repetitions doesn't consist '\n'
56
+ def self.split(str, context)
57
+ splitted = []
58
+ context.repetitions.delete_if do |r|
59
+ pos = nil
60
+
61
+ for i in r.get_left_pos(0) ... r.get_left_pos(0) + r.length
62
+ if str[i].to_str == "\n"
63
+ pos = i
64
+ break
65
+ end
66
+ end
67
+
68
+ if pos != nil
69
+ # right part of split
70
+ if r.length - pos - 1 >= 2
71
+ length = r.length - pos - 1
72
+ positions = (0 ... r.positions_size).collect { |i| r.get_left_pos(i) + pos + 1}
73
+
74
+ nr = r.class.new(length, positions)
75
+
76
+ splitted << nr
77
+ end
78
+
79
+ # left part of split
80
+ if pos >= 2
81
+ r.length = pos
82
+ splitted << r
83
+ end
84
+ end
85
+
86
+ context.delete_repetition(r) if pos != nil
87
+
88
+ (pos != nil)
89
+ end
90
+
91
+ splitted.delete_if do |r|
92
+ flag = false
93
+
94
+ context.repetitions.each do |x|
95
+ flag = ((x.left_positions & r.left_positions).size == r.left_positions.size && x.length >= r.length)
96
+ break if flag
97
+ end
98
+
99
+ flag
100
+ end
101
+
102
+ Concatenation.process_new_repetitions(splitted, context)
103
+
104
+ context.repetitions.concat(splitted)
105
+ end
106
+
107
+ # Filter repetitions: delete duplicate
108
+ def self.delete_duplicates(context)
109
+ i = 0
110
+ while i < context.repetitions.size
111
+ j = i + 1
112
+ while j < context.repetitions.size
113
+ if context.repetitions[i] == context.repetitions[j]
114
+ context.delete_repetition(context.repetitions[j])
115
+ context.repetitions.delete_at(j)
116
+ else
117
+ j += 1
118
+ end
119
+ end
120
+ i += 1
121
+ end
122
+ end
123
+
124
+ # Merge repetitions with common positions
125
+ def self.merge(context)
126
+ i = 0
127
+ while i < context.repetitions.size
128
+ j = 0
129
+ while j < context.repetitions.size
130
+
131
+ if i != j
132
+ x, y = context.repetitions[i], context.repetitions[j]
133
+ if x.length == y.length && x.k == y.k
134
+ common_positions = x.left_positions & y.left_positions
135
+ if common_positions.size == y.left_positions.size
136
+ context.delete_repetition(y)
137
+ context.repetitions.delete_at(j)
138
+ elsif common_positions.size > 0
139
+ context.merge_repetitions(x, y)
140
+ context.repetitions.delete_at(j)
141
+ end
142
+ end
143
+ end
144
+
145
+ j += 1
146
+ end
147
+ i += 1
148
+ end
149
+ end
150
+
151
+ end # Filter
152
+ end # Repetitions
153
+ end # TraceVisualization
@@ -0,0 +1,89 @@
1
+ require 'trace_visualization'
2
+
3
+ module TraceVisualization
4
+ module Repetitions
5
+ module Incrementation
6
+
7
+ # Process
8
+ def self.process(context, options = {})
9
+ Utils.set_default_options(options, {
10
+ :k => 1, :common_positions_size => 2
11
+ })
12
+
13
+ k = options[:k]
14
+ repetitions = context.repetitions
15
+
16
+ result = []
17
+
18
+ for core in repetitions
19
+ next if core.k != 0
20
+
21
+ # Positions for Right Incrementation
22
+ positions_ri = []
23
+
24
+ # Positions for Left Incrementation
25
+ positions_li = []
26
+
27
+ for r in repetitions
28
+ next if r == core || core.length + k != r.length
29
+
30
+ clps, rlps = core.left_positions, r.left_positions
31
+
32
+ common_positions_ri = clps & rlps
33
+ common_positions_li = clps.collect { |p| p - k } & rlps
34
+
35
+ positions_ri |= common_positions_ri if common_positions_ri.size >= options[:common_positions_size]
36
+ positions_li |= common_positions_li if common_positions_li.size >= options[:common_positions_size]
37
+ end
38
+
39
+ if positions_ri.size > 1
40
+ left_positions = positions_ri.sort
41
+ right_positions = left_positions.collect { |pos| pos + core.length + k }
42
+
43
+ hash = TraceVisualization::Utils.rhash(left_positions, right_positions)
44
+
45
+ unless context.hashes.include? hash
46
+ context.hashes << hash
47
+ result << create_repetition(core, k, left_positions, right_positions, "right")
48
+ end
49
+ end
50
+
51
+ if positions_li.size > 1
52
+ left_positions = positions_li.sort
53
+ right_positions = left_positions.collect { |pos| pos + k}
54
+
55
+ hash = TraceVisualization::Utils.rhash(left_positions, right_positions)
56
+
57
+ unless context.hashes.include? hash
58
+ context.hashes << hash
59
+ result << create_repetition(core, k, left_positions, right_positions, "left")
60
+ end
61
+ end
62
+ end
63
+
64
+ context.repetitions.concat(result)
65
+ end
66
+
67
+ # Create repetition
68
+ def self.create_repetition(core, k, left_positions, right_positions, type)
69
+ repetition = core.class.new(core.length + k, left_positions, right_positions)
70
+ repetition.k = k
71
+
72
+ fake = fake_repetition(core.class, left_positions, right_positions, type)
73
+
74
+ if type == "left"
75
+ repetition.left, repetition.right = fake, core
76
+ else
77
+ repetition.left, repetition.right = core, fake
78
+ end
79
+
80
+ repetition
81
+ end
82
+
83
+ # Create fake repetition
84
+ def self.fake_repetition(cls, left_positions, right_positions, type)
85
+ cls.new(0, type == "left" ? left_positions : right_positions)
86
+ end
87
+ end
88
+ end
89
+ end