trace_visualization 0.0.3 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +1 -1
- data/bin/trace_visualization +21 -26
- data/lib/trace_visualization.rb +2 -2
- data/lib/trace_visualization/algorithm.rb +42 -0
- data/lib/trace_visualization/data/irepetition.rb +5 -1
- data/lib/trace_visualization/data/repetition.rb +14 -6
- data/lib/trace_visualization/data/sorted_array.rb +44 -0
- data/lib/trace_visualization/data/{lexeme.rb → token.rb} +7 -8
- data/lib/trace_visualization/lexeme_overlap_filter.rb +18 -18
- data/lib/trace_visualization/mapping.rb +72 -185
- data/lib/trace_visualization/repetitions/concatenation.rb +136 -0
- data/lib/trace_visualization/repetitions/context.rb +70 -0
- data/lib/trace_visualization/repetitions/filter.rb +153 -0
- data/lib/trace_visualization/repetitions/incrementation.rb +89 -0
- data/lib/trace_visualization/repetitions_psy.rb +12 -5
- data/lib/trace_visualization/utils.rb +5 -42
- data/lib/trace_visualization/version.rb +1 -1
- data/lib/trace_visualization/visualization/console_color_print.rb +20 -4
- data/spec/algorithm_spec.rb +69 -0
- data/spec/bwt_spec.rb +2 -5
- data/spec/data/sorted_array_spec.rb +27 -0
- data/spec/lexeme_overlap_filter_spec.rb +22 -22
- data/spec/longest_common_prefix_spec.rb +3 -8
- data/spec/mapping_spec.rb +72 -69
- data/spec/repetitions/concatenation_spec.rb +65 -0
- data/spec/repetitions/filter_spec.rb +180 -0
- data/spec/repetitions/incrementation_spec.rb +29 -0
- data/spec/repetitions_psy_spec.rb +7 -16
- data/spec/suffix_array_spec.rb +11 -31
- data/spec/utils_spec.rb +21 -9
- data/spec/visualization/console_color_print_spec.rb +26 -0
- data/trace_visualization.gemspec +2 -2
- metadata +27 -13
- data/lib/trace_visualization/repetitions_concatenation.rb +0 -134
- data/lib/trace_visualization/repetitions_context.rb +0 -18
- data/spec/repetitions_concatenation_spec.rb +0 -64
@@ -0,0 +1,136 @@
|
|
1
|
+
require 'trace_visualization'
|
2
|
+
|
3
|
+
module TraceVisualization
|
4
|
+
module Repetitions
|
5
|
+
module Concatenation
|
6
|
+
|
7
|
+
def self.process(context, options = {})
|
8
|
+
Utils.set_default_options(options, { :positions_min_size => 3, :k => 3 })
|
9
|
+
k = options[:k]
|
10
|
+
|
11
|
+
result = []
|
12
|
+
|
13
|
+
useful_cnt = 0
|
14
|
+
|
15
|
+
pairs_cnt = {}
|
16
|
+
context.repetitions_by_line.each do |item|
|
17
|
+
for i in 0 ... item.size
|
18
|
+
for j in i + 1 ... item.size
|
19
|
+
left, right = item[i][0], item[j][0]
|
20
|
+
delta = k - left.k - right.k
|
21
|
+
|
22
|
+
next if not concat_condition(left, right, delta, options[:positions_min_size])
|
23
|
+
|
24
|
+
key = (left.id << 32) + right.id
|
25
|
+
val = (pairs_cnt[key] || 0) + 1
|
26
|
+
pairs_cnt[key] = val
|
27
|
+
next if val != options[:positions_min_size]
|
28
|
+
|
29
|
+
lps, rps = process_common_positions(left, right, delta, context)
|
30
|
+
|
31
|
+
if lps.size >= options[:positions_min_size]
|
32
|
+
result << create_repetition(left, right, delta, lps, rps)
|
33
|
+
end
|
34
|
+
|
35
|
+
useful_cnt += 1
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
options[:counter] << [k, useful_cnt] if options[:counter]
|
41
|
+
|
42
|
+
process_new_repetitions(result, context)
|
43
|
+
|
44
|
+
context.repetitions.concat(result)
|
45
|
+
end
|
46
|
+
|
47
|
+
def self.process_new_repetitions(new_repetitions, context)
|
48
|
+
context.init_repetitions_by_line(new_repetitions)
|
49
|
+
end
|
50
|
+
|
51
|
+
def self.process_full_search(rs, k, context, options = {})
|
52
|
+
opts = {
|
53
|
+
:positions_min_size => 3
|
54
|
+
}.merge options
|
55
|
+
|
56
|
+
result = []
|
57
|
+
|
58
|
+
useful_cnt = 0
|
59
|
+
|
60
|
+
for left in rs
|
61
|
+
for right in rs
|
62
|
+
delta = k - left.k - right.k
|
63
|
+
next if not concat_condition(left, right, delta, options[:positions_min_size])
|
64
|
+
|
65
|
+
# @@processed_path.add(key(left, right, delta))
|
66
|
+
|
67
|
+
lps, rps = process_common_positions(left, right, delta, context)
|
68
|
+
|
69
|
+
if lps.size >= options[:positions_min_size]
|
70
|
+
result << create_repetition(left, right, delta, lps, rps)
|
71
|
+
end
|
72
|
+
|
73
|
+
useful_cnt += 1
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
puts "Total: #{rs.size ** 2} #{useful_cnt} #{result.size}"
|
78
|
+
|
79
|
+
rs.concat(result)
|
80
|
+
end
|
81
|
+
|
82
|
+
# Condition for potential concatenation
|
83
|
+
def self.concat_condition(left, right, delta, positions_min_size)
|
84
|
+
delta >= 0 && left.id != right.id &&
|
85
|
+
left.positions_size >= positions_min_size &&
|
86
|
+
right.positions_size >= positions_min_size
|
87
|
+
end
|
88
|
+
|
89
|
+
# *Attention* Position arrays are modified in place which can lead to side
|
90
|
+
# effects. Don't send left == right!
|
91
|
+
def self.process_common_positions(left, right, delta, context)
|
92
|
+
lr_pos = left.left_positions
|
93
|
+
lr_pos.collect! { |pos| pos + left.length + delta }
|
94
|
+
|
95
|
+
rr_pos = right.left_positions
|
96
|
+
|
97
|
+
cpr = lr_pos & rr_pos
|
98
|
+
cpl = cpr.collect { |pos| pos - left.length - delta }
|
99
|
+
|
100
|
+
idx = 0
|
101
|
+
while idx < cpr.size
|
102
|
+
xxx = context.mapping[cpl[idx] + left.length ... cpr[idx]]
|
103
|
+
xxx = xxx.join if xxx.instance_of? Array
|
104
|
+
if xxx.scan(TraceVisualization::FORBIDDEN_CHARS).size != 0
|
105
|
+
cpr.delete_at(idx)
|
106
|
+
cpl.delete_at(idx)
|
107
|
+
else
|
108
|
+
idx += 1
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
lr_pos.collect! { |lpos| lpos - left.length - delta }
|
113
|
+
|
114
|
+
[cpl, cpr]
|
115
|
+
end
|
116
|
+
|
117
|
+
# Create repetition
|
118
|
+
# @param left [TraceVisualization::Repetition] left parent
|
119
|
+
# @param right [TraceVisualization::Repetition] right parent
|
120
|
+
# @param delta
|
121
|
+
# @param lps
|
122
|
+
# @param rps
|
123
|
+
def self.create_repetition(left, right, delta, lps, rps)
|
124
|
+
r = left.class.new(left.length + right.length + delta, lps, rps)
|
125
|
+
|
126
|
+
r.k = left.k + right.k + delta
|
127
|
+
r.pcount = left.pcount + right.pcount
|
128
|
+
r.left = left
|
129
|
+
r.right = right
|
130
|
+
r.strict_ids = left.strict_ids + right.strict_ids
|
131
|
+
|
132
|
+
r
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
require 'trace_visualization/utils'
|
2
|
+
|
3
|
+
module TraceVisualization
|
4
|
+
module Repetitions
|
5
|
+
class Context
|
6
|
+
attr_accessor :mapping, :repetitions, :repetitions_by_line, :hashes
|
7
|
+
|
8
|
+
def initialize mapping
|
9
|
+
@mapping = mapping
|
10
|
+
@repetitions_by_line = Array.new(@mapping.lines.size) { [] }
|
11
|
+
@hashes = []
|
12
|
+
end
|
13
|
+
|
14
|
+
def repetitions=(repetitions)
|
15
|
+
@repetitions = repetitions
|
16
|
+
init_repetitions_by_line(repetitions)
|
17
|
+
end
|
18
|
+
|
19
|
+
def lines
|
20
|
+
@mapping.lines
|
21
|
+
end
|
22
|
+
|
23
|
+
def init_repetitions_by_line(new_repetitions)
|
24
|
+
for r in new_repetitions
|
25
|
+
r_pos = r.left_positions
|
26
|
+
r.lines.clear
|
27
|
+
i, j = 0, 0
|
28
|
+
|
29
|
+
while (i < @mapping.lines.size && j < r_pos.size)
|
30
|
+
a, b = @mapping.lines[i], (i + 1 < @mapping.lines.size ? @mapping.lines[i + 1] : 2**32)
|
31
|
+
|
32
|
+
if a <= r_pos[j] && r_pos[j] < b
|
33
|
+
@repetitions_by_line[i] << [r, r_pos[j]]
|
34
|
+
r.lines << i
|
35
|
+
|
36
|
+
j += 1
|
37
|
+
else
|
38
|
+
i += 1
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
@repetitions_by_line.each { |item| item.sort! { |a, b| a[1] <=> b[1] } }
|
44
|
+
end
|
45
|
+
|
46
|
+
def delete_repetition(repetition)
|
47
|
+
repetition.lines.each do |line|
|
48
|
+
@repetitions_by_line[line].delete_if { |item| item[0].id == repetition.id }
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def merge_repetitions(x, y)
|
53
|
+
x.left_positions.concat(y.left_positions).uniq!.sort!
|
54
|
+
|
55
|
+
if x.right_positions
|
56
|
+
x.right_positions.concat(y.right_positions).uniq!.sort!
|
57
|
+
end
|
58
|
+
|
59
|
+
y.lines.each do |line|
|
60
|
+
for i in 0 ... @repetitions_by_line[line].size
|
61
|
+
if @repetitions_by_line[line][i][0].id == y.id
|
62
|
+
@repetitions_by_line[line][i][0] = x
|
63
|
+
x.lines << line unless x.lines.index line
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,153 @@
|
|
1
|
+
require 'trace_visualization/utils'
|
2
|
+
require 'trace_visualization/repetitions/concatenation'
|
3
|
+
|
4
|
+
module TraceVisualization
|
5
|
+
module Repetitions
|
6
|
+
module Filter
|
7
|
+
|
8
|
+
# Filter for strict repetitions
|
9
|
+
def self.strict_repetitions_filter(str, context, options = {})
|
10
|
+
Utils.set_default_options(options, { :positions_min_size => 3 })
|
11
|
+
|
12
|
+
repetitions_filter(str, context, options)
|
13
|
+
split(str, context)
|
14
|
+
merge(context)
|
15
|
+
end
|
16
|
+
|
17
|
+
# Filter repetitions
|
18
|
+
def self.repetitions_filter(str, context, options = {})
|
19
|
+
Utils.set_default_options(options, { :positions_min_size => 3 })
|
20
|
+
|
21
|
+
context.repetitions.delete_if do |repetition|
|
22
|
+
flag = repetition.positions_size < options[:positions_min_size]
|
23
|
+
|
24
|
+
context.delete_repetition(repetition) if flag
|
25
|
+
|
26
|
+
flag
|
27
|
+
end
|
28
|
+
|
29
|
+
fix_boundaries(str, context.repetitions)
|
30
|
+
delete_duplicates(context)
|
31
|
+
end
|
32
|
+
|
33
|
+
# Filter repetitions: Boundaries
|
34
|
+
# * Change repetitions with '\n' at the beginning or end
|
35
|
+
def self.fix_boundaries(str, rs)
|
36
|
+
rs.each do |r|
|
37
|
+
if str[r.get_left_pos(0)].to_str == "\n" && r.length > 1
|
38
|
+
for i in 0 ... r.positions_size
|
39
|
+
r.set_left_pos(i, r.get_left_pos(i) + 1)
|
40
|
+
end
|
41
|
+
r.length -= 1
|
42
|
+
end
|
43
|
+
|
44
|
+
if str[r.get_left_pos(0) + r.length - 1].to_str == "\n"
|
45
|
+
r.length -= 1
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
# Filter repetitions: Split
|
51
|
+
# * Split repetitions with '\n' at the center
|
52
|
+
# * Delete duplicate after split (one repeat a subset of the other)
|
53
|
+
#
|
54
|
+
# *NB* It has meaning only for the strict repetitions, approximate
|
55
|
+
# repetitions doesn't consist '\n'
|
56
|
+
def self.split(str, context)
|
57
|
+
splitted = []
|
58
|
+
context.repetitions.delete_if do |r|
|
59
|
+
pos = nil
|
60
|
+
|
61
|
+
for i in r.get_left_pos(0) ... r.get_left_pos(0) + r.length
|
62
|
+
if str[i].to_str == "\n"
|
63
|
+
pos = i
|
64
|
+
break
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
if pos != nil
|
69
|
+
# right part of split
|
70
|
+
if r.length - pos - 1 >= 2
|
71
|
+
length = r.length - pos - 1
|
72
|
+
positions = (0 ... r.positions_size).collect { |i| r.get_left_pos(i) + pos + 1}
|
73
|
+
|
74
|
+
nr = r.class.new(length, positions)
|
75
|
+
|
76
|
+
splitted << nr
|
77
|
+
end
|
78
|
+
|
79
|
+
# left part of split
|
80
|
+
if pos >= 2
|
81
|
+
r.length = pos
|
82
|
+
splitted << r
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
context.delete_repetition(r) if pos != nil
|
87
|
+
|
88
|
+
(pos != nil)
|
89
|
+
end
|
90
|
+
|
91
|
+
splitted.delete_if do |r|
|
92
|
+
flag = false
|
93
|
+
|
94
|
+
context.repetitions.each do |x|
|
95
|
+
flag = ((x.left_positions & r.left_positions).size == r.left_positions.size && x.length >= r.length)
|
96
|
+
break if flag
|
97
|
+
end
|
98
|
+
|
99
|
+
flag
|
100
|
+
end
|
101
|
+
|
102
|
+
Concatenation.process_new_repetitions(splitted, context)
|
103
|
+
|
104
|
+
context.repetitions.concat(splitted)
|
105
|
+
end
|
106
|
+
|
107
|
+
# Filter repetitions: delete duplicate
|
108
|
+
def self.delete_duplicates(context)
|
109
|
+
i = 0
|
110
|
+
while i < context.repetitions.size
|
111
|
+
j = i + 1
|
112
|
+
while j < context.repetitions.size
|
113
|
+
if context.repetitions[i] == context.repetitions[j]
|
114
|
+
context.delete_repetition(context.repetitions[j])
|
115
|
+
context.repetitions.delete_at(j)
|
116
|
+
else
|
117
|
+
j += 1
|
118
|
+
end
|
119
|
+
end
|
120
|
+
i += 1
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
# Merge repetitions with common positions
|
125
|
+
def self.merge(context)
|
126
|
+
i = 0
|
127
|
+
while i < context.repetitions.size
|
128
|
+
j = 0
|
129
|
+
while j < context.repetitions.size
|
130
|
+
|
131
|
+
if i != j
|
132
|
+
x, y = context.repetitions[i], context.repetitions[j]
|
133
|
+
if x.length == y.length && x.k == y.k
|
134
|
+
common_positions = x.left_positions & y.left_positions
|
135
|
+
if common_positions.size == y.left_positions.size
|
136
|
+
context.delete_repetition(y)
|
137
|
+
context.repetitions.delete_at(j)
|
138
|
+
elsif common_positions.size > 0
|
139
|
+
context.merge_repetitions(x, y)
|
140
|
+
context.repetitions.delete_at(j)
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
j += 1
|
146
|
+
end
|
147
|
+
i += 1
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
end # Filter
|
152
|
+
end # Repetitions
|
153
|
+
end # TraceVisualization
|
@@ -0,0 +1,89 @@
|
|
1
|
+
require 'trace_visualization'
|
2
|
+
|
3
|
+
module TraceVisualization
|
4
|
+
module Repetitions
|
5
|
+
module Incrementation
|
6
|
+
|
7
|
+
# Process
|
8
|
+
def self.process(context, options = {})
|
9
|
+
Utils.set_default_options(options, {
|
10
|
+
:k => 1, :common_positions_size => 2
|
11
|
+
})
|
12
|
+
|
13
|
+
k = options[:k]
|
14
|
+
repetitions = context.repetitions
|
15
|
+
|
16
|
+
result = []
|
17
|
+
|
18
|
+
for core in repetitions
|
19
|
+
next if core.k != 0
|
20
|
+
|
21
|
+
# Positions for Right Incrementation
|
22
|
+
positions_ri = []
|
23
|
+
|
24
|
+
# Positions for Left Incrementation
|
25
|
+
positions_li = []
|
26
|
+
|
27
|
+
for r in repetitions
|
28
|
+
next if r == core || core.length + k != r.length
|
29
|
+
|
30
|
+
clps, rlps = core.left_positions, r.left_positions
|
31
|
+
|
32
|
+
common_positions_ri = clps & rlps
|
33
|
+
common_positions_li = clps.collect { |p| p - k } & rlps
|
34
|
+
|
35
|
+
positions_ri |= common_positions_ri if common_positions_ri.size >= options[:common_positions_size]
|
36
|
+
positions_li |= common_positions_li if common_positions_li.size >= options[:common_positions_size]
|
37
|
+
end
|
38
|
+
|
39
|
+
if positions_ri.size > 1
|
40
|
+
left_positions = positions_ri.sort
|
41
|
+
right_positions = left_positions.collect { |pos| pos + core.length + k }
|
42
|
+
|
43
|
+
hash = TraceVisualization::Utils.rhash(left_positions, right_positions)
|
44
|
+
|
45
|
+
unless context.hashes.include? hash
|
46
|
+
context.hashes << hash
|
47
|
+
result << create_repetition(core, k, left_positions, right_positions, "right")
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
if positions_li.size > 1
|
52
|
+
left_positions = positions_li.sort
|
53
|
+
right_positions = left_positions.collect { |pos| pos + k}
|
54
|
+
|
55
|
+
hash = TraceVisualization::Utils.rhash(left_positions, right_positions)
|
56
|
+
|
57
|
+
unless context.hashes.include? hash
|
58
|
+
context.hashes << hash
|
59
|
+
result << create_repetition(core, k, left_positions, right_positions, "left")
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
context.repetitions.concat(result)
|
65
|
+
end
|
66
|
+
|
67
|
+
# Create repetition
|
68
|
+
def self.create_repetition(core, k, left_positions, right_positions, type)
|
69
|
+
repetition = core.class.new(core.length + k, left_positions, right_positions)
|
70
|
+
repetition.k = k
|
71
|
+
|
72
|
+
fake = fake_repetition(core.class, left_positions, right_positions, type)
|
73
|
+
|
74
|
+
if type == "left"
|
75
|
+
repetition.left, repetition.right = fake, core
|
76
|
+
else
|
77
|
+
repetition.left, repetition.right = core, fake
|
78
|
+
end
|
79
|
+
|
80
|
+
repetition
|
81
|
+
end
|
82
|
+
|
83
|
+
# Create fake repetition
|
84
|
+
def self.fake_repetition(cls, left_positions, right_positions, type)
|
85
|
+
cls.new(0, type == "left" ? left_positions : right_positions)
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|