pedump 0.3.3 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +1 -0
- data/Gemfile.lock +2 -0
- data/README.md +352 -10
- data/README.md.tpl +57 -2
- data/Rakefile +54 -27
- data/VERSION +1 -1
- data/data/fs.txt +1595 -0
- data/data/sig.bin +0 -0
- data/data/signatures.txt +678 -0
- data/data/{sig.txt → userdb.txt} +0 -0
- data/lib/pedump.rb +125 -64
- data/lib/pedump/cli.rb +135 -25
- data/lib/pedump/packer.rb +66 -65
- data/lib/pedump/sig_parser.rb +386 -0
- data/lib/pedump/version.rb +2 -2
- data/lib/pedump/version_info.rb +166 -0
- data/pedump.gemspec +14 -3
- data/samples/zlib.dll +0 -0
- data/spec/pedump_spec.rb +3 -3
- data/spec/resource_spec.rb +13 -0
- data/spec/sig_all_packers_spec.rb +14 -0
- data/spec/sig_spec.rb +63 -0
- metadata +35 -16
data/lib/pedump/packer.rb
CHANGED
@@ -1,47 +1,101 @@
|
|
1
|
+
require 'pedump/sig_parser'
|
2
|
+
|
1
3
|
class PEdump
|
2
4
|
class Packer < Struct.new(:name, :re, :ep_only, :size)
|
3
5
|
|
4
6
|
DATA_ROOT = File.dirname(File.dirname(File.dirname(__FILE__)))
|
5
7
|
BIN_SIGS_FILE = File.join(DATA_ROOT, "data", "sig.bin")
|
6
|
-
TEXT_SIGS_FILE = File.join(DATA_ROOT, "data", "sig.txt")
|
7
8
|
|
8
|
-
Match
|
9
|
+
class Match < Struct.new(:offset, :packer)
|
10
|
+
def name
|
11
|
+
packer.name
|
12
|
+
end
|
13
|
+
end
|
9
14
|
|
10
15
|
class << self
|
11
|
-
|
12
16
|
def all
|
13
17
|
@@all ||=
|
14
18
|
begin
|
15
19
|
r = unmarshal
|
16
20
|
unless r
|
21
|
+
msg = "[?] #{self}: unmarshal failed, using slow text parsing instead"
|
17
22
|
if PEdump.respond_to?(:logger) && PEdump.logger
|
18
|
-
PEdump.logger.warn
|
23
|
+
PEdump.logger.warn msg
|
19
24
|
else
|
20
|
-
STDERR.puts
|
25
|
+
STDERR.puts msg
|
21
26
|
end
|
22
|
-
r = parse
|
27
|
+
r = SigParser.parse
|
23
28
|
end
|
24
29
|
r
|
25
30
|
end
|
26
31
|
end
|
27
32
|
alias :load :all
|
28
33
|
|
34
|
+
# default deep-scan flag
|
35
|
+
@@deep = false
|
36
|
+
|
37
|
+
def default_deep
|
38
|
+
@@deep
|
39
|
+
end
|
40
|
+
|
41
|
+
def default_deep= value
|
42
|
+
@@deep = value
|
43
|
+
end
|
44
|
+
|
29
45
|
def max_size
|
30
46
|
@@max_size ||= all.map(&:size).max
|
31
47
|
end
|
32
48
|
|
33
|
-
def of data,
|
34
|
-
if data.respond_to?(:read) && data.respond_to?(:seek) && ep_offset
|
35
|
-
|
49
|
+
def of data, h = {}
|
50
|
+
if data.respond_to?(:read) && data.respond_to?(:seek) && h[:ep_offset]
|
51
|
+
of_pe_file data, h
|
36
52
|
else
|
37
53
|
of_data data
|
38
54
|
end
|
39
55
|
end
|
40
56
|
|
41
57
|
# try to determine packer of FILE f, ep_offset - offset to entrypoint from start of file
|
42
|
-
def
|
43
|
-
|
44
|
-
|
58
|
+
def of_pe_file f, h
|
59
|
+
h[:deep] = @@deep unless h.key?(:deep)
|
60
|
+
h[:deep] = 1 if h[:deep] == true
|
61
|
+
h[:deep] = 0 if h[:deep] == false
|
62
|
+
|
63
|
+
f.seek(h[:ep_offset]) # offset of PE EntryPoint from start of file
|
64
|
+
r = Array(of_data(f.read(max_size)))
|
65
|
+
return r if r && r.any? && h[:deep] < 2
|
66
|
+
r += scan_whole_file(f,
|
67
|
+
:limit => (h[:deep] > 0 ? nil : 1048576),
|
68
|
+
:deep => h[:deep]
|
69
|
+
) # scan only 1st mb unless :deep
|
70
|
+
end
|
71
|
+
|
72
|
+
BLOCK_SIZE = 0x10000
|
73
|
+
|
74
|
+
def scan_whole_file f, h = {}
|
75
|
+
h[:limit] ||= f.size
|
76
|
+
f.seek( pos = 0 )
|
77
|
+
buf = ''.force_encoding('binary')
|
78
|
+
sigs =
|
79
|
+
if h[:deep].is_a?(Numeric) && h[:deep] > 1
|
80
|
+
self.all
|
81
|
+
else
|
82
|
+
self.find_all{ |sig| !sig.ep_only }
|
83
|
+
end
|
84
|
+
r = []
|
85
|
+
while true
|
86
|
+
f.read BLOCK_SIZE, buf
|
87
|
+
pos += buf.size
|
88
|
+
sigs.each do |sig|
|
89
|
+
if idx = buf.index(sig.re)
|
90
|
+
r << Match.new(f.tell-buf.size+idx, sig)
|
91
|
+
end
|
92
|
+
end
|
93
|
+
break if f.eof? || pos >= h[:limit]
|
94
|
+
# overlap the read for the case when read buffer boundary breaks signature
|
95
|
+
f.seek -max_size-2, IO::SEEK_CUR
|
96
|
+
pos -= (max_size+2)
|
97
|
+
end
|
98
|
+
r
|
45
99
|
end
|
46
100
|
|
47
101
|
def of_data data
|
@@ -66,59 +120,6 @@ class PEdump
|
|
66
120
|
nil
|
67
121
|
end
|
68
122
|
|
69
|
-
# parse text signatures
|
70
|
-
def parse fname = TEXT_SIGS_FILE
|
71
|
-
sigs = {}; sig = nil
|
72
|
-
|
73
|
-
File.open(fname,'r:utf-8') do |f|
|
74
|
-
while line = f.gets
|
75
|
-
line.strip!
|
76
|
-
|
77
|
-
# XXX
|
78
|
-
# "B\xE9rczi G\xE1bor".force_encoding('binary').to_yaml:
|
79
|
-
# RuntimeError: expected SCALAR, SEQUENCE-START, MAPPING-START, or ALIAS
|
80
|
-
|
81
|
-
case line
|
82
|
-
when /^;/,/^$/
|
83
|
-
next
|
84
|
-
when /^\[(.+)\]$/
|
85
|
-
sig = Packer.new($1.sub(/^\*\s+/,'').sub(/\s+\(h\)$/,''))
|
86
|
-
when /^signature = (.+)$/
|
87
|
-
sig.re = $1
|
88
|
-
if sigs[sig.re]
|
89
|
-
next if sigs[sig.re].name == sig.name
|
90
|
-
printf "[?] dup %-40s, %s\n", sigs[sig.re].name.inspect, sig.name.inspect
|
91
|
-
end
|
92
|
-
sigs[sig.re] = sig
|
93
|
-
when /^ep_only = (.+)$/
|
94
|
-
sig.ep_only = ($1.strip.downcase == 'true')
|
95
|
-
else raise line
|
96
|
-
end
|
97
|
-
end
|
98
|
-
end
|
99
|
-
|
100
|
-
sigs = sigs.values
|
101
|
-
sigs.each do |sig|
|
102
|
-
sig.re = Regexp.new(
|
103
|
-
sig.re.split(' ').tap do |a|
|
104
|
-
sig.size = a.size
|
105
|
-
end.map do |x|
|
106
|
-
case x
|
107
|
-
when '??'
|
108
|
-
'.'
|
109
|
-
when /[a-f0-9]{2}/i
|
110
|
-
Regexp::escape x.to_i(16).chr
|
111
|
-
else raise x
|
112
|
-
end
|
113
|
-
end.join
|
114
|
-
)
|
115
|
-
if sig.name[/-+>/]
|
116
|
-
a = sig.name.split(/-+>/,2).map(&:strip)
|
117
|
-
sig.name = "#{a[0]} (#{a[1]})"
|
118
|
-
end
|
119
|
-
end
|
120
|
-
sigs
|
121
|
-
end
|
122
123
|
end
|
123
124
|
end
|
124
125
|
end
|
@@ -0,0 +1,386 @@
|
|
1
|
+
class PEdump
|
2
|
+
module SigParser
|
3
|
+
|
4
|
+
DATA_ROOT = File.dirname(File.dirname(File.dirname(__FILE__)))
|
5
|
+
|
6
|
+
TEXT_SIGS_FILES = [
|
7
|
+
File.join(DATA_ROOT, "data", "userdb.txt"),
|
8
|
+
File.join(DATA_ROOT, "data", "signatures.txt"),
|
9
|
+
File.join(DATA_ROOT, "data", "fs.txt")
|
10
|
+
]
|
11
|
+
|
12
|
+
class OrBlock < Array; end
|
13
|
+
|
14
|
+
class << self
|
15
|
+
|
16
|
+
# parse text signatures
|
17
|
+
def parse args = {}
|
18
|
+
args[:fnames] ||= TEXT_SIGS_FILES
|
19
|
+
sigs = {}; sig = nil
|
20
|
+
|
21
|
+
args[:fnames].each do |fname|
|
22
|
+
n0 = sigs.size
|
23
|
+
File.open(fname,'r:utf-8') do |f|
|
24
|
+
while line = f.gets
|
25
|
+
case line.strip
|
26
|
+
when /^[<;#]/, /^$/ # comments & blank lines
|
27
|
+
next
|
28
|
+
when /^\[(.+)=(.+)\]$/
|
29
|
+
_add_sig(sigs, Packer.new($1, $2, true), args )
|
30
|
+
when /^\[([^=]+)\]$/
|
31
|
+
sig = Packer.new($1)
|
32
|
+
when /^signature = (.+)$/
|
33
|
+
sig.re = $1
|
34
|
+
_add_sig(sigs, sig, args)
|
35
|
+
when /^ep_only = (.+)$/
|
36
|
+
sig.ep_only = ($1.strip.downcase == 'true')
|
37
|
+
else raise line
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
puts "[=] #{sigs.size-n0} sigs from #{File.basename(fname)}\n\n" if args[:verbose]
|
42
|
+
end
|
43
|
+
|
44
|
+
bins = Hash.new{ |k,v| k[v] = ''.force_encoding('binary') }
|
45
|
+
|
46
|
+
# convert strings to Regexps
|
47
|
+
sigs = sigs.values
|
48
|
+
sigs.each_with_index do |sig,idx|
|
49
|
+
sig.re =
|
50
|
+
sig.re.split(' ').tap do |a|
|
51
|
+
sig.size = a.size
|
52
|
+
end.map do |x|
|
53
|
+
case x
|
54
|
+
when /\A\?\?\Z/
|
55
|
+
bins[sig] << '.'
|
56
|
+
'.'
|
57
|
+
when /\A.\?/,/\?.\Z/
|
58
|
+
puts "[?] #{x.inspect} -> \"??\" in #{sig.name}" if args[:verbose]
|
59
|
+
bins[sig] << '.'
|
60
|
+
'.'
|
61
|
+
when /\A[a-f0-9]{2}\Z/i
|
62
|
+
x = x.to_i(16).chr
|
63
|
+
bins[sig] << x
|
64
|
+
args[:raw] ? x : Regexp::escape(x)
|
65
|
+
else
|
66
|
+
puts "[?] unknown re element: #{x.inspect} in #{sig.inspect}" if args[:verbose]
|
67
|
+
"BAD_RE"
|
68
|
+
break
|
69
|
+
end
|
70
|
+
end
|
71
|
+
if sig.name[/-+>/]
|
72
|
+
a = sig.name.split(/-+>/,2).map(&:strip)
|
73
|
+
sig.name = "#{a[0]} (#{a[1]})"
|
74
|
+
end
|
75
|
+
sig.re.pop while sig.re.last == '??'
|
76
|
+
end
|
77
|
+
sigs.delete_if{ |sig| !sig.re || sig.re.index('BAD_RE') }
|
78
|
+
return sigs if args[:raw]
|
79
|
+
|
80
|
+
# require 'awesome_print'
|
81
|
+
# bins.each do |bin_sig, bin|
|
82
|
+
# next if bin.size < 5
|
83
|
+
# #next unless bin_sig.name['UPX']
|
84
|
+
#
|
85
|
+
# bin_re = Regexp.new(bin_sig.re.join, Regexp::MULTILINE)
|
86
|
+
# was = false
|
87
|
+
# sigs.each do |sig|
|
88
|
+
# next if sig.size < 5 || sig == bin_sig
|
89
|
+
# #next unless sig.name['UPX']
|
90
|
+
#
|
91
|
+
# re = Regexp.new(sig.re.join, Regexp::MULTILINE)
|
92
|
+
# if bin.index(re) == 0
|
93
|
+
# rd = _re_diff(bin_re.source, re.source)
|
94
|
+
# if rd.any? && rd.size <= 4
|
95
|
+
# #if sig.name.split.first.upcase != bin_sig.name.split.first.upcase
|
96
|
+
# puts "\n[.] #{bin_sig.name.yellow}\n#{bin_re.source.inspect.red}" unless was
|
97
|
+
# puts "[=] #{sig.name}"
|
98
|
+
# puts re.source.inspect.green
|
99
|
+
# p rd
|
100
|
+
# was = true
|
101
|
+
# #end
|
102
|
+
# end
|
103
|
+
# end
|
104
|
+
# end
|
105
|
+
# end
|
106
|
+
|
107
|
+
|
108
|
+
optimize sigs if args[:optimize]
|
109
|
+
|
110
|
+
# convert re-arrays to Regexps
|
111
|
+
sigs.each do |sig|
|
112
|
+
sig.re = Regexp.new( _join(sig.re), Regexp::MULTILINE )
|
113
|
+
end
|
114
|
+
|
115
|
+
sigs
|
116
|
+
end
|
117
|
+
|
118
|
+
# XXX
|
119
|
+
# "B\xE9rczi G\xE1bor".force_encoding('binary').to_yaml:
|
120
|
+
# RuntimeError: expected SCALAR, SEQUENCE-START, MAPPING-START, or ALIAS
|
121
|
+
|
122
|
+
def _add_sig sigs, sig, args = {}
|
123
|
+
raise "null RE: #{sig.inspect}" unless sig.re
|
124
|
+
|
125
|
+
# bad sigs
|
126
|
+
return if sig.re[/\A538BD833C0A30:::::/]
|
127
|
+
return if sig.name == "Name of the Packer v1.0"
|
128
|
+
return if sig.re == "54 68 69 73 20 70 72 6F 67 72 61 6D 20 63 61 6E 6E 6F 74 20 62 65 20 72 75 6E 20 69 6E 20 44 4F 53 20 6D 6F" # dos stub
|
129
|
+
|
130
|
+
sig.name.sub!(/^\*\s+/, '')
|
131
|
+
sig.name.sub!(/\s+\(h\)$/, '')
|
132
|
+
sig.name.sub!(/version (\d)/i,"v\\1")
|
133
|
+
sig.name.sub!(/Microsoft/i, "MS")
|
134
|
+
sig.name.sub!(/ or /i, " / ")
|
135
|
+
sig.name.sub! 'RLP ','RLPack '
|
136
|
+
sig.name.sub! '.beta', ' beta'
|
137
|
+
sig.name.sub! '(com)','[com]'
|
138
|
+
sig.name = sig.name.split(/\s*-+>\s*/).join(' -> ') # fix spaces around '->'
|
139
|
+
|
140
|
+
sig.re = sig.re.strip.upcase.tr(':','?')
|
141
|
+
sig.re = sig.re.scan(/../).join(' ') if sig.re.split.first.size > 2
|
142
|
+
if sigs[sig.re]
|
143
|
+
a = [sig, sigs[sig.re]].map{ |x| x.name.upcase.split('->').first.tr('V ','') }
|
144
|
+
return if a[0][a[1]] || a[1][a[0]]
|
145
|
+
|
146
|
+
new_name = _merge_names(sigs[sig.re].name, sig.name)
|
147
|
+
if new_name && new_name != sig.name && new_name != sigs[sig.re].name
|
148
|
+
puts "[.] sig name join: #{new_name}" if args[:verbose]
|
149
|
+
sigs[sig.re].name = new_name
|
150
|
+
end
|
151
|
+
else
|
152
|
+
# new sig
|
153
|
+
sigs[sig.re] = sig
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
def _merge_names name1, name2
|
158
|
+
a = [name1, name2].map{ |x| x.split('->').first.split }
|
159
|
+
|
160
|
+
d = [a[0]-a[1], a[1]-a[0]] # different words
|
161
|
+
d.map! do |x|
|
162
|
+
x - [
|
163
|
+
'EXE','[EXE]',
|
164
|
+
'vx.x','v?.?',
|
165
|
+
'DLL','(DLL)','[DLL]',
|
166
|
+
'[LZMA]','(LZMA)','LZMA',
|
167
|
+
'-','~','(pack)','(1)','(2)',
|
168
|
+
'19??'
|
169
|
+
]
|
170
|
+
end
|
171
|
+
return if d.all?(&:empty?) # no different words
|
172
|
+
|
173
|
+
# [["v1.14/v1.20"], ["v1.14,", "v1.20"]]]
|
174
|
+
# [["EXEShield", "v0.3b/v0.3", "v0.6"], ["Shield", "v0.3b,", "v0.3"]]]
|
175
|
+
2.times do |i|
|
176
|
+
return if d[i].all? do |x|
|
177
|
+
x = x.downcase.delete(',-').sub(/tm$/,'')
|
178
|
+
d[1-i].any? do |y|
|
179
|
+
y = y.downcase.delete(',-').sub(/tm$/,'')
|
180
|
+
y[x]
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
a = name1.split
|
186
|
+
b = name2.split
|
187
|
+
new_name_head = []
|
188
|
+
while a.any? && b.any? && a.first.upcase == b.first.upcase
|
189
|
+
new_name_head << a.shift
|
190
|
+
b.shift
|
191
|
+
end
|
192
|
+
new_name_tail = []
|
193
|
+
while a.any? && b.any? && a.last.upcase == b.last.upcase
|
194
|
+
new_name_tail.unshift a.pop
|
195
|
+
b.pop
|
196
|
+
end
|
197
|
+
new_name = new_name_head
|
198
|
+
new_name << [a.join(' '), b.join(' ')].delete_if{|x| x.empty?}.join(' / ')
|
199
|
+
new_name += new_name_tail
|
200
|
+
new_name = new_name.join(' ')
|
201
|
+
end
|
202
|
+
|
203
|
+
def _join a, sep=''
|
204
|
+
a.map do |x|
|
205
|
+
case x
|
206
|
+
when OrBlock
|
207
|
+
'(' + _join(x, '|') + ')'
|
208
|
+
when Array
|
209
|
+
_join x
|
210
|
+
when String
|
211
|
+
x
|
212
|
+
end
|
213
|
+
end.join(sep)
|
214
|
+
end
|
215
|
+
|
216
|
+
def _re_diff a,b, max_cnt = 1000
|
217
|
+
r = []
|
218
|
+
[a,b].map(&:size).max.times.map do |i|
|
219
|
+
if a[i] != b[i]
|
220
|
+
r << [a[i],b[i]]
|
221
|
+
return nil if r.size > max_cnt
|
222
|
+
end
|
223
|
+
end
|
224
|
+
r
|
225
|
+
end
|
226
|
+
|
227
|
+
def _optimize sigs
|
228
|
+
nfound = 0
|
229
|
+
min_sz = 6
|
230
|
+
max_diff = 6
|
231
|
+
sigs.each_with_index do |sig1,idx|
|
232
|
+
#break if idx == 100
|
233
|
+
next if sig1.re.size < min_sz
|
234
|
+
next if sig1.name['PseudoSigner']
|
235
|
+
|
236
|
+
sigs[(idx+1)..-1].each do |sig2|
|
237
|
+
next if sig2.re.size < min_sz
|
238
|
+
next if sig2.name['PseudoSigner']
|
239
|
+
|
240
|
+
if rd = _re_diff(sig1.re, sig2.re, max_diff)
|
241
|
+
if rd.all?{ |x| x[0].nil? || x[0] == '.' } && sig2.re.size >= sig1.re.size
|
242
|
+
if new_name = _merge_names(sig2.name, sig1.name)
|
243
|
+
#pp ["FIRST", sig1.name, sig2.name, new_name, sig1.re.join, sig2.re.join] if new_name
|
244
|
+
sig1.name = new_name
|
245
|
+
end
|
246
|
+
sig2.ep_only ||= sig1.ep_only
|
247
|
+
sig2.re = []
|
248
|
+
elsif rd.all?{ |x| x[1].nil? || x[1] == '.' } && sig1.re.size >= sig2.re.size
|
249
|
+
if new_name = _merge_names(sig2.name, sig1.name)
|
250
|
+
#pp ["SECOND", sig1.name, sig2.name, new_name, sig1.re.join, sig2.re.join] if new_name
|
251
|
+
sig2.name = new_name
|
252
|
+
end
|
253
|
+
sig1.re = []
|
254
|
+
sig1.ep_only ||= sig2.ep_only
|
255
|
+
break
|
256
|
+
else
|
257
|
+
next
|
258
|
+
end
|
259
|
+
nfound += 1
|
260
|
+
end
|
261
|
+
end
|
262
|
+
end
|
263
|
+
|
264
|
+
sigs.delete_if{ |sig| sig.re.empty? }
|
265
|
+
end
|
266
|
+
|
267
|
+
def optimize sigs
|
268
|
+
# replaces all duplicate names with references to one name
|
269
|
+
# saves ~30k out of ~200k mem
|
270
|
+
h = {}
|
271
|
+
sigs.each do |sig|
|
272
|
+
sig.name = (h[sig.name] ||= sig.name)
|
273
|
+
end
|
274
|
+
|
275
|
+
print "[.] sigs merge: #{sigs.size}"; _optimize(sigs); puts " -> #{sigs.size}"
|
276
|
+
|
277
|
+
# try to merge signatures with same name, size & ep_only
|
278
|
+
sigs.group_by{ |sig|
|
279
|
+
[sig.re.size, sig.name, sig.ep_only]
|
280
|
+
}.values.each do |a|
|
281
|
+
next if a.size == 1
|
282
|
+
if merged_re = _merge(a)
|
283
|
+
a.first.re = merged_re
|
284
|
+
a[1..-1].each{ |sig| sig.re = nil }
|
285
|
+
end
|
286
|
+
end
|
287
|
+
print "[.] sigs merge: #{sigs.size}"; sigs.delete_if{ |x| x.re.nil? }; puts " -> #{sigs.size}"
|
288
|
+
|
289
|
+
|
290
|
+
# 361 entries of ["VMProtect v1.25 (PolyTech)", true, "h....\xE8...."])
|
291
|
+
sigs.group_by{ |sig|
|
292
|
+
[sig.name, sig.ep_only, sig.re[0,10].join]
|
293
|
+
}.each do |k,entries|
|
294
|
+
next if entries.size < 10
|
295
|
+
#printf "%5d %s\n", entries.size, k
|
296
|
+
prefix = entries.first.re[0,10]
|
297
|
+
infix = entries.map{ |sig| sig.re[10..-1] }
|
298
|
+
|
299
|
+
entries.first.re = prefix + [OrBlock.new(infix)]
|
300
|
+
entries.first.size = entries.map(&:size).max
|
301
|
+
|
302
|
+
entries[1..-1].each{ |sig| sig.re = nil }
|
303
|
+
end
|
304
|
+
print "[.] sigs merge: #{sigs.size}"; sigs.delete_if{ |x| x.re.nil? }; puts " -> #{sigs.size}"
|
305
|
+
|
306
|
+
|
307
|
+
# # merge signatures with same prefix & suffix
|
308
|
+
# # most ineffecient part :)
|
309
|
+
# sigs.group_by{ |sig|
|
310
|
+
# [sig.name, sig.ep_only, sig.re.index{ |x| x.is_a?(Array)}]
|
311
|
+
# }.values.each do |a|
|
312
|
+
# next if a.size == 1
|
313
|
+
# next unless idx = a.first.re.index{ |x| x.is_a?(Array) }
|
314
|
+
# a.group_by{ |sig| [sig.re[0...idx], sig.re[(idx+1)..-1]] }.each do |k,entries|
|
315
|
+
# # prefix | infix | suffix
|
316
|
+
# # s o m [[b r e r o] [e w h a t]] h e r e
|
317
|
+
# prefix, suffix = k
|
318
|
+
# infix = entries.map{ |sig| sig.re[idx] }
|
319
|
+
# #infix = [['f','o','o']]
|
320
|
+
# merged_re = prefix + infix + suffix
|
321
|
+
# max_size = entries.map(&:size).max
|
322
|
+
# entries.each{ |sig| sig.re = merged_re; sig.size = max_size }
|
323
|
+
# end
|
324
|
+
# end
|
325
|
+
# print "[.] sigs merge: #{sigs.size}"; sigs.uniq!; puts " -> #{sigs.size}"
|
326
|
+
|
327
|
+
# stats
|
328
|
+
# aa = []
|
329
|
+
# 6.upto(20) do |len|
|
330
|
+
# sigs.group_by{ |sig| [sig.re[0,len].join, sig.name, sig.ep_only] }.each do |a,b|
|
331
|
+
# aa << [b.size, a[0], [b.map(&:size).min, b.map(&:size).max].join(' .. ') ] if b.size > 2
|
332
|
+
# end
|
333
|
+
# end
|
334
|
+
# aa.sort_by(&:first).each do |sz,prefix,name|
|
335
|
+
# printf "%5d %-50s %s\n", sz, prefix.inspect, name
|
336
|
+
# end
|
337
|
+
|
338
|
+
sigs
|
339
|
+
end
|
340
|
+
|
341
|
+
# range of common difference between N given sigs
|
342
|
+
def _diff res
|
343
|
+
raise "diff sizes" if res.map(&:size).uniq.size != 1
|
344
|
+
size = res.first.size
|
345
|
+
|
346
|
+
dstart = nil
|
347
|
+
dend = size - 1
|
348
|
+
prev_eq = true
|
349
|
+
|
350
|
+
size.times do |i|
|
351
|
+
eq = res.map{ |re| re[i] }.uniq.size == 1
|
352
|
+
if eq != prev_eq
|
353
|
+
if eq
|
354
|
+
# end of current diff
|
355
|
+
dend = i-1
|
356
|
+
else
|
357
|
+
# start of new diff
|
358
|
+
return nil if dstart # return nil if it's a 2nd diff
|
359
|
+
dstart = i
|
360
|
+
end
|
361
|
+
end
|
362
|
+
prev_eq = eq
|
363
|
+
end
|
364
|
+
r = dstart..dend
|
365
|
+
r == (0..(size-1)) ? nil : r
|
366
|
+
end
|
367
|
+
|
368
|
+
# merge array of signatures into one signature
|
369
|
+
def _merge sigs
|
370
|
+
sizes = sigs.map(&:re).map(&:size)
|
371
|
+
|
372
|
+
if sizes.uniq.size != 1
|
373
|
+
puts "[?] wrong sizes: #{sizes.inspect}"
|
374
|
+
return nil
|
375
|
+
end
|
376
|
+
|
377
|
+
res = sigs.map(&:re)
|
378
|
+
diff = _diff res
|
379
|
+
return nil unless diff
|
380
|
+
|
381
|
+
ref = res.first
|
382
|
+
ref[0...diff.first] + [OrBlock.new(res.map{ |re| re[diff] })] + ref[(diff.last+1)..-1]
|
383
|
+
end
|
384
|
+
end
|
385
|
+
end
|
386
|
+
end
|