pedump 0.3.3 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,47 +1,101 @@
1
+ require 'pedump/sig_parser'
2
+
1
3
  class PEdump
2
4
  class Packer < Struct.new(:name, :re, :ep_only, :size)
3
5
 
4
6
  DATA_ROOT = File.dirname(File.dirname(File.dirname(__FILE__)))
5
7
  BIN_SIGS_FILE = File.join(DATA_ROOT, "data", "sig.bin")
6
- TEXT_SIGS_FILE = File.join(DATA_ROOT, "data", "sig.txt")
7
8
 
8
- Match = Struct.new :offset, :packer
9
+ class Match < Struct.new(:offset, :packer)
10
+ def name
11
+ packer.name
12
+ end
13
+ end
9
14
 
10
15
  class << self
11
-
12
16
  def all
13
17
  @@all ||=
14
18
  begin
15
19
  r = unmarshal
16
20
  unless r
21
+ msg = "[?] #{self}: unmarshal failed, using slow text parsing instead"
17
22
  if PEdump.respond_to?(:logger) && PEdump.logger
18
- PEdump.logger.warn "[?] #{self}: unmarshal failed, using slow text parsing instead"
23
+ PEdump.logger.warn msg
19
24
  else
20
- STDERR.puts "[?] #{self}: unmarshal failed, using slow text parsing instead"
25
+ STDERR.puts msg
21
26
  end
22
- r = parse
27
+ r = SigParser.parse
23
28
  end
24
29
  r
25
30
  end
26
31
  end
27
32
  alias :load :all
28
33
 
34
+ # default deep-scan flag
35
+ @@deep = false
36
+
37
+ def default_deep
38
+ @@deep
39
+ end
40
+
41
+ def default_deep= value
42
+ @@deep = value
43
+ end
44
+
29
45
  def max_size
30
46
  @@max_size ||= all.map(&:size).max
31
47
  end
32
48
 
33
- def of data, ep_offset = nil
34
- if data.respond_to?(:read) && data.respond_to?(:seek) && ep_offset
35
- of_file data, ep_offset
49
+ def of data, h = {}
50
+ if data.respond_to?(:read) && data.respond_to?(:seek) && h[:ep_offset]
51
+ of_pe_file data, h
36
52
  else
37
53
  of_data data
38
54
  end
39
55
  end
40
56
 
41
57
  # try to determine packer of FILE f, ep_offset - offset to entrypoint from start of file
42
- def of_file f, ep_offset
43
- f.seek(ep_offset)
44
- of_data f.read(max_size)
58
+ def of_pe_file f, h
59
+ h[:deep] = @@deep unless h.key?(:deep)
60
+ h[:deep] = 1 if h[:deep] == true
61
+ h[:deep] = 0 if h[:deep] == false
62
+
63
+ f.seek(h[:ep_offset]) # offset of PE EntryPoint from start of file
64
+ r = Array(of_data(f.read(max_size)))
65
+ return r if r && r.any? && h[:deep] < 2
66
+ r += scan_whole_file(f,
67
+ :limit => (h[:deep] > 0 ? nil : 1048576),
68
+ :deep => h[:deep]
69
+ ) # scan only 1st mb unless :deep
70
+ end
71
+
72
+ BLOCK_SIZE = 0x10000
73
+
74
+ def scan_whole_file f, h = {}
75
+ h[:limit] ||= f.size
76
+ f.seek( pos = 0 )
77
+ buf = ''.force_encoding('binary')
78
+ sigs =
79
+ if h[:deep].is_a?(Numeric) && h[:deep] > 1
80
+ self.all
81
+ else
82
+ self.find_all{ |sig| !sig.ep_only }
83
+ end
84
+ r = []
85
+ while true
86
+ f.read BLOCK_SIZE, buf
87
+ pos += buf.size
88
+ sigs.each do |sig|
89
+ if idx = buf.index(sig.re)
90
+ r << Match.new(f.tell-buf.size+idx, sig)
91
+ end
92
+ end
93
+ break if f.eof? || pos >= h[:limit]
94
+ # overlap the read for the case when read buffer boundary breaks signature
95
+ f.seek -max_size-2, IO::SEEK_CUR
96
+ pos -= (max_size+2)
97
+ end
98
+ r
45
99
  end
46
100
 
47
101
  def of_data data
@@ -66,59 +120,6 @@ class PEdump
66
120
  nil
67
121
  end
68
122
 
69
- # parse text signatures
70
- def parse fname = TEXT_SIGS_FILE
71
- sigs = {}; sig = nil
72
-
73
- File.open(fname,'r:utf-8') do |f|
74
- while line = f.gets
75
- line.strip!
76
-
77
- # XXX
78
- # "B\xE9rczi G\xE1bor".force_encoding('binary').to_yaml:
79
- # RuntimeError: expected SCALAR, SEQUENCE-START, MAPPING-START, or ALIAS
80
-
81
- case line
82
- when /^;/,/^$/
83
- next
84
- when /^\[(.+)\]$/
85
- sig = Packer.new($1.sub(/^\*\s+/,'').sub(/\s+\(h\)$/,''))
86
- when /^signature = (.+)$/
87
- sig.re = $1
88
- if sigs[sig.re]
89
- next if sigs[sig.re].name == sig.name
90
- printf "[?] dup %-40s, %s\n", sigs[sig.re].name.inspect, sig.name.inspect
91
- end
92
- sigs[sig.re] = sig
93
- when /^ep_only = (.+)$/
94
- sig.ep_only = ($1.strip.downcase == 'true')
95
- else raise line
96
- end
97
- end
98
- end
99
-
100
- sigs = sigs.values
101
- sigs.each do |sig|
102
- sig.re = Regexp.new(
103
- sig.re.split(' ').tap do |a|
104
- sig.size = a.size
105
- end.map do |x|
106
- case x
107
- when '??'
108
- '.'
109
- when /[a-f0-9]{2}/i
110
- Regexp::escape x.to_i(16).chr
111
- else raise x
112
- end
113
- end.join
114
- )
115
- if sig.name[/-+>/]
116
- a = sig.name.split(/-+>/,2).map(&:strip)
117
- sig.name = "#{a[0]} (#{a[1]})"
118
- end
119
- end
120
- sigs
121
- end
122
123
  end
123
124
  end
124
125
  end
@@ -0,0 +1,386 @@
1
+ class PEdump
2
+ module SigParser
3
+
4
+ DATA_ROOT = File.dirname(File.dirname(File.dirname(__FILE__)))
5
+
6
+ TEXT_SIGS_FILES = [
7
+ File.join(DATA_ROOT, "data", "userdb.txt"),
8
+ File.join(DATA_ROOT, "data", "signatures.txt"),
9
+ File.join(DATA_ROOT, "data", "fs.txt")
10
+ ]
11
+
12
+ class OrBlock < Array; end
13
+
14
+ class << self
15
+
16
+ # parse text signatures
17
+ def parse args = {}
18
+ args[:fnames] ||= TEXT_SIGS_FILES
19
+ sigs = {}; sig = nil
20
+
21
+ args[:fnames].each do |fname|
22
+ n0 = sigs.size
23
+ File.open(fname,'r:utf-8') do |f|
24
+ while line = f.gets
25
+ case line.strip
26
+ when /^[<;#]/, /^$/ # comments & blank lines
27
+ next
28
+ when /^\[(.+)=(.+)\]$/
29
+ _add_sig(sigs, Packer.new($1, $2, true), args )
30
+ when /^\[([^=]+)\]$/
31
+ sig = Packer.new($1)
32
+ when /^signature = (.+)$/
33
+ sig.re = $1
34
+ _add_sig(sigs, sig, args)
35
+ when /^ep_only = (.+)$/
36
+ sig.ep_only = ($1.strip.downcase == 'true')
37
+ else raise line
38
+ end
39
+ end
40
+ end
41
+ puts "[=] #{sigs.size-n0} sigs from #{File.basename(fname)}\n\n" if args[:verbose]
42
+ end
43
+
44
+ bins = Hash.new{ |k,v| k[v] = ''.force_encoding('binary') }
45
+
46
+ # convert strings to Regexps
47
+ sigs = sigs.values
48
+ sigs.each_with_index do |sig,idx|
49
+ sig.re =
50
+ sig.re.split(' ').tap do |a|
51
+ sig.size = a.size
52
+ end.map do |x|
53
+ case x
54
+ when /\A\?\?\Z/
55
+ bins[sig] << '.'
56
+ '.'
57
+ when /\A.\?/,/\?.\Z/
58
+ puts "[?] #{x.inspect} -> \"??\" in #{sig.name}" if args[:verbose]
59
+ bins[sig] << '.'
60
+ '.'
61
+ when /\A[a-f0-9]{2}\Z/i
62
+ x = x.to_i(16).chr
63
+ bins[sig] << x
64
+ args[:raw] ? x : Regexp::escape(x)
65
+ else
66
+ puts "[?] unknown re element: #{x.inspect} in #{sig.inspect}" if args[:verbose]
67
+ "BAD_RE"
68
+ break
69
+ end
70
+ end
71
+ if sig.name[/-+>/]
72
+ a = sig.name.split(/-+>/,2).map(&:strip)
73
+ sig.name = "#{a[0]} (#{a[1]})"
74
+ end
75
+ sig.re.pop while sig.re.last == '??'
76
+ end
77
+ sigs.delete_if{ |sig| !sig.re || sig.re.index('BAD_RE') }
78
+ return sigs if args[:raw]
79
+
80
+ # require 'awesome_print'
81
+ # bins.each do |bin_sig, bin|
82
+ # next if bin.size < 5
83
+ # #next unless bin_sig.name['UPX']
84
+ #
85
+ # bin_re = Regexp.new(bin_sig.re.join, Regexp::MULTILINE)
86
+ # was = false
87
+ # sigs.each do |sig|
88
+ # next if sig.size < 5 || sig == bin_sig
89
+ # #next unless sig.name['UPX']
90
+ #
91
+ # re = Regexp.new(sig.re.join, Regexp::MULTILINE)
92
+ # if bin.index(re) == 0
93
+ # rd = _re_diff(bin_re.source, re.source)
94
+ # if rd.any? && rd.size <= 4
95
+ # #if sig.name.split.first.upcase != bin_sig.name.split.first.upcase
96
+ # puts "\n[.] #{bin_sig.name.yellow}\n#{bin_re.source.inspect.red}" unless was
97
+ # puts "[=] #{sig.name}"
98
+ # puts re.source.inspect.green
99
+ # p rd
100
+ # was = true
101
+ # #end
102
+ # end
103
+ # end
104
+ # end
105
+ # end
106
+
107
+
108
+ optimize sigs if args[:optimize]
109
+
110
+ # convert re-arrays to Regexps
111
+ sigs.each do |sig|
112
+ sig.re = Regexp.new( _join(sig.re), Regexp::MULTILINE )
113
+ end
114
+
115
+ sigs
116
+ end
117
+
118
+ # XXX
119
+ # "B\xE9rczi G\xE1bor".force_encoding('binary').to_yaml:
120
+ # RuntimeError: expected SCALAR, SEQUENCE-START, MAPPING-START, or ALIAS
121
+
122
+ def _add_sig sigs, sig, args = {}
123
+ raise "null RE: #{sig.inspect}" unless sig.re
124
+
125
+ # bad sigs
126
+ return if sig.re[/\A538BD833C0A30:::::/]
127
+ return if sig.name == "Name of the Packer v1.0"
128
+ return if sig.re == "54 68 69 73 20 70 72 6F 67 72 61 6D 20 63 61 6E 6E 6F 74 20 62 65 20 72 75 6E 20 69 6E 20 44 4F 53 20 6D 6F" # dos stub
129
+
130
+ sig.name.sub!(/^\*\s+/, '')
131
+ sig.name.sub!(/\s+\(h\)$/, '')
132
+ sig.name.sub!(/version (\d)/i,"v\\1")
133
+ sig.name.sub!(/Microsoft/i, "MS")
134
+ sig.name.sub!(/ or /i, " / ")
135
+ sig.name.sub! 'RLP ','RLPack '
136
+ sig.name.sub! '.beta', ' beta'
137
+ sig.name.sub! '(com)','[com]'
138
+ sig.name = sig.name.split(/\s*-+>\s*/).join(' -> ') # fix spaces around '->'
139
+
140
+ sig.re = sig.re.strip.upcase.tr(':','?')
141
+ sig.re = sig.re.scan(/../).join(' ') if sig.re.split.first.size > 2
142
+ if sigs[sig.re]
143
+ a = [sig, sigs[sig.re]].map{ |x| x.name.upcase.split('->').first.tr('V ','') }
144
+ return if a[0][a[1]] || a[1][a[0]]
145
+
146
+ new_name = _merge_names(sigs[sig.re].name, sig.name)
147
+ if new_name && new_name != sig.name && new_name != sigs[sig.re].name
148
+ puts "[.] sig name join: #{new_name}" if args[:verbose]
149
+ sigs[sig.re].name = new_name
150
+ end
151
+ else
152
+ # new sig
153
+ sigs[sig.re] = sig
154
+ end
155
+ end
156
+
157
+ def _merge_names name1, name2
158
+ a = [name1, name2].map{ |x| x.split('->').first.split }
159
+
160
+ d = [a[0]-a[1], a[1]-a[0]] # different words
161
+ d.map! do |x|
162
+ x - [
163
+ 'EXE','[EXE]',
164
+ 'vx.x','v?.?',
165
+ 'DLL','(DLL)','[DLL]',
166
+ '[LZMA]','(LZMA)','LZMA',
167
+ '-','~','(pack)','(1)','(2)',
168
+ '19??'
169
+ ]
170
+ end
171
+ return if d.all?(&:empty?) # no different words
172
+
173
+ # [["v1.14/v1.20"], ["v1.14,", "v1.20"]]]
174
+ # [["EXEShield", "v0.3b/v0.3", "v0.6"], ["Shield", "v0.3b,", "v0.3"]]]
175
+ 2.times do |i|
176
+ return if d[i].all? do |x|
177
+ x = x.downcase.delete(',-').sub(/tm$/,'')
178
+ d[1-i].any? do |y|
179
+ y = y.downcase.delete(',-').sub(/tm$/,'')
180
+ y[x]
181
+ end
182
+ end
183
+ end
184
+
185
+ a = name1.split
186
+ b = name2.split
187
+ new_name_head = []
188
+ while a.any? && b.any? && a.first.upcase == b.first.upcase
189
+ new_name_head << a.shift
190
+ b.shift
191
+ end
192
+ new_name_tail = []
193
+ while a.any? && b.any? && a.last.upcase == b.last.upcase
194
+ new_name_tail.unshift a.pop
195
+ b.pop
196
+ end
197
+ new_name = new_name_head
198
+ new_name << [a.join(' '), b.join(' ')].delete_if{|x| x.empty?}.join(' / ')
199
+ new_name += new_name_tail
200
+ new_name = new_name.join(' ')
201
+ end
202
+
203
+ def _join a, sep=''
204
+ a.map do |x|
205
+ case x
206
+ when OrBlock
207
+ '(' + _join(x, '|') + ')'
208
+ when Array
209
+ _join x
210
+ when String
211
+ x
212
+ end
213
+ end.join(sep)
214
+ end
215
+
216
+ def _re_diff a,b, max_cnt = 1000
217
+ r = []
218
+ [a,b].map(&:size).max.times.map do |i|
219
+ if a[i] != b[i]
220
+ r << [a[i],b[i]]
221
+ return nil if r.size > max_cnt
222
+ end
223
+ end
224
+ r
225
+ end
226
+
227
+ def _optimize sigs
228
+ nfound = 0
229
+ min_sz = 6
230
+ max_diff = 6
231
+ sigs.each_with_index do |sig1,idx|
232
+ #break if idx == 100
233
+ next if sig1.re.size < min_sz
234
+ next if sig1.name['PseudoSigner']
235
+
236
+ sigs[(idx+1)..-1].each do |sig2|
237
+ next if sig2.re.size < min_sz
238
+ next if sig2.name['PseudoSigner']
239
+
240
+ if rd = _re_diff(sig1.re, sig2.re, max_diff)
241
+ if rd.all?{ |x| x[0].nil? || x[0] == '.' } && sig2.re.size >= sig1.re.size
242
+ if new_name = _merge_names(sig2.name, sig1.name)
243
+ #pp ["FIRST", sig1.name, sig2.name, new_name, sig1.re.join, sig2.re.join] if new_name
244
+ sig1.name = new_name
245
+ end
246
+ sig2.ep_only ||= sig1.ep_only
247
+ sig2.re = []
248
+ elsif rd.all?{ |x| x[1].nil? || x[1] == '.' } && sig1.re.size >= sig2.re.size
249
+ if new_name = _merge_names(sig2.name, sig1.name)
250
+ #pp ["SECOND", sig1.name, sig2.name, new_name, sig1.re.join, sig2.re.join] if new_name
251
+ sig2.name = new_name
252
+ end
253
+ sig1.re = []
254
+ sig1.ep_only ||= sig2.ep_only
255
+ break
256
+ else
257
+ next
258
+ end
259
+ nfound += 1
260
+ end
261
+ end
262
+ end
263
+
264
+ sigs.delete_if{ |sig| sig.re.empty? }
265
+ end
266
+
267
+ def optimize sigs
268
+ # replaces all duplicate names with references to one name
269
+ # saves ~30k out of ~200k mem
270
+ h = {}
271
+ sigs.each do |sig|
272
+ sig.name = (h[sig.name] ||= sig.name)
273
+ end
274
+
275
+ print "[.] sigs merge: #{sigs.size}"; _optimize(sigs); puts " -> #{sigs.size}"
276
+
277
+ # try to merge signatures with same name, size & ep_only
278
+ sigs.group_by{ |sig|
279
+ [sig.re.size, sig.name, sig.ep_only]
280
+ }.values.each do |a|
281
+ next if a.size == 1
282
+ if merged_re = _merge(a)
283
+ a.first.re = merged_re
284
+ a[1..-1].each{ |sig| sig.re = nil }
285
+ end
286
+ end
287
+ print "[.] sigs merge: #{sigs.size}"; sigs.delete_if{ |x| x.re.nil? }; puts " -> #{sigs.size}"
288
+
289
+
290
+ # 361 entries of ["VMProtect v1.25 (PolyTech)", true, "h....\xE8...."])
291
+ sigs.group_by{ |sig|
292
+ [sig.name, sig.ep_only, sig.re[0,10].join]
293
+ }.each do |k,entries|
294
+ next if entries.size < 10
295
+ #printf "%5d %s\n", entries.size, k
296
+ prefix = entries.first.re[0,10]
297
+ infix = entries.map{ |sig| sig.re[10..-1] }
298
+
299
+ entries.first.re = prefix + [OrBlock.new(infix)]
300
+ entries.first.size = entries.map(&:size).max
301
+
302
+ entries[1..-1].each{ |sig| sig.re = nil }
303
+ end
304
+ print "[.] sigs merge: #{sigs.size}"; sigs.delete_if{ |x| x.re.nil? }; puts " -> #{sigs.size}"
305
+
306
+
307
+ # # merge signatures with same prefix & suffix
308
+ # # most ineffecient part :)
309
+ # sigs.group_by{ |sig|
310
+ # [sig.name, sig.ep_only, sig.re.index{ |x| x.is_a?(Array)}]
311
+ # }.values.each do |a|
312
+ # next if a.size == 1
313
+ # next unless idx = a.first.re.index{ |x| x.is_a?(Array) }
314
+ # a.group_by{ |sig| [sig.re[0...idx], sig.re[(idx+1)..-1]] }.each do |k,entries|
315
+ # # prefix | infix | suffix
316
+ # # s o m [[b r e r o] [e w h a t]] h e r e
317
+ # prefix, suffix = k
318
+ # infix = entries.map{ |sig| sig.re[idx] }
319
+ # #infix = [['f','o','o']]
320
+ # merged_re = prefix + infix + suffix
321
+ # max_size = entries.map(&:size).max
322
+ # entries.each{ |sig| sig.re = merged_re; sig.size = max_size }
323
+ # end
324
+ # end
325
+ # print "[.] sigs merge: #{sigs.size}"; sigs.uniq!; puts " -> #{sigs.size}"
326
+
327
+ # stats
328
+ # aa = []
329
+ # 6.upto(20) do |len|
330
+ # sigs.group_by{ |sig| [sig.re[0,len].join, sig.name, sig.ep_only] }.each do |a,b|
331
+ # aa << [b.size, a[0], [b.map(&:size).min, b.map(&:size).max].join(' .. ') ] if b.size > 2
332
+ # end
333
+ # end
334
+ # aa.sort_by(&:first).each do |sz,prefix,name|
335
+ # printf "%5d %-50s %s\n", sz, prefix.inspect, name
336
+ # end
337
+
338
+ sigs
339
+ end
340
+
341
+ # range of common difference between N given sigs
342
+ def _diff res
343
+ raise "diff sizes" if res.map(&:size).uniq.size != 1
344
+ size = res.first.size
345
+
346
+ dstart = nil
347
+ dend = size - 1
348
+ prev_eq = true
349
+
350
+ size.times do |i|
351
+ eq = res.map{ |re| re[i] }.uniq.size == 1
352
+ if eq != prev_eq
353
+ if eq
354
+ # end of current diff
355
+ dend = i-1
356
+ else
357
+ # start of new diff
358
+ return nil if dstart # return nil if it's a 2nd diff
359
+ dstart = i
360
+ end
361
+ end
362
+ prev_eq = eq
363
+ end
364
+ r = dstart..dend
365
+ r == (0..(size-1)) ? nil : r
366
+ end
367
+
368
+ # merge array of signatures into one signature
369
+ def _merge sigs
370
+ sizes = sigs.map(&:re).map(&:size)
371
+
372
+ if sizes.uniq.size != 1
373
+ puts "[?] wrong sizes: #{sizes.inspect}"
374
+ return nil
375
+ end
376
+
377
+ res = sigs.map(&:re)
378
+ diff = _diff res
379
+ return nil unless diff
380
+
381
+ ref = res.first
382
+ ref[0...diff.first] + [OrBlock.new(res.map{ |re| re[diff] })] + ref[(diff.last+1)..-1]
383
+ end
384
+ end
385
+ end
386
+ end