pedump 0.3.3 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,47 +1,101 @@
1
+ require 'pedump/sig_parser'
2
+
1
3
  class PEdump
2
4
  class Packer < Struct.new(:name, :re, :ep_only, :size)
3
5
 
4
6
  DATA_ROOT = File.dirname(File.dirname(File.dirname(__FILE__)))
5
7
  BIN_SIGS_FILE = File.join(DATA_ROOT, "data", "sig.bin")
6
- TEXT_SIGS_FILE = File.join(DATA_ROOT, "data", "sig.txt")
7
8
 
8
- Match = Struct.new :offset, :packer
9
+ class Match < Struct.new(:offset, :packer)
10
+ def name
11
+ packer.name
12
+ end
13
+ end
9
14
 
10
15
  class << self
11
-
12
16
  def all
13
17
  @@all ||=
14
18
  begin
15
19
  r = unmarshal
16
20
  unless r
21
+ msg = "[?] #{self}: unmarshal failed, using slow text parsing instead"
17
22
  if PEdump.respond_to?(:logger) && PEdump.logger
18
- PEdump.logger.warn "[?] #{self}: unmarshal failed, using slow text parsing instead"
23
+ PEdump.logger.warn msg
19
24
  else
20
- STDERR.puts "[?] #{self}: unmarshal failed, using slow text parsing instead"
25
+ STDERR.puts msg
21
26
  end
22
- r = parse
27
+ r = SigParser.parse
23
28
  end
24
29
  r
25
30
  end
26
31
  end
27
32
  alias :load :all
28
33
 
34
+ # default deep-scan flag
35
+ @@deep = false
36
+
37
+ def default_deep
38
+ @@deep
39
+ end
40
+
41
+ def default_deep= value
42
+ @@deep = value
43
+ end
44
+
29
45
  def max_size
30
46
  @@max_size ||= all.map(&:size).max
31
47
  end
32
48
 
33
- def of data, ep_offset = nil
34
- if data.respond_to?(:read) && data.respond_to?(:seek) && ep_offset
35
- of_file data, ep_offset
49
+ def of data, h = {}
50
+ if data.respond_to?(:read) && data.respond_to?(:seek) && h[:ep_offset]
51
+ of_pe_file data, h
36
52
  else
37
53
  of_data data
38
54
  end
39
55
  end
40
56
 
41
57
  # try to determine packer of FILE f, ep_offset - offset to entrypoint from start of file
42
- def of_file f, ep_offset
43
- f.seek(ep_offset)
44
- of_data f.read(max_size)
58
+ def of_pe_file f, h
59
+ h[:deep] = @@deep unless h.key?(:deep)
60
+ h[:deep] = 1 if h[:deep] == true
61
+ h[:deep] = 0 if h[:deep] == false
62
+
63
+ f.seek(h[:ep_offset]) # offset of PE EntryPoint from start of file
64
+ r = Array(of_data(f.read(max_size)))
65
+ return r if r && r.any? && h[:deep] < 2
66
+ r += scan_whole_file(f,
67
+ :limit => (h[:deep] > 0 ? nil : 1048576),
68
+ :deep => h[:deep]
69
+ ) # scan only 1st mb unless :deep
70
+ end
71
+
72
+ BLOCK_SIZE = 0x10000
73
+
74
+ def scan_whole_file f, h = {}
75
+ h[:limit] ||= f.size
76
+ f.seek( pos = 0 )
77
+ buf = ''.force_encoding('binary')
78
+ sigs =
79
+ if h[:deep].is_a?(Numeric) && h[:deep] > 1
80
+ self.all
81
+ else
82
+ self.find_all{ |sig| !sig.ep_only }
83
+ end
84
+ r = []
85
+ while true
86
+ f.read BLOCK_SIZE, buf
87
+ pos += buf.size
88
+ sigs.each do |sig|
89
+ if idx = buf.index(sig.re)
90
+ r << Match.new(f.tell-buf.size+idx, sig)
91
+ end
92
+ end
93
+ break if f.eof? || pos >= h[:limit]
94
+ # overlap the read for the case when read buffer boundary breaks signature
95
+ f.seek -max_size-2, IO::SEEK_CUR
96
+ pos -= (max_size+2)
97
+ end
98
+ r
45
99
  end
46
100
 
47
101
  def of_data data
@@ -66,59 +120,6 @@ class PEdump
66
120
  nil
67
121
  end
68
122
 
69
- # parse text signatures
70
- def parse fname = TEXT_SIGS_FILE
71
- sigs = {}; sig = nil
72
-
73
- File.open(fname,'r:utf-8') do |f|
74
- while line = f.gets
75
- line.strip!
76
-
77
- # XXX
78
- # "B\xE9rczi G\xE1bor".force_encoding('binary').to_yaml:
79
- # RuntimeError: expected SCALAR, SEQUENCE-START, MAPPING-START, or ALIAS
80
-
81
- case line
82
- when /^;/,/^$/
83
- next
84
- when /^\[(.+)\]$/
85
- sig = Packer.new($1.sub(/^\*\s+/,'').sub(/\s+\(h\)$/,''))
86
- when /^signature = (.+)$/
87
- sig.re = $1
88
- if sigs[sig.re]
89
- next if sigs[sig.re].name == sig.name
90
- printf "[?] dup %-40s, %s\n", sigs[sig.re].name.inspect, sig.name.inspect
91
- end
92
- sigs[sig.re] = sig
93
- when /^ep_only = (.+)$/
94
- sig.ep_only = ($1.strip.downcase == 'true')
95
- else raise line
96
- end
97
- end
98
- end
99
-
100
- sigs = sigs.values
101
- sigs.each do |sig|
102
- sig.re = Regexp.new(
103
- sig.re.split(' ').tap do |a|
104
- sig.size = a.size
105
- end.map do |x|
106
- case x
107
- when '??'
108
- '.'
109
- when /[a-f0-9]{2}/i
110
- Regexp::escape x.to_i(16).chr
111
- else raise x
112
- end
113
- end.join
114
- )
115
- if sig.name[/-+>/]
116
- a = sig.name.split(/-+>/,2).map(&:strip)
117
- sig.name = "#{a[0]} (#{a[1]})"
118
- end
119
- end
120
- sigs
121
- end
122
123
  end
123
124
  end
124
125
  end
@@ -0,0 +1,386 @@
1
+ class PEdump
2
+ module SigParser
3
+
4
+ DATA_ROOT = File.dirname(File.dirname(File.dirname(__FILE__)))
5
+
6
+ TEXT_SIGS_FILES = [
7
+ File.join(DATA_ROOT, "data", "userdb.txt"),
8
+ File.join(DATA_ROOT, "data", "signatures.txt"),
9
+ File.join(DATA_ROOT, "data", "fs.txt")
10
+ ]
11
+
12
+ class OrBlock < Array; end
13
+
14
+ class << self
15
+
16
+ # parse text signatures
17
+ def parse args = {}
18
+ args[:fnames] ||= TEXT_SIGS_FILES
19
+ sigs = {}; sig = nil
20
+
21
+ args[:fnames].each do |fname|
22
+ n0 = sigs.size
23
+ File.open(fname,'r:utf-8') do |f|
24
+ while line = f.gets
25
+ case line.strip
26
+ when /^[<;#]/, /^$/ # comments & blank lines
27
+ next
28
+ when /^\[(.+)=(.+)\]$/
29
+ _add_sig(sigs, Packer.new($1, $2, true), args )
30
+ when /^\[([^=]+)\]$/
31
+ sig = Packer.new($1)
32
+ when /^signature = (.+)$/
33
+ sig.re = $1
34
+ _add_sig(sigs, sig, args)
35
+ when /^ep_only = (.+)$/
36
+ sig.ep_only = ($1.strip.downcase == 'true')
37
+ else raise line
38
+ end
39
+ end
40
+ end
41
+ puts "[=] #{sigs.size-n0} sigs from #{File.basename(fname)}\n\n" if args[:verbose]
42
+ end
43
+
44
+ bins = Hash.new{ |k,v| k[v] = ''.force_encoding('binary') }
45
+
46
+ # convert strings to Regexps
47
+ sigs = sigs.values
48
+ sigs.each_with_index do |sig,idx|
49
+ sig.re =
50
+ sig.re.split(' ').tap do |a|
51
+ sig.size = a.size
52
+ end.map do |x|
53
+ case x
54
+ when /\A\?\?\Z/
55
+ bins[sig] << '.'
56
+ '.'
57
+ when /\A.\?/,/\?.\Z/
58
+ puts "[?] #{x.inspect} -> \"??\" in #{sig.name}" if args[:verbose]
59
+ bins[sig] << '.'
60
+ '.'
61
+ when /\A[a-f0-9]{2}\Z/i
62
+ x = x.to_i(16).chr
63
+ bins[sig] << x
64
+ args[:raw] ? x : Regexp::escape(x)
65
+ else
66
+ puts "[?] unknown re element: #{x.inspect} in #{sig.inspect}" if args[:verbose]
67
+ "BAD_RE"
68
+ break
69
+ end
70
+ end
71
+ if sig.name[/-+>/]
72
+ a = sig.name.split(/-+>/,2).map(&:strip)
73
+ sig.name = "#{a[0]} (#{a[1]})"
74
+ end
75
+ sig.re.pop while sig.re.last == '??'
76
+ end
77
+ sigs.delete_if{ |sig| !sig.re || sig.re.index('BAD_RE') }
78
+ return sigs if args[:raw]
79
+
80
+ # require 'awesome_print'
81
+ # bins.each do |bin_sig, bin|
82
+ # next if bin.size < 5
83
+ # #next unless bin_sig.name['UPX']
84
+ #
85
+ # bin_re = Regexp.new(bin_sig.re.join, Regexp::MULTILINE)
86
+ # was = false
87
+ # sigs.each do |sig|
88
+ # next if sig.size < 5 || sig == bin_sig
89
+ # #next unless sig.name['UPX']
90
+ #
91
+ # re = Regexp.new(sig.re.join, Regexp::MULTILINE)
92
+ # if bin.index(re) == 0
93
+ # rd = _re_diff(bin_re.source, re.source)
94
+ # if rd.any? && rd.size <= 4
95
+ # #if sig.name.split.first.upcase != bin_sig.name.split.first.upcase
96
+ # puts "\n[.] #{bin_sig.name.yellow}\n#{bin_re.source.inspect.red}" unless was
97
+ # puts "[=] #{sig.name}"
98
+ # puts re.source.inspect.green
99
+ # p rd
100
+ # was = true
101
+ # #end
102
+ # end
103
+ # end
104
+ # end
105
+ # end
106
+
107
+
108
+ optimize sigs if args[:optimize]
109
+
110
+ # convert re-arrays to Regexps
111
+ sigs.each do |sig|
112
+ sig.re = Regexp.new( _join(sig.re), Regexp::MULTILINE )
113
+ end
114
+
115
+ sigs
116
+ end
117
+
118
+ # XXX
119
+ # "B\xE9rczi G\xE1bor".force_encoding('binary').to_yaml:
120
+ # RuntimeError: expected SCALAR, SEQUENCE-START, MAPPING-START, or ALIAS
121
+
122
+ def _add_sig sigs, sig, args = {}
123
+ raise "null RE: #{sig.inspect}" unless sig.re
124
+
125
+ # bad sigs
126
+ return if sig.re[/\A538BD833C0A30:::::/]
127
+ return if sig.name == "Name of the Packer v1.0"
128
+ return if sig.re == "54 68 69 73 20 70 72 6F 67 72 61 6D 20 63 61 6E 6E 6F 74 20 62 65 20 72 75 6E 20 69 6E 20 44 4F 53 20 6D 6F" # dos stub
129
+
130
+ sig.name.sub!(/^\*\s+/, '')
131
+ sig.name.sub!(/\s+\(h\)$/, '')
132
+ sig.name.sub!(/version (\d)/i,"v\\1")
133
+ sig.name.sub!(/Microsoft/i, "MS")
134
+ sig.name.sub!(/ or /i, " / ")
135
+ sig.name.sub! 'RLP ','RLPack '
136
+ sig.name.sub! '.beta', ' beta'
137
+ sig.name.sub! '(com)','[com]'
138
+ sig.name = sig.name.split(/\s*-+>\s*/).join(' -> ') # fix spaces around '->'
139
+
140
+ sig.re = sig.re.strip.upcase.tr(':','?')
141
+ sig.re = sig.re.scan(/../).join(' ') if sig.re.split.first.size > 2
142
+ if sigs[sig.re]
143
+ a = [sig, sigs[sig.re]].map{ |x| x.name.upcase.split('->').first.tr('V ','') }
144
+ return if a[0][a[1]] || a[1][a[0]]
145
+
146
+ new_name = _merge_names(sigs[sig.re].name, sig.name)
147
+ if new_name && new_name != sig.name && new_name != sigs[sig.re].name
148
+ puts "[.] sig name join: #{new_name}" if args[:verbose]
149
+ sigs[sig.re].name = new_name
150
+ end
151
+ else
152
+ # new sig
153
+ sigs[sig.re] = sig
154
+ end
155
+ end
156
+
157
+ def _merge_names name1, name2
158
+ a = [name1, name2].map{ |x| x.split('->').first.split }
159
+
160
+ d = [a[0]-a[1], a[1]-a[0]] # different words
161
+ d.map! do |x|
162
+ x - [
163
+ 'EXE','[EXE]',
164
+ 'vx.x','v?.?',
165
+ 'DLL','(DLL)','[DLL]',
166
+ '[LZMA]','(LZMA)','LZMA',
167
+ '-','~','(pack)','(1)','(2)',
168
+ '19??'
169
+ ]
170
+ end
171
+ return if d.all?(&:empty?) # no different words
172
+
173
+ # [["v1.14/v1.20"], ["v1.14,", "v1.20"]]]
174
+ # [["EXEShield", "v0.3b/v0.3", "v0.6"], ["Shield", "v0.3b,", "v0.3"]]]
175
+ 2.times do |i|
176
+ return if d[i].all? do |x|
177
+ x = x.downcase.delete(',-').sub(/tm$/,'')
178
+ d[1-i].any? do |y|
179
+ y = y.downcase.delete(',-').sub(/tm$/,'')
180
+ y[x]
181
+ end
182
+ end
183
+ end
184
+
185
+ a = name1.split
186
+ b = name2.split
187
+ new_name_head = []
188
+ while a.any? && b.any? && a.first.upcase == b.first.upcase
189
+ new_name_head << a.shift
190
+ b.shift
191
+ end
192
+ new_name_tail = []
193
+ while a.any? && b.any? && a.last.upcase == b.last.upcase
194
+ new_name_tail.unshift a.pop
195
+ b.pop
196
+ end
197
+ new_name = new_name_head
198
+ new_name << [a.join(' '), b.join(' ')].delete_if{|x| x.empty?}.join(' / ')
199
+ new_name += new_name_tail
200
+ new_name = new_name.join(' ')
201
+ end
202
+
203
+ def _join a, sep=''
204
+ a.map do |x|
205
+ case x
206
+ when OrBlock
207
+ '(' + _join(x, '|') + ')'
208
+ when Array
209
+ _join x
210
+ when String
211
+ x
212
+ end
213
+ end.join(sep)
214
+ end
215
+
216
+ def _re_diff a,b, max_cnt = 1000
217
+ r = []
218
+ [a,b].map(&:size).max.times.map do |i|
219
+ if a[i] != b[i]
220
+ r << [a[i],b[i]]
221
+ return nil if r.size > max_cnt
222
+ end
223
+ end
224
+ r
225
+ end
226
+
227
+ def _optimize sigs
228
+ nfound = 0
229
+ min_sz = 6
230
+ max_diff = 6
231
+ sigs.each_with_index do |sig1,idx|
232
+ #break if idx == 100
233
+ next if sig1.re.size < min_sz
234
+ next if sig1.name['PseudoSigner']
235
+
236
+ sigs[(idx+1)..-1].each do |sig2|
237
+ next if sig2.re.size < min_sz
238
+ next if sig2.name['PseudoSigner']
239
+
240
+ if rd = _re_diff(sig1.re, sig2.re, max_diff)
241
+ if rd.all?{ |x| x[0].nil? || x[0] == '.' } && sig2.re.size >= sig1.re.size
242
+ if new_name = _merge_names(sig2.name, sig1.name)
243
+ #pp ["FIRST", sig1.name, sig2.name, new_name, sig1.re.join, sig2.re.join] if new_name
244
+ sig1.name = new_name
245
+ end
246
+ sig2.ep_only ||= sig1.ep_only
247
+ sig2.re = []
248
+ elsif rd.all?{ |x| x[1].nil? || x[1] == '.' } && sig1.re.size >= sig2.re.size
249
+ if new_name = _merge_names(sig2.name, sig1.name)
250
+ #pp ["SECOND", sig1.name, sig2.name, new_name, sig1.re.join, sig2.re.join] if new_name
251
+ sig2.name = new_name
252
+ end
253
+ sig1.re = []
254
+ sig1.ep_only ||= sig2.ep_only
255
+ break
256
+ else
257
+ next
258
+ end
259
+ nfound += 1
260
+ end
261
+ end
262
+ end
263
+
264
+ sigs.delete_if{ |sig| sig.re.empty? }
265
+ end
266
+
267
+ def optimize sigs
268
+ # replaces all duplicate names with references to one name
269
+ # saves ~30k out of ~200k mem
270
+ h = {}
271
+ sigs.each do |sig|
272
+ sig.name = (h[sig.name] ||= sig.name)
273
+ end
274
+
275
+ print "[.] sigs merge: #{sigs.size}"; _optimize(sigs); puts " -> #{sigs.size}"
276
+
277
+ # try to merge signatures with same name, size & ep_only
278
+ sigs.group_by{ |sig|
279
+ [sig.re.size, sig.name, sig.ep_only]
280
+ }.values.each do |a|
281
+ next if a.size == 1
282
+ if merged_re = _merge(a)
283
+ a.first.re = merged_re
284
+ a[1..-1].each{ |sig| sig.re = nil }
285
+ end
286
+ end
287
+ print "[.] sigs merge: #{sigs.size}"; sigs.delete_if{ |x| x.re.nil? }; puts " -> #{sigs.size}"
288
+
289
+
290
+ # 361 entries of ["VMProtect v1.25 (PolyTech)", true, "h....\xE8...."])
291
+ sigs.group_by{ |sig|
292
+ [sig.name, sig.ep_only, sig.re[0,10].join]
293
+ }.each do |k,entries|
294
+ next if entries.size < 10
295
+ #printf "%5d %s\n", entries.size, k
296
+ prefix = entries.first.re[0,10]
297
+ infix = entries.map{ |sig| sig.re[10..-1] }
298
+
299
+ entries.first.re = prefix + [OrBlock.new(infix)]
300
+ entries.first.size = entries.map(&:size).max
301
+
302
+ entries[1..-1].each{ |sig| sig.re = nil }
303
+ end
304
+ print "[.] sigs merge: #{sigs.size}"; sigs.delete_if{ |x| x.re.nil? }; puts " -> #{sigs.size}"
305
+
306
+
307
+ # # merge signatures with same prefix & suffix
308
+ # # most ineffecient part :)
309
+ # sigs.group_by{ |sig|
310
+ # [sig.name, sig.ep_only, sig.re.index{ |x| x.is_a?(Array)}]
311
+ # }.values.each do |a|
312
+ # next if a.size == 1
313
+ # next unless idx = a.first.re.index{ |x| x.is_a?(Array) }
314
+ # a.group_by{ |sig| [sig.re[0...idx], sig.re[(idx+1)..-1]] }.each do |k,entries|
315
+ # # prefix | infix | suffix
316
+ # # s o m [[b r e r o] [e w h a t]] h e r e
317
+ # prefix, suffix = k
318
+ # infix = entries.map{ |sig| sig.re[idx] }
319
+ # #infix = [['f','o','o']]
320
+ # merged_re = prefix + infix + suffix
321
+ # max_size = entries.map(&:size).max
322
+ # entries.each{ |sig| sig.re = merged_re; sig.size = max_size }
323
+ # end
324
+ # end
325
+ # print "[.] sigs merge: #{sigs.size}"; sigs.uniq!; puts " -> #{sigs.size}"
326
+
327
+ # stats
328
+ # aa = []
329
+ # 6.upto(20) do |len|
330
+ # sigs.group_by{ |sig| [sig.re[0,len].join, sig.name, sig.ep_only] }.each do |a,b|
331
+ # aa << [b.size, a[0], [b.map(&:size).min, b.map(&:size).max].join(' .. ') ] if b.size > 2
332
+ # end
333
+ # end
334
+ # aa.sort_by(&:first).each do |sz,prefix,name|
335
+ # printf "%5d %-50s %s\n", sz, prefix.inspect, name
336
+ # end
337
+
338
+ sigs
339
+ end
340
+
341
+ # range of common difference between N given sigs
342
+ def _diff res
343
+ raise "diff sizes" if res.map(&:size).uniq.size != 1
344
+ size = res.first.size
345
+
346
+ dstart = nil
347
+ dend = size - 1
348
+ prev_eq = true
349
+
350
+ size.times do |i|
351
+ eq = res.map{ |re| re[i] }.uniq.size == 1
352
+ if eq != prev_eq
353
+ if eq
354
+ # end of current diff
355
+ dend = i-1
356
+ else
357
+ # start of new diff
358
+ return nil if dstart # return nil if it's a 2nd diff
359
+ dstart = i
360
+ end
361
+ end
362
+ prev_eq = eq
363
+ end
364
+ r = dstart..dend
365
+ r == (0..(size-1)) ? nil : r
366
+ end
367
+
368
+ # merge array of signatures into one signature
369
+ def _merge sigs
370
+ sizes = sigs.map(&:re).map(&:size)
371
+
372
+ if sizes.uniq.size != 1
373
+ puts "[?] wrong sizes: #{sizes.inspect}"
374
+ return nil
375
+ end
376
+
377
+ res = sigs.map(&:re)
378
+ diff = _diff res
379
+ return nil unless diff
380
+
381
+ ref = res.first
382
+ ref[0...diff.first] + [OrBlock.new(res.map{ |re| re[diff] })] + ref[(diff.last+1)..-1]
383
+ end
384
+ end
385
+ end
386
+ end