pedump 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,125 @@
1
+ require 'pedump/sig_parser'
2
+
3
+ class PEdump
4
+ class Packer < Struct.new(:name, :re, :ep_only, :size)
5
+
6
+ DATA_ROOT = File.dirname(File.dirname(File.dirname(__FILE__)))
7
+ BIN_SIGS_FILE = File.join(DATA_ROOT, "data", "sig.bin")
8
+
9
+ class Match < Struct.new(:offset, :packer)
10
+ def name
11
+ packer.name
12
+ end
13
+ end
14
+
15
+ class << self
16
+ def all
17
+ @@all ||=
18
+ begin
19
+ r = unmarshal
20
+ unless r
21
+ msg = "[?] #{self}: unmarshal failed, using slow text parsing instead"
22
+ if PEdump.respond_to?(:logger) && PEdump.logger
23
+ PEdump.logger.warn msg
24
+ else
25
+ STDERR.puts msg
26
+ end
27
+ r = SigParser.parse
28
+ end
29
+ r
30
+ end
31
+ end
32
+ alias :load :all
33
+
34
+ # default deep-scan flag
35
+ @@deep = false
36
+
37
+ def default_deep
38
+ @@deep
39
+ end
40
+
41
+ def default_deep= value
42
+ @@deep = value
43
+ end
44
+
45
+ def max_size
46
+ @@max_size ||= all.map(&:size).max
47
+ end
48
+
49
+ def of data, h = {}
50
+ if data.respond_to?(:read) && data.respond_to?(:seek) && h[:ep_offset]
51
+ of_pe_file data, h
52
+ else
53
+ of_data data
54
+ end
55
+ end
56
+
57
+ # try to determine packer of FILE f, ep_offset - offset to entrypoint from start of file
58
+ def of_pe_file f, h
59
+ h[:deep] = @@deep unless h.key?(:deep)
60
+ h[:deep] = 1 if h[:deep] == true
61
+ h[:deep] = 0 if h[:deep] == false
62
+
63
+ f.seek(h[:ep_offset]) # offset of PE EntryPoint from start of file
64
+ r = Array(of_data(f.read(max_size)))
65
+ return r if r && r.any? && h[:deep] < 2
66
+ r += scan_whole_file(f,
67
+ :limit => (h[:deep] > 0 ? nil : 1048576),
68
+ :deep => h[:deep]
69
+ ) # scan only 1st mb unless :deep
70
+ end
71
+
72
+ BLOCK_SIZE = 0x10000
73
+
74
+ def scan_whole_file f, h = {}
75
+ h[:limit] ||= f.size
76
+ f.seek( pos = 0 )
77
+ buf = ''.force_encoding('binary')
78
+ sigs =
79
+ if h[:deep].is_a?(Numeric) && h[:deep] > 1
80
+ self.all
81
+ else
82
+ self.find_all{ |sig| !sig.ep_only }
83
+ end
84
+ r = []
85
+ while true
86
+ f.read BLOCK_SIZE, buf
87
+ pos += buf.size
88
+ sigs.each do |sig|
89
+ if idx = buf.index(sig.re)
90
+ r << Match.new(f.tell-buf.size+idx, sig)
91
+ end
92
+ end
93
+ break if f.eof? || pos >= h[:limit]
94
+ # overlap the read for the case when read buffer boundary breaks signature
95
+ f.seek -max_size-2, IO::SEEK_CUR
96
+ pos -= (max_size+2)
97
+ end
98
+ r
99
+ end
100
+
101
+ def of_data data
102
+ r = []
103
+ each do |packer|
104
+ if (idx=data.index(packer.re)) == 0
105
+ r << Match.new(idx, packer)
106
+ end
107
+ end
108
+ r.any? ? r.sort_by{ |x| -x.packer.size } : nil
109
+ end
110
+
111
+ def method_missing *args, &block
112
+ all.respond_to?(args.first) ? all.send(*args,&block) : super
113
+ end
114
+
115
+ def unmarshal
116
+ File.open(BIN_SIGS_FILE,"rb") do |f|
117
+ Marshal.load(f)
118
+ end
119
+ rescue
120
+ nil
121
+ end
122
+
123
+ end
124
+ end
125
+ end
@@ -0,0 +1,386 @@
1
+ class PEdump
2
+ module SigParser
3
+
4
+ DATA_ROOT = File.dirname(File.dirname(File.dirname(__FILE__)))
5
+
6
+ TEXT_SIGS_FILES = [
7
+ File.join(DATA_ROOT, "data", "userdb.txt"),
8
+ File.join(DATA_ROOT, "data", "signatures.txt"),
9
+ File.join(DATA_ROOT, "data", "fs.txt")
10
+ ]
11
+
12
+ class OrBlock < Array; end
13
+
14
+ class << self
15
+
16
+ # parse text signatures
17
+ def parse args = {}
18
+ args[:fnames] ||= TEXT_SIGS_FILES
19
+ sigs = {}; sig = nil
20
+
21
+ args[:fnames].each do |fname|
22
+ n0 = sigs.size
23
+ File.open(fname,'r:utf-8') do |f|
24
+ while line = f.gets
25
+ case line.strip
26
+ when /^[<;#]/, /^$/ # comments & blank lines
27
+ next
28
+ when /^\[(.+)=(.+)\]$/
29
+ _add_sig(sigs, Packer.new($1, $2, true), args )
30
+ when /^\[([^=]+)\]$/
31
+ sig = Packer.new($1)
32
+ when /^signature = (.+)$/
33
+ sig.re = $1
34
+ _add_sig(sigs, sig, args)
35
+ when /^ep_only = (.+)$/
36
+ sig.ep_only = ($1.strip.downcase == 'true')
37
+ else raise line
38
+ end
39
+ end
40
+ end
41
+ puts "[=] #{sigs.size-n0} sigs from #{File.basename(fname)}\n\n" if args[:verbose]
42
+ end
43
+
44
+ bins = Hash.new{ |k,v| k[v] = ''.force_encoding('binary') }
45
+
46
+ # convert strings to Regexps
47
+ sigs = sigs.values
48
+ sigs.each_with_index do |sig,idx|
49
+ sig.re =
50
+ sig.re.split(' ').tap do |a|
51
+ sig.size = a.size
52
+ end.map do |x|
53
+ case x
54
+ when /\A\?\?\Z/
55
+ bins[sig] << '.'
56
+ '.'
57
+ when /\A.\?/,/\?.\Z/
58
+ puts "[?] #{x.inspect} -> \"??\" in #{sig.name}" if args[:verbose]
59
+ bins[sig] << '.'
60
+ '.'
61
+ when /\A[a-f0-9]{2}\Z/i
62
+ x = x.to_i(16).chr
63
+ bins[sig] << x
64
+ args[:raw] ? x : Regexp::escape(x)
65
+ else
66
+ puts "[?] unknown re element: #{x.inspect} in #{sig.inspect}" if args[:verbose]
67
+ "BAD_RE"
68
+ break
69
+ end
70
+ end
71
+ if sig.name[/-+>/]
72
+ a = sig.name.split(/-+>/,2).map(&:strip)
73
+ sig.name = "#{a[0]} (#{a[1]})"
74
+ end
75
+ sig.re.pop while sig.re.last == '??'
76
+ end
77
+ sigs.delete_if{ |sig| !sig.re || sig.re.index('BAD_RE') }
78
+ return sigs if args[:raw]
79
+
80
+ # require 'awesome_print'
81
+ # bins.each do |bin_sig, bin|
82
+ # next if bin.size < 5
83
+ # #next unless bin_sig.name['UPX']
84
+ #
85
+ # bin_re = Regexp.new(bin_sig.re.join, Regexp::MULTILINE)
86
+ # was = false
87
+ # sigs.each do |sig|
88
+ # next if sig.size < 5 || sig == bin_sig
89
+ # #next unless sig.name['UPX']
90
+ #
91
+ # re = Regexp.new(sig.re.join, Regexp::MULTILINE)
92
+ # if bin.index(re) == 0
93
+ # rd = _re_diff(bin_re.source, re.source)
94
+ # if rd.any? && rd.size <= 4
95
+ # #if sig.name.split.first.upcase != bin_sig.name.split.first.upcase
96
+ # puts "\n[.] #{bin_sig.name.yellow}\n#{bin_re.source.inspect.red}" unless was
97
+ # puts "[=] #{sig.name}"
98
+ # puts re.source.inspect.green
99
+ # p rd
100
+ # was = true
101
+ # #end
102
+ # end
103
+ # end
104
+ # end
105
+ # end
106
+
107
+
108
+ optimize sigs if args[:optimize]
109
+
110
+ # convert re-arrays to Regexps
111
+ sigs.each do |sig|
112
+ sig.re = Regexp.new( _join(sig.re), Regexp::MULTILINE )
113
+ end
114
+
115
+ sigs
116
+ end
117
+
118
+ # XXX
119
+ # "B\xE9rczi G\xE1bor".force_encoding('binary').to_yaml:
120
+ # RuntimeError: expected SCALAR, SEQUENCE-START, MAPPING-START, or ALIAS
121
+
122
+ def _add_sig sigs, sig, args = {}
123
+ raise "null RE: #{sig.inspect}" unless sig.re
124
+
125
+ # bad sigs
126
+ return if sig.re[/\A538BD833C0A30:::::/]
127
+ return if sig.name == "Name of the Packer v1.0"
128
+ return if sig.re == "54 68 69 73 20 70 72 6F 67 72 61 6D 20 63 61 6E 6E 6F 74 20 62 65 20 72 75 6E 20 69 6E 20 44 4F 53 20 6D 6F" # dos stub
129
+
130
+ sig.name.sub!(/^\*\s+/, '')
131
+ sig.name.sub!(/\s+\(h\)$/, '')
132
+ sig.name.sub!(/version (\d)/i,"v\\1")
133
+ sig.name.sub!(/Microsoft/i, "MS")
134
+ sig.name.sub!(/ or /i, " / ")
135
+ sig.name.sub! 'RLP ','RLPack '
136
+ sig.name.sub! '.beta', ' beta'
137
+ sig.name.sub! '(com)','[com]'
138
+ sig.name = sig.name.split(/\s*-+>\s*/).join(' -> ') # fix spaces around '->'
139
+
140
+ sig.re = sig.re.strip.upcase.tr(':','?')
141
+ sig.re = sig.re.scan(/../).join(' ') if sig.re.split.first.size > 2
142
+ if sigs[sig.re]
143
+ a = [sig, sigs[sig.re]].map{ |x| x.name.upcase.split('->').first.tr('V ','') }
144
+ return if a[0][a[1]] || a[1][a[0]]
145
+
146
+ new_name = _merge_names(sigs[sig.re].name, sig.name)
147
+ if new_name && new_name != sig.name && new_name != sigs[sig.re].name
148
+ puts "[.] sig name join: #{new_name}" if args[:verbose]
149
+ sigs[sig.re].name = new_name
150
+ end
151
+ else
152
+ # new sig
153
+ sigs[sig.re] = sig
154
+ end
155
+ end
156
+
157
+ def _merge_names name1, name2
158
+ a = [name1, name2].map{ |x| x.split('->').first.split }
159
+
160
+ d = [a[0]-a[1], a[1]-a[0]] # different words
161
+ d.map! do |x|
162
+ x - [
163
+ 'EXE','[EXE]',
164
+ 'vx.x','v?.?',
165
+ 'DLL','(DLL)','[DLL]',
166
+ '[LZMA]','(LZMA)','LZMA',
167
+ '-','~','(pack)','(1)','(2)',
168
+ '19??'
169
+ ]
170
+ end
171
+ return if d.all?(&:empty?) # no different words
172
+
173
+ # [["v1.14/v1.20"], ["v1.14,", "v1.20"]]]
174
+ # [["EXEShield", "v0.3b/v0.3", "v0.6"], ["Shield", "v0.3b,", "v0.3"]]]
175
+ 2.times do |i|
176
+ return if d[i].all? do |x|
177
+ x = x.downcase.delete(',-').sub(/tm$/,'')
178
+ d[1-i].any? do |y|
179
+ y = y.downcase.delete(',-').sub(/tm$/,'')
180
+ y[x]
181
+ end
182
+ end
183
+ end
184
+
185
+ a = name1.split
186
+ b = name2.split
187
+ new_name_head = []
188
+ while a.any? && b.any? && a.first.upcase == b.first.upcase
189
+ new_name_head << a.shift
190
+ b.shift
191
+ end
192
+ new_name_tail = []
193
+ while a.any? && b.any? && a.last.upcase == b.last.upcase
194
+ new_name_tail.unshift a.pop
195
+ b.pop
196
+ end
197
+ new_name = new_name_head
198
+ new_name << [a.join(' '), b.join(' ')].delete_if{|x| x.empty?}.join(' / ')
199
+ new_name += new_name_tail
200
+ new_name = new_name.join(' ')
201
+ end
202
+
203
+ def _join a, sep=''
204
+ a.map do |x|
205
+ case x
206
+ when OrBlock
207
+ '(' + _join(x, '|') + ')'
208
+ when Array
209
+ _join x
210
+ when String
211
+ x
212
+ end
213
+ end.join(sep)
214
+ end
215
+
216
+ def _re_diff a,b, max_cnt = 1000
217
+ r = []
218
+ [a,b].map(&:size).max.times.map do |i|
219
+ if a[i] != b[i]
220
+ r << [a[i],b[i]]
221
+ return nil if r.size > max_cnt
222
+ end
223
+ end
224
+ r
225
+ end
226
+
227
+ def _optimize sigs
228
+ nfound = 0
229
+ min_sz = 6
230
+ max_diff = 6
231
+ sigs.each_with_index do |sig1,idx|
232
+ #break if idx == 100
233
+ next if sig1.re.size < min_sz
234
+ next if sig1.name['PseudoSigner']
235
+
236
+ sigs[(idx+1)..-1].each do |sig2|
237
+ next if sig2.re.size < min_sz
238
+ next if sig2.name['PseudoSigner']
239
+
240
+ if rd = _re_diff(sig1.re, sig2.re, max_diff)
241
+ if rd.all?{ |x| x[0].nil? || x[0] == '.' } && sig2.re.size >= sig1.re.size
242
+ if new_name = _merge_names(sig2.name, sig1.name)
243
+ #pp ["FIRST", sig1.name, sig2.name, new_name, sig1.re.join, sig2.re.join] if new_name
244
+ sig1.name = new_name
245
+ end
246
+ sig2.ep_only ||= sig1.ep_only
247
+ sig2.re = []
248
+ elsif rd.all?{ |x| x[1].nil? || x[1] == '.' } && sig1.re.size >= sig2.re.size
249
+ if new_name = _merge_names(sig2.name, sig1.name)
250
+ #pp ["SECOND", sig1.name, sig2.name, new_name, sig1.re.join, sig2.re.join] if new_name
251
+ sig2.name = new_name
252
+ end
253
+ sig1.re = []
254
+ sig1.ep_only ||= sig2.ep_only
255
+ break
256
+ else
257
+ next
258
+ end
259
+ nfound += 1
260
+ end
261
+ end
262
+ end
263
+
264
+ sigs.delete_if{ |sig| sig.re.empty? }
265
+ end
266
+
267
+ def optimize sigs
268
+ # replaces all duplicate names with references to one name
269
+ # saves ~30k out of ~200k mem
270
+ h = {}
271
+ sigs.each do |sig|
272
+ sig.name = (h[sig.name] ||= sig.name)
273
+ end
274
+
275
+ print "[.] sigs merge: #{sigs.size}"; _optimize(sigs); puts " -> #{sigs.size}"
276
+
277
+ # try to merge signatures with same name, size & ep_only
278
+ sigs.group_by{ |sig|
279
+ [sig.re.size, sig.name, sig.ep_only]
280
+ }.values.each do |a|
281
+ next if a.size == 1
282
+ if merged_re = _merge(a)
283
+ a.first.re = merged_re
284
+ a[1..-1].each{ |sig| sig.re = nil }
285
+ end
286
+ end
287
+ print "[.] sigs merge: #{sigs.size}"; sigs.delete_if{ |x| x.re.nil? }; puts " -> #{sigs.size}"
288
+
289
+
290
+ # 361 entries of ["VMProtect v1.25 (PolyTech)", true, "h....\xE8...."])
291
+ sigs.group_by{ |sig|
292
+ [sig.name, sig.ep_only, sig.re[0,10].join]
293
+ }.each do |k,entries|
294
+ next if entries.size < 10
295
+ #printf "%5d %s\n", entries.size, k
296
+ prefix = entries.first.re[0,10]
297
+ infix = entries.map{ |sig| sig.re[10..-1] }
298
+
299
+ entries.first.re = prefix + [OrBlock.new(infix)]
300
+ entries.first.size = entries.map(&:size).max
301
+
302
+ entries[1..-1].each{ |sig| sig.re = nil }
303
+ end
304
+ print "[.] sigs merge: #{sigs.size}"; sigs.delete_if{ |x| x.re.nil? }; puts " -> #{sigs.size}"
305
+
306
+
307
+ # # merge signatures with same prefix & suffix
308
+ # # most ineffecient part :)
309
+ # sigs.group_by{ |sig|
310
+ # [sig.name, sig.ep_only, sig.re.index{ |x| x.is_a?(Array)}]
311
+ # }.values.each do |a|
312
+ # next if a.size == 1
313
+ # next unless idx = a.first.re.index{ |x| x.is_a?(Array) }
314
+ # a.group_by{ |sig| [sig.re[0...idx], sig.re[(idx+1)..-1]] }.each do |k,entries|
315
+ # # prefix | infix | suffix
316
+ # # s o m [[b r e r o] [e w h a t]] h e r e
317
+ # prefix, suffix = k
318
+ # infix = entries.map{ |sig| sig.re[idx] }
319
+ # #infix = [['f','o','o']]
320
+ # merged_re = prefix + infix + suffix
321
+ # max_size = entries.map(&:size).max
322
+ # entries.each{ |sig| sig.re = merged_re; sig.size = max_size }
323
+ # end
324
+ # end
325
+ # print "[.] sigs merge: #{sigs.size}"; sigs.uniq!; puts " -> #{sigs.size}"
326
+
327
+ # stats
328
+ # aa = []
329
+ # 6.upto(20) do |len|
330
+ # sigs.group_by{ |sig| [sig.re[0,len].join, sig.name, sig.ep_only] }.each do |a,b|
331
+ # aa << [b.size, a[0], [b.map(&:size).min, b.map(&:size).max].join(' .. ') ] if b.size > 2
332
+ # end
333
+ # end
334
+ # aa.sort_by(&:first).each do |sz,prefix,name|
335
+ # printf "%5d %-50s %s\n", sz, prefix.inspect, name
336
+ # end
337
+
338
+ sigs
339
+ end
340
+
341
+ # range of common difference between N given sigs
342
+ def _diff res
343
+ raise "diff sizes" if res.map(&:size).uniq.size != 1
344
+ size = res.first.size
345
+
346
+ dstart = nil
347
+ dend = size - 1
348
+ prev_eq = true
349
+
350
+ size.times do |i|
351
+ eq = res.map{ |re| re[i] }.uniq.size == 1
352
+ if eq != prev_eq
353
+ if eq
354
+ # end of current diff
355
+ dend = i-1
356
+ else
357
+ # start of new diff
358
+ return nil if dstart # return nil if it's a 2nd diff
359
+ dstart = i
360
+ end
361
+ end
362
+ prev_eq = eq
363
+ end
364
+ r = dstart..dend
365
+ r == (0..(size-1)) ? nil : r
366
+ end
367
+
368
+ # merge array of signatures into one signature
369
+ def _merge sigs
370
+ sizes = sigs.map(&:re).map(&:size)
371
+
372
+ if sizes.uniq.size != 1
373
+ puts "[?] wrong sizes: #{sizes.inspect}"
374
+ return nil
375
+ end
376
+
377
+ res = sigs.map(&:re)
378
+ diff = _diff res
379
+ return nil unless diff
380
+
381
+ ref = res.first
382
+ ref[0...diff.first] + [OrBlock.new(res.map{ |re| re[diff] })] + ref[(diff.last+1)..-1]
383
+ end
384
+ end
385
+ end
386
+ end