plympton 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.coveralls.yml +1 -0
- data/.document +5 -0
- data/.rspec +1 -0
- data/.travis.yml +4 -0
- data/Gemfile +15 -0
- data/LICENSE.txt +20 -0
- data/README.md +24 -0
- data/Rakefile +48 -0
- data/VERSION +1 -0
- data/bin/func-auto.py +431 -0
- data/bin/func.py +435 -0
- data/bin/func.py.new +440 -0
- data/bin/idascript.py +21 -0
- data/lib/plympton.rb +26 -0
- data/lib/plympton/Solver.tokens +25 -0
- data/lib/plympton/SolverLexer.rb +704 -0
- data/lib/plympton/SolverParser.rb +550 -0
- data/lib/plympton/block.rb +19 -0
- data/lib/plympton/chunk.rb +20 -0
- data/lib/plympton/disassembly.rb +105 -0
- data/lib/plympton/function.rb +31 -0
- data/lib/plympton/matrix.rb +59 -0
- data/lib/plympton/object.rb +153 -0
- data/lib/plympton/solver.g +118 -0
- data/plympton.gemspec +95 -0
- data/spec/libFontParser.64.dylib.fz +152170 -0
- data/spec/libauto.dylib.fz +127001 -0
- data/spec/plympton_spec.rb +220 -0
- data/spec/rufus-test.32bit.trace.xml +53 -0
- data/spec/rufus-test.64bit.trace.xml +50 -0
- data/spec/spec_helper.rb +14 -0
- data/spec/steady-state.64bit.trace.xml +12070 -0
- metadata +195 -0
data/bin/func.py
ADDED
@@ -0,0 +1,435 @@
|
|
1
|
+
import yaml
|
2
|
+
from sets import Set
|
3
|
+
|
4
|
+
class IdaProgram(yaml.YAMLObject):
|
5
|
+
yaml_tag = u'!fuzz.io/IdaProgram'
|
6
|
+
def __init__(self, textSegmentStart, textSegmentEnd):
|
7
|
+
self.disName = GetInputFilePath()
|
8
|
+
self.functionList = []
|
9
|
+
self.importList = []
|
10
|
+
self.textSegmentStart = textSegmentStart
|
11
|
+
self.textSegmentEnd = textSegmentEnd
|
12
|
+
|
13
|
+
#
|
14
|
+
# Pull out all information about functions
|
15
|
+
#
|
16
|
+
self.initialize_functions()
|
17
|
+
|
18
|
+
self.numFunctions = len(self.functionList)
|
19
|
+
self.numImports = len(self.importList)
|
20
|
+
self.numBlocks = self.iter_functions()
|
21
|
+
self.textSegmentStart = hex(textSegmentStart)
|
22
|
+
self.textSegmentEnd = hex(textSegmentEnd)
|
23
|
+
|
24
|
+
def initialize_functions(self):
|
25
|
+
#
|
26
|
+
# Iterate through all of the functions
|
27
|
+
#
|
28
|
+
for fn in Functions(self.textSegmentStart, self.textSegmentEnd):
|
29
|
+
tmp = IdaFunction(fn)
|
30
|
+
if not tmp.isImport:
|
31
|
+
self.functionList.append(tmp)
|
32
|
+
else:
|
33
|
+
self.importList.append(tmp)
|
34
|
+
|
35
|
+
def iter_functions(self):
|
36
|
+
blockCount = 0
|
37
|
+
for fn in self.functionList:
|
38
|
+
blockCount = blockCount + fn.iter_chunks()
|
39
|
+
|
40
|
+
return(blockCount)
|
41
|
+
|
42
|
+
#
|
43
|
+
# Create a class for a function
|
44
|
+
#
|
45
|
+
class IdaFunction(yaml.YAMLObject):
|
46
|
+
yaml_tag = u'!fuzz.io/IdaFunction'
|
47
|
+
def __init__(self, effectiveAddress):
|
48
|
+
self.name = Name(effectiveAddress)
|
49
|
+
self.argSize = 0
|
50
|
+
self.numArgs = 0
|
51
|
+
self.numLocalVars = 0
|
52
|
+
self.isImport = False
|
53
|
+
self.chunkList = []
|
54
|
+
self.numChunks = 0
|
55
|
+
self.cyclomaticComplexity = 0
|
56
|
+
|
57
|
+
#
|
58
|
+
# Get the function flags, structure, and frame info
|
59
|
+
#
|
60
|
+
flags = GetFunctionFlags(effectiveAddress)
|
61
|
+
funcStruct = idaapi.get_func(effectiveAddress)
|
62
|
+
frameStruct = idaapi.get_frame(funcStruct)
|
63
|
+
|
64
|
+
# if we're not in a "real" function. set the id and ea_start manually and stop analyzing.
|
65
|
+
if not funcStruct or flags & FUNC_LIB or flags & FUNC_STATIC:
|
66
|
+
self.startAddress = hex(effectiveAddress)
|
67
|
+
self.endAddress = hex(effectiveAddress)
|
68
|
+
self.name = idaapi.get_name(effectiveAddress, effectiveAddress)
|
69
|
+
self.savedRegSize = 0
|
70
|
+
self.localVarSize = 0
|
71
|
+
self.frameSize = 0
|
72
|
+
self.retSize = 0
|
73
|
+
self.isImport = True
|
74
|
+
|
75
|
+
#
|
76
|
+
# Need to fix these if possible
|
77
|
+
#
|
78
|
+
self.argSize = 0
|
79
|
+
self.numArgs = 0
|
80
|
+
self.numLocalVars = 0
|
81
|
+
return
|
82
|
+
|
83
|
+
#
|
84
|
+
# So we know we're in a real function
|
85
|
+
#
|
86
|
+
self.startAddress = funcStruct.startEA
|
87
|
+
self.endAddress = hex(PrevAddr(funcStruct.endEA))
|
88
|
+
self.savedRegSize = funcStruct.frregs
|
89
|
+
self.localVarSize = funcStruct.frsize
|
90
|
+
self.frameSize = idaapi.get_frame_size(funcStruct)
|
91
|
+
self.retSize = idaapi.get_frame_retsize(funcStruct)
|
92
|
+
|
93
|
+
print "Saved Reg Size %d" % self.savedRegSize
|
94
|
+
print "FRSIZE %d" % self.localVarSize
|
95
|
+
|
96
|
+
#
|
97
|
+
# Fixup numbers for arguments and local variables
|
98
|
+
#
|
99
|
+
self.__init_args_and_local_vars__(funcStruct, frameStruct)
|
100
|
+
|
101
|
+
#
|
102
|
+
# Initialize chunks
|
103
|
+
#
|
104
|
+
self.collect_function_chunks()
|
105
|
+
self.cyclomaticComplexity = self.calculate_cyclomatic_complexity(self.startAddress)
|
106
|
+
self.startAddress = hex(self.startAddress)
|
107
|
+
|
108
|
+
def calculate_cyclomatic_complexity (self, function_ea):
|
109
|
+
'''Calculate the cyclomatic complexity measure for a function.
|
110
|
+
|
111
|
+
Given the starting address of a function, it will find all
|
112
|
+
the basic block's boundaries and edges between them and will
|
113
|
+
return the cyclomatic complexity, defined as:
|
114
|
+
|
115
|
+
CC = Edges - Nodes + 2
|
116
|
+
http://www.openrce.org/articles/full_view/11
|
117
|
+
'''
|
118
|
+
|
119
|
+
f_start = function_ea
|
120
|
+
f_end = FindFuncEnd(function_ea)
|
121
|
+
|
122
|
+
edges = Set()
|
123
|
+
boundaries = Set((f_start,))
|
124
|
+
|
125
|
+
# For each defined element in the function.
|
126
|
+
for head in Heads(f_start, f_end):
|
127
|
+
|
128
|
+
# If the element is an instruction
|
129
|
+
if isCode(GetFlags(head)):
|
130
|
+
|
131
|
+
# Get the references made from the current instruction
|
132
|
+
# and keep only the ones local to the function.
|
133
|
+
refs = CodeRefsFrom(head, 0)
|
134
|
+
refs = Set(filter(lambda x: x>=f_start and x<=f_end, refs))
|
135
|
+
|
136
|
+
if refs:
|
137
|
+
# If the flow continues also to the next (address-wise)
|
138
|
+
# instruction, we add a reference to it.
|
139
|
+
# For instance, a conditional jump will not branch
|
140
|
+
# if the condition is not met, so we save that
|
141
|
+
# reference as well.
|
142
|
+
next_head = NextHead(head, f_end)
|
143
|
+
if isFlow(GetFlags(next_head)):
|
144
|
+
refs.add(next_head)
|
145
|
+
|
146
|
+
# Update the boundaries found so far.
|
147
|
+
boundaries.union_update(refs)
|
148
|
+
|
149
|
+
# For each of the references found, and edge is
|
150
|
+
# created.
|
151
|
+
for r in refs:
|
152
|
+
# If the flow could also come from the address
|
153
|
+
# previous to the destination of the branching
|
154
|
+
# an edge is created.
|
155
|
+
if isFlow(GetFlags(r)):
|
156
|
+
edges.add((PrevHead(r, f_start), r))
|
157
|
+
edges.add((head, r))
|
158
|
+
|
159
|
+
return len(edges) - len(boundaries) + 2
|
160
|
+
|
161
|
+
def __init_args_and_local_vars__ (self, funcStruct, frameStruct):
|
162
|
+
'''
|
163
|
+
Calculate the total size of arguments, # of arguments and # of local variables. Update the internal class member
|
164
|
+
variables appropriately. Taken directly from paimei
|
165
|
+
'''
|
166
|
+
|
167
|
+
#
|
168
|
+
# Initialize some local variables
|
169
|
+
#
|
170
|
+
args = {}
|
171
|
+
local_vars = {}
|
172
|
+
|
173
|
+
if not frameStruct:
|
174
|
+
return
|
175
|
+
|
176
|
+
# argument_boundary = self.localVarSize + self.savedRegSize + self.retSize
|
177
|
+
argument_boundary = self.frameSize
|
178
|
+
frame_offset = frameStruct.get_member(0).soff
|
179
|
+
self.localVarSize = 0
|
180
|
+
|
181
|
+
# print "Argument boundary is %d" % argument_boundary
|
182
|
+
|
183
|
+
for i in xrange(0, frameStruct.memqty):
|
184
|
+
end_offset = frameStruct.get_member(i).soff
|
185
|
+
|
186
|
+
if i == frameStruct.memqty - 1:
|
187
|
+
begin_offset = frameStruct.get_member(i).eoff
|
188
|
+
else:
|
189
|
+
begin_offset = frameStruct.get_member(i+1).soff
|
190
|
+
|
191
|
+
frame_offset += (begin_offset - end_offset)
|
192
|
+
|
193
|
+
# grab the name of the current local variable or argument.
|
194
|
+
name = idaapi.get_member_name(frameStruct.get_member(i).id)
|
195
|
+
# print "Name: %s" % name
|
196
|
+
# print "Agument Boundary: %d" % argument_boundary
|
197
|
+
# print "Frame offset: %d" % frame_offset
|
198
|
+
# print "End offset: %d" % end_offset
|
199
|
+
# print "Begin Offset: %d" % begin_offset
|
200
|
+
# print "Frame offset: %d" % frame_offset
|
201
|
+
|
202
|
+
if name == None:
|
203
|
+
continue
|
204
|
+
|
205
|
+
if frame_offset > argument_boundary:
|
206
|
+
args[end_offset] = name
|
207
|
+
# if name.startswith("arg_"):
|
208
|
+
# args[end_offset] = name
|
209
|
+
self.argSize = self.argSize + idaapi.get_member_size(frameStruct.get_member(i))
|
210
|
+
else:
|
211
|
+
# if the name starts with a space, then ignore it as it is either the stack saved ebp or eip.
|
212
|
+
# XXX - this is a pretty ghetto check.
|
213
|
+
if not name.startswith(" "):
|
214
|
+
local_vars[end_offset] = name
|
215
|
+
self.localVarSize = self.localVarSize + idaapi.get_member_size(frameStruct.get_member(i))
|
216
|
+
# self.argSize = frame_offset - argument_boundary
|
217
|
+
self.numArgs = len(args)
|
218
|
+
self.numLocalVars = len(local_vars)
|
219
|
+
|
220
|
+
def iter_chunks(self):
|
221
|
+
chunkBlockCount = 0
|
222
|
+
for ch in self.chunkList:
|
223
|
+
chunkBlockCount = chunkBlockCount + len(ch.blockList)
|
224
|
+
|
225
|
+
return(chunkBlockCount)
|
226
|
+
|
227
|
+
def collect_function_chunks(self):
|
228
|
+
'''
|
229
|
+
Generate and return the list of function chunks (including the main one) for the current function. Ripped from idb2reml (Ero Carerra). Modified slightly by Roger Seagle.
|
230
|
+
|
231
|
+
@rtype: None
|
232
|
+
@return: None
|
233
|
+
'''
|
234
|
+
|
235
|
+
#
|
236
|
+
# Loop through all chunks for a function
|
237
|
+
#
|
238
|
+
iterator = idaapi.func_tail_iterator_t(idaapi.get_func(self.startAddress))
|
239
|
+
status = iterator.main()
|
240
|
+
|
241
|
+
while status:
|
242
|
+
chunk = iterator.chunk()
|
243
|
+
tmp = IdaChunk(chunk)
|
244
|
+
self.chunkList.append(tmp)
|
245
|
+
status = iterator.next()
|
246
|
+
|
247
|
+
#
|
248
|
+
# Create a class for a basic block
|
249
|
+
#
|
250
|
+
class IdaChunk(yaml.YAMLObject):
|
251
|
+
yaml_tag = u'!fuzz.io/IdaChunk'
|
252
|
+
def __init__(self, chunk):
|
253
|
+
self.startEA = chunk.startEA
|
254
|
+
self.endEA = chunk.endEA
|
255
|
+
self.blockList = []
|
256
|
+
self.numBlocks = 0
|
257
|
+
|
258
|
+
#
|
259
|
+
# Just to get it started
|
260
|
+
#
|
261
|
+
block_start = self.startEA
|
262
|
+
|
263
|
+
#
|
264
|
+
# Might be a bug? (effective address that has code mixed in and no ret instruction
|
265
|
+
# Or effective address just calls exit (there is no return instruction!!!!)
|
266
|
+
#
|
267
|
+
|
268
|
+
#
|
269
|
+
# Break down the chunk into blocks
|
270
|
+
#
|
271
|
+
for effectiveAddress in Heads(self.startEA, self.endEA):
|
272
|
+
|
273
|
+
#
|
274
|
+
# Ignore Head if data
|
275
|
+
#
|
276
|
+
if not isCode(GetFlags(effectiveAddress)):
|
277
|
+
continue
|
278
|
+
|
279
|
+
prev_ea = PrevNotTail(effectiveAddress)
|
280
|
+
next_ea = NextNotTail(effectiveAddress)
|
281
|
+
|
282
|
+
#
|
283
|
+
# Get the list of places branched to and from
|
284
|
+
#
|
285
|
+
branchesTo = self._branches_to(effectiveAddress)
|
286
|
+
branchesFrom = self._branches_from(effectiveAddress)
|
287
|
+
|
288
|
+
|
289
|
+
# ensure that both prev_ea and next_ea reference code and not data.
|
290
|
+
while not isCode(GetFlags(prev_ea)):
|
291
|
+
prev_ea = PrevNotTail(prev_ea)
|
292
|
+
|
293
|
+
while not isCode(GetFlags(next_ea)):
|
294
|
+
next_ea = PrevNotTail(next_ea)
|
295
|
+
|
296
|
+
# if the current instruction is a ret instruction, end the current node at ea.
|
297
|
+
if idaapi.is_ret_insn(effectiveAddress):
|
298
|
+
tmp = IdaBlock(block_start, effectiveAddress, branchesTo, branchesFrom)
|
299
|
+
self.blockList.append(tmp)
|
300
|
+
block_start = next_ea
|
301
|
+
|
302
|
+
elif branchesTo and block_start != effectiveAddress:
|
303
|
+
tmp = IdaBlock(block_start, effectiveAddress, branchesTo, branchesFrom)
|
304
|
+
self.blockList.append(tmp)
|
305
|
+
|
306
|
+
# start a new block at ea.
|
307
|
+
block_start = effectiveAddress
|
308
|
+
|
309
|
+
# if there is a branch from the current instruction, end the current node at ea.
|
310
|
+
elif branchesFrom:
|
311
|
+
tmp = IdaBlock(block_start, effectiveAddress, branchesTo, branchesFrom)
|
312
|
+
self.blockList.append(tmp)
|
313
|
+
|
314
|
+
# start a new block at the next ea
|
315
|
+
block_start = next_ea
|
316
|
+
|
317
|
+
#
|
318
|
+
# Calculate the number of blocks
|
319
|
+
#
|
320
|
+
self.numBlocks = len(self.blockList)
|
321
|
+
|
322
|
+
#
|
323
|
+
# Covert addresses to hex
|
324
|
+
#
|
325
|
+
self.startEA = hex(self.startEA)
|
326
|
+
self.endEA = hex(self.endEA)
|
327
|
+
|
328
|
+
####################################################################################################################
|
329
|
+
def _branches_from (self, ea):
|
330
|
+
'''
|
331
|
+
Enumerate and return the list of branches from the supplied address, *including* the next logical instruction.
|
332
|
+
Part of the reason why we even need this function is that the "flow" argument to CodeRefsFrom does not appear
|
333
|
+
to be functional.
|
334
|
+
|
335
|
+
@type ea: DWORD
|
336
|
+
@param ea: Effective address of instruction to enumerate jumps from.
|
337
|
+
|
338
|
+
@rtype: List
|
339
|
+
@return: List of branches from the specified address.
|
340
|
+
'''
|
341
|
+
|
342
|
+
if idaapi.is_call_insn(ea):
|
343
|
+
return []
|
344
|
+
|
345
|
+
xrefs = list(CodeRefsFrom(ea, 1))
|
346
|
+
|
347
|
+
# if the only xref from ea is next ea, then return nothing.
|
348
|
+
if len(xrefs) == 1 and xrefs[0] == NextNotTail(ea):
|
349
|
+
xrefs = []
|
350
|
+
|
351
|
+
return xrefs
|
352
|
+
|
353
|
+
|
354
|
+
####################################################################################################################
|
355
|
+
def _branches_to (self, ea):
|
356
|
+
'''
|
357
|
+
Enumerate and return the list of branches to the supplied address, *excluding* the previous logical instruction.
|
358
|
+
Part of the reason why we even need this function is that the "flow" argument to CodeRefsTo does not appear to
|
359
|
+
be functional.
|
360
|
+
|
361
|
+
@type ea: DWORD
|
362
|
+
@param ea: Effective address of instruction to enumerate jumps to.
|
363
|
+
|
364
|
+
@rtype: List
|
365
|
+
@return: List of branches to the specified address.
|
366
|
+
'''
|
367
|
+
|
368
|
+
xrefs = []
|
369
|
+
prev_ea = PrevNotTail(ea)
|
370
|
+
prev_code_ea = prev_ea
|
371
|
+
|
372
|
+
while not isCode(GetFlags(prev_code_ea)):
|
373
|
+
prev_code_ea = PrevNotTail(prev_code_ea)
|
374
|
+
|
375
|
+
for xref in list(CodeRefsTo(ea, 1)):
|
376
|
+
if not idaapi.is_call_insn(xref) and xref not in [prev_ea, prev_code_ea]:
|
377
|
+
xrefs.append(hex(xref))
|
378
|
+
|
379
|
+
return xrefs
|
380
|
+
|
381
|
+
#
|
382
|
+
# Create a class for a basic block
|
383
|
+
#
|
384
|
+
class IdaBlock(yaml.YAMLObject):
|
385
|
+
yaml_tag = u'!fuzz.io/IdaBlock'
|
386
|
+
def __init__(self, effectiveAddressStart, effectiveAddressEnd, branchesTo, branchesFrom):
|
387
|
+
self.startEA = hex(effectiveAddressStart)
|
388
|
+
self.endEA = hex(effectiveAddressEnd)
|
389
|
+
self.branchTo = branchesTo
|
390
|
+
branchFr = []
|
391
|
+
|
392
|
+
#
|
393
|
+
# Covert branches to hex addresses
|
394
|
+
#
|
395
|
+
for i in range(len(branchesFrom)):
|
396
|
+
branchFr.append(hex(branchesFrom[i]))
|
397
|
+
|
398
|
+
self.branchFrom = branchFr
|
399
|
+
|
400
|
+
#
|
401
|
+
# Get the number of instructions in the block
|
402
|
+
#
|
403
|
+
heads = [head for head in Heads(effectiveAddressStart, effectiveAddressEnd + 1) if isCode(GetFlags(head))]
|
404
|
+
self.numInstructions = len(heads)
|
405
|
+
|
406
|
+
#
|
407
|
+
# Ask to open a file to save results
|
408
|
+
#
|
409
|
+
infoFilename = AskFile(1, "*.fz", "Please select/create a file to save static disassembly information:")
|
410
|
+
|
411
|
+
#
|
412
|
+
# Open the file
|
413
|
+
#
|
414
|
+
infoFile = open(infoFilename, 'w')
|
415
|
+
|
416
|
+
#
|
417
|
+
# Get the start and end of the text section
|
418
|
+
#
|
419
|
+
textSegmentStart = SegByName("__text")
|
420
|
+
textSegmentEnd = SegEnd(textSegmentStart)
|
421
|
+
|
422
|
+
#
|
423
|
+
# Pull out all the information
|
424
|
+
#
|
425
|
+
disassembledProgram = IdaProgram(textSegmentStart, textSegmentEnd)
|
426
|
+
|
427
|
+
#
|
428
|
+
# Dump the disassembled program info in a portable format
|
429
|
+
#
|
430
|
+
yaml.dump(disassembledProgram, infoFile, default_flow_style=False)
|
431
|
+
|
432
|
+
#
|
433
|
+
# Be nice close the file
|
434
|
+
#
|
435
|
+
infoFile.close()
|
data/bin/func.py.new
ADDED
@@ -0,0 +1,440 @@
|
|
1
|
+
import yaml
|
2
|
+
#from sets import Set
|
3
|
+
|
4
|
+
class IdaProgram(yaml.YAMLObject):
|
5
|
+
yaml_tag = u'!fuzz.io/IdaProgram'
|
6
|
+
def __init__(self, textSegmentStart, textSegmentEnd):
|
7
|
+
self.disName = GetInputFilePath()
|
8
|
+
self.functionList = []
|
9
|
+
self.importList = []
|
10
|
+
self.textSegmentStart = textSegmentStart
|
11
|
+
self.textSegmentEnd = textSegmentEnd
|
12
|
+
|
13
|
+
#
|
14
|
+
# Pull out all information about functions
|
15
|
+
#
|
16
|
+
self.initialize_functions()
|
17
|
+
|
18
|
+
self.numFunctions = len(self.functionList)
|
19
|
+
self.numImports = len(self.importList)
|
20
|
+
self.numBlocks = self.iter_functions()
|
21
|
+
self.textSegmentStart = hex(textSegmentStart)
|
22
|
+
self.textSegmentEnd = hex(textSegmentEnd)
|
23
|
+
|
24
|
+
def initialize_functions(self):
|
25
|
+
#
|
26
|
+
# Iterate through all of the functions
|
27
|
+
#
|
28
|
+
for fn in Functions(self.textSegmentStart, self.textSegmentEnd):
|
29
|
+
tmp = IdaFunction(fn)
|
30
|
+
if not tmp.isImport:
|
31
|
+
self.functionList.append(tmp)
|
32
|
+
else:
|
33
|
+
self.importList.append(tmp)
|
34
|
+
|
35
|
+
def iter_functions(self):
|
36
|
+
blockCount = 0
|
37
|
+
for fn in self.functionList:
|
38
|
+
blockCount = blockCount + fn.iter_chunks()
|
39
|
+
|
40
|
+
return(blockCount)
|
41
|
+
|
42
|
+
#
|
43
|
+
# Create a class for a function
|
44
|
+
#
|
45
|
+
class IdaFunction(yaml.YAMLObject):
|
46
|
+
yaml_tag = u'!fuzz.io/IdaFunction'
|
47
|
+
def __init__(self, effectiveAddress):
|
48
|
+
#self.name = Name(effectiveAddress)
|
49
|
+
self.name = idaapi.get_func_name(effectiveAddress)
|
50
|
+
self.argSize = 0
|
51
|
+
self.numArgs = 0
|
52
|
+
self.numLocalVars = 0
|
53
|
+
self.isImport = False
|
54
|
+
self.chunkList = []
|
55
|
+
self.numChunks = 0
|
56
|
+
self.cyclomaticComplexity = 0
|
57
|
+
|
58
|
+
#
|
59
|
+
# Get the function flags, structure, and frame info
|
60
|
+
#
|
61
|
+
flags = GetFunctionFlags(effectiveAddress)
|
62
|
+
funcStruct = idaapi.get_func(effectiveAddress)
|
63
|
+
frameStruct = idaapi.get_frame(funcStruct)
|
64
|
+
|
65
|
+
# if we're not in a "real" function. set the id and ea_start manually and stop analyzing.
|
66
|
+
if not funcStruct or flags & FUNC_LIB or flags & FUNC_STATIC:
|
67
|
+
self.startAddress = hex(effectiveAddress)
|
68
|
+
self.endAddress = hex(effectiveAddress)
|
69
|
+
self.name = idaapi.get_name(effectiveAddress, effectiveAddress)
|
70
|
+
self.savedRegSize = 0
|
71
|
+
self.localVarSize = 0
|
72
|
+
self.frameSize = 0
|
73
|
+
self.retSize = 0
|
74
|
+
self.isImport = True
|
75
|
+
|
76
|
+
#
|
77
|
+
# Need to fix these if possible
|
78
|
+
#
|
79
|
+
self.argSize = 0
|
80
|
+
self.numArgs = 0
|
81
|
+
self.numLocalVars = 0
|
82
|
+
return
|
83
|
+
|
84
|
+
#
|
85
|
+
# So we know we're in a real function
|
86
|
+
#
|
87
|
+
self.startAddress = funcStruct.startEA
|
88
|
+
self.endAddress = hex(PrevAddr(funcStruct.endEA))
|
89
|
+
self.savedRegSize = funcStruct.frregs
|
90
|
+
self.localVarSize = funcStruct.frsize
|
91
|
+
self.frameSize = idaapi.get_frame_size(funcStruct)
|
92
|
+
self.retSize = idaapi.get_frame_retsize(funcStruct)
|
93
|
+
|
94
|
+
print "Saved Reg Size %d" % self.savedRegSize
|
95
|
+
print "FRSIZE %d" % self.localVarSize
|
96
|
+
|
97
|
+
#
|
98
|
+
# Fixup numbers for arguments and local variables
|
99
|
+
#
|
100
|
+
self.__init_args_and_local_vars__(funcStruct, frameStruct)
|
101
|
+
|
102
|
+
#
|
103
|
+
# Initialize chunks
|
104
|
+
#
|
105
|
+
self.collect_function_chunks()
|
106
|
+
self.cyclomaticComplexity = self.calculate_cyclomatic_complexity(self.startAddress)
|
107
|
+
self.startAddress = hex(self.startAddress)
|
108
|
+
|
109
|
+
def calculate_cyclomatic_complexity (self, function_ea):
|
110
|
+
'''Calculate the cyclomatic complexity measure for a function.
|
111
|
+
|
112
|
+
Given the starting address of a function, it will find all
|
113
|
+
the basic block's boundaries and edges between them and will
|
114
|
+
return the cyclomatic complexity, defined as:
|
115
|
+
|
116
|
+
CC = Edges - Nodes + 2
|
117
|
+
http://www.openrce.org/articles/full_view/11
|
118
|
+
'''
|
119
|
+
|
120
|
+
f_start = function_ea
|
121
|
+
f_end = FindFuncEnd(function_ea)
|
122
|
+
|
123
|
+
edges = set()
|
124
|
+
boundaries = set((f_start,))
|
125
|
+
|
126
|
+
# For each defined element in the function.
|
127
|
+
for head in Heads(f_start, f_end):
|
128
|
+
|
129
|
+
# If the element is an instruction
|
130
|
+
if isCode(GetFlags(head)):
|
131
|
+
|
132
|
+
# Get the references made from the current instruction
|
133
|
+
# and keep only the ones local to the function.
|
134
|
+
refs = CodeRefsFrom(head, 0)
|
135
|
+
refs = set(filter(lambda x: x>=f_start and x<=f_end, refs))
|
136
|
+
|
137
|
+
if refs:
|
138
|
+
# If the flow continues also to the next (address-wise)
|
139
|
+
# instruction, we add a reference to it.
|
140
|
+
# For instance, a conditional jump will not branch
|
141
|
+
# if the condition is not met, so we save that
|
142
|
+
# reference as well.
|
143
|
+
next_head = NextHead(head, f_end)
|
144
|
+
if isFlow(GetFlags(next_head)):
|
145
|
+
refs.add(next_head)
|
146
|
+
|
147
|
+
# Update the boundaries found so far.
|
148
|
+
boundaries.union_update(refs)
|
149
|
+
|
150
|
+
# For each of the references found, and edge is
|
151
|
+
# created.
|
152
|
+
for r in refs:
|
153
|
+
# If the flow could also come from the address
|
154
|
+
# previous to the destination of the branching
|
155
|
+
# an edge is created.
|
156
|
+
if isFlow(GetFlags(r)):
|
157
|
+
edges.add((PrevHead(r, f_start), r))
|
158
|
+
edges.add((head, r))
|
159
|
+
|
160
|
+
return len(edges) - len(boundaries) + 2
|
161
|
+
|
162
|
+
def __init_args_and_local_vars__ (self, funcStruct, frameStruct):
|
163
|
+
'''
|
164
|
+
Calculate the total size of arguments, # of arguments and # of local variables. Update the internal class member
|
165
|
+
variables appropriately. Taken directly from paimei
|
166
|
+
'''
|
167
|
+
|
168
|
+
#
|
169
|
+
# Initialize some local variables
|
170
|
+
#
|
171
|
+
args = {}
|
172
|
+
local_vars = {}
|
173
|
+
|
174
|
+
if not frameStruct:
|
175
|
+
return
|
176
|
+
|
177
|
+
# argument_boundary = self.localVarSize + self.savedRegSize + self.retSize
|
178
|
+
argument_boundary = self.frameSize
|
179
|
+
frame_offset = frameStruct.get_member(0).soff
|
180
|
+
self.localVarSize = 0
|
181
|
+
|
182
|
+
# print "Argument boundary is %d" % argument_boundary
|
183
|
+
|
184
|
+
for i in xrange(0, frameStruct.memqty):
|
185
|
+
end_offset = frameStruct.get_member(i).soff
|
186
|
+
|
187
|
+
if i == frameStruct.memqty - 1:
|
188
|
+
begin_offset = frameStruct.get_member(i).eoff
|
189
|
+
else:
|
190
|
+
begin_offset = frameStruct.get_member(i+1).soff
|
191
|
+
|
192
|
+
frame_offset += (begin_offset - end_offset)
|
193
|
+
|
194
|
+
# grab the name of the current local variable or argument.
|
195
|
+
#name = idaapi.get_member_name(frameStruct.get_member(i).id)
|
196
|
+
name = idaapi.get_member_name(frameStruct.get_member(i).soff)
|
197
|
+
# print "Name: %s" % name
|
198
|
+
# print "Agument Boundary: %d" % argument_boundary
|
199
|
+
# print "Frame offset: %d" % frame_offset
|
200
|
+
# print "End offset: %d" % end_offset
|
201
|
+
# print "Begin Offset: %d" % begin_offset
|
202
|
+
# print "Frame offset: %d" % frame_offset
|
203
|
+
|
204
|
+
if name == None:
|
205
|
+
continue
|
206
|
+
|
207
|
+
if frame_offset > argument_boundary:
|
208
|
+
args[end_offset] = name
|
209
|
+
# if name.startswith("arg_"):
|
210
|
+
# args[end_offset] = name
|
211
|
+
self.argSize = self.argSize + idaapi.get_member_size(frameStruct.get_member(i))
|
212
|
+
else:
|
213
|
+
# if the name starts with a space, then ignore it as it is either the stack saved ebp or eip.
|
214
|
+
# XXX - this is a pretty ghetto check.
|
215
|
+
if not name.startswith(" "):
|
216
|
+
local_vars[end_offset] = name
|
217
|
+
self.localVarSize = self.localVarSize + idaapi.get_member_size(frameStruct.get_member(i))
|
218
|
+
# self.argSize = frame_offset - argument_boundary
|
219
|
+
self.numArgs = len(args)
|
220
|
+
self.numLocalVars = len(local_vars)
|
221
|
+
|
222
|
+
def iter_chunks(self):
|
223
|
+
chunkBlockCount = 0
|
224
|
+
for ch in self.chunkList:
|
225
|
+
chunkBlockCount = chunkBlockCount + len(ch.blockList)
|
226
|
+
|
227
|
+
return(chunkBlockCount)
|
228
|
+
|
229
|
+
def collect_function_chunks(self):
|
230
|
+
'''
|
231
|
+
Generate and return the list of function chunks (including the main one) for the current function. Ripped from idb2reml (Ero Carerra). Modified slightly by Roger Seagle.
|
232
|
+
|
233
|
+
@rtype: None
|
234
|
+
@return: None
|
235
|
+
'''
|
236
|
+
|
237
|
+
#
|
238
|
+
# Loop through all chunks for a function
|
239
|
+
#
|
240
|
+
iterator = idaapi.func_tail_iterator_t(idaapi.get_func(self.startAddress))
|
241
|
+
status = iterator.main()
|
242
|
+
|
243
|
+
while status:
|
244
|
+
chunk = iterator.chunk()
|
245
|
+
tmp = IdaChunk(chunk)
|
246
|
+
self.chunkList.append(tmp)
|
247
|
+
status = iterator.next()
|
248
|
+
|
249
|
+
#
|
250
|
+
# Create a class for a basic block
|
251
|
+
#
|
252
|
+
class IdaChunk(yaml.YAMLObject):
|
253
|
+
yaml_tag = u'!fuzz.io/IdaChunk'
|
254
|
+
def __init__(self, chunk):
|
255
|
+
self.startEA = chunk.startEA
|
256
|
+
self.endEA = chunk.endEA
|
257
|
+
self.blockList = []
|
258
|
+
self.numBlocks = 0
|
259
|
+
|
260
|
+
#
|
261
|
+
# Just to get it started
|
262
|
+
#
|
263
|
+
block_start = self.startEA
|
264
|
+
|
265
|
+
#
|
266
|
+
# Might be a bug? (effective address that has code mixed in and no ret instruction
|
267
|
+
# Or effective address just calls exit (there is no return instruction!!!!)
|
268
|
+
#
|
269
|
+
|
270
|
+
#
|
271
|
+
# Break down the chunk into blocks
|
272
|
+
#
|
273
|
+
for effectiveAddress in Heads(self.startEA, self.endEA):
|
274
|
+
|
275
|
+
#
|
276
|
+
# Ignore Head if data
|
277
|
+
#
|
278
|
+
if not isCode(GetFlags(effectiveAddress)):
|
279
|
+
continue
|
280
|
+
|
281
|
+
prev_ea = PrevNotTail(effectiveAddress)
|
282
|
+
next_ea = NextNotTail(effectiveAddress)
|
283
|
+
|
284
|
+
#
|
285
|
+
# Get the list of places branched to and from
|
286
|
+
#
|
287
|
+
# branchesTo = self._branches_to(effectiveAddress)
|
288
|
+
# branchesFrom = self._branches_from(effectiveAddress)
|
289
|
+
|
290
|
+
|
291
|
+
# ensure that both prev_ea and next_ea reference code and not data.
|
292
|
+
while not isCode(GetFlags(prev_ea)):
|
293
|
+
prev_ea = PrevNotTail(prev_ea)
|
294
|
+
|
295
|
+
while not isCode(GetFlags(next_ea)):
|
296
|
+
next_ea = PrevNotTail(next_ea)
|
297
|
+
|
298
|
+
# if the current instruction is a ret instruction, end the current node at ea.
|
299
|
+
if idaapi.is_ret_insn(effectiveAddress):
|
300
|
+
tmp = IdaBlock(block_start, effectiveAddress, branchesTo, branchesFrom)
|
301
|
+
self.blockList.append(tmp)
|
302
|
+
block_start = next_ea
|
303
|
+
|
304
|
+
elif branchesTo and block_start != effectiveAddress:
|
305
|
+
tmp = IdaBlock(block_start, effectiveAddress, branchesTo, branchesFrom)
|
306
|
+
self.blockList.append(tmp)
|
307
|
+
|
308
|
+
# start a new block at ea.
|
309
|
+
block_start = effectiveAddress
|
310
|
+
|
311
|
+
# if there is a branch from the current instruction, end the current node at ea.
|
312
|
+
elif branchesFrom:
|
313
|
+
tmp = IdaBlock(block_start, effectiveAddress, branchesTo, branchesFrom)
|
314
|
+
self.blockList.append(tmp)
|
315
|
+
|
316
|
+
# start a new block at the next ea
|
317
|
+
block_start = next_ea
|
318
|
+
|
319
|
+
#
|
320
|
+
# Calculate the number of blocks
|
321
|
+
#
|
322
|
+
self.numBlocks = len(self.blockList)
|
323
|
+
|
324
|
+
#
|
325
|
+
# Covert addresses to hex
|
326
|
+
#
|
327
|
+
self.startEA = hex(self.startEA)
|
328
|
+
self.endEA = hex(self.endEA)
|
329
|
+
|
330
|
+
####################################################################################################################
|
331
|
+
def _branches_from (self, ea):
|
332
|
+
'''
|
333
|
+
Enumerate and return the list of branches from the supplied address, *including* the next logical instruction.
|
334
|
+
Part of the reason why we even need this function is that the "flow" argument to CodeRefsFrom does not appear
|
335
|
+
to be functional.
|
336
|
+
|
337
|
+
@type ea: DWORD
|
338
|
+
@param ea: Effective address of instruction to enumerate jumps from.
|
339
|
+
|
340
|
+
@rtype: List
|
341
|
+
@return: List of branches from the specified address.
|
342
|
+
'''
|
343
|
+
|
344
|
+
if idaapi.is_call_insn(ea):
|
345
|
+
return []
|
346
|
+
|
347
|
+
xrefs = list(CodeRefsFrom(ea, 1))
|
348
|
+
|
349
|
+
# if the only xref from ea is next ea, then return nothing.
|
350
|
+
if len(xrefs) == 1 and xrefs[0] == NextNotTail(ea):
|
351
|
+
xrefs = []
|
352
|
+
|
353
|
+
return xrefs
|
354
|
+
|
355
|
+
|
356
|
+
####################################################################################################################
|
357
|
+
def _branches_to (self, ea):
|
358
|
+
'''
|
359
|
+
Enumerate and return the list of branches to the supplied address, *excluding* the previous logical instruction.
|
360
|
+
Part of the reason why we even need this function is that the "flow" argument to CodeRefsTo does not appear to
|
361
|
+
be functional.
|
362
|
+
|
363
|
+
@type ea: DWORD
|
364
|
+
@param ea: Effective address of instruction to enumerate jumps to.
|
365
|
+
|
366
|
+
@rtype: List
|
367
|
+
@return: List of branches to the specified address.
|
368
|
+
'''
|
369
|
+
|
370
|
+
xrefs = []
|
371
|
+
prev_ea = PrevNotTail(ea)
|
372
|
+
prev_code_ea = prev_ea
|
373
|
+
|
374
|
+
print "Error is: %s" % prev_code_ea
|
375
|
+
|
376
|
+
while not isCode(GetFlags(prev_code_ea)):
|
377
|
+
prev_code_ea = PrevNotTail(prev_code_ea)
|
378
|
+
|
379
|
+
for xref in list(CodeRefsTo(ea, 1)):
|
380
|
+
if not idaapi.is_call_insn(xref) and xref not in [prev_ea, prev_code_ea]:
|
381
|
+
xrefs.append(hex(xref))
|
382
|
+
|
383
|
+
return xrefs
|
384
|
+
|
385
|
+
#
|
386
|
+
# Create a class for a basic block
|
387
|
+
#
|
388
|
+
class IdaBlock(yaml.YAMLObject):
|
389
|
+
yaml_tag = u'!fuzz.io/IdaBlock'
|
390
|
+
def __init__(self, effectiveAddressStart, effectiveAddressEnd, branchesTo, branchesFrom):
|
391
|
+
self.startEA = hex(effectiveAddressStart)
|
392
|
+
self.endEA = hex(effectiveAddressEnd)
|
393
|
+
self.branchTo = branchesTo
|
394
|
+
branchFr = []
|
395
|
+
|
396
|
+
#
|
397
|
+
# Covert branches to hex addresses
|
398
|
+
#
|
399
|
+
for i in range(len(branchesFrom)):
|
400
|
+
branchFr.append(hex(branchesFrom[i]))
|
401
|
+
|
402
|
+
self.branchFrom = branchFr
|
403
|
+
|
404
|
+
#
|
405
|
+
# Get the number of instructions in the block
|
406
|
+
#
|
407
|
+
heads = [head for head in Heads(effectiveAddressStart, effectiveAddressEnd + 1) if isCode(GetFlags(head))]
|
408
|
+
self.numInstructions = len(heads)
|
409
|
+
|
410
|
+
#
|
411
|
+
# Ask to open a file to save results
|
412
|
+
#
|
413
|
+
infoFilename = AskFile(1, "*.fz", "Please select/create a file to save static disassembly information:")
|
414
|
+
|
415
|
+
#
|
416
|
+
# Open the file
|
417
|
+
#
|
418
|
+
infoFile = open(infoFilename, 'w')
|
419
|
+
|
420
|
+
#
|
421
|
+
# Get the start and end of the text section
|
422
|
+
#
|
423
|
+
textSegmentSelector = SegByName("__text")
|
424
|
+
textSegmentStart = SegByBase(textSegmentSelector)
|
425
|
+
textSegmentEnd = SegEnd(textSegmentStart)
|
426
|
+
|
427
|
+
#
|
428
|
+
# Pull out all the information
|
429
|
+
#
|
430
|
+
disassembledProgram = IdaProgram(textSegmentStart, textSegmentEnd)
|
431
|
+
|
432
|
+
#
|
433
|
+
# Dump the disassembled program info in a portable format
|
434
|
+
#
|
435
|
+
yaml.dump(disassembledProgram, infoFile, default_flow_style=False)
|
436
|
+
|
437
|
+
#
|
438
|
+
# Be nice close the file
|
439
|
+
#
|
440
|
+
infoFile.close()
|