Opcodes 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
data/module/Arch.h ADDED
@@ -0,0 +1,37 @@
1
+ /* Arch.h
2
+ * Copyright 2010 Thoughtgang <http://www.thoughtgang.org>
3
+ * Written by TG Community Developers <community@thoughtgang.org>
4
+ * Released under the GNU Public License, version 3.
5
+ * See http://www.gnu.org/licenses/gpl.txt for details.
6
+ */
7
+
8
+ #ifndef OPCODES_EXT_ARCH_H
9
+ #define OPCODES_EXT_ARCH_H
10
+
11
+ #include <bfd.h>
12
+ #include <dis-asm.h>
13
+
14
+ /* Disassembler definitions for supported architectures */
15
+ typedef struct {
16
+ const char * name; /* unique name for disassembler */
17
+ enum bfd_architecture arch; /* architecture from bfd.h */
18
+ unsigned long mach; /* machine from bfd.h or 0 */
19
+ disassembler_ftype fn; /* print_insn fn from dis-asm.h */
20
+ } Opcodes_disasm_def;
21
+
22
+ /* disassembler iterator callback function. Returns 0 if iteration should
23
+ * halt; 1 otherwise. */
24
+ typedef int (*OPCODES_DISASM_ITER_FN) ( const Opcodes_disasm_def *, void * );
25
+
26
+ /* iterate over all available disassemblers, invoking 'fn' on each */
27
+ void Opcodes_disasm_iter( OPCODES_DISASM_ITER_FN fn, void * arg );
28
+
29
+ /* return the disassembler definition for 'name', or the invalid
30
+ * definition. */
31
+ const Opcodes_disasm_def * Opcodes_disasm_for_name( const char * name );
32
+
33
+ /* return an invalid disassembler definition with safe values for
34
+ * name and fn, and an architecture of bfd_arch_unknown. */
35
+ const Opcodes_disasm_def * Opcodes_disasm_invalid( void );
36
+
37
+ #endif
data/module/Opcodes.c ADDED
@@ -0,0 +1,473 @@
1
+ /* Opcodes.c
2
+ * Copyright 2010 Thoughtgang <http://www.thoughtgang.org>
3
+ * Written by TG Community Developers <community@thoughtgang.org>
4
+ * Released under the GNU Public License, version 3.
5
+ * See http://www.gnu.org/licenses/gpl.txt for details.
6
+ */
7
+
8
+ #include <dis-asm.h>
9
+ #include <ruby.h>
10
+
11
+ #include "ruby_compat.h"
12
+
13
+ #include "Opcodes.h"
14
+ #include "Arch.h"
15
+
16
+ #ifdef RUBY_18
17
+ #define IVAR(attr) attr
18
+ #else
19
+ #define IVAR(attr) "@" attr
20
+ #endif
21
+
22
+ static VALUE modOpcodes;
23
+ static VALUE clsDisasm;
24
+
25
+ static VALUE str_to_sym( const char * str ) {
26
+ VALUE var = rb_str_new_cstr(str);
27
+ return rb_funcall(var, rb_intern("to_sym"), 0);
28
+ }
29
+
30
+ static int generic_print_address_wrapper(bfd_vma vma, disassemble_info *info ) {
31
+ generic_print_address(vma, info);
32
+ return 1;
33
+ }
34
+
35
+ /* ---------------------------------------------------------------------- */
36
+ struct disasm_def_for_bfd {
37
+ bfd * abfd;
38
+ disassembler_ftype fn;
39
+ };
40
+
41
+ static int is_def_for_bfd( const Opcodes_disasm_def * def, void * arg ) {
42
+ struct disasm_def_for_bfd * out = (struct disasm_def_for_bfd *) arg;
43
+ if ( def->arch == out->abfd->arch_info->arch &&
44
+ def->arch == out->abfd->arch_info->mach ) {
45
+ out->fn = def->fn;
46
+ return 0;
47
+ }
48
+
49
+ return 1; /* not found; continue */
50
+ }
51
+
52
+ /* return disassembler fn for BFD */
53
+ static disassembler_ftype fn_for_bfd( bfd * abfd ) {
54
+ struct disasm_def_for_bfd arg = { abfd, NULL };
55
+ const Opcodes_disasm_def * invalid = Opcodes_disasm_invalid();
56
+
57
+ /* initialize to generic_print_address_wrapper */
58
+ arg.fn = invalid->fn;
59
+
60
+ Opcodes_disasm_iter( is_def_for_bfd, &arg );
61
+
62
+ return arg.fn;
63
+ }
64
+
65
+ /* configure disassemble_info for specified architecture */
66
+ static void config_disasm_arch( struct disassemble_info * info, VALUE str ) {
67
+ const Opcodes_disasm_def * def;
68
+ const char * name = rb_string_value_cstr(&str);
69
+
70
+ def = Opcodes_disasm_for_name( name );
71
+ info->application_data = def->fn;
72
+ info->arch = def->arch;
73
+ info->mach = def->mach;
74
+ }
75
+
76
+ static int fill_disasm_def_array( const Opcodes_disasm_def * def, void * arg ) {
77
+ VALUE * ary = (VALUE *) arg;
78
+ const Opcodes_disasm_def * invalid = Opcodes_disasm_invalid();
79
+
80
+ /* do not report the invalid architecture */
81
+ if ( def != invalid ) {
82
+ rb_ary_push( *ary, rb_str_new_cstr(def->name) );
83
+ }
84
+
85
+ return 1;
86
+ }
87
+
88
+ /* fill array with available disassemblers */
89
+ static void get_available_disassemblers( VALUE * ary ) {
90
+ Opcodes_disasm_iter( fill_disasm_def_array, ary );
91
+ }
92
+
93
+ /* ---------------------------------------------------------------------- */
94
+ /* convert instruction type code to string */
95
+ static const char * insn_type_to_str( enum dis_insn_type t ) {
96
+ const char *s;
97
+ switch (t) {
98
+ case dis_noninsn: s = "Invalid"; break;
99
+ case dis_nonbranch: s = "Not branch"; break;
100
+ case dis_branch: s = "Unconditional branch"; break;
101
+ case dis_condbranch: s = "Conditional branch"; break;
102
+ case dis_jsr: s = "Jump to subroutine"; break;
103
+ case dis_condjsr: s = "Conditional jump to subroutine"; break;
104
+ case dis_dref: s = "Data reference"; break;
105
+ case dis_dref2: s = "Two data references"; break;
106
+ }
107
+ return s;
108
+ }
109
+
110
+ /* ---------------------------------------------------------------------- */
111
+ /* Disassembler class */
112
+
113
+ /* libopcodes callback */
114
+ /* this appends the tokens emitted by libopcodes to the insn array */
115
+ static int disasm_fprintf( void * stream, const char * format, ... ) {
116
+ char buf[DISASM_MAX_STR];
117
+ int rv;
118
+ VALUE ary = (VALUE) stream;
119
+
120
+ va_list args;
121
+ va_start (args, format);
122
+ rv = vsnprintf( buf, DISASM_MAX_STR - 1, format, args );
123
+ rb_ary_push( ary, rb_str_new_cstr(buf) );
124
+ va_end (args);
125
+
126
+ return rv;
127
+ }
128
+
129
+ /* fill instruction info hash based on disassemble_info */
130
+ static VALUE disasm_insn_info( struct disassemble_info * info ) {
131
+ VALUE hash = rb_hash_new();
132
+
133
+ if (! info->insn_info_valid ) {
134
+ return hash;
135
+ }
136
+
137
+ rb_hash_aset( hash, str_to_sym(DIS_INSN_INFO_DELAY),
138
+ INT2NUM((int) info->branch_delay_insns) );
139
+ rb_hash_aset( hash, str_to_sym(DIS_INSN_INFO_DATA_SZ),
140
+ INT2NUM((int) info->data_size) );
141
+ rb_hash_aset( hash, str_to_sym(DIS_INSN_INFO_TYPE),
142
+ rb_str_new_cstr(insn_type_to_str(info->insn_type)) );
143
+ rb_hash_aset( hash, str_to_sym(DIS_INSN_INFO_TGT),
144
+ INT2NUM(info->target) );
145
+ rb_hash_aset( hash, str_to_sym(DIS_INSN_INFO_TGT2),
146
+ INT2NUM(info->target2) );
147
+
148
+ return hash;
149
+ }
150
+
151
+ /* disassemble a single instruction at address, returning a ruby hash. */
152
+ static VALUE disasm_insn( struct disassemble_info * info, bfd_vma vma,
153
+ unsigned int * length ) {
154
+ disassembler_ftype fn;
155
+ int size;
156
+ VALUE ary = rb_ary_new();
157
+ VALUE hash = rb_hash_new();
158
+
159
+ if ( vma < info->buffer_vma ) {
160
+ /* assume small VMAs are offsets into buffer */
161
+ vma += info->buffer_vma;
162
+ }
163
+
164
+ if ( vma >= info->buffer_vma + info->buffer_length) {
165
+ rb_raise(rb_eArgError, "VMA %d exceeds buffer length",
166
+ (int) vma);
167
+ }
168
+
169
+ /* prepare info for insn disassmbly */
170
+ info->insn_info_valid = 0;
171
+ info->stream = (void *) ary;
172
+
173
+ /* invoke disassembly */
174
+ fn = (disassembler_ftype) info->application_data;
175
+ size = fn( vma, info );
176
+
177
+ /* increase # bytes disassembled */
178
+ if ( length ) {
179
+ *length += size;
180
+ }
181
+
182
+ /* fill output hash */
183
+ rb_hash_aset( hash, str_to_sym(DIS_INSN_VMA), INT2NUM(vma) );
184
+ rb_hash_aset( hash, str_to_sym(DIS_INSN_SIZE), INT2NUM(size) );
185
+ rb_hash_aset( hash, str_to_sym(DIS_INSN_INFO), disasm_insn_info(info) );
186
+ rb_hash_aset( hash, str_to_sym(DIS_INSN_INSN), rb_ary_dup(ary) );
187
+
188
+ return hash;
189
+ }
190
+
191
+ /* support for different target types */
192
+ struct disasm_target {
193
+ unsigned char * buf;
194
+ unsigned int buf_len;
195
+ asection * sec;
196
+ asymbol * sym;
197
+ bfd * abfd;
198
+ unsigned int ruby_manages_buf;
199
+ };
200
+
201
+ /* fill disassemble_info struct based on contents of target struct */
202
+ static void config_libopcodes_for_target( struct disassemble_info * info,
203
+ struct disasm_target * tgt ) {
204
+ info->buffer_vma = info->buffer_length = 0;
205
+ info->buffer = NULL;
206
+
207
+
208
+ if ( tgt->sec ) {
209
+ info->buffer_vma = tgt->sec->vma;
210
+ info->buffer_length = tgt->sec->size;
211
+ info->buffer = tgt->buf;
212
+
213
+ } else if ( tgt->sym ) {
214
+ unsigned int vma_off;
215
+ symbol_info sym;
216
+ asection * sec = tgt->sym->section;
217
+
218
+ bfd_symbol_info(tgt->sym, &sym);
219
+
220
+ if (! sym.value || (sym.value > sec->vma + sec->size ) ) {
221
+ rb_raise(rb_eRuntimeError, "Invalid symbol value 0x%X",
222
+ ((unsigned long) sym.value));
223
+ }
224
+
225
+ vma_off = sym.value - sec->vma;
226
+
227
+ /* disassembly buffer is set to start of symbol in section */
228
+ info->buffer_vma = sym.value;
229
+ info->buffer = &tgt->buf[vma_off];
230
+ info->buffer_length = sec->size - vma_off;
231
+
232
+ } else if ( tgt->buf ) {
233
+ /* entire buffer is loaded at offset 9 */
234
+ info->buffer_length = tgt->buf_len;
235
+ info->buffer = tgt->buf;
236
+ }
237
+
238
+ /* get disassembler function */
239
+ if ( tgt->abfd && (! info->application_data ||
240
+ info->application_data == generic_print_address_wrapper) ) {
241
+ info->application_data = fn_for_bfd( tgt->abfd );
242
+ }
243
+
244
+ }
245
+
246
+ /* fill target struct based on ruby inout value */
247
+ static void load_target( VALUE tgt, struct disasm_target * dest ) {
248
+
249
+ memset( dest, 0, sizeof(struct disasm_target) );
250
+
251
+ if ( Qtrue == rb_obj_is_kind_of( tgt, rb_cString) ) {
252
+ /* string of bytes */
253
+ dest->buf = (unsigned char *) RSTRING_PTR(tgt);
254
+ dest->buf_len = RSTRING_LEN(tgt);
255
+ dest->ruby_manages_buf = 1;
256
+
257
+ } else if ( Qtrue == rb_obj_is_kind_of( tgt, rb_cArray) ) {
258
+ /* array of bytes */
259
+ int i;
260
+ dest->buf_len = RARRAY_LEN(tgt);
261
+ dest->buf = calloc(dest->buf_len, 1);
262
+ for( i=0; i < dest->buf_len; i++ ) {
263
+ VALUE val = rb_ary_entry( tgt, i );
264
+ dest->buf[i] = (unsigned char) NUM2UINT(val);
265
+ }
266
+
267
+ } else if ( Qtrue == rb_obj_is_kind_of( tgt, rb_cIO) ) {
268
+ /* IO (file) object */
269
+ VALUE str = rb_funcall( tgt, rb_intern("read"), 0 );
270
+ dest->buf = (unsigned char*) RSTRING_PTR(str);
271
+ dest->buf_len = RSTRING_LEN(str);
272
+ dest->ruby_manages_buf = 1;
273
+
274
+ } else if ( Qtrue == rb_obj_is_kind_of( tgt,
275
+ path2class("Bfd::Section") ) ) {
276
+ /* BFD Section */
277
+ Data_Get_Struct(tgt, asection, dest->sec);
278
+ if ( dest->sec ) {
279
+ dest->abfd = dest->sec->owner;
280
+ /* load section contents */
281
+ bfd_malloc_and_get_section( dest->abfd, dest->sec,
282
+ &dest->buf );
283
+ }
284
+
285
+ } else if ( Qtrue == rb_obj_is_kind_of( tgt,
286
+ path2class("Bfd::Symbol") ) ) {
287
+ /* BFD Symbol */
288
+ Data_Get_Struct(tgt, asymbol, dest->sym);
289
+ if ( dest->sym ) {
290
+ dest->abfd = dest->sym->the_bfd;
291
+ /* load contents of section containing symbol */
292
+ bfd_malloc_and_get_section( dest->abfd,
293
+ dest->sym->section,
294
+ &dest->buf );
295
+ }
296
+
297
+ } else {
298
+ rb_raise(rb_eArgError,
299
+ "Expecting IO, String, Bfd::Target,or Bfd::Section");
300
+ }
301
+ }
302
+
303
+ /* free any memory allocated when loading target */
304
+ static void unload_target( struct disasm_target * tgt ) {
305
+ if ( tgt->buf && ! tgt->ruby_manages_buf ) {
306
+ free(tgt->buf);
307
+ }
308
+ }
309
+
310
+ /* shared code for loading a target, configuring libopcodes, and getting
311
+ * options */
312
+ static void disasm_init( struct disassemble_info * info,
313
+ struct disasm_target * target, bfd_vma * vma,
314
+ VALUE class, VALUE tgt, VALUE hash ) {
315
+ const char *opts;
316
+ bfd_vma vma_arg;
317
+ VALUE var;
318
+
319
+ load_target( tgt, target );
320
+ config_libopcodes_for_target( info, target );
321
+
322
+ /* override vma if caller requested it */
323
+ vma_arg = rb_hash_lookup2(hash, str_to_sym(DIS_ARG_BUFVMA), Qnil);
324
+ if ( vma_arg != Qnil ) {
325
+ info->buffer_vma = NUM2UINT(vma_arg);
326
+ }
327
+
328
+ /* disassembly options: offset/vma to disasm, vma of buffer */
329
+ /* vma to disassemble */
330
+ vma_arg = rb_hash_lookup2(hash, str_to_sym(DIS_ARG_VMA), Qnil);
331
+ *vma = (vma_arg == Qnil) ? info->buffer_vma : NUM2UINT(vma_arg);
332
+
333
+ /* libopcodes disassembler options */
334
+ var = rb_iv_get(class, IVAR(DIS_ATTR_OPTIONS));
335
+ if ( var != Qnil ) {
336
+ info->disassembler_options = StringValueCStr( var );
337
+ }
338
+ }
339
+
340
+ /* disassemble a single instruction */
341
+ static VALUE cls_disasm_single(VALUE class, VALUE tgt, VALUE hash) {
342
+ struct disassemble_info * info;
343
+ struct disasm_target target;
344
+ bfd_vma vma;
345
+ VALUE result;
346
+
347
+ Data_Get_Struct(class, struct disassemble_info, info);
348
+ if (! info ) {
349
+ rb_raise( rb_eRuntimeError, "Invalid disassemble_info" );
350
+ }
351
+
352
+ disasm_init( info, &target, &vma, class, tgt, hash );
353
+
354
+ result = disasm_insn( info, vma, NULL );
355
+
356
+ unload_target(&target);
357
+
358
+ return result;
359
+ }
360
+
361
+ /* disassemble a buffer */
362
+ static VALUE cls_disasm_dis(VALUE class, VALUE tgt, VALUE hash) {
363
+ struct disassemble_info * info;
364
+ struct disasm_target target;
365
+ unsigned int pos, length;
366
+ bfd_vma vma;
367
+ VALUE ary;
368
+
369
+ Data_Get_Struct(class, struct disassemble_info, info);
370
+ if (! info ) {
371
+ rb_raise( rb_eRuntimeError, "Invalid disassemble_info" );
372
+ }
373
+
374
+ disasm_init( info, &target, &vma, class, tgt, hash );
375
+
376
+ /* length to disassemble to */
377
+ length = rb_hash_lookup2(hash, str_to_sym(DIS_ARG_LENGTH), Qnil);
378
+ length = (length == Qnil) ? info->buffer_length : NUM2UINT(length);
379
+
380
+ /* number of bytes disassembled */
381
+ ary = rb_ary_new();
382
+ for ( pos = 0; pos < length; ) {
383
+ /* yes, pos is modified by disasm_insn. deal. */
384
+ rb_ary_push(ary, disasm_insn( info, vma + pos, &pos ));
385
+ }
386
+
387
+ unload_target(&target);
388
+
389
+ return ary;
390
+ }
391
+
392
+ /* return an array of supported architectures */
393
+ static VALUE cls_disasm_arch(VALUE class) {
394
+ VALUE ary = rb_ary_new();
395
+ get_available_disassemblers( &ary );
396
+ return ary;
397
+ }
398
+
399
+
400
+ /* instantiate a new Disassembler object */
401
+ static VALUE cls_disasm_new(VALUE class, VALUE hash) {
402
+ struct disassemble_info * info;
403
+ VALUE instance, var;
404
+ VALUE argv[1] = {Qnil};
405
+
406
+ /* prepare disassemble_info struct */
407
+ instance = Data_Make_Struct(class, struct disassemble_info, 0, free,
408
+ info);
409
+ var = rb_ary_new();
410
+ init_disassemble_info(info, (void *) var, disasm_fprintf );
411
+ rb_obj_call_init(instance, 0, argv);
412
+
413
+
414
+ /* libopcodes disassembler options string */
415
+ var = rb_hash_lookup2(hash, str_to_sym(DIS_ARG_OPTS),
416
+ rb_str_new_cstr(""));
417
+ rb_iv_set(instance, IVAR(DIS_ATTR_OPTIONS), var);
418
+
419
+ /* -- Get Disassembler Function (print-insn-*) */
420
+ /* default to hex dump */
421
+ info->application_data = generic_print_address_wrapper;
422
+ info->disassembler_options = NULL;
423
+
424
+ /* configure arch based on BFD, if provided */
425
+ var = rb_hash_lookup(hash, str_to_sym(DIS_ARG_BFD));
426
+ if ( var != Qnil) {
427
+ if ( Qtrue == rb_obj_is_kind_of( var,
428
+ path2class("Bfd::Target") ) ) {
429
+ bfd * abfd;
430
+ /* nasty nasty! touching other peoples' privates! */
431
+ Data_Get_Struct(var, bfd, abfd);
432
+ info->application_data = disassembler(abfd);
433
+ info->arch = abfd->arch_info->arch;
434
+ info->mach = abfd->arch_info->mach;
435
+ } else {
436
+ rb_raise(rb_eArgError, "Invalid :bfd argument");
437
+ }
438
+ }
439
+
440
+ /* configure architecture manually, if provided */
441
+ var = rb_hash_lookup(hash, str_to_sym(DIS_ARG_ARCH));
442
+ if ( var != Qnil) {
443
+ config_disasm_arch(info, var);
444
+ }
445
+
446
+ return instance;
447
+ }
448
+
449
+
450
+ static void init_disasm_class( VALUE modOpcodes ) {
451
+ clsDisasm = rb_define_class_under(modOpcodes, DIS_CLASS_NAME,
452
+ rb_cObject);
453
+ /* class methods */
454
+ rb_define_singleton_method(clsDisasm, "ext_new", cls_disasm_new, 1);
455
+ rb_define_singleton_method(clsDisasm, DIS_METHOD_ARCH, cls_disasm_arch,
456
+ 0);
457
+
458
+ /* instance attributes */
459
+ rb_define_attr(clsDisasm, DIS_ATTR_OPTIONS, 1, 1);
460
+
461
+ /* instance methods */
462
+ rb_define_method(clsDisasm, DIS_FN_DIS_DIS, cls_disasm_dis, 2);
463
+ rb_define_method(clsDisasm, DIS_FN_DIS_INSN, cls_disasm_single, 2);
464
+ }
465
+
466
+ /* ---------------------------------------------------------------------- */
467
+ /* Opcodes Module */
468
+
469
+ void Init_OpcodesExt() {
470
+ modOpcodes = rb_define_module(OPCODES_MODULE_NAME);
471
+
472
+ init_disasm_class(modOpcodes);
473
+ }