Opdis 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +2 -0
- data/LICENSE +674 -0
- data/LICENSE.README +8 -0
- data/README +101 -0
- data/examples/array_linear.rb +23 -0
- data/examples/bfd_entry.rb +24 -0
- data/examples/bfd_section.rb +24 -0
- data/examples/bfd_symbol.rb +27 -0
- data/examples/buf_linear.rb +25 -0
- data/examples/decoder.rb +45 -0
- data/examples/file_linear.rb +31 -0
- data/examples/libopcodes_options.rb +11 -0
- data/examples/resolver.rb +191 -0
- data/examples/supported_architectures.rb +11 -0
- data/examples/visited_handler.rb +61 -0
- data/examples/x86_decoder.rb +46 -0
- data/lib/Opdis.rb +123 -0
- data/module/Arch.c +364 -0
- data/module/Arch.h +37 -0
- data/module/Callbacks.c +266 -0
- data/module/Callbacks.h +43 -0
- data/module/Model.c +1275 -0
- data/module/Model.h +230 -0
- data/module/Opdis.c +850 -0
- data/module/Opdis.h +89 -0
- data/module/extconf.rb +126 -0
- data/module/rdoc_input/Callbacks.rb +143 -0
- data/module/rdoc_input/Model.rb +636 -0
- data/module/rdoc_input/Opdis.rb +253 -0
- data/module/ruby_compat.c +72 -0
- data/module/ruby_compat.h +25 -0
- data/tests/ut_opdis.rb +30 -0
- data/tests/ut_opdis_bfd.rb +556 -0
- metadata +109 -0
data/module/Opdis.h
ADDED
@@ -0,0 +1,89 @@
|
|
1
|
+
/* Opdis.h
|
2
|
+
* Copyright 2010 Thoughtgang <http://www.thoughtgang.org>
|
3
|
+
* Written by TG Community Developers <community@thoughtgang.org>
|
4
|
+
* Released under the GNU Public License, version 3.
|
5
|
+
* See http://www.gnu.org/licenses/gpl.txt for details.
|
6
|
+
*/
|
7
|
+
|
8
|
+
#ifndef OPDIS_RB_OPDIS_H
|
9
|
+
#define OPDIS_RB_OPDIS_H
|
10
|
+
|
11
|
+
/* method names */
|
12
|
+
#define DIS_METHOD_DISASM "ext_disassemble"
|
13
|
+
#define DIS_METHOD_usage "ext_usage"
|
14
|
+
|
15
|
+
/* attribute names */
|
16
|
+
#define DIS_ATTR_DECODER "insn_decoder"
|
17
|
+
#define DIS_ATTR_HANDLER "addr_tracker"
|
18
|
+
#define DIS_ATTR_RESOLVER "resolver"
|
19
|
+
#define DIS_ATTR_DEBUG "debug"
|
20
|
+
#define DIS_ATTR_SYNTAX "syntax"
|
21
|
+
#define DIS_ATTR_ARCH "arch"
|
22
|
+
#define DIS_ATTR_OPTS "opcodes_options"
|
23
|
+
|
24
|
+
/* argument (hash) names */
|
25
|
+
#define DIS_ARG_DECODER DIS_ATTR_DECODER
|
26
|
+
#define DIS_ARG_HANDLER DIS_ATTR_HANDLER
|
27
|
+
#define DIS_ARG_RESOLVER DIS_ATTR_RESOLVER
|
28
|
+
#define DIS_ARG_SYNTAX "syntax"
|
29
|
+
#define DIS_ARG_DEBUG "debug"
|
30
|
+
#define DIS_ARG_OPTIONS "options"
|
31
|
+
#define DIS_ARG_STRATEGY "strategy"
|
32
|
+
#define DIS_ARG_ARCH "arch"
|
33
|
+
#define DIS_ARG_VMA "vma"
|
34
|
+
#define DIS_ARG_OFFSET "offset"
|
35
|
+
#define DIS_ARG_LEN "length"
|
36
|
+
#define DIS_ARG_BUFVMA "buffer_vma"
|
37
|
+
|
38
|
+
/* constants */
|
39
|
+
#define DIS_ERR_BOUNDS_NAME "ERROR_BOUNDS"
|
40
|
+
#define DIS_ERR_BOUNDS "Bounds exceeded"
|
41
|
+
#define DIS_ERR_INVALID_NAME "ERROR_INVALID_INSN"
|
42
|
+
#define DIS_ERR_INVALID "Invalid instruction"
|
43
|
+
#define DIS_ERR_DECODE_NAME "ERROR_DECODE_INSN"
|
44
|
+
#define DIS_ERR_DECODE "Decoder error"
|
45
|
+
#define DIS_ERR_BFD_NAME "ERROR_BFD"
|
46
|
+
#define DIS_ERR_BFD "Bfd error"
|
47
|
+
#define DIS_ERR_MAX_NAME "ERROR_MAX_ITEMS"
|
48
|
+
#define DIS_ERR_MAX "Max insn items error"
|
49
|
+
#define DIS_ERR_UNK "Unknown error"
|
50
|
+
|
51
|
+
#define DIS_STRAT_SINGLE_NAME "STRATEGY_SINGLE"
|
52
|
+
#define DIS_STRAT_SINGLE "single-instruction"
|
53
|
+
#define DIS_STRAT_LINEAR_NAME "STRATEGY_LINEAR"
|
54
|
+
#define DIS_STRAT_LINEAR "linear"
|
55
|
+
#define DIS_STRAT_CFLOW_NAME "STRATEGY_CFLOW"
|
56
|
+
#define DIS_STRAT_CFLOW "control-flow"
|
57
|
+
#define DIS_STRAT_SYMBOL_NAME "STRATEGY_SYMBOL"
|
58
|
+
#define DIS_STRAT_SYMBOL "bfd-symbol"
|
59
|
+
#define DIS_STRAT_SECTION_NAME "STRATEGY_SECTION"
|
60
|
+
#define DIS_STRAT_SECTION "bfd-section"
|
61
|
+
#define DIS_STRAT_ENTRY_NAME "STRATEGY_ENTRY"
|
62
|
+
#define DIS_STRAT_ENTRY "bfd-entry"
|
63
|
+
|
64
|
+
#define DIS_CONST_STRATEGIES "STRATEGIES"
|
65
|
+
|
66
|
+
#define DIS_CONST_ARCHES "architectures"
|
67
|
+
|
68
|
+
#define DIS_SYNTAX_ATT "att"
|
69
|
+
#define DIS_SYNTAX_INTEL "intel"
|
70
|
+
|
71
|
+
#define DIS_CONST_SYNTAXES "SYNTAXES"
|
72
|
+
|
73
|
+
/* Output */
|
74
|
+
|
75
|
+
#define OUT_ATTR_ERRORS "errors"
|
76
|
+
#define OUT_METHOD_CONTAIN "containing"
|
77
|
+
|
78
|
+
/* BFD */
|
79
|
+
#define BFD_TGT_PATH "Bfd::Target"
|
80
|
+
#define BFD_SEC_PATH "Bfd::Section"
|
81
|
+
#define BFD_SYM_PATH "Bfd::Symbol"
|
82
|
+
|
83
|
+
#define OPDIS_MODULE_NAME "Opdis"
|
84
|
+
#define OPDIS_DISASM_CLASS_NAME "Disassembler"
|
85
|
+
#define OPDIS_OUTPUT_CLASS_NAME "Disassembly"
|
86
|
+
|
87
|
+
void Init_OpdisExt();
|
88
|
+
|
89
|
+
#endif
|
data/module/extconf.rb
ADDED
@@ -0,0 +1,126 @@
|
|
1
|
+
#!/usr/bin/env ruby1.9
|
2
|
+
# Opdis Ruby extension config file
|
3
|
+
# Copyright 2010 Thoughtgang <http://www.thoughtgang.org>
|
4
|
+
# Options:
|
5
|
+
# --with-bfd-dir=path_to_binutils_install_base (/usr)
|
6
|
+
# --with-bfd-include=path_to_bfd.h (/usr/include)
|
7
|
+
# --with-bfd-lib=path_to_libbfd.so (/usr/lib)
|
8
|
+
# --with-opcodes-dir=path_to_binutils_install_base (/usr)
|
9
|
+
# --with-opcodes-include=path_to_dis-asm.h (/usr/include)
|
10
|
+
# --with-opcodes-lib=path_to_libopcodes.so (/usr/lib)
|
11
|
+
# --with-opdis-dir=path_to_opdis_install_base (/usr/local)
|
12
|
+
# --with-opdis-include=path_to_opdis_include_dir (/usr/local/include)
|
13
|
+
# --with-opdis-lib=path_to_libopdis.so (/usr/local/lib)
|
14
|
+
# --with-opdis=path_to_opdis_source_tree
|
15
|
+
# --with-objdump=path_to_objdump_binary (objdump)
|
16
|
+
# See README for more info.
|
17
|
+
|
18
|
+
require 'mkmf'
|
19
|
+
|
20
|
+
def require_header(name)
|
21
|
+
have_header(name) or raise "Missing header file #{name}"
|
22
|
+
end
|
23
|
+
|
24
|
+
def require_library(name, func)
|
25
|
+
have_library(name, func) or raise "Missing library #{name}"
|
26
|
+
end
|
27
|
+
|
28
|
+
def require_opdis_header(name, opdis_base)
|
29
|
+
return require_header(name) if not opdis_base
|
30
|
+
|
31
|
+
dirs = [opdis_base, opdis_base + "/opdis", opdis_base + "/include"]
|
32
|
+
find_header(name, *dirs) or raise "#{name} not found in #{dirs}"
|
33
|
+
end
|
34
|
+
|
35
|
+
def require_opdis_library(name, func, opdis_base)
|
36
|
+
return require_library(name, func) if not opdis_base
|
37
|
+
|
38
|
+
dirs = [opdis_base, opdis_base + "/lib", opdis_base + '/dist',
|
39
|
+
opdis_base + '/dist/.libs']
|
40
|
+
find_library(name, func, *dirs) or raise "#{name} not found in #{dirs}"
|
41
|
+
end
|
42
|
+
|
43
|
+
# ----------------------------------------------------------------------
|
44
|
+
# BFD
|
45
|
+
|
46
|
+
# allow user to specify specific binutils distro
|
47
|
+
dir_config('binutils')
|
48
|
+
|
49
|
+
require_header('bfd.h')
|
50
|
+
require_library('bfd', 'bfd_init')
|
51
|
+
|
52
|
+
# ----------------------------------------------------------------------
|
53
|
+
# OPCODES
|
54
|
+
|
55
|
+
# allow user to override libopcodes
|
56
|
+
dir_config('opcodes')
|
57
|
+
|
58
|
+
require_header('dis-asm.h')
|
59
|
+
require_library('opcodes', 'init_disassemble_info')
|
60
|
+
|
61
|
+
# ----------------------------------------------------------------------
|
62
|
+
# OPDIS
|
63
|
+
|
64
|
+
dir_config('opdis')
|
65
|
+
|
66
|
+
# allow pointing to source code repo
|
67
|
+
opdis_base=with_config('opdis')
|
68
|
+
|
69
|
+
require_opdis_header('opdis/opdis.h', opdis_base)
|
70
|
+
require_opdis_header('opdis/model.h', opdis_base)
|
71
|
+
require_opdis_header('opdis/metadata.h', opdis_base)
|
72
|
+
require_opdis_library('opdis', 'opdis_init', opdis_base)
|
73
|
+
|
74
|
+
# ----------------------------------------------------------------------
|
75
|
+
# Architectures supported by binutils
|
76
|
+
# These have to be specified on the command line, as binutils does not
|
77
|
+
# provide any clue as to which architectures it has been compiled for
|
78
|
+
# on the local machine.
|
79
|
+
# NOTE: These were compiled from the list of architectures in bfd.h .
|
80
|
+
|
81
|
+
ARCH= %w[ m32c alpha arc arm avr bfin cr16 cris crx d10v d30v
|
82
|
+
dlx h8300 h8500 hppa i370 i386 i860 i960 ia64 ip2k fr30
|
83
|
+
lm32 m32r m68k m88k maxq mt microblaze msp430 ns32k mcore
|
84
|
+
mep mips mmix mn10200 mn10300 openrisc or32 pdp11 pj
|
85
|
+
powerpc rs6000 s390 score sh sparc spu tic30 tic4x tic54x
|
86
|
+
tic80 v850 w65 xstormy16 xc16x xtensa z80 z8k vax frv
|
87
|
+
moxie iq2000 m32c ]
|
88
|
+
|
89
|
+
# Define all architecture options
|
90
|
+
ARCH.each { |a| with_config( "ARCH_#{a.upcase}" ) }
|
91
|
+
|
92
|
+
# ----------------------------------------------------------------------
|
93
|
+
# Detect architectures supported locally
|
94
|
+
|
95
|
+
SEEN_ARCH = []
|
96
|
+
def handle_bfd_arch( line )
|
97
|
+
arch = line.strip
|
98
|
+
if ARCH.include?(arch) and not SEEN_ARCH.include?(arch)
|
99
|
+
puts "Adding architecture '#{arch}'"
|
100
|
+
SEEN_ARCH << arch
|
101
|
+
$CPPFLAGS += " -DARCH_#{arch.upcase}"
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
# allow user to override the objump binary using --with-objdump=path
|
106
|
+
objdump_bin = with_config('objdump', 'objdump')
|
107
|
+
|
108
|
+
# use objdump -i to get supported architectures
|
109
|
+
`#{objdump_bin} -i`.split("\n").each { |line| handle_bfd_arch(line) }
|
110
|
+
|
111
|
+
# default to i386 if objdump failed to run or no architectures were found
|
112
|
+
$CPPFLAGS += " -DARCH_I386" if SEEN_ARCH.length == 0
|
113
|
+
|
114
|
+
# ----------------------------------------------------------------------
|
115
|
+
# Compatibility flags
|
116
|
+
|
117
|
+
if RUBY_VERSION =~ /1.8/ then
|
118
|
+
$CPPFLAGS += " -DRUBY_18"
|
119
|
+
elsif RUBY_VERSION =~ /1.9/ then
|
120
|
+
$CPPFLAGS += " -DRUBY_19"
|
121
|
+
end
|
122
|
+
|
123
|
+
# ----------------------------------------------------------------------
|
124
|
+
# Makefile
|
125
|
+
|
126
|
+
create_makefile('OpdisExt')
|
@@ -0,0 +1,143 @@
|
|
1
|
+
#!/usr/bin/env ruby1.9
|
2
|
+
# :title: Opdis::Callbacks
|
3
|
+
=begin rdoc
|
4
|
+
=Opdis Callbacks
|
5
|
+
<i>Copyright 2010 Thoughtgang <http://www.thoughtgang.org></i>
|
6
|
+
|
7
|
+
= Opdis Callback Routines
|
8
|
+
|
9
|
+
== Summary
|
10
|
+
|
11
|
+
libopdis uses callback functions to override its default behavior. The
|
12
|
+
OpdisExt extension provides stubs that allow Ruby objects to be used
|
13
|
+
for these callbacks.
|
14
|
+
|
15
|
+
== Contact
|
16
|
+
Support:: community@thoughtgang.org
|
17
|
+
Project:: http://rubyforge.org/projects/opdis/
|
18
|
+
=end
|
19
|
+
|
20
|
+
module Opdis
|
21
|
+
|
22
|
+
# ----------------------------------------------------------------------
|
23
|
+
=begin rdoc
|
24
|
+
An object responsible for filling an Opdis::Instruction object based on the
|
25
|
+
output of libopcodes.
|
26
|
+
=end
|
27
|
+
class InstructionDecoder
|
28
|
+
|
29
|
+
=begin rdoc
|
30
|
+
Fill an Opdis::Instruction object based on the information supplied by
|
31
|
+
libopcodes.
|
32
|
+
|
33
|
+
The <i>insn</i> argument is an Opdis::Instruction object to be filled by this
|
34
|
+
method. Depending on the InstructionDecoder object, it may have been
|
35
|
+
partially filled by a previous Decoder (e.g. the GenericDecoder and one of the
|
36
|
+
X86Decoders).
|
37
|
+
|
38
|
+
The <i>hash</i> argument contains the output of libopcodes and has the following
|
39
|
+
members:
|
40
|
+
|
41
|
+
:vma:: The virtual memory address of the instruction.
|
42
|
+
:offset:: The offset of the instruction into the target buffer.
|
43
|
+
:size:: The size of the instruction in bytes.
|
44
|
+
:buffer:: An array containing the bytes in the instruction.
|
45
|
+
:items:: An array of the instruction strings generated by libopcodes.
|
46
|
+
:raw_insn:: The complete instruction string (ASCII_ as generated by libopcodes.
|
47
|
+
:branch_delay:: The number of instructions that will execute before the branch
|
48
|
+
takes effect.
|
49
|
+
This is not always set by libopcodes.
|
50
|
+
:data_size:: The size of the data reference in the instruction.
|
51
|
+
This is not always set by libopcodes.
|
52
|
+
:type:: The type of the instruction (e.g. non-branch, branch type, etc).
|
53
|
+
This is not always set by libopcodes.
|
54
|
+
:target:: The target address of a branch or dereference.
|
55
|
+
This is not always set by libopcodes.
|
56
|
+
:target2:: The second target address of a branch or dereference.
|
57
|
+
This is not always set by libopcodes.
|
58
|
+
|
59
|
+
InstructionDecoder#decode will invoke opdis_default_decoder to fill
|
60
|
+
architecture-independent members such as Instruction#vma and Instruction#ascii.
|
61
|
+
|
62
|
+
This method must return success or failure. Failure will result in an
|
63
|
+
error message being added to Disassembly.errors.
|
64
|
+
=end
|
65
|
+
def decode( insn, hash )
|
66
|
+
true
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
70
|
+
|
71
|
+
# ----------------------------------------------------------------------
|
72
|
+
=begin rdoc
|
73
|
+
A decoder for disassembled AT&T syntax x86 instructions.
|
74
|
+
=end
|
75
|
+
class X86Decoder < InstructionDecoder
|
76
|
+
|
77
|
+
=begin rdoc
|
78
|
+
See InstructionDecoder#decode.
|
79
|
+
This will invoke opdis default x86 decoder with AT&T syntax.
|
80
|
+
=end
|
81
|
+
def decode( insn, hash )
|
82
|
+
true
|
83
|
+
end
|
84
|
+
|
85
|
+
end
|
86
|
+
|
87
|
+
# ----------------------------------------------------------------------
|
88
|
+
=begin rdoc
|
89
|
+
A decoder for disassembled Intel syntax x86 instructions.
|
90
|
+
=end
|
91
|
+
class X86IntelDecoder < InstructionDecoder
|
92
|
+
|
93
|
+
=begin rdoc
|
94
|
+
See InstructionDecoder#decode.
|
95
|
+
This will invoke opdis default x86 decoder with Intel syntax.
|
96
|
+
=end
|
97
|
+
def decode( insn, hash )
|
98
|
+
true
|
99
|
+
end
|
100
|
+
|
101
|
+
end
|
102
|
+
|
103
|
+
# ----------------------------------------------------------------------
|
104
|
+
=begin rdoc
|
105
|
+
An object for determining whether an address has been encountered.
|
106
|
+
|
107
|
+
This is used to prevent endless loops in control-flow analysis.
|
108
|
+
=end
|
109
|
+
class VisitedAddressTracker
|
110
|
+
|
111
|
+
=begin rdoc
|
112
|
+
Return <i>true</i> if the address at Instruction.vma has been encountered,
|
113
|
+
false otherwise.
|
114
|
+
|
115
|
+
VisitedAddressTracker#visited? will invoke the Opdis default visited address
|
116
|
+
handler, which uses an AVL tree to store visited addresses.
|
117
|
+
=end
|
118
|
+
def visited?( insn )
|
119
|
+
end
|
120
|
+
|
121
|
+
end
|
122
|
+
|
123
|
+
# ----------------------------------------------------------------------
|
124
|
+
=begin rdoc
|
125
|
+
An object for determine the VMA referred to by the target operand of a
|
126
|
+
branch instruction, if possible.
|
127
|
+
=end
|
128
|
+
class AddressResolver
|
129
|
+
|
130
|
+
=begin rdoc
|
131
|
+
Return the VMA (e.g. the contents of a register, or the contents pointed
|
132
|
+
to by an address expression) for the target operand of the instruction.
|
133
|
+
Return <i>nil</i> if there is no target operand, or the operand VMA cannot
|
134
|
+
be determined.
|
135
|
+
|
136
|
+
AddressResolver#resolve will invoke the default Opdis resolver, which returns
|
137
|
+
<i>nil</i> regardless of operand value.
|
138
|
+
=end
|
139
|
+
def resolve( insn )
|
140
|
+
end
|
141
|
+
|
142
|
+
end
|
143
|
+
end
|
@@ -0,0 +1,636 @@
|
|
1
|
+
#!/usr/bin/env ruby1.9
|
2
|
+
# :title: Opdis::DataModel
|
3
|
+
=begin rdoc
|
4
|
+
=Opdis Data Model
|
5
|
+
<i>Copyright 2010 Thoughtgang <http://www.thoughtgang.org></i>
|
6
|
+
|
7
|
+
= Opdis Data Model
|
8
|
+
|
9
|
+
== Summary
|
10
|
+
|
11
|
+
The Opdis data model provides a representation for Instruction and Opdis
|
12
|
+
objects that supports more sophisticated analyses than simple string
|
13
|
+
compares. The original strings produced by libopcodes are also made
|
14
|
+
available for 'raw' processing.
|
15
|
+
|
16
|
+
==Contact
|
17
|
+
Support:: community@thoughtgang.org
|
18
|
+
Project:: http://rubyforge.org/projects/opdis/
|
19
|
+
=end
|
20
|
+
|
21
|
+
|
22
|
+
module Opdis
|
23
|
+
|
24
|
+
=begin rdoc
|
25
|
+
A disassembled instruction.
|
26
|
+
|
27
|
+
The Instruction object contains information which may not be made available by
|
28
|
+
the decoder. The libopcodes disassembler generates a list of ASCII strings
|
29
|
+
which the decoder converts to Instruction objects. The generic decoder
|
30
|
+
(see InstructionDecoder) fills the <i>status</i>, <i>vma</i>, <i>size</i>,
|
31
|
+
<i>bytes</i>, and <i>ascii</i> members.
|
32
|
+
|
33
|
+
Use the <i>status</i> member to determine how much information the decoder
|
34
|
+
was able to produce. DECODE_BASIC indicates that the generic decoder has
|
35
|
+
filled the fields mentioned above. DECODE MNEMONIC and DECODE_OPERANDS
|
36
|
+
indicate that the decoder has successfully identified the mnemonic and
|
37
|
+
the operands of the instruction, respectively; this means that the
|
38
|
+
<i>prefixes</i>, <i>mnemonic</i>, <i>operands</i>, <i>srce</i>, <i>dest</i>,
|
39
|
+
and <i>target</i> members have been filled, though the Operand objects in
|
40
|
+
<i>operands</i> will only have their <i>ascii</i> member filled.
|
41
|
+
DECODE_MNEMONIC_FLAGS and DECODE_OPERAND_FLAGS indicate that instruction
|
42
|
+
and operand metadata have been filled.
|
43
|
+
=end
|
44
|
+
class Instruction
|
45
|
+
|
46
|
+
=begin rdoc
|
47
|
+
The decoding status of an instruction. This will be DECODE_INVALID or any
|
48
|
+
combination of DECODE_BASIC, DECODE_MNEMONIC, DECODE_OPERANDS,
|
49
|
+
DECODE_MNEMONIC_FLAGS, and DECODE_OPERAND_FLAGS -- depending on how much
|
50
|
+
work the instruction decoder performed successfully.
|
51
|
+
=end
|
52
|
+
attr_accessor :status
|
53
|
+
|
54
|
+
=begin rdoc
|
55
|
+
Virtual Memory Address. The in-memory address of the instruction.
|
56
|
+
=end
|
57
|
+
attr_reader :vma
|
58
|
+
|
59
|
+
=begin rdoc
|
60
|
+
The size of the instruction in bytes.
|
61
|
+
=end
|
62
|
+
attr_reader :size
|
63
|
+
|
64
|
+
=begin rdoc
|
65
|
+
An array containing the bytes of the instruction.
|
66
|
+
=end
|
67
|
+
attr_reader :bytes
|
68
|
+
|
69
|
+
=begin rdoc
|
70
|
+
The target operand of a branch instruction, or <i>nil</i>.
|
71
|
+
=end
|
72
|
+
attr_reader :target
|
73
|
+
|
74
|
+
=begin rdoc
|
75
|
+
The destination (write) operand of an instruction, or <i>nil</i>.
|
76
|
+
=end
|
77
|
+
attr_reader :dest
|
78
|
+
|
79
|
+
=begin rdoc
|
80
|
+
The first source (read) operand of an instruction, or <i>nil</i>.
|
81
|
+
=end
|
82
|
+
attr_reader :src
|
83
|
+
|
84
|
+
=begin rdoc
|
85
|
+
The category or high-level type of the instruction. This will be one of
|
86
|
+
CAT_CFLOW, CAT_STACK, CAT_LOADSTORE, CAT_TEST, CAT_MATH, CAT_BIT, CAT_IO,
|
87
|
+
CAT_TRAP, CAT_PRIV, CAT_NOP, or <i>nil</i> if the instruction category is
|
88
|
+
unknown.
|
89
|
+
=end
|
90
|
+
attr_accessor :category
|
91
|
+
|
92
|
+
=begin rdoc
|
93
|
+
The category-specific flags for the instruction. This is generally used
|
94
|
+
to encode a specific instruction type, e.g. FLG_JMP for an unconditional
|
95
|
+
jump instruction, or FLG_POP for a stack pop instruction.
|
96
|
+
=end
|
97
|
+
attr_reader :flags
|
98
|
+
|
99
|
+
=begin rdoc
|
100
|
+
The Instruction Set Architecture of the instruction. This is a subset of the
|
101
|
+
full CPU architecture ISA.
|
102
|
+
ISA_GEN, ISA_FPU, ISA_GPU, ISA_SIMD, ISA_VM
|
103
|
+
=end
|
104
|
+
attr_accessor :isa
|
105
|
+
|
106
|
+
=begin rdoc
|
107
|
+
Array of instruction prefix strings.
|
108
|
+
=end
|
109
|
+
attr_reader :prefixes
|
110
|
+
|
111
|
+
=begin rdoc
|
112
|
+
Array of Opdis::Operand objects..
|
113
|
+
=end
|
114
|
+
attr_reader :operands
|
115
|
+
|
116
|
+
=begin rdoc
|
117
|
+
Instruction mnemonic string.
|
118
|
+
=end
|
119
|
+
attr_accessor :mnemonic
|
120
|
+
|
121
|
+
=begin rdoc
|
122
|
+
Comment string generated by libopcodes.
|
123
|
+
=end
|
124
|
+
attr_accessor :comment
|
125
|
+
|
126
|
+
=begin rdoc
|
127
|
+
Invalid instruction.
|
128
|
+
=end
|
129
|
+
DECODE_INVALID='invalid'
|
130
|
+
=begin rdoc
|
131
|
+
Basic instruction decoding has been performed. This means that the <i>vma</i>,
|
132
|
+
<i>size</i>, <i>bytes</i>, and <i>ascii</i> members are valid.
|
133
|
+
=end
|
134
|
+
DECODE_BASIC='basic'
|
135
|
+
=begin rdoc
|
136
|
+
Instruction mnemonic has been decoded. This means that the <i>prefixes</i> and
|
137
|
+
<i>mnemonic</i> members are valid.
|
138
|
+
=end
|
139
|
+
DECODE_MNEMONIC='mnemonic'
|
140
|
+
=begin rdoc
|
141
|
+
Basic operand decoding has been performed. This means that the <i>src</i>,
|
142
|
+
<i>dest</i>, and <i>target</i> members are valid. The <i>operands</i> valid
|
143
|
+
has been filled with Operand objects whose <i>ascii</i> member is valid.
|
144
|
+
=end
|
145
|
+
DECODE_OPERANDS='operands'
|
146
|
+
=begin rdoc
|
147
|
+
Instruction metadata has been decoded. This means that the Instruction
|
148
|
+
<i>isa</i>, <i>category</i>, and <i>flags</i> members are valid.
|
149
|
+
=end
|
150
|
+
DECODE_MNEMONIC_FLAGS='mnemonic flags'
|
151
|
+
=begin rdoc
|
152
|
+
Operand metadata has been decoded. This means that the <i>operands</i> field
|
153
|
+
is filled with objects derived from Operand, whose members are all valid.
|
154
|
+
=end
|
155
|
+
DECODE_OPERAND_FLAGS='operand flags'
|
156
|
+
|
157
|
+
=begin rdoc
|
158
|
+
General-purpose instruction set. The default ISA for all instructions.
|
159
|
+
=end
|
160
|
+
ISA_GEN='general'
|
161
|
+
=begin rdoc
|
162
|
+
Floating-point instruction set.
|
163
|
+
=end
|
164
|
+
ISA_FPU='fpu'
|
165
|
+
=begin rdoc
|
166
|
+
Graphics card instruction set.
|
167
|
+
=end
|
168
|
+
ISA_GPU='gpu'
|
169
|
+
=begin rdoc
|
170
|
+
SIMD (single instruction, multiple data) instruction set. Examples include
|
171
|
+
MMX, SSE, SSE2, SSE3, Altivec, and 3DNow! instructions.
|
172
|
+
=end
|
173
|
+
ISA_SIMD='simd'
|
174
|
+
=begin rdoc
|
175
|
+
Virtual Machine or virtualization (hypervisor) extensions.
|
176
|
+
=end
|
177
|
+
ISA_VM='vm'
|
178
|
+
|
179
|
+
=begin rdoc
|
180
|
+
Unrecognized instruction.
|
181
|
+
=end
|
182
|
+
CAT_UNKNOWN='unknown'
|
183
|
+
=begin rdoc
|
184
|
+
Control flow (jump, call, return) instruction.
|
185
|
+
=end
|
186
|
+
CAT_CFLOW='control-flow'
|
187
|
+
=begin rdoc
|
188
|
+
Stack manipulation (push, pop) instruction.
|
189
|
+
=end
|
190
|
+
CAT_STACK='stack'
|
191
|
+
=begin rdoc
|
192
|
+
Load/store (move) instruction.
|
193
|
+
=end
|
194
|
+
CAT_LOADSTORE='load/store'
|
195
|
+
=begin rdoc
|
196
|
+
Test or compare instruction.
|
197
|
+
=end
|
198
|
+
CAT_TEST='test'
|
199
|
+
=begin rdoc
|
200
|
+
Mathematical (add, sub, mul, etc) instruction.
|
201
|
+
=end
|
202
|
+
CAT_MATH='mathematic'
|
203
|
+
=begin rdoc
|
204
|
+
Logical (and, or, xor, not, etc) instruction.
|
205
|
+
=end
|
206
|
+
CAT_BIT='bitwise'
|
207
|
+
=begin rdoc
|
208
|
+
Input/output (i.e. port read/write) instruction.
|
209
|
+
=end
|
210
|
+
CAT_IO='i/o'
|
211
|
+
=begin rdoc
|
212
|
+
Trap or interrupt instruction.
|
213
|
+
=end
|
214
|
+
CAT_TRAP='trap'
|
215
|
+
=begin rdoc
|
216
|
+
Privileged (ring0) or system management instruction.
|
217
|
+
=end
|
218
|
+
CAT_PRIV='privileged'
|
219
|
+
=begin rdoc
|
220
|
+
No-operation instruction.
|
221
|
+
=end
|
222
|
+
CAT_NOP='no-op'
|
223
|
+
|
224
|
+
=begin rdoc
|
225
|
+
Call a procedure.
|
226
|
+
=end
|
227
|
+
FLG_CALL='call'
|
228
|
+
=begin rdoc
|
229
|
+
Conditionally call a procedure.
|
230
|
+
=end
|
231
|
+
FLG_CALLCC='conditional call'
|
232
|
+
=begin rdoc
|
233
|
+
Jump to an address.
|
234
|
+
=end
|
235
|
+
FLG_JMP='jump'
|
236
|
+
=begin rdoc
|
237
|
+
Conditionally jump to an address.
|
238
|
+
=end
|
239
|
+
FLG_JMPCC='conditional jump'
|
240
|
+
=begin rdoc
|
241
|
+
Return from a procedure.
|
242
|
+
=end
|
243
|
+
FLG_RET='return'
|
244
|
+
=begin rdoc
|
245
|
+
Push onto the stack.
|
246
|
+
=end
|
247
|
+
FLG_PUSH='push'
|
248
|
+
=begin rdoc
|
249
|
+
Pop from the stack.
|
250
|
+
=end
|
251
|
+
FLG_POP='pop'
|
252
|
+
=begin rdoc
|
253
|
+
Enter a stack frame.
|
254
|
+
=end
|
255
|
+
FLG_FRAME='enter frame'
|
256
|
+
=begin rdoc
|
257
|
+
Leave a stack frame.
|
258
|
+
=end
|
259
|
+
FLG_UNFRAME='leave frame'
|
260
|
+
=begin rdoc
|
261
|
+
Logical AND operation.
|
262
|
+
=end
|
263
|
+
FLG_AND='bitwise and'
|
264
|
+
=begin rdoc
|
265
|
+
Lofical OR operation.
|
266
|
+
=end
|
267
|
+
FLG_OR='bitwise or'
|
268
|
+
=begin rdoc
|
269
|
+
Logical XOR operation.
|
270
|
+
=end
|
271
|
+
FLG_XOR='bitwise xor'
|
272
|
+
=begin rdoc
|
273
|
+
Logical NOT operation.
|
274
|
+
=end
|
275
|
+
FLG_NOT='bitwise not'
|
276
|
+
=begin rdoc
|
277
|
+
Logical (no carry) shift left.
|
278
|
+
=end
|
279
|
+
FLG_LSL='logical shift left'
|
280
|
+
=begin rdoc
|
281
|
+
Logical (no carry) shift right.
|
282
|
+
=end
|
283
|
+
FLG_LSR='logical shift right'
|
284
|
+
=begin rdoc
|
285
|
+
Arithmetic (with carry) shift left.
|
286
|
+
=end
|
287
|
+
FLG_ASL='arithmetic shift left'
|
288
|
+
=begin rdoc
|
289
|
+
Arithmetic (with carry) shift right.
|
290
|
+
=end
|
291
|
+
FLG_ASR='arithmetic shift right'
|
292
|
+
=begin rdoc
|
293
|
+
Logical (no carry) rotate left.
|
294
|
+
=end
|
295
|
+
FLG_ROL='rotate left'
|
296
|
+
=begin rdoc
|
297
|
+
Logical (no carry) rotate right.
|
298
|
+
=end
|
299
|
+
FLG_ROR='rotate right'
|
300
|
+
=begin rdoc
|
301
|
+
Arithmetic (with carry) rotate left.
|
302
|
+
=end
|
303
|
+
FLG_RCL='rotate carry left'
|
304
|
+
=begin rdoc
|
305
|
+
Arithmetic (with carry) rotate right.
|
306
|
+
=end
|
307
|
+
FLG_RCR='rotate carry right'
|
308
|
+
=begin rdoc
|
309
|
+
Read from I/O port.
|
310
|
+
=end
|
311
|
+
FLG_IN='input from port'
|
312
|
+
=begin rdoc
|
313
|
+
Write to I/O port.
|
314
|
+
=end
|
315
|
+
FLG_OUT='output to port'
|
316
|
+
|
317
|
+
=begin rdoc
|
318
|
+
Returns true if the instruction is a branch (is a CALL, CALLCC, JMP, or
|
319
|
+
JMPCC). This is only reliable if <i>status</i> includes DECODE_MNEMONIC_FLAGS.
|
320
|
+
=end
|
321
|
+
def branch?
|
322
|
+
end
|
323
|
+
|
324
|
+
=begin rdoc
|
325
|
+
Returns true if execution falls through to the next instruction. This is true
|
326
|
+
in all cases except JMP and RET. This is only reliable if <i>status</i>
|
327
|
+
includes DECODE_MNEMONIC_FLAGS.
|
328
|
+
=end
|
329
|
+
def fallthrough?
|
330
|
+
end
|
331
|
+
|
332
|
+
=begin rdoc
|
333
|
+
Returns the <i>ascii</i> field if the instruction.
|
334
|
+
=end
|
335
|
+
def to_s
|
336
|
+
end
|
337
|
+
end
|
338
|
+
|
339
|
+
=begin rdoc
|
340
|
+
An instruction operand. A properly-decoded instruction will have Operand
|
341
|
+
subclasses in its <i>operands</i> array; the Operand base class is used only
|
342
|
+
when the operand strings have been identified by the decoder but not
|
343
|
+
processed.
|
344
|
+
=end
|
345
|
+
class Operand
|
346
|
+
|
347
|
+
=begin rdoc
|
348
|
+
Metadata containing additional information about the operand
|
349
|
+
=end
|
350
|
+
attr_accessor :flags
|
351
|
+
|
352
|
+
=begin rdoc
|
353
|
+
The size in bytes of the data referenced by the instruction, or <i>nil</i> if
|
354
|
+
not known.
|
355
|
+
=end
|
356
|
+
attr_accessor :data_size
|
357
|
+
|
358
|
+
=begin rdoc
|
359
|
+
The string for the operand returned by libopcodes.
|
360
|
+
=end
|
361
|
+
attr_accessor :ascii
|
362
|
+
|
363
|
+
=begin rdoc
|
364
|
+
Operand is read by instruction.
|
365
|
+
=end
|
366
|
+
FLG_R='r'
|
367
|
+
=begin rdoc
|
368
|
+
Operand is written to by instruction.
|
369
|
+
=end
|
370
|
+
FLG_W='w'
|
371
|
+
=begin rdoc
|
372
|
+
Operand is executed (jumped to) by instruction.
|
373
|
+
=end
|
374
|
+
FLG_X='x'
|
375
|
+
=begin rdoc
|
376
|
+
=end
|
377
|
+
FLG_SIGNED='signed'
|
378
|
+
=begin rdoc
|
379
|
+
=end
|
380
|
+
FLG_ADDR='address'
|
381
|
+
=begin rdoc
|
382
|
+
=end
|
383
|
+
FLG_INDIRECT='indirect address'
|
384
|
+
|
385
|
+
=begin rdoc
|
386
|
+
Returns the <i>ascii</i> field.
|
387
|
+
=end
|
388
|
+
def to_s
|
389
|
+
end
|
390
|
+
end
|
391
|
+
|
392
|
+
=begin rdoc
|
393
|
+
A numeric value explicitly encoded in the bytes of the instruction.
|
394
|
+
=end
|
395
|
+
class ImmediateOperand < Operand
|
396
|
+
|
397
|
+
=begin rdoc
|
398
|
+
The immediate value, interpreted as signed or unsigned based on whether
|
399
|
+
Operand#flags includes FLG_SIGNED.
|
400
|
+
=end
|
401
|
+
attr_accessor :value
|
402
|
+
|
403
|
+
=begin rdoc
|
404
|
+
The immediate value interpreted as a signed integer.
|
405
|
+
=end
|
406
|
+
attr_accessor :signed
|
407
|
+
|
408
|
+
=begin rdoc
|
409
|
+
The immediate value interpreted as an unsigned integer.
|
410
|
+
=end
|
411
|
+
attr_accessor :unsigned
|
412
|
+
|
413
|
+
=begin rdoc
|
414
|
+
The immediate value interpreted as a VMA.
|
415
|
+
=end
|
416
|
+
attr_accessor :vma
|
417
|
+
end
|
418
|
+
|
419
|
+
=begin rdoc
|
420
|
+
An address expression such as an Intel Effective Address. This generally
|
421
|
+
takes the form of
|
422
|
+
|
423
|
+
base + (index * scale) + disp
|
424
|
+
|
425
|
+
where <b>base</b> and <b>index</b> are registers, <b>scale</b> is a power of
|
426
|
+
two, <b>disp</b> is an immediate value, and <b>*</b> is a "shift operation"
|
427
|
+
(generally arithmetic shift left).
|
428
|
+
=end
|
429
|
+
class AddressExpressionOperand < Operand
|
430
|
+
|
431
|
+
=begin rdoc
|
432
|
+
The shift algorithm used by the expression. This is one of SHIFT_ASL,
|
433
|
+
SHIFT_LSL, SHIFT_LSR, SHIFT_ROR, or SHIFT_RRX. The default is SHIFT_ASL;
|
434
|
+
the other algorithms apply to the ARM architecture.
|
435
|
+
=end
|
436
|
+
attr_accessor :shift
|
437
|
+
|
438
|
+
=begin rdoc
|
439
|
+
The scale factor of the expression. This must be a power of two; the default
|
440
|
+
scale value is <b>1</b>.
|
441
|
+
=end
|
442
|
+
attr_accessor :scale
|
443
|
+
|
444
|
+
=begin rdoc
|
445
|
+
A Register object for the <b>index</b> value of the expression, or <i>nil</i> if
|
446
|
+
the expression does not use an index value.
|
447
|
+
=end
|
448
|
+
attr_accessor :index
|
449
|
+
|
450
|
+
=begin rdoc
|
451
|
+
A Register object for the <b>base</b> value of the expression, or <i>nil</i> if
|
452
|
+
the expression does not use an base value.
|
453
|
+
=end
|
454
|
+
attr_accessor :base
|
455
|
+
|
456
|
+
=begin rdoc
|
457
|
+
A numeric value or an AbsoluteAddress object for the displacement of the
|
458
|
+
expression, or <i>nil</i> if the expression does not use a displacement.
|
459
|
+
=end
|
460
|
+
attr_accessor :displacement
|
461
|
+
|
462
|
+
=begin rdoc
|
463
|
+
Logical (no carry) shift left.
|
464
|
+
=end
|
465
|
+
SHIFT_LSL='lsl'
|
466
|
+
=begin rdoc
|
467
|
+
Logical (no carry) shift right.
|
468
|
+
=end
|
469
|
+
SHIFT_LSR='lsr'
|
470
|
+
=begin rdoc
|
471
|
+
Arithmetic (carry) shift left.
|
472
|
+
=end
|
473
|
+
SHIFT_ASL='asl'
|
474
|
+
=begin rdoc
|
475
|
+
Logical (no carry) rotate right.
|
476
|
+
=end
|
477
|
+
SHIFT_ROR='ror'
|
478
|
+
=begin rdoc
|
479
|
+
=end
|
480
|
+
SHIFT_RRX='rrx'
|
481
|
+
end
|
482
|
+
|
483
|
+
=begin rdoc
|
484
|
+
A segmented address consisting of a base (segment) register and a displacement
|
485
|
+
or offset. In Intel notation, these take the form <b>segment:offset</b>.
|
486
|
+
=end
|
487
|
+
class AbsoluteAddress
|
488
|
+
|
489
|
+
=begin rdoc
|
490
|
+
A Register object for the segment or base of the absolute address.
|
491
|
+
=end
|
492
|
+
attr_accessor :segment
|
493
|
+
|
494
|
+
=begin rdoc
|
495
|
+
An immediate value for the offset or displacement of the absolute address.
|
496
|
+
=end
|
497
|
+
attr_accessor :offset
|
498
|
+
end
|
499
|
+
|
500
|
+
=begin rdoc
|
501
|
+
An AbsoluteAddress operand.
|
502
|
+
|
503
|
+
See AbsoluteAddress class.
|
504
|
+
=end
|
505
|
+
class AbsoluteAddressOperand < Operand
|
506
|
+
|
507
|
+
attr_accessor :segment, :offset
|
508
|
+
|
509
|
+
end
|
510
|
+
|
511
|
+
=begin rdoc
|
512
|
+
A CPU register.
|
513
|
+
=end
|
514
|
+
class Register
|
515
|
+
|
516
|
+
=begin rdoc
|
517
|
+
A numeric ID for the register. Registers with the same ID but different names
|
518
|
+
are aliases of each other.
|
519
|
+
=end
|
520
|
+
attr_reader :id
|
521
|
+
|
522
|
+
=begin rdoc
|
523
|
+
The size of the register in bytes.
|
524
|
+
=end
|
525
|
+
attr_reader :size
|
526
|
+
|
527
|
+
=begin rdoc
|
528
|
+
The name or mnemonic for the register.
|
529
|
+
=end
|
530
|
+
attr_reader :name
|
531
|
+
|
532
|
+
=begin rdoc
|
533
|
+
Metadata describing the purpose or general use of the register.
|
534
|
+
=end
|
535
|
+
attr_accessor :purpose
|
536
|
+
|
537
|
+
=begin rdoc
|
538
|
+
A general-purpose register.
|
539
|
+
=end
|
540
|
+
FLG_GEN='general purpose'
|
541
|
+
=begin rdoc
|
542
|
+
A floating-point register.
|
543
|
+
=end
|
544
|
+
FLG_FPU='fpu'
|
545
|
+
=begin rdoc
|
546
|
+
A register on the graphics card.
|
547
|
+
=end
|
548
|
+
FLG_GPU='gpu'
|
549
|
+
=begin rdoc
|
550
|
+
An SIMD register.
|
551
|
+
=end
|
552
|
+
FLG_SIMD='simd'
|
553
|
+
=begin rdoc
|
554
|
+
A system register for task management.
|
555
|
+
=end
|
556
|
+
FLG_TASK='task mgt'
|
557
|
+
=begin rdoc
|
558
|
+
A system register for memory management.
|
559
|
+
=end
|
560
|
+
FLG_MEM='memory mgt'
|
561
|
+
=begin rdoc
|
562
|
+
A system register providing debugger support.
|
563
|
+
=end
|
564
|
+
FLG_DBG='debug'
|
565
|
+
=begin rdoc
|
566
|
+
The program counter or instruction pointer.
|
567
|
+
=end
|
568
|
+
FLG_PC='pc'
|
569
|
+
=begin rdoc
|
570
|
+
The flags or condition code register.
|
571
|
+
=end
|
572
|
+
FLG_FLAGS='flags'
|
573
|
+
=begin rdoc
|
574
|
+
The stack pointer.
|
575
|
+
=end
|
576
|
+
FLG_STACK='stack'
|
577
|
+
=begin rdoc
|
578
|
+
The frame pointer.
|
579
|
+
=end
|
580
|
+
FLG_FRAME='stack frame'
|
581
|
+
=begin rdoc
|
582
|
+
A memory segment register.
|
583
|
+
=end
|
584
|
+
FLG_SEG='segment'
|
585
|
+
=begin rdoc
|
586
|
+
The (virtual) zero register.
|
587
|
+
=end
|
588
|
+
FLG_Z='zero'
|
589
|
+
=begin rdoc
|
590
|
+
A register used for incoming arguments inside a procedure.
|
591
|
+
=end
|
592
|
+
FLG_IN='args in'
|
593
|
+
=begin rdoc
|
594
|
+
A register used for outgoing arguments in a procedure call.
|
595
|
+
=end
|
596
|
+
FLG_OUT='args out'
|
597
|
+
=begin rdoc
|
598
|
+
A register used for local variables inside a procedure.
|
599
|
+
=end
|
600
|
+
FLG_LOCALS='locals'
|
601
|
+
=begin rdoc
|
602
|
+
A register used for a return value from a procedure call.
|
603
|
+
=end
|
604
|
+
FLG_RET='return'
|
605
|
+
end
|
606
|
+
|
607
|
+
=begin rdoc
|
608
|
+
A register operand.
|
609
|
+
|
610
|
+
See Register class.
|
611
|
+
=end
|
612
|
+
class RegisterOperand < Operand
|
613
|
+
|
614
|
+
attr_reader :id, :size,:name
|
615
|
+
attr_accessor :purpose
|
616
|
+
|
617
|
+
FLG_GEN='general purpose'
|
618
|
+
FLG_FPU='fpu'
|
619
|
+
FLG_GPU='gpu'
|
620
|
+
FLG_SIMD='simd'
|
621
|
+
FLG_TASK='task mgt'
|
622
|
+
FLG_MEM='memory mgt'
|
623
|
+
FLG_DBG='debug'
|
624
|
+
FLG_PC='pc'
|
625
|
+
FLG_FLAGS='flags'
|
626
|
+
FLG_STACK='stack'
|
627
|
+
FLG_FRAME='stack frame'
|
628
|
+
FLG_SEG='segment'
|
629
|
+
FLG_Z='zero'
|
630
|
+
FLG_IN='args in'
|
631
|
+
FLG_OUT='args out'
|
632
|
+
FLG_LOCALS='locals'
|
633
|
+
FLG_RET='return'
|
634
|
+
end
|
635
|
+
|
636
|
+
end
|