Opdis 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +2 -0
- data/LICENSE +674 -0
- data/LICENSE.README +8 -0
- data/README +101 -0
- data/examples/array_linear.rb +23 -0
- data/examples/bfd_entry.rb +24 -0
- data/examples/bfd_section.rb +24 -0
- data/examples/bfd_symbol.rb +27 -0
- data/examples/buf_linear.rb +25 -0
- data/examples/decoder.rb +45 -0
- data/examples/file_linear.rb +31 -0
- data/examples/libopcodes_options.rb +11 -0
- data/examples/resolver.rb +191 -0
- data/examples/supported_architectures.rb +11 -0
- data/examples/visited_handler.rb +61 -0
- data/examples/x86_decoder.rb +46 -0
- data/lib/Opdis.rb +123 -0
- data/module/Arch.c +364 -0
- data/module/Arch.h +37 -0
- data/module/Callbacks.c +266 -0
- data/module/Callbacks.h +43 -0
- data/module/Model.c +1275 -0
- data/module/Model.h +230 -0
- data/module/Opdis.c +850 -0
- data/module/Opdis.h +89 -0
- data/module/extconf.rb +126 -0
- data/module/rdoc_input/Callbacks.rb +143 -0
- data/module/rdoc_input/Model.rb +636 -0
- data/module/rdoc_input/Opdis.rb +253 -0
- data/module/ruby_compat.c +72 -0
- data/module/ruby_compat.h +25 -0
- data/tests/ut_opdis.rb +30 -0
- data/tests/ut_opdis_bfd.rb +556 -0
- metadata +109 -0
data/module/Opdis.h
ADDED
@@ -0,0 +1,89 @@
|
|
1
|
+
/* Opdis.h
|
2
|
+
* Copyright 2010 Thoughtgang <http://www.thoughtgang.org>
|
3
|
+
* Written by TG Community Developers <community@thoughtgang.org>
|
4
|
+
* Released under the GNU Public License, version 3.
|
5
|
+
* See http://www.gnu.org/licenses/gpl.txt for details.
|
6
|
+
*/
|
7
|
+
|
8
|
+
#ifndef OPDIS_RB_OPDIS_H
|
9
|
+
#define OPDIS_RB_OPDIS_H
|
10
|
+
|
11
|
+
/* method names */
|
12
|
+
#define DIS_METHOD_DISASM "ext_disassemble"
|
13
|
+
#define DIS_METHOD_usage "ext_usage"
|
14
|
+
|
15
|
+
/* attribute names */
|
16
|
+
#define DIS_ATTR_DECODER "insn_decoder"
|
17
|
+
#define DIS_ATTR_HANDLER "addr_tracker"
|
18
|
+
#define DIS_ATTR_RESOLVER "resolver"
|
19
|
+
#define DIS_ATTR_DEBUG "debug"
|
20
|
+
#define DIS_ATTR_SYNTAX "syntax"
|
21
|
+
#define DIS_ATTR_ARCH "arch"
|
22
|
+
#define DIS_ATTR_OPTS "opcodes_options"
|
23
|
+
|
24
|
+
/* argument (hash) names */
|
25
|
+
#define DIS_ARG_DECODER DIS_ATTR_DECODER
|
26
|
+
#define DIS_ARG_HANDLER DIS_ATTR_HANDLER
|
27
|
+
#define DIS_ARG_RESOLVER DIS_ATTR_RESOLVER
|
28
|
+
#define DIS_ARG_SYNTAX "syntax"
|
29
|
+
#define DIS_ARG_DEBUG "debug"
|
30
|
+
#define DIS_ARG_OPTIONS "options"
|
31
|
+
#define DIS_ARG_STRATEGY "strategy"
|
32
|
+
#define DIS_ARG_ARCH "arch"
|
33
|
+
#define DIS_ARG_VMA "vma"
|
34
|
+
#define DIS_ARG_OFFSET "offset"
|
35
|
+
#define DIS_ARG_LEN "length"
|
36
|
+
#define DIS_ARG_BUFVMA "buffer_vma"
|
37
|
+
|
38
|
+
/* constants */
|
39
|
+
#define DIS_ERR_BOUNDS_NAME "ERROR_BOUNDS"
|
40
|
+
#define DIS_ERR_BOUNDS "Bounds exceeded"
|
41
|
+
#define DIS_ERR_INVALID_NAME "ERROR_INVALID_INSN"
|
42
|
+
#define DIS_ERR_INVALID "Invalid instruction"
|
43
|
+
#define DIS_ERR_DECODE_NAME "ERROR_DECODE_INSN"
|
44
|
+
#define DIS_ERR_DECODE "Decoder error"
|
45
|
+
#define DIS_ERR_BFD_NAME "ERROR_BFD"
|
46
|
+
#define DIS_ERR_BFD "Bfd error"
|
47
|
+
#define DIS_ERR_MAX_NAME "ERROR_MAX_ITEMS"
|
48
|
+
#define DIS_ERR_MAX "Max insn items error"
|
49
|
+
#define DIS_ERR_UNK "Unknown error"
|
50
|
+
|
51
|
+
#define DIS_STRAT_SINGLE_NAME "STRATEGY_SINGLE"
|
52
|
+
#define DIS_STRAT_SINGLE "single-instruction"
|
53
|
+
#define DIS_STRAT_LINEAR_NAME "STRATEGY_LINEAR"
|
54
|
+
#define DIS_STRAT_LINEAR "linear"
|
55
|
+
#define DIS_STRAT_CFLOW_NAME "STRATEGY_CFLOW"
|
56
|
+
#define DIS_STRAT_CFLOW "control-flow"
|
57
|
+
#define DIS_STRAT_SYMBOL_NAME "STRATEGY_SYMBOL"
|
58
|
+
#define DIS_STRAT_SYMBOL "bfd-symbol"
|
59
|
+
#define DIS_STRAT_SECTION_NAME "STRATEGY_SECTION"
|
60
|
+
#define DIS_STRAT_SECTION "bfd-section"
|
61
|
+
#define DIS_STRAT_ENTRY_NAME "STRATEGY_ENTRY"
|
62
|
+
#define DIS_STRAT_ENTRY "bfd-entry"
|
63
|
+
|
64
|
+
#define DIS_CONST_STRATEGIES "STRATEGIES"
|
65
|
+
|
66
|
+
#define DIS_CONST_ARCHES "architectures"
|
67
|
+
|
68
|
+
#define DIS_SYNTAX_ATT "att"
|
69
|
+
#define DIS_SYNTAX_INTEL "intel"
|
70
|
+
|
71
|
+
#define DIS_CONST_SYNTAXES "SYNTAXES"
|
72
|
+
|
73
|
+
/* Output */
|
74
|
+
|
75
|
+
#define OUT_ATTR_ERRORS "errors"
|
76
|
+
#define OUT_METHOD_CONTAIN "containing"
|
77
|
+
|
78
|
+
/* BFD */
|
79
|
+
#define BFD_TGT_PATH "Bfd::Target"
|
80
|
+
#define BFD_SEC_PATH "Bfd::Section"
|
81
|
+
#define BFD_SYM_PATH "Bfd::Symbol"
|
82
|
+
|
83
|
+
#define OPDIS_MODULE_NAME "Opdis"
|
84
|
+
#define OPDIS_DISASM_CLASS_NAME "Disassembler"
|
85
|
+
#define OPDIS_OUTPUT_CLASS_NAME "Disassembly"
|
86
|
+
|
87
|
+
void Init_OpdisExt();
|
88
|
+
|
89
|
+
#endif
|
data/module/extconf.rb
ADDED
@@ -0,0 +1,126 @@
|
|
1
|
+
#!/usr/bin/env ruby1.9
|
2
|
+
# Opdis Ruby extension config file
|
3
|
+
# Copyright 2010 Thoughtgang <http://www.thoughtgang.org>
|
4
|
+
# Options:
|
5
|
+
# --with-bfd-dir=path_to_binutils_install_base (/usr)
|
6
|
+
# --with-bfd-include=path_to_bfd.h (/usr/include)
|
7
|
+
# --with-bfd-lib=path_to_libbfd.so (/usr/lib)
|
8
|
+
# --with-opcodes-dir=path_to_binutils_install_base (/usr)
|
9
|
+
# --with-opcodes-include=path_to_dis-asm.h (/usr/include)
|
10
|
+
# --with-opcodes-lib=path_to_libopcodes.so (/usr/lib)
|
11
|
+
# --with-opdis-dir=path_to_opdis_install_base (/usr/local)
|
12
|
+
# --with-opdis-include=path_to_opdis_include_dir (/usr/local/include)
|
13
|
+
# --with-opdis-lib=path_to_libopdis.so (/usr/local/lib)
|
14
|
+
# --with-opdis=path_to_opdis_source_tree
|
15
|
+
# --with-objdump=path_to_objdump_binary (objdump)
|
16
|
+
# See README for more info.
|
17
|
+
|
18
|
+
require 'mkmf'
|
19
|
+
|
20
|
+
def require_header(name)
|
21
|
+
have_header(name) or raise "Missing header file #{name}"
|
22
|
+
end
|
23
|
+
|
24
|
+
def require_library(name, func)
|
25
|
+
have_library(name, func) or raise "Missing library #{name}"
|
26
|
+
end
|
27
|
+
|
28
|
+
def require_opdis_header(name, opdis_base)
|
29
|
+
return require_header(name) if not opdis_base
|
30
|
+
|
31
|
+
dirs = [opdis_base, opdis_base + "/opdis", opdis_base + "/include"]
|
32
|
+
find_header(name, *dirs) or raise "#{name} not found in #{dirs}"
|
33
|
+
end
|
34
|
+
|
35
|
+
def require_opdis_library(name, func, opdis_base)
|
36
|
+
return require_library(name, func) if not opdis_base
|
37
|
+
|
38
|
+
dirs = [opdis_base, opdis_base + "/lib", opdis_base + '/dist',
|
39
|
+
opdis_base + '/dist/.libs']
|
40
|
+
find_library(name, func, *dirs) or raise "#{name} not found in #{dirs}"
|
41
|
+
end
|
42
|
+
|
43
|
+
# ----------------------------------------------------------------------
|
44
|
+
# BFD
|
45
|
+
|
46
|
+
# allow user to specify specific binutils distro
|
47
|
+
dir_config('binutils')
|
48
|
+
|
49
|
+
require_header('bfd.h')
|
50
|
+
require_library('bfd', 'bfd_init')
|
51
|
+
|
52
|
+
# ----------------------------------------------------------------------
|
53
|
+
# OPCODES
|
54
|
+
|
55
|
+
# allow user to override libopcodes
|
56
|
+
dir_config('opcodes')
|
57
|
+
|
58
|
+
require_header('dis-asm.h')
|
59
|
+
require_library('opcodes', 'init_disassemble_info')
|
60
|
+
|
61
|
+
# ----------------------------------------------------------------------
|
62
|
+
# OPDIS
|
63
|
+
|
64
|
+
dir_config('opdis')
|
65
|
+
|
66
|
+
# allow pointing to source code repo
|
67
|
+
opdis_base=with_config('opdis')
|
68
|
+
|
69
|
+
require_opdis_header('opdis/opdis.h', opdis_base)
|
70
|
+
require_opdis_header('opdis/model.h', opdis_base)
|
71
|
+
require_opdis_header('opdis/metadata.h', opdis_base)
|
72
|
+
require_opdis_library('opdis', 'opdis_init', opdis_base)
|
73
|
+
|
74
|
+
# ----------------------------------------------------------------------
|
75
|
+
# Architectures supported by binutils
|
76
|
+
# These have to be specified on the command line, as binutils does not
|
77
|
+
# provide any clue as to which architectures it has been compiled for
|
78
|
+
# on the local machine.
|
79
|
+
# NOTE: These were compiled from the list of architectures in bfd.h .
|
80
|
+
|
81
|
+
ARCH= %w[ m32c alpha arc arm avr bfin cr16 cris crx d10v d30v
|
82
|
+
dlx h8300 h8500 hppa i370 i386 i860 i960 ia64 ip2k fr30
|
83
|
+
lm32 m32r m68k m88k maxq mt microblaze msp430 ns32k mcore
|
84
|
+
mep mips mmix mn10200 mn10300 openrisc or32 pdp11 pj
|
85
|
+
powerpc rs6000 s390 score sh sparc spu tic30 tic4x tic54x
|
86
|
+
tic80 v850 w65 xstormy16 xc16x xtensa z80 z8k vax frv
|
87
|
+
moxie iq2000 m32c ]
|
88
|
+
|
89
|
+
# Define all architecture options
|
90
|
+
ARCH.each { |a| with_config( "ARCH_#{a.upcase}" ) }
|
91
|
+
|
92
|
+
# ----------------------------------------------------------------------
|
93
|
+
# Detect architectures supported locally
|
94
|
+
|
95
|
+
SEEN_ARCH = []
|
96
|
+
def handle_bfd_arch( line )
|
97
|
+
arch = line.strip
|
98
|
+
if ARCH.include?(arch) and not SEEN_ARCH.include?(arch)
|
99
|
+
puts "Adding architecture '#{arch}'"
|
100
|
+
SEEN_ARCH << arch
|
101
|
+
$CPPFLAGS += " -DARCH_#{arch.upcase}"
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
# allow user to override the objump binary using --with-objdump=path
|
106
|
+
objdump_bin = with_config('objdump', 'objdump')
|
107
|
+
|
108
|
+
# use objdump -i to get supported architectures
|
109
|
+
`#{objdump_bin} -i`.split("\n").each { |line| handle_bfd_arch(line) }
|
110
|
+
|
111
|
+
# default to i386 if objdump failed to run or no architectures were found
|
112
|
+
$CPPFLAGS += " -DARCH_I386" if SEEN_ARCH.length == 0
|
113
|
+
|
114
|
+
# ----------------------------------------------------------------------
|
115
|
+
# Compatibility flags
|
116
|
+
|
117
|
+
if RUBY_VERSION =~ /1.8/ then
|
118
|
+
$CPPFLAGS += " -DRUBY_18"
|
119
|
+
elsif RUBY_VERSION =~ /1.9/ then
|
120
|
+
$CPPFLAGS += " -DRUBY_19"
|
121
|
+
end
|
122
|
+
|
123
|
+
# ----------------------------------------------------------------------
|
124
|
+
# Makefile
|
125
|
+
|
126
|
+
create_makefile('OpdisExt')
|
@@ -0,0 +1,143 @@
|
|
1
|
+
#!/usr/bin/env ruby1.9
|
2
|
+
# :title: Opdis::Callbacks
|
3
|
+
=begin rdoc
|
4
|
+
=Opdis Callbacks
|
5
|
+
<i>Copyright 2010 Thoughtgang <http://www.thoughtgang.org></i>
|
6
|
+
|
7
|
+
= Opdis Callback Routines
|
8
|
+
|
9
|
+
== Summary
|
10
|
+
|
11
|
+
libopdis uses callback functions to override its default behavior. The
|
12
|
+
OpdisExt extension provides stubs that allow Ruby objects to be used
|
13
|
+
for these callbacks.
|
14
|
+
|
15
|
+
== Contact
|
16
|
+
Support:: community@thoughtgang.org
|
17
|
+
Project:: http://rubyforge.org/projects/opdis/
|
18
|
+
=end
|
19
|
+
|
20
|
+
module Opdis
|
21
|
+
|
22
|
+
# ----------------------------------------------------------------------
|
23
|
+
=begin rdoc
|
24
|
+
An object responsible for filling an Opdis::Instruction object based on the
|
25
|
+
output of libopcodes.
|
26
|
+
=end
|
27
|
+
class InstructionDecoder
|
28
|
+
|
29
|
+
=begin rdoc
|
30
|
+
Fill an Opdis::Instruction object based on the information supplied by
|
31
|
+
libopcodes.
|
32
|
+
|
33
|
+
The <i>insn</i> argument is an Opdis::Instruction object to be filled by this
|
34
|
+
method. Depending on the InstructionDecoder object, it may have been
|
35
|
+
partially filled by a previous Decoder (e.g. the GenericDecoder and one of the
|
36
|
+
X86Decoders).
|
37
|
+
|
38
|
+
The <i>hash</i> argument contains the output of libopcodes and has the following
|
39
|
+
members:
|
40
|
+
|
41
|
+
:vma:: The virtual memory address of the instruction.
|
42
|
+
:offset:: The offset of the instruction into the target buffer.
|
43
|
+
:size:: The size of the instruction in bytes.
|
44
|
+
:buffer:: An array containing the bytes in the instruction.
|
45
|
+
:items:: An array of the instruction strings generated by libopcodes.
|
46
|
+
:raw_insn:: The complete instruction string (ASCII_ as generated by libopcodes.
|
47
|
+
:branch_delay:: The number of instructions that will execute before the branch
|
48
|
+
takes effect.
|
49
|
+
This is not always set by libopcodes.
|
50
|
+
:data_size:: The size of the data reference in the instruction.
|
51
|
+
This is not always set by libopcodes.
|
52
|
+
:type:: The type of the instruction (e.g. non-branch, branch type, etc).
|
53
|
+
This is not always set by libopcodes.
|
54
|
+
:target:: The target address of a branch or dereference.
|
55
|
+
This is not always set by libopcodes.
|
56
|
+
:target2:: The second target address of a branch or dereference.
|
57
|
+
This is not always set by libopcodes.
|
58
|
+
|
59
|
+
InstructionDecoder#decode will invoke opdis_default_decoder to fill
|
60
|
+
architecture-independent members such as Instruction#vma and Instruction#ascii.
|
61
|
+
|
62
|
+
This method must return success or failure. Failure will result in an
|
63
|
+
error message being added to Disassembly.errors.
|
64
|
+
=end
|
65
|
+
def decode( insn, hash )
|
66
|
+
true
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
70
|
+
|
71
|
+
# ----------------------------------------------------------------------
|
72
|
+
=begin rdoc
|
73
|
+
A decoder for disassembled AT&T syntax x86 instructions.
|
74
|
+
=end
|
75
|
+
class X86Decoder < InstructionDecoder
|
76
|
+
|
77
|
+
=begin rdoc
|
78
|
+
See InstructionDecoder#decode.
|
79
|
+
This will invoke opdis default x86 decoder with AT&T syntax.
|
80
|
+
=end
|
81
|
+
def decode( insn, hash )
|
82
|
+
true
|
83
|
+
end
|
84
|
+
|
85
|
+
end
|
86
|
+
|
87
|
+
# ----------------------------------------------------------------------
|
88
|
+
=begin rdoc
|
89
|
+
A decoder for disassembled Intel syntax x86 instructions.
|
90
|
+
=end
|
91
|
+
class X86IntelDecoder < InstructionDecoder
|
92
|
+
|
93
|
+
=begin rdoc
|
94
|
+
See InstructionDecoder#decode.
|
95
|
+
This will invoke opdis default x86 decoder with Intel syntax.
|
96
|
+
=end
|
97
|
+
def decode( insn, hash )
|
98
|
+
true
|
99
|
+
end
|
100
|
+
|
101
|
+
end
|
102
|
+
|
103
|
+
# ----------------------------------------------------------------------
|
104
|
+
=begin rdoc
|
105
|
+
An object for determining whether an address has been encountered.
|
106
|
+
|
107
|
+
This is used to prevent endless loops in control-flow analysis.
|
108
|
+
=end
|
109
|
+
class VisitedAddressTracker
|
110
|
+
|
111
|
+
=begin rdoc
|
112
|
+
Return <i>true</i> if the address at Instruction.vma has been encountered,
|
113
|
+
false otherwise.
|
114
|
+
|
115
|
+
VisitedAddressTracker#visited? will invoke the Opdis default visited address
|
116
|
+
handler, which uses an AVL tree to store visited addresses.
|
117
|
+
=end
|
118
|
+
def visited?( insn )
|
119
|
+
end
|
120
|
+
|
121
|
+
end
|
122
|
+
|
123
|
+
# ----------------------------------------------------------------------
|
124
|
+
=begin rdoc
|
125
|
+
An object for determine the VMA referred to by the target operand of a
|
126
|
+
branch instruction, if possible.
|
127
|
+
=end
|
128
|
+
class AddressResolver
|
129
|
+
|
130
|
+
=begin rdoc
|
131
|
+
Return the VMA (e.g. the contents of a register, or the contents pointed
|
132
|
+
to by an address expression) for the target operand of the instruction.
|
133
|
+
Return <i>nil</i> if there is no target operand, or the operand VMA cannot
|
134
|
+
be determined.
|
135
|
+
|
136
|
+
AddressResolver#resolve will invoke the default Opdis resolver, which returns
|
137
|
+
<i>nil</i> regardless of operand value.
|
138
|
+
=end
|
139
|
+
def resolve( insn )
|
140
|
+
end
|
141
|
+
|
142
|
+
end
|
143
|
+
end
|
@@ -0,0 +1,636 @@
|
|
1
|
+
#!/usr/bin/env ruby1.9
|
2
|
+
# :title: Opdis::DataModel
|
3
|
+
=begin rdoc
|
4
|
+
=Opdis Data Model
|
5
|
+
<i>Copyright 2010 Thoughtgang <http://www.thoughtgang.org></i>
|
6
|
+
|
7
|
+
= Opdis Data Model
|
8
|
+
|
9
|
+
== Summary
|
10
|
+
|
11
|
+
The Opdis data model provides a representation for Instruction and Opdis
|
12
|
+
objects that supports more sophisticated analyses than simple string
|
13
|
+
compares. The original strings produced by libopcodes are also made
|
14
|
+
available for 'raw' processing.
|
15
|
+
|
16
|
+
==Contact
|
17
|
+
Support:: community@thoughtgang.org
|
18
|
+
Project:: http://rubyforge.org/projects/opdis/
|
19
|
+
=end
|
20
|
+
|
21
|
+
|
22
|
+
module Opdis
|
23
|
+
|
24
|
+
=begin rdoc
|
25
|
+
A disassembled instruction.
|
26
|
+
|
27
|
+
The Instruction object contains information which may not be made available by
|
28
|
+
the decoder. The libopcodes disassembler generates a list of ASCII strings
|
29
|
+
which the decoder converts to Instruction objects. The generic decoder
|
30
|
+
(see InstructionDecoder) fills the <i>status</i>, <i>vma</i>, <i>size</i>,
|
31
|
+
<i>bytes</i>, and <i>ascii</i> members.
|
32
|
+
|
33
|
+
Use the <i>status</i> member to determine how much information the decoder
|
34
|
+
was able to produce. DECODE_BASIC indicates that the generic decoder has
|
35
|
+
filled the fields mentioned above. DECODE MNEMONIC and DECODE_OPERANDS
|
36
|
+
indicate that the decoder has successfully identified the mnemonic and
|
37
|
+
the operands of the instruction, respectively; this means that the
|
38
|
+
<i>prefixes</i>, <i>mnemonic</i>, <i>operands</i>, <i>srce</i>, <i>dest</i>,
|
39
|
+
and <i>target</i> members have been filled, though the Operand objects in
|
40
|
+
<i>operands</i> will only have their <i>ascii</i> member filled.
|
41
|
+
DECODE_MNEMONIC_FLAGS and DECODE_OPERAND_FLAGS indicate that instruction
|
42
|
+
and operand metadata have been filled.
|
43
|
+
=end
|
44
|
+
class Instruction
|
45
|
+
|
46
|
+
=begin rdoc
|
47
|
+
The decoding status of an instruction. This will be DECODE_INVALID or any
|
48
|
+
combination of DECODE_BASIC, DECODE_MNEMONIC, DECODE_OPERANDS,
|
49
|
+
DECODE_MNEMONIC_FLAGS, and DECODE_OPERAND_FLAGS -- depending on how much
|
50
|
+
work the instruction decoder performed successfully.
|
51
|
+
=end
|
52
|
+
attr_accessor :status
|
53
|
+
|
54
|
+
=begin rdoc
|
55
|
+
Virtual Memory Address. The in-memory address of the instruction.
|
56
|
+
=end
|
57
|
+
attr_reader :vma
|
58
|
+
|
59
|
+
=begin rdoc
|
60
|
+
The size of the instruction in bytes.
|
61
|
+
=end
|
62
|
+
attr_reader :size
|
63
|
+
|
64
|
+
=begin rdoc
|
65
|
+
An array containing the bytes of the instruction.
|
66
|
+
=end
|
67
|
+
attr_reader :bytes
|
68
|
+
|
69
|
+
=begin rdoc
|
70
|
+
The target operand of a branch instruction, or <i>nil</i>.
|
71
|
+
=end
|
72
|
+
attr_reader :target
|
73
|
+
|
74
|
+
=begin rdoc
|
75
|
+
The destination (write) operand of an instruction, or <i>nil</i>.
|
76
|
+
=end
|
77
|
+
attr_reader :dest
|
78
|
+
|
79
|
+
=begin rdoc
|
80
|
+
The first source (read) operand of an instruction, or <i>nil</i>.
|
81
|
+
=end
|
82
|
+
attr_reader :src
|
83
|
+
|
84
|
+
=begin rdoc
|
85
|
+
The category or high-level type of the instruction. This will be one of
|
86
|
+
CAT_CFLOW, CAT_STACK, CAT_LOADSTORE, CAT_TEST, CAT_MATH, CAT_BIT, CAT_IO,
|
87
|
+
CAT_TRAP, CAT_PRIV, CAT_NOP, or <i>nil</i> if the instruction category is
|
88
|
+
unknown.
|
89
|
+
=end
|
90
|
+
attr_accessor :category
|
91
|
+
|
92
|
+
=begin rdoc
|
93
|
+
The category-specific flags for the instruction. This is generally used
|
94
|
+
to encode a specific instruction type, e.g. FLG_JMP for an unconditional
|
95
|
+
jump instruction, or FLG_POP for a stack pop instruction.
|
96
|
+
=end
|
97
|
+
attr_reader :flags
|
98
|
+
|
99
|
+
=begin rdoc
|
100
|
+
The Instruction Set Architecture of the instruction. This is a subset of the
|
101
|
+
full CPU architecture ISA.
|
102
|
+
ISA_GEN, ISA_FPU, ISA_GPU, ISA_SIMD, ISA_VM
|
103
|
+
=end
|
104
|
+
attr_accessor :isa
|
105
|
+
|
106
|
+
=begin rdoc
|
107
|
+
Array of instruction prefix strings.
|
108
|
+
=end
|
109
|
+
attr_reader :prefixes
|
110
|
+
|
111
|
+
=begin rdoc
|
112
|
+
Array of Opdis::Operand objects..
|
113
|
+
=end
|
114
|
+
attr_reader :operands
|
115
|
+
|
116
|
+
=begin rdoc
|
117
|
+
Instruction mnemonic string.
|
118
|
+
=end
|
119
|
+
attr_accessor :mnemonic
|
120
|
+
|
121
|
+
=begin rdoc
|
122
|
+
Comment string generated by libopcodes.
|
123
|
+
=end
|
124
|
+
attr_accessor :comment
|
125
|
+
|
126
|
+
=begin rdoc
|
127
|
+
Invalid instruction.
|
128
|
+
=end
|
129
|
+
DECODE_INVALID='invalid'
|
130
|
+
=begin rdoc
|
131
|
+
Basic instruction decoding has been performed. This means that the <i>vma</i>,
|
132
|
+
<i>size</i>, <i>bytes</i>, and <i>ascii</i> members are valid.
|
133
|
+
=end
|
134
|
+
DECODE_BASIC='basic'
|
135
|
+
=begin rdoc
|
136
|
+
Instruction mnemonic has been decoded. This means that the <i>prefixes</i> and
|
137
|
+
<i>mnemonic</i> members are valid.
|
138
|
+
=end
|
139
|
+
DECODE_MNEMONIC='mnemonic'
|
140
|
+
=begin rdoc
|
141
|
+
Basic operand decoding has been performed. This means that the <i>src</i>,
|
142
|
+
<i>dest</i>, and <i>target</i> members are valid. The <i>operands</i> valid
|
143
|
+
has been filled with Operand objects whose <i>ascii</i> member is valid.
|
144
|
+
=end
|
145
|
+
DECODE_OPERANDS='operands'
|
146
|
+
=begin rdoc
|
147
|
+
Instruction metadata has been decoded. This means that the Instruction
|
148
|
+
<i>isa</i>, <i>category</i>, and <i>flags</i> members are valid.
|
149
|
+
=end
|
150
|
+
DECODE_MNEMONIC_FLAGS='mnemonic flags'
|
151
|
+
=begin rdoc
|
152
|
+
Operand metadata has been decoded. This means that the <i>operands</i> field
|
153
|
+
is filled with objects derived from Operand, whose members are all valid.
|
154
|
+
=end
|
155
|
+
DECODE_OPERAND_FLAGS='operand flags'
|
156
|
+
|
157
|
+
=begin rdoc
|
158
|
+
General-purpose instruction set. The default ISA for all instructions.
|
159
|
+
=end
|
160
|
+
ISA_GEN='general'
|
161
|
+
=begin rdoc
|
162
|
+
Floating-point instruction set.
|
163
|
+
=end
|
164
|
+
ISA_FPU='fpu'
|
165
|
+
=begin rdoc
|
166
|
+
Graphics card instruction set.
|
167
|
+
=end
|
168
|
+
ISA_GPU='gpu'
|
169
|
+
=begin rdoc
|
170
|
+
SIMD (single instruction, multiple data) instruction set. Examples include
|
171
|
+
MMX, SSE, SSE2, SSE3, Altivec, and 3DNow! instructions.
|
172
|
+
=end
|
173
|
+
ISA_SIMD='simd'
|
174
|
+
=begin rdoc
|
175
|
+
Virtual Machine or virtualization (hypervisor) extensions.
|
176
|
+
=end
|
177
|
+
ISA_VM='vm'
|
178
|
+
|
179
|
+
=begin rdoc
|
180
|
+
Unrecognized instruction.
|
181
|
+
=end
|
182
|
+
CAT_UNKNOWN='unknown'
|
183
|
+
=begin rdoc
|
184
|
+
Control flow (jump, call, return) instruction.
|
185
|
+
=end
|
186
|
+
CAT_CFLOW='control-flow'
|
187
|
+
=begin rdoc
|
188
|
+
Stack manipulation (push, pop) instruction.
|
189
|
+
=end
|
190
|
+
CAT_STACK='stack'
|
191
|
+
=begin rdoc
|
192
|
+
Load/store (move) instruction.
|
193
|
+
=end
|
194
|
+
CAT_LOADSTORE='load/store'
|
195
|
+
=begin rdoc
|
196
|
+
Test or compare instruction.
|
197
|
+
=end
|
198
|
+
CAT_TEST='test'
|
199
|
+
=begin rdoc
|
200
|
+
Mathematical (add, sub, mul, etc) instruction.
|
201
|
+
=end
|
202
|
+
CAT_MATH='mathematic'
|
203
|
+
=begin rdoc
|
204
|
+
Logical (and, or, xor, not, etc) instruction.
|
205
|
+
=end
|
206
|
+
CAT_BIT='bitwise'
|
207
|
+
=begin rdoc
|
208
|
+
Input/output (i.e. port read/write) instruction.
|
209
|
+
=end
|
210
|
+
CAT_IO='i/o'
|
211
|
+
=begin rdoc
|
212
|
+
Trap or interrupt instruction.
|
213
|
+
=end
|
214
|
+
CAT_TRAP='trap'
|
215
|
+
=begin rdoc
|
216
|
+
Privileged (ring0) or system management instruction.
|
217
|
+
=end
|
218
|
+
CAT_PRIV='privileged'
|
219
|
+
=begin rdoc
|
220
|
+
No-operation instruction.
|
221
|
+
=end
|
222
|
+
CAT_NOP='no-op'
|
223
|
+
|
224
|
+
=begin rdoc
|
225
|
+
Call a procedure.
|
226
|
+
=end
|
227
|
+
FLG_CALL='call'
|
228
|
+
=begin rdoc
|
229
|
+
Conditionally call a procedure.
|
230
|
+
=end
|
231
|
+
FLG_CALLCC='conditional call'
|
232
|
+
=begin rdoc
|
233
|
+
Jump to an address.
|
234
|
+
=end
|
235
|
+
FLG_JMP='jump'
|
236
|
+
=begin rdoc
|
237
|
+
Conditionally jump to an address.
|
238
|
+
=end
|
239
|
+
FLG_JMPCC='conditional jump'
|
240
|
+
=begin rdoc
|
241
|
+
Return from a procedure.
|
242
|
+
=end
|
243
|
+
FLG_RET='return'
|
244
|
+
=begin rdoc
|
245
|
+
Push onto the stack.
|
246
|
+
=end
|
247
|
+
FLG_PUSH='push'
|
248
|
+
=begin rdoc
|
249
|
+
Pop from the stack.
|
250
|
+
=end
|
251
|
+
FLG_POP='pop'
|
252
|
+
=begin rdoc
|
253
|
+
Enter a stack frame.
|
254
|
+
=end
|
255
|
+
FLG_FRAME='enter frame'
|
256
|
+
=begin rdoc
|
257
|
+
Leave a stack frame.
|
258
|
+
=end
|
259
|
+
FLG_UNFRAME='leave frame'
|
260
|
+
=begin rdoc
|
261
|
+
Logical AND operation.
|
262
|
+
=end
|
263
|
+
FLG_AND='bitwise and'
|
264
|
+
=begin rdoc
|
265
|
+
Lofical OR operation.
|
266
|
+
=end
|
267
|
+
FLG_OR='bitwise or'
|
268
|
+
=begin rdoc
|
269
|
+
Logical XOR operation.
|
270
|
+
=end
|
271
|
+
FLG_XOR='bitwise xor'
|
272
|
+
=begin rdoc
|
273
|
+
Logical NOT operation.
|
274
|
+
=end
|
275
|
+
FLG_NOT='bitwise not'
|
276
|
+
=begin rdoc
|
277
|
+
Logical (no carry) shift left.
|
278
|
+
=end
|
279
|
+
FLG_LSL='logical shift left'
|
280
|
+
=begin rdoc
|
281
|
+
Logical (no carry) shift right.
|
282
|
+
=end
|
283
|
+
FLG_LSR='logical shift right'
|
284
|
+
=begin rdoc
|
285
|
+
Arithmetic (with carry) shift left.
|
286
|
+
=end
|
287
|
+
FLG_ASL='arithmetic shift left'
|
288
|
+
=begin rdoc
|
289
|
+
Arithmetic (with carry) shift right.
|
290
|
+
=end
|
291
|
+
FLG_ASR='arithmetic shift right'
|
292
|
+
=begin rdoc
|
293
|
+
Logical (no carry) rotate left.
|
294
|
+
=end
|
295
|
+
FLG_ROL='rotate left'
|
296
|
+
=begin rdoc
|
297
|
+
Logical (no carry) rotate right.
|
298
|
+
=end
|
299
|
+
FLG_ROR='rotate right'
|
300
|
+
=begin rdoc
|
301
|
+
Arithmetic (with carry) rotate left.
|
302
|
+
=end
|
303
|
+
FLG_RCL='rotate carry left'
|
304
|
+
=begin rdoc
|
305
|
+
Arithmetic (with carry) rotate right.
|
306
|
+
=end
|
307
|
+
FLG_RCR='rotate carry right'
|
308
|
+
=begin rdoc
|
309
|
+
Read from I/O port.
|
310
|
+
=end
|
311
|
+
FLG_IN='input from port'
|
312
|
+
=begin rdoc
|
313
|
+
Write to I/O port.
|
314
|
+
=end
|
315
|
+
FLG_OUT='output to port'
|
316
|
+
|
317
|
+
=begin rdoc
|
318
|
+
Returns true if the instruction is a branch (is a CALL, CALLCC, JMP, or
|
319
|
+
JMPCC). This is only reliable if <i>status</i> includes DECODE_MNEMONIC_FLAGS.
|
320
|
+
=end
|
321
|
+
def branch?
|
322
|
+
end
|
323
|
+
|
324
|
+
=begin rdoc
|
325
|
+
Returns true if execution falls through to the next instruction. This is true
|
326
|
+
in all cases except JMP and RET. This is only reliable if <i>status</i>
|
327
|
+
includes DECODE_MNEMONIC_FLAGS.
|
328
|
+
=end
|
329
|
+
def fallthrough?
|
330
|
+
end
|
331
|
+
|
332
|
+
=begin rdoc
|
333
|
+
Returns the <i>ascii</i> field if the instruction.
|
334
|
+
=end
|
335
|
+
def to_s
|
336
|
+
end
|
337
|
+
end
|
338
|
+
|
339
|
+
=begin rdoc
|
340
|
+
An instruction operand. A properly-decoded instruction will have Operand
|
341
|
+
subclasses in its <i>operands</i> array; the Operand base class is used only
|
342
|
+
when the operand strings have been identified by the decoder but not
|
343
|
+
processed.
|
344
|
+
=end
|
345
|
+
class Operand
|
346
|
+
|
347
|
+
=begin rdoc
|
348
|
+
Metadata containing additional information about the operand
|
349
|
+
=end
|
350
|
+
attr_accessor :flags
|
351
|
+
|
352
|
+
=begin rdoc
|
353
|
+
The size in bytes of the data referenced by the instruction, or <i>nil</i> if
|
354
|
+
not known.
|
355
|
+
=end
|
356
|
+
attr_accessor :data_size
|
357
|
+
|
358
|
+
=begin rdoc
|
359
|
+
The string for the operand returned by libopcodes.
|
360
|
+
=end
|
361
|
+
attr_accessor :ascii
|
362
|
+
|
363
|
+
=begin rdoc
|
364
|
+
Operand is read by instruction.
|
365
|
+
=end
|
366
|
+
FLG_R='r'
|
367
|
+
=begin rdoc
|
368
|
+
Operand is written to by instruction.
|
369
|
+
=end
|
370
|
+
FLG_W='w'
|
371
|
+
=begin rdoc
|
372
|
+
Operand is executed (jumped to) by instruction.
|
373
|
+
=end
|
374
|
+
FLG_X='x'
|
375
|
+
=begin rdoc
|
376
|
+
=end
|
377
|
+
FLG_SIGNED='signed'
|
378
|
+
=begin rdoc
|
379
|
+
=end
|
380
|
+
FLG_ADDR='address'
|
381
|
+
=begin rdoc
|
382
|
+
=end
|
383
|
+
FLG_INDIRECT='indirect address'
|
384
|
+
|
385
|
+
=begin rdoc
|
386
|
+
Returns the <i>ascii</i> field.
|
387
|
+
=end
|
388
|
+
def to_s
|
389
|
+
end
|
390
|
+
end
|
391
|
+
|
392
|
+
=begin rdoc
|
393
|
+
A numeric value explicitly encoded in the bytes of the instruction.
|
394
|
+
=end
|
395
|
+
class ImmediateOperand < Operand
|
396
|
+
|
397
|
+
=begin rdoc
|
398
|
+
The immediate value, interpreted as signed or unsigned based on whether
|
399
|
+
Operand#flags includes FLG_SIGNED.
|
400
|
+
=end
|
401
|
+
attr_accessor :value
|
402
|
+
|
403
|
+
=begin rdoc
|
404
|
+
The immediate value interpreted as a signed integer.
|
405
|
+
=end
|
406
|
+
attr_accessor :signed
|
407
|
+
|
408
|
+
=begin rdoc
|
409
|
+
The immediate value interpreted as an unsigned integer.
|
410
|
+
=end
|
411
|
+
attr_accessor :unsigned
|
412
|
+
|
413
|
+
=begin rdoc
|
414
|
+
The immediate value interpreted as a VMA.
|
415
|
+
=end
|
416
|
+
attr_accessor :vma
|
417
|
+
end
|
418
|
+
|
419
|
+
=begin rdoc
|
420
|
+
An address expression such as an Intel Effective Address. This generally
|
421
|
+
takes the form of
|
422
|
+
|
423
|
+
base + (index * scale) + disp
|
424
|
+
|
425
|
+
where <b>base</b> and <b>index</b> are registers, <b>scale</b> is a power of
|
426
|
+
two, <b>disp</b> is an immediate value, and <b>*</b> is a "shift operation"
|
427
|
+
(generally arithmetic shift left).
|
428
|
+
=end
|
429
|
+
class AddressExpressionOperand < Operand
|
430
|
+
|
431
|
+
=begin rdoc
|
432
|
+
The shift algorithm used by the expression. This is one of SHIFT_ASL,
|
433
|
+
SHIFT_LSL, SHIFT_LSR, SHIFT_ROR, or SHIFT_RRX. The default is SHIFT_ASL;
|
434
|
+
the other algorithms apply to the ARM architecture.
|
435
|
+
=end
|
436
|
+
attr_accessor :shift
|
437
|
+
|
438
|
+
=begin rdoc
|
439
|
+
The scale factor of the expression. This must be a power of two; the default
|
440
|
+
scale value is <b>1</b>.
|
441
|
+
=end
|
442
|
+
attr_accessor :scale
|
443
|
+
|
444
|
+
=begin rdoc
|
445
|
+
A Register object for the <b>index</b> value of the expression, or <i>nil</i> if
|
446
|
+
the expression does not use an index value.
|
447
|
+
=end
|
448
|
+
attr_accessor :index
|
449
|
+
|
450
|
+
=begin rdoc
|
451
|
+
A Register object for the <b>base</b> value of the expression, or <i>nil</i> if
|
452
|
+
the expression does not use an base value.
|
453
|
+
=end
|
454
|
+
attr_accessor :base
|
455
|
+
|
456
|
+
=begin rdoc
|
457
|
+
A numeric value or an AbsoluteAddress object for the displacement of the
|
458
|
+
expression, or <i>nil</i> if the expression does not use a displacement.
|
459
|
+
=end
|
460
|
+
attr_accessor :displacement
|
461
|
+
|
462
|
+
=begin rdoc
|
463
|
+
Logical (no carry) shift left.
|
464
|
+
=end
|
465
|
+
SHIFT_LSL='lsl'
|
466
|
+
=begin rdoc
|
467
|
+
Logical (no carry) shift right.
|
468
|
+
=end
|
469
|
+
SHIFT_LSR='lsr'
|
470
|
+
=begin rdoc
|
471
|
+
Arithmetic (carry) shift left.
|
472
|
+
=end
|
473
|
+
SHIFT_ASL='asl'
|
474
|
+
=begin rdoc
|
475
|
+
Logical (no carry) rotate right.
|
476
|
+
=end
|
477
|
+
SHIFT_ROR='ror'
|
478
|
+
=begin rdoc
|
479
|
+
=end
|
480
|
+
SHIFT_RRX='rrx'
|
481
|
+
end
|
482
|
+
|
483
|
+
=begin rdoc
|
484
|
+
A segmented address consisting of a base (segment) register and a displacement
|
485
|
+
or offset. In Intel notation, these take the form <b>segment:offset</b>.
|
486
|
+
=end
|
487
|
+
class AbsoluteAddress
|
488
|
+
|
489
|
+
=begin rdoc
|
490
|
+
A Register object for the segment or base of the absolute address.
|
491
|
+
=end
|
492
|
+
attr_accessor :segment
|
493
|
+
|
494
|
+
=begin rdoc
|
495
|
+
An immediate value for the offset or displacement of the absolute address.
|
496
|
+
=end
|
497
|
+
attr_accessor :offset
|
498
|
+
end
|
499
|
+
|
500
|
+
=begin rdoc
|
501
|
+
An AbsoluteAddress operand.
|
502
|
+
|
503
|
+
See AbsoluteAddress class.
|
504
|
+
=end
|
505
|
+
class AbsoluteAddressOperand < Operand
|
506
|
+
|
507
|
+
attr_accessor :segment, :offset
|
508
|
+
|
509
|
+
end
|
510
|
+
|
511
|
+
=begin rdoc
|
512
|
+
A CPU register.
|
513
|
+
=end
|
514
|
+
class Register
|
515
|
+
|
516
|
+
=begin rdoc
|
517
|
+
A numeric ID for the register. Registers with the same ID but different names
|
518
|
+
are aliases of each other.
|
519
|
+
=end
|
520
|
+
attr_reader :id
|
521
|
+
|
522
|
+
=begin rdoc
|
523
|
+
The size of the register in bytes.
|
524
|
+
=end
|
525
|
+
attr_reader :size
|
526
|
+
|
527
|
+
=begin rdoc
|
528
|
+
The name or mnemonic for the register.
|
529
|
+
=end
|
530
|
+
attr_reader :name
|
531
|
+
|
532
|
+
=begin rdoc
|
533
|
+
Metadata describing the purpose or general use of the register.
|
534
|
+
=end
|
535
|
+
attr_accessor :purpose
|
536
|
+
|
537
|
+
=begin rdoc
|
538
|
+
A general-purpose register.
|
539
|
+
=end
|
540
|
+
FLG_GEN='general purpose'
|
541
|
+
=begin rdoc
|
542
|
+
A floating-point register.
|
543
|
+
=end
|
544
|
+
FLG_FPU='fpu'
|
545
|
+
=begin rdoc
|
546
|
+
A register on the graphics card.
|
547
|
+
=end
|
548
|
+
FLG_GPU='gpu'
|
549
|
+
=begin rdoc
|
550
|
+
An SIMD register.
|
551
|
+
=end
|
552
|
+
FLG_SIMD='simd'
|
553
|
+
=begin rdoc
|
554
|
+
A system register for task management.
|
555
|
+
=end
|
556
|
+
FLG_TASK='task mgt'
|
557
|
+
=begin rdoc
|
558
|
+
A system register for memory management.
|
559
|
+
=end
|
560
|
+
FLG_MEM='memory mgt'
|
561
|
+
=begin rdoc
|
562
|
+
A system register providing debugger support.
|
563
|
+
=end
|
564
|
+
FLG_DBG='debug'
|
565
|
+
=begin rdoc
|
566
|
+
The program counter or instruction pointer.
|
567
|
+
=end
|
568
|
+
FLG_PC='pc'
|
569
|
+
=begin rdoc
|
570
|
+
The flags or condition code register.
|
571
|
+
=end
|
572
|
+
FLG_FLAGS='flags'
|
573
|
+
=begin rdoc
|
574
|
+
The stack pointer.
|
575
|
+
=end
|
576
|
+
FLG_STACK='stack'
|
577
|
+
=begin rdoc
|
578
|
+
The frame pointer.
|
579
|
+
=end
|
580
|
+
FLG_FRAME='stack frame'
|
581
|
+
=begin rdoc
|
582
|
+
A memory segment register.
|
583
|
+
=end
|
584
|
+
FLG_SEG='segment'
|
585
|
+
=begin rdoc
|
586
|
+
The (virtual) zero register.
|
587
|
+
=end
|
588
|
+
FLG_Z='zero'
|
589
|
+
=begin rdoc
|
590
|
+
A register used for incoming arguments inside a procedure.
|
591
|
+
=end
|
592
|
+
FLG_IN='args in'
|
593
|
+
=begin rdoc
|
594
|
+
A register used for outgoing arguments in a procedure call.
|
595
|
+
=end
|
596
|
+
FLG_OUT='args out'
|
597
|
+
=begin rdoc
|
598
|
+
A register used for local variables inside a procedure.
|
599
|
+
=end
|
600
|
+
FLG_LOCALS='locals'
|
601
|
+
=begin rdoc
|
602
|
+
A register used for a return value from a procedure call.
|
603
|
+
=end
|
604
|
+
FLG_RET='return'
|
605
|
+
end
|
606
|
+
|
607
|
+
=begin rdoc
|
608
|
+
A register operand.
|
609
|
+
|
610
|
+
See Register class.
|
611
|
+
=end
|
612
|
+
class RegisterOperand < Operand
|
613
|
+
|
614
|
+
attr_reader :id, :size,:name
|
615
|
+
attr_accessor :purpose
|
616
|
+
|
617
|
+
FLG_GEN='general purpose'
|
618
|
+
FLG_FPU='fpu'
|
619
|
+
FLG_GPU='gpu'
|
620
|
+
FLG_SIMD='simd'
|
621
|
+
FLG_TASK='task mgt'
|
622
|
+
FLG_MEM='memory mgt'
|
623
|
+
FLG_DBG='debug'
|
624
|
+
FLG_PC='pc'
|
625
|
+
FLG_FLAGS='flags'
|
626
|
+
FLG_STACK='stack'
|
627
|
+
FLG_FRAME='stack frame'
|
628
|
+
FLG_SEG='segment'
|
629
|
+
FLG_Z='zero'
|
630
|
+
FLG_IN='args in'
|
631
|
+
FLG_OUT='args out'
|
632
|
+
FLG_LOCALS='locals'
|
633
|
+
FLG_RET='return'
|
634
|
+
end
|
635
|
+
|
636
|
+
end
|