rallhook 0.7.5 → 0.8.0
Sign up to get free protection for your applications and to get access to all the features.
- data/AUTHORS +2 -0
- data/CHANGELOG +2 -0
- data/README +0 -2
- data/Rakefile +1 -1
- data/TODO +0 -1
- data/ext/rallhook_base/deps/distorm/config.h +170 -0
- data/ext/rallhook_base/deps/distorm/distorm.h +401 -0
- data/ext/rallhook_base/deps/distorm/mnemonics.c +258 -0
- data/ext/rallhook_base/deps/distorm/mnemonics.h +200 -0
- data/ext/rallhook_base/deps/distorm/src/decoder.c +548 -0
- data/ext/rallhook_base/deps/distorm/src/decoder.h +18 -0
- data/ext/rallhook_base/deps/distorm/src/distorm.c +375 -0
- data/ext/rallhook_base/deps/distorm/src/instructions.c +490 -0
- data/ext/rallhook_base/deps/distorm/src/instructions.h +445 -0
- data/ext/rallhook_base/deps/distorm/src/insts.c +4851 -0
- data/ext/rallhook_base/deps/distorm/src/insts.h +36 -0
- data/ext/rallhook_base/deps/distorm/src/operands.c +1270 -0
- data/ext/rallhook_base/deps/distorm/src/operands.h +38 -0
- data/ext/rallhook_base/deps/distorm/src/prefix.c +380 -0
- data/ext/rallhook_base/deps/distorm/src/prefix.h +76 -0
- data/ext/rallhook_base/deps/distorm/src/pydistorm.h +62 -0
- data/ext/rallhook_base/deps/distorm/src/textdefs.c +180 -0
- data/ext/rallhook_base/deps/distorm/src/textdefs.h +68 -0
- data/ext/rallhook_base/deps/distorm/src/wstring.c +55 -0
- data/ext/rallhook_base/deps/distorm/src/wstring.h +43 -0
- data/ext/rallhook_base/deps/distorm/src/x86defs.c +41 -0
- data/ext/rallhook_base/deps/distorm/src/x86defs.h +105 -0
- data/ext/rallhook_base/extconf.rb +15 -20
- data/ext/rallhook_base/rallhook.c +20 -8
- metadata +27 -5
@@ -0,0 +1,18 @@
|
|
1
|
+
/*
|
2
|
+
decoder.h
|
3
|
+
|
4
|
+
Copyright (C) 2003-2009 Gil Dabah, http://ragestorm.net/distorm/
|
5
|
+
This file is licensed under the GPL license. See the file COPYING.
|
6
|
+
*/
|
7
|
+
|
8
|
+
|
9
|
+
#ifndef DECODER_H
|
10
|
+
#define DECODER_H
|
11
|
+
|
12
|
+
#include "../config.h"
|
13
|
+
|
14
|
+
typedef unsigned int _iflags;
|
15
|
+
|
16
|
+
_DecodeResult decode_internal(const _CodeInfo* ci, int supportOldIntr, _DInst result[], unsigned int maxResultCount, unsigned int* usedInstructionsCount);
|
17
|
+
|
18
|
+
#endif /* DECODER_H */
|
@@ -0,0 +1,375 @@
|
|
1
|
+
/*
|
2
|
+
distorm.c
|
3
|
+
|
4
|
+
diStorm3 C Library Interface
|
5
|
+
diStorm3 - Powerful disassembler for X86/AMD64
|
6
|
+
http://ragestorm.net/distorm/
|
7
|
+
distorm at gmail dot com
|
8
|
+
Copyright (C) 2010 Gil Dabah
|
9
|
+
|
10
|
+
This program is free software: you can redistribute it and/or modify
|
11
|
+
it under the terms of the GNU General Public License as published by
|
12
|
+
the Free Software Foundation, either version 3 of the License, or
|
13
|
+
(at your option) any later version.
|
14
|
+
|
15
|
+
This program is distributed in the hope that it will be useful,
|
16
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
|
+
GNU General Public License for more details.
|
19
|
+
|
20
|
+
You should have received a copy of the GNU General Public License
|
21
|
+
along with this program. If not, see <http://www.gnu.org/licenses/>
|
22
|
+
*/
|
23
|
+
|
24
|
+
|
25
|
+
#include "../distorm.h"
|
26
|
+
#include "../config.h"
|
27
|
+
#include "decoder.h"
|
28
|
+
#include "x86defs.h"
|
29
|
+
#include "textdefs.h"
|
30
|
+
#include "wstring.h"
|
31
|
+
#include "../mnemonics.h"
|
32
|
+
|
33
|
+
|
34
|
+
/* C LIBRARY EXPORTS */
|
35
|
+
#ifdef SUPPORT_64BIT_OFFSET
|
36
|
+
_DecodeResult distorm_decompose64(const _CodeInfo* ci, _DInst result[], unsigned int maxInstructions, unsigned int* usedInstructionsCount)
|
37
|
+
#else
|
38
|
+
_DecodeResult distorm_decompose32(const _CodeInfo* ci, _DInst result[], unsigned int maxInstructions, unsigned int* usedInstructionsCount)
|
39
|
+
#endif
|
40
|
+
{
|
41
|
+
if (usedInstructionsCount == NULL) {
|
42
|
+
return DECRES_SUCCESS;
|
43
|
+
}
|
44
|
+
|
45
|
+
/* DECRES_SUCCESS still may indicate we may have something in the result, so zero it first thing. */
|
46
|
+
*usedInstructionsCount = 0;
|
47
|
+
|
48
|
+
if ((ci == NULL) ||
|
49
|
+
(ci->codeLen < 0) ||
|
50
|
+
((ci->dt != Decode16Bits) && (ci->dt != Decode32Bits) && (ci->dt != Decode64Bits)) ||
|
51
|
+
(ci->code == NULL) ||
|
52
|
+
(result == NULL) ||
|
53
|
+
((ci->features & (DF_MAXIMUM_ADDR16 | DF_MAXIMUM_ADDR32)) == (DF_MAXIMUM_ADDR16 | DF_MAXIMUM_ADDR32)))
|
54
|
+
{
|
55
|
+
return DECRES_INPUTERR;
|
56
|
+
}
|
57
|
+
|
58
|
+
/* Assume length=0 is success. */
|
59
|
+
if (ci->codeLen == 0) {
|
60
|
+
return DECRES_SUCCESS;
|
61
|
+
}
|
62
|
+
|
63
|
+
return decode_internal(ci, FALSE, result, maxInstructions, usedInstructionsCount);
|
64
|
+
}
|
65
|
+
|
66
|
+
/* Helper function to concat an explicit size when it's unknown from the operands. */
|
67
|
+
static void distorm_format_size(_WString* str, const _DInst* di, int opNum)
|
68
|
+
{
|
69
|
+
/*
|
70
|
+
* We only have to output the size explicitly if it's not clear from the operands.
|
71
|
+
* For example:
|
72
|
+
* mov al, [0x1234] -> The size is 8, we know it from the AL register operand.
|
73
|
+
* mov [0x1234], 0x11 -> Now we don't know the size. Pam pam pam
|
74
|
+
*
|
75
|
+
* If given operand number is higher than 2, then output the size anyways.
|
76
|
+
*/
|
77
|
+
if ((opNum >= 2) || ((di->ops[0].type != O_REG) && (di->ops[1].type != O_REG))) {
|
78
|
+
switch (di->ops[opNum].size)
|
79
|
+
{
|
80
|
+
case 0: break; /* OT_MEM's unknown size. */
|
81
|
+
case 8: strcat_WSN(str, "BYTE "); break;
|
82
|
+
case 16: strcat_WSN(str, "WORD "); break;
|
83
|
+
case 32: strcat_WSN(str, "DWORD "); break;
|
84
|
+
case 64: strcat_WSN(str, "QWORD "); break;
|
85
|
+
case 80: strcat_WSN(str, "TBYTE "); break;
|
86
|
+
case 128: strcat_WSN(str, "DQWORD "); break;
|
87
|
+
case 256: strcat_WSN(str, "YWORD "); break;
|
88
|
+
default: /* Big oh uh if it gets here. */ break;
|
89
|
+
}
|
90
|
+
}
|
91
|
+
}
|
92
|
+
|
93
|
+
static void distorm_format_signed_disp(_WString* str, const _DInst* di, uint64_t addrMask)
|
94
|
+
{
|
95
|
+
int64_t tmpDisp64;
|
96
|
+
|
97
|
+
if (di->dispSize) {
|
98
|
+
chrcat_WS(str, ((int64_t)di->disp < 0) ? MINUS_DISP_CHR : PLUS_DISP_CHR);
|
99
|
+
if ((int64_t)di->disp < 0) tmpDisp64 = -(int64_t)di->disp;
|
100
|
+
else tmpDisp64 = di->disp;
|
101
|
+
tmpDisp64 &= addrMask;
|
102
|
+
str_code_hqw(str, (uint8_t*)&tmpDisp64);
|
103
|
+
}
|
104
|
+
}
|
105
|
+
|
106
|
+
#ifdef SUPPORT_64BIT_OFFSET
|
107
|
+
_DLLEXPORT_ void distorm_format64(const _CodeInfo* ci, const _DInst* di, _DecodedInst* result)
|
108
|
+
#else
|
109
|
+
_DLLEXPORT_ void distorm_format32(const _CodeInfo* ci, const _DInst* di, _DecodedInst* result)
|
110
|
+
#endif
|
111
|
+
{
|
112
|
+
_WString* str;
|
113
|
+
unsigned int i, isDefault;
|
114
|
+
int64_t tmpDisp64;
|
115
|
+
uint64_t addrMask = (uint64_t)-1;
|
116
|
+
uint8_t segment;
|
117
|
+
|
118
|
+
/* Set address mask, when default is for 64bits addresses. */
|
119
|
+
if (ci->features & DF_MAXIMUM_ADDR32) addrMask = 0xffffffff;
|
120
|
+
else if (ci->features & DF_MAXIMUM_ADDR16) addrMask = 0xffff;
|
121
|
+
|
122
|
+
/* Copy other fields. */
|
123
|
+
result->size = di->size;
|
124
|
+
result->offset = di->addr & addrMask;
|
125
|
+
|
126
|
+
if (di->flags == FLAG_NOT_DECODABLE) {
|
127
|
+
str = &result->mnemonic;
|
128
|
+
strclear_WS(&result->operands);
|
129
|
+
strcpy_WSN(str, "DB ");
|
130
|
+
str_code_hb(str, di->imm.byte);
|
131
|
+
strclear_WS(&result->instructionHex);
|
132
|
+
str_hex_b(&result->instructionHex, di->imm.byte);
|
133
|
+
return; /* Skip to next instruction. */
|
134
|
+
}
|
135
|
+
|
136
|
+
str = &result->instructionHex;
|
137
|
+
strclear_WS(str);
|
138
|
+
for (i = 0; i < di->size; i++)
|
139
|
+
str_hex_b(str, ci->code[(unsigned int)(di->addr - ci->codeOffset + i)]);
|
140
|
+
|
141
|
+
str = &result->mnemonic;
|
142
|
+
switch (FLAG_GET_PREFIX(di->flags))
|
143
|
+
{
|
144
|
+
case FLAG_LOCK:
|
145
|
+
strcpy_WSN(str, "LOCK ");
|
146
|
+
break;
|
147
|
+
case FLAG_REP:
|
148
|
+
strcpy_WSN(str, "REP ");
|
149
|
+
break;
|
150
|
+
case FLAG_REPNZ:
|
151
|
+
strcpy_WSN(str, "REPNZ ");
|
152
|
+
break;
|
153
|
+
default:
|
154
|
+
/* Init mnemonic string, cause next touch is concatenation. */
|
155
|
+
strclear_WS(str);
|
156
|
+
break;
|
157
|
+
}
|
158
|
+
|
159
|
+
strcat_WS(str, (const _WString*)&_MNEMONICS[di->opcode]);
|
160
|
+
|
161
|
+
/* Format operands: */
|
162
|
+
str = &result->operands;
|
163
|
+
strclear_WS(str);
|
164
|
+
|
165
|
+
/* Special treatment for String instructions. */
|
166
|
+
if ((META_GET_ISC(di->meta) == ISC_INTEGER) &&
|
167
|
+
((di->opcode == I_MOVS) ||
|
168
|
+
(di->opcode == I_CMPS) ||
|
169
|
+
(di->opcode == I_STOS) ||
|
170
|
+
(di->opcode == I_LODS) ||
|
171
|
+
(di->opcode == I_SCAS)))
|
172
|
+
{
|
173
|
+
/*
|
174
|
+
* No operands are needed if the address size is the default one,
|
175
|
+
* and no segment is overridden, so add the suffix letter,
|
176
|
+
* to indicate size of operation and continue to next instruction.
|
177
|
+
*/
|
178
|
+
if ((FLAG_GET_ADDRSIZE(di->flags) == ci->dt) && (SEGMENT_IS_DEFAULT(di->segment)))
|
179
|
+
{
|
180
|
+
str = &result->mnemonic;
|
181
|
+
switch (di->ops[0].size)
|
182
|
+
{
|
183
|
+
case 8: chrcat_WS(str, 'B'); break;
|
184
|
+
case 16: chrcat_WS(str, 'W'); break;
|
185
|
+
case 32: chrcat_WS(str, 'D'); break;
|
186
|
+
case 64: chrcat_WS(str, 'Q'); break;
|
187
|
+
}
|
188
|
+
return;
|
189
|
+
}
|
190
|
+
}
|
191
|
+
|
192
|
+
for (i = 0; ((i < OPERANDS_NO) && (di->ops[i].type != O_NONE)); i++) {
|
193
|
+
if (i > 0) strcat_WSN(str, ", ");
|
194
|
+
switch (di->ops[i].type)
|
195
|
+
{
|
196
|
+
case O_REG:
|
197
|
+
strcat_WS(str, (const _WString*)&_REGISTERS[di->ops[i].index]);
|
198
|
+
break;
|
199
|
+
case O_IMM:
|
200
|
+
/* If the instruction is 'push', show explicit size (except byte imm). */
|
201
|
+
if (di->opcode == I_PUSH && di->ops[i].size != 8) distorm_format_size(str, di, i);
|
202
|
+
/* Special fix for negative sign extended immediates. */
|
203
|
+
if ((di->flags & FLAG_IMM_SIGNED) && (di->ops[i].size == 8)) {
|
204
|
+
if (di->imm.sbyte < 0) {
|
205
|
+
chrcat_WS(str, MINUS_DISP_CHR);
|
206
|
+
str_code_hb(str, -di->imm.sbyte);
|
207
|
+
break;
|
208
|
+
}
|
209
|
+
}
|
210
|
+
if (di->ops[i].size == 64) str_code_hqw(str, (uint8_t*)&di->imm.qword);
|
211
|
+
else str_code_hdw(str, di->imm.dword);
|
212
|
+
break;
|
213
|
+
case O_IMM1:
|
214
|
+
str_code_hdw(str, di->imm.ex.i1);
|
215
|
+
break;
|
216
|
+
case O_IMM2:
|
217
|
+
str_code_hdw(str, di->imm.ex.i2);
|
218
|
+
break;
|
219
|
+
case O_DISP:
|
220
|
+
distorm_format_size(str, di, i);
|
221
|
+
chrcat_WS(str, OPEN_CHR);
|
222
|
+
if ((SEGMENT_GET(di->segment) != R_NONE) && !SEGMENT_IS_DEFAULT(di->segment)) {
|
223
|
+
strcat_WS(str, (const _WString*)&_REGISTERS[SEGMENT_GET(di->segment)]);
|
224
|
+
chrcat_WS(str, SEG_OFF_CHR);
|
225
|
+
}
|
226
|
+
tmpDisp64 = di->disp & addrMask;
|
227
|
+
str_code_hqw(str, (uint8_t*)&tmpDisp64);
|
228
|
+
chrcat_WS(str, CLOSE_CHR);
|
229
|
+
break;
|
230
|
+
case O_SMEM:
|
231
|
+
distorm_format_size(str, di, i);
|
232
|
+
chrcat_WS(str, OPEN_CHR);
|
233
|
+
|
234
|
+
/*
|
235
|
+
* This is where we need to take special care for String instructions.
|
236
|
+
* If we got here, it means we need to explicitly show their operands.
|
237
|
+
* The problem with CMPS and MOVS is that they have two(!) memory operands.
|
238
|
+
* So we have to complete it ourselves, since the structure supplies only the segment that can be overridden.
|
239
|
+
* And make the rest of the String operations explicit.
|
240
|
+
*/
|
241
|
+
segment = SEGMENT_GET(di->segment);
|
242
|
+
isDefault = SEGMENT_IS_DEFAULT(di->segment);
|
243
|
+
switch (di->opcode)
|
244
|
+
{
|
245
|
+
case I_MOVS:
|
246
|
+
isDefault = FALSE;
|
247
|
+
if (i == 0) segment = R_ES;
|
248
|
+
break;
|
249
|
+
case I_CMPS:
|
250
|
+
isDefault = FALSE;
|
251
|
+
if (i == 1) segment = R_ES;
|
252
|
+
break;
|
253
|
+
case I_INS:
|
254
|
+
case I_LODS:
|
255
|
+
case I_STOS:
|
256
|
+
case I_SCAS: isDefault = FALSE; break;
|
257
|
+
}
|
258
|
+
if (!isDefault && (segment != R_NONE)) {
|
259
|
+
strcat_WS(str, (const _WString*)&_REGISTERS[segment]);
|
260
|
+
chrcat_WS(str, SEG_OFF_CHR);
|
261
|
+
}
|
262
|
+
|
263
|
+
strcat_WS(str, (const _WString*)&_REGISTERS[di->ops[i].index]);
|
264
|
+
|
265
|
+
distorm_format_signed_disp(str, di, addrMask);
|
266
|
+
chrcat_WS(str, CLOSE_CHR);
|
267
|
+
break;
|
268
|
+
case O_MEM:
|
269
|
+
distorm_format_size(str, di, i);
|
270
|
+
chrcat_WS(str, OPEN_CHR);
|
271
|
+
if ((SEGMENT_GET(di->segment) != R_NONE) && !SEGMENT_IS_DEFAULT(di->segment)) {
|
272
|
+
strcat_WS(str, (const _WString*)&_REGISTERS[SEGMENT_GET(di->segment)]);
|
273
|
+
chrcat_WS(str, SEG_OFF_CHR);
|
274
|
+
}
|
275
|
+
if (di->base != R_NONE) {
|
276
|
+
strcat_WS(str, (const _WString*)&_REGISTERS[di->base]);
|
277
|
+
chrcat_WS(str, PLUS_DISP_CHR);
|
278
|
+
}
|
279
|
+
strcat_WS(str, (const _WString*)&_REGISTERS[di->ops[i].index]);
|
280
|
+
if (di->scale != 0) {
|
281
|
+
chrcat_WS(str, '*');
|
282
|
+
if (di->scale == 2) chrcat_WS(str, '2');
|
283
|
+
else if (di->scale == 4) chrcat_WS(str, '4');
|
284
|
+
else /* if (di->scale == 8) */ chrcat_WS(str, '8');
|
285
|
+
}
|
286
|
+
|
287
|
+
distorm_format_signed_disp(str, di, addrMask);
|
288
|
+
chrcat_WS(str, CLOSE_CHR);
|
289
|
+
break;
|
290
|
+
case O_PC:
|
291
|
+
#ifdef SUPPORT_64BIT_OFFSET
|
292
|
+
str_off64(str, (di->imm.sqword + di->addr + di->size) & addrMask);
|
293
|
+
#else
|
294
|
+
str_code_hdw(str, ((_OffsetType)di->imm.sdword + di->addr + di->size) & (uint32_t)addrMask);
|
295
|
+
#endif
|
296
|
+
break;
|
297
|
+
case O_PTR:
|
298
|
+
str_code_hdw(str, di->imm.ptr.seg);
|
299
|
+
chrcat_WS(str, SEG_OFF_CHR);
|
300
|
+
str_code_hdw(str, di->imm.ptr.off);
|
301
|
+
break;
|
302
|
+
}
|
303
|
+
}
|
304
|
+
|
305
|
+
if (di->flags & FLAG_HINT_TAKEN) strcat_WSN(str, " ;TAKEN");
|
306
|
+
else if (di->flags & FLAG_HINT_NOT_TAKEN) strcat_WSN(str, " ;NOT TAKEN");
|
307
|
+
}
|
308
|
+
|
309
|
+
#ifdef SUPPORT_64BIT_OFFSET
|
310
|
+
_DLLEXPORT_ _DecodeResult distorm_decode64(_OffsetType codeOffset, const unsigned char* code, int codeLen, _DecodeType dt, _DecodedInst result[], unsigned int maxInstructions, unsigned int* usedInstructionsCount)
|
311
|
+
#else
|
312
|
+
_DLLEXPORT_ _DecodeResult distorm_decode32(_OffsetType codeOffset, const unsigned char* code, int codeLen, _DecodeType dt, _DecodedInst result[], unsigned int maxInstructions, unsigned int* usedInstructionsCount)
|
313
|
+
#endif
|
314
|
+
{
|
315
|
+
_DecodeResult res;
|
316
|
+
_DInst di;
|
317
|
+
_CodeInfo ci;
|
318
|
+
unsigned int instsCount = 0, i;
|
319
|
+
|
320
|
+
*usedInstructionsCount = 0;
|
321
|
+
|
322
|
+
/* I use codeLen as a signed variable in order to ease detection of underflow... and besides - */
|
323
|
+
if (codeLen < 0) {
|
324
|
+
return DECRES_INPUTERR;
|
325
|
+
}
|
326
|
+
|
327
|
+
if ((dt != Decode16Bits) && (dt != Decode32Bits) && (dt != Decode64Bits)) {
|
328
|
+
return DECRES_INPUTERR;
|
329
|
+
}
|
330
|
+
|
331
|
+
if (code == NULL || result == NULL) {
|
332
|
+
return DECRES_INPUTERR;
|
333
|
+
}
|
334
|
+
|
335
|
+
/* Assume length=0 is success. */
|
336
|
+
if (codeLen == 0) {
|
337
|
+
return DECRES_SUCCESS;
|
338
|
+
}
|
339
|
+
|
340
|
+
/*
|
341
|
+
* We have to format the result into text. But the interal decoder works with the new structure of _DInst.
|
342
|
+
* Therefore, we will pass the result array(!) from the caller and the interal decoder will fill it in with _DInst's.
|
343
|
+
* Then we will copy each result to a temporary structure, and use it to reformat that specific result.
|
344
|
+
*
|
345
|
+
* This is all done to save memory allocation and to work on the same result array in-place!!!
|
346
|
+
* It's a bit ugly, I have to admit, but worth it.
|
347
|
+
*/
|
348
|
+
|
349
|
+
ci.codeOffset = codeOffset;
|
350
|
+
ci.code = code;
|
351
|
+
ci.codeLen = codeLen;
|
352
|
+
ci.dt = dt;
|
353
|
+
ci.features = DF_NONE;
|
354
|
+
|
355
|
+
res = decode_internal(&ci, TRUE, (_DInst*)result, maxInstructions, &instsCount);
|
356
|
+
for (i = 0; i < instsCount; i++) {
|
357
|
+
if ((*usedInstructionsCount + i) >= maxInstructions) return DECRES_MEMORYERR;
|
358
|
+
|
359
|
+
/* Copy the current decomposed result to a temp structure, so we can override the result with text. */
|
360
|
+
memcpy(&di, (char*)result + (i * sizeof(_DecodedInst)), sizeof(_DInst));
|
361
|
+
#ifdef SUPPORT_64BIT_OFFSET
|
362
|
+
distorm_format64(&ci, &di, &result[i]);
|
363
|
+
#else
|
364
|
+
distorm_format32(&ci, &di, &result[i]);
|
365
|
+
#endif
|
366
|
+
}
|
367
|
+
|
368
|
+
*usedInstructionsCount = instsCount;
|
369
|
+
return res;
|
370
|
+
}
|
371
|
+
|
372
|
+
_DLLEXPORT_ unsigned int distorm_version()
|
373
|
+
{
|
374
|
+
return __DISTORMV__;
|
375
|
+
}
|
@@ -0,0 +1,490 @@
|
|
1
|
+
/*
|
2
|
+
instructions.c
|
3
|
+
|
4
|
+
diStorm3 - Powerful disassembler for X86/AMD64
|
5
|
+
http://ragestorm.net/distorm/
|
6
|
+
distorm at gmail dot com
|
7
|
+
Copyright (C) 2010 Gil Dabah
|
8
|
+
|
9
|
+
This program is free software: you can redistribute it and/or modify
|
10
|
+
it under the terms of the GNU General Public License as published by
|
11
|
+
the Free Software Foundation, either version 3 of the License, or
|
12
|
+
(at your option) any later version.
|
13
|
+
|
14
|
+
This program is distributed in the hope that it will be useful,
|
15
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
17
|
+
GNU General Public License for more details.
|
18
|
+
|
19
|
+
You should have received a copy of the GNU General Public License
|
20
|
+
along with this program. If not, see <http://www.gnu.org/licenses/>
|
21
|
+
*/
|
22
|
+
|
23
|
+
|
24
|
+
#include "instructions.h"
|
25
|
+
|
26
|
+
#include "insts.h"
|
27
|
+
#include "prefix.h"
|
28
|
+
#include "x86defs.h"
|
29
|
+
#include "../mnemonics.h"
|
30
|
+
|
31
|
+
|
32
|
+
/*
|
33
|
+
I use the trie data structure as I found it most fitting to a disassembler mechanism.
|
34
|
+
When you read a byte and have to decide if it's enough or you should read more bytes, 'till you get to the instruction information.
|
35
|
+
It's really fast because you POP the instruction info in top 3 iterates on the DB, because an instruction can be formed from two bytes + 3 bits reg from the ModR/M byte.
|
36
|
+
For a simple explanation, check this out:
|
37
|
+
http://www.csse.monash.edu.au/~lloyd/tildeAlgDS/Tree/Trie/
|
38
|
+
Futher reading: http://en.wikipedia.org/wiki/Trie
|
39
|
+
|
40
|
+
The first GATE (array you read off a trie data structure), as I call them, is statically allocated by the compiler.
|
41
|
+
The second and third gates if used are being allocated dynamically by the instructions-insertion functionality.
|
42
|
+
|
43
|
+
How would such a thing look in memory, say we support 4 instructions with 3 bytes top (means 2 dynamically allocated gates).
|
44
|
+
|
45
|
+
->
|
46
|
+
|-------| 0,
|
47
|
+
|0| -------------------------------> |-------|
|
48
|
+
|1|RET | 1, |0|AND |
|
49
|
+
|2| -----> |-------| |1|XOR |
|
50
|
+
|3|INT3 | |0|PUSH | |2|OR | 0,3,
|
51
|
+
|-------| |1|POP | |3| --------->|-------|
|
52
|
+
|2|PUSHF| |-------| |0|ROR |
|
53
|
+
|3|POPF | |1|ROL |
|
54
|
+
|-------| |2|SHR |
|
55
|
+
|3|SHL |
|
56
|
+
|-------|
|
57
|
+
|
58
|
+
Of course, this is NOT how Intel instructions set looks!!!
|
59
|
+
but I just wanted to give a small demonstration.
|
60
|
+
Now the instructions you get from such a trie DB goes like this:
|
61
|
+
|
62
|
+
0, 0 - AND
|
63
|
+
0, 1 - XOR
|
64
|
+
0, 2 - OR
|
65
|
+
0, 3, 0, ROR
|
66
|
+
0, 3, 1, ROL
|
67
|
+
0, 3, 2, SHR
|
68
|
+
0, 3, 3, SHL
|
69
|
+
1 - RET
|
70
|
+
2, 0 - PUSH
|
71
|
+
2, 1 - POP
|
72
|
+
2, 2 - PUSHF
|
73
|
+
2, 3 - POPF
|
74
|
+
3 - INT3
|
75
|
+
|
76
|
+
I guess it's clear by now.
|
77
|
+
So now, if you read 0, you know that you have to enter the second gate(list) with the second byte specifying the index.
|
78
|
+
But if you read 1, you know that you go to an instruction (in this case, a RET).
|
79
|
+
That's why there's an Instruction-Node structure, it tells you whether you got to an instruction or another list
|
80
|
+
so you should keep on reading byte).
|
81
|
+
|
82
|
+
In Intel, you could go through 4 gates at top, because there're instructions which are built from 2 bytes and another smaller list
|
83
|
+
for the REG part, or newest SSE4 instructions which use 4 bytes for opcode.
|
84
|
+
Therefore, Intel's first gate is 256 long, and other gates are 256 (/72) or 8 long, yes, it costs pretty much alot of memory
|
85
|
+
for non-used defined instructions, but I think that it still rocks.
|
86
|
+
*/
|
87
|
+
|
88
|
+
/*
|
89
|
+
* This function is reponsible to return the instruction information of the first found in code.
|
90
|
+
* It returns the _InstInfo of the found instruction, otherwise NULL.
|
91
|
+
* code should point to the ModR/M byte upon exit (if used), or after the instruction binary code itself.
|
92
|
+
* This function is NOT decoding-type dependant, it is up to the caller to see whether the instruction is valid.
|
93
|
+
* Get the instruction info, using a Trie data structure.
|
94
|
+
*
|
95
|
+
* Sometimes normal prefixes become mandatory prefixes, which means they are now part of the instruction opcode bytes.
|
96
|
+
|
97
|
+
* This is a bit tricky now,
|
98
|
+
* if the first byte is a REP (F3) prefix, we will have to give a chance to an SSE instruction.
|
99
|
+
* If an instruction doesn't exist, we will make it as a prefix and re-locateinst.
|
100
|
+
* A case such that a REP prefix is being changed into an instruction byte and also an SSE instruction will not be found can't happen,
|
101
|
+
* simply because there are no collisions between string instruction and SSE instructions (they are escaped).
|
102
|
+
|
103
|
+
* As for S/SSE2/3, check for F2 and 66 as well.
|
104
|
+
|
105
|
+
* In 64 bits, we have to make sure that we will skip the REX prefix, if it exists.
|
106
|
+
* There's a specific case, where a 66 is mandatory but it was dropped because REG.W was used,
|
107
|
+
* but it doesn't behave as an operand size prefix but as a mandatory, so we will have to take it into account.
|
108
|
+
|
109
|
+
* For example (64 bits decoding mode):
|
110
|
+
* 66 98 CBW
|
111
|
+
* 48 98 CDQE
|
112
|
+
* 66 48 98: db 0x66; CDQE
|
113
|
+
* Shows that operand size is dropped.
|
114
|
+
|
115
|
+
* Now, it's a mandatory prefix and NOT an operand size one.
|
116
|
+
* 66480f2dc0 db 0x48; CVTPD2PI XMM0, XMM0
|
117
|
+
* Although this instruction doesn't require a REX.W, it just shows, that even if it did - it doesn't matter.
|
118
|
+
* REX.W is dropped because it's not requried, but the decode function disabled the operand size even so.
|
119
|
+
*/
|
120
|
+
|
121
|
+
static _InstInfo* inst_lookup_prefixed(_InstNode* in, _PrefixState* ps)
|
122
|
+
{
|
123
|
+
_InstInfo* ii = NULL;
|
124
|
+
/* Check prefixes of current decoded instruction (None, 0x66, 0xf3, 0xf2). */
|
125
|
+
switch (ps->decodedPrefixes & (INST_PRE_OP_SIZE | INST_PRE_REPS))
|
126
|
+
{
|
127
|
+
case 0:
|
128
|
+
/* Non-prefixed, index = 0. */
|
129
|
+
ii = in->list[in->ids[0]];
|
130
|
+
break;
|
131
|
+
case INST_PRE_OP_SIZE:
|
132
|
+
/* 0x66, index = 1. */
|
133
|
+
ii = in->list[in->ids[1]];
|
134
|
+
/* Mark that we used it as a mandatory prefix. */
|
135
|
+
ps->isOpSizeMandatory = 1;
|
136
|
+
ps->decodedPrefixes &= ~INST_PRE_OP_SIZE;
|
137
|
+
break;
|
138
|
+
case INST_PRE_REP:
|
139
|
+
/* 0xf3, index = 2. */
|
140
|
+
ii = in->list[in->ids[2]];
|
141
|
+
ps->decodedPrefixes &= ~INST_PRE_REP;
|
142
|
+
break;
|
143
|
+
case INST_PRE_REPNZ:
|
144
|
+
/* 0xf2, index = 3. */
|
145
|
+
ii = in->list[in->ids[3]];
|
146
|
+
ps->decodedPrefixes &= ~INST_PRE_REPNZ;
|
147
|
+
break;
|
148
|
+
default:
|
149
|
+
/*
|
150
|
+
* Now we got a problem, since there are a few mandatory prefixes at once.
|
151
|
+
* There is only one case when it's ok, when the operand size prefix is for real (not mandatory).
|
152
|
+
* Otherwise we will have to return NULL, since the instruction is illegal.
|
153
|
+
* Therefore we will start with REPNZ and REP prefixes,
|
154
|
+
* try to get the instruction and only then check for the operand size prefix.
|
155
|
+
*/
|
156
|
+
|
157
|
+
/* If both REPNZ and REP are together, it's illegal for sure. */
|
158
|
+
if ((ps->decodedPrefixes & INST_PRE_REPS) == INST_PRE_REPS) return NULL;
|
159
|
+
|
160
|
+
/* Now we know it's either REPNZ+OPSIZE or REP+OPSIZE, so examine the instruction. */
|
161
|
+
if (ps->decodedPrefixes & INST_PRE_REPNZ) {
|
162
|
+
ii = in->list[in->ids[3]];
|
163
|
+
ps->decodedPrefixes &= ~INST_PRE_REPNZ;
|
164
|
+
} else if (ps->decodedPrefixes & INST_PRE_REP) {
|
165
|
+
ii = in->list[in->ids[2]];
|
166
|
+
ps->decodedPrefixes &= ~INST_PRE_REP;
|
167
|
+
}
|
168
|
+
/* If the instruction doesn't support operand size prefix, then it's illegal. */
|
169
|
+
if ((ii == NULL) || (~ii->flags & INST_PRE_OP_SIZE)) return NULL;
|
170
|
+
break;
|
171
|
+
}
|
172
|
+
/* If there was a prefix, but the instruction wasn't found. Try to fold back to use the normal instruction. */
|
173
|
+
if (ii == NULL) ii = in->list[in->ids[0]];
|
174
|
+
return ii;
|
175
|
+
}
|
176
|
+
|
177
|
+
static _InstInfo* inst_vex_lookup(_CodeInfo* ci, _PrefixState* ps)
|
178
|
+
{
|
179
|
+
_InstNode* in = NULL;
|
180
|
+
unsigned int pp = 0, start = 0;
|
181
|
+
unsigned int index = 4; /* VEX instructions start at index 4 in the Prefixed table. */
|
182
|
+
uint8_t vex = *ps->vexPos, vex2 = 0, v = 0;
|
183
|
+
|
184
|
+
/* The VEX instruction will #ud if any of 66, f0, f2, f3, REX prefixes precede. */
|
185
|
+
_iflags illegal = (INST_PRE_OP_SIZE | INST_PRE_LOCK | INST_PRE_REP | INST_PRE_REPNZ | INST_PRE_REX);
|
186
|
+
if ((ps->decodedPrefixes & illegal) != 0) return NULL;
|
187
|
+
|
188
|
+
/* Read the some fields from the VEX prefix we need to extract the instruction. */
|
189
|
+
if (ps->prefixExtType == PET_VEX2BYTES) {
|
190
|
+
ps->vexV = v = (~vex >> 3) & 0xf;
|
191
|
+
pp = vex & 3;
|
192
|
+
/* Implied leading 0x0f byte by default for 2 bytes VEX prefix. */
|
193
|
+
start = 1;
|
194
|
+
} else { /* PET_VEX3BYTES */
|
195
|
+
start = vex & 0x1f;
|
196
|
+
vex2 = *(ps->vexPos + 1);
|
197
|
+
ps->vexV = v = (~vex2 >> 3) & 0xf;
|
198
|
+
pp = vex2 & 3;
|
199
|
+
}
|
200
|
+
|
201
|
+
/* start can be either 1 (0x0f), 2 (0x0f, 0x038) or 3 (0x0f, 0x3a), otherwise it's illegal. */
|
202
|
+
switch (start)
|
203
|
+
{
|
204
|
+
case 1: in = &Table_0F; break;
|
205
|
+
case 2: in = &Table_0F_38; break;
|
206
|
+
case 3: in = &Table_0F_3A; break;
|
207
|
+
default: return NULL;
|
208
|
+
}
|
209
|
+
|
210
|
+
/* If the instruction is encoded using the vvvv field, fix the index into the Prefixed table. */
|
211
|
+
if (v == 0) index += 4;
|
212
|
+
|
213
|
+
/* pp is actually the implied mandatory prefix, apply it to the index. */
|
214
|
+
index += pp; /* (None, 0x66, 0xf3, 0xf2) */
|
215
|
+
|
216
|
+
/* Read a byte from the stream. */
|
217
|
+
ci->codeLen -= 1;
|
218
|
+
if (ci->codeLen < 0) return NULL;
|
219
|
+
|
220
|
+
in = (_InstNode*)in->list[in->ids[*ci->code]];
|
221
|
+
if (in == NULL) return NULL;
|
222
|
+
|
223
|
+
/*
|
224
|
+
* If we started with 0f38 or 0f3a so it's a prefixed table,
|
225
|
+
* therefore it's surely a VEXed instruction (because of a high index).
|
226
|
+
* However, starting with 0f, could also lead immediately to a prefixed table for some bytes.
|
227
|
+
* it might return NULL, if the index is invalid.
|
228
|
+
*/
|
229
|
+
if (in->type == INT_LIST_PREFIXED) return in->list[in->ids[index]];
|
230
|
+
|
231
|
+
/*
|
232
|
+
* If we reached here, obviously we started with 0f. VEXed instructions must be nodes of a prefixed table.
|
233
|
+
* But since we found an instruction (or divided one), just return NULL.
|
234
|
+
* They cannot lead to a VEXed instruction.
|
235
|
+
*/
|
236
|
+
if (in->type == INT_INFO || in->type == INT_LIST_DIVIDED) return NULL;
|
237
|
+
|
238
|
+
/* Now we are left with handling either GROUP or FULL tables, therefore we will read another byte from the stream. */
|
239
|
+
ci->code += 1;
|
240
|
+
ci->codeLen -= 1;
|
241
|
+
if (ci->codeLen < 0) return NULL;
|
242
|
+
if (in->type == INT_LIST_GROUP) {
|
243
|
+
in = (_InstNode*)in->list[in->ids[(*ci->code >> 3) & 7]];
|
244
|
+
if (in == NULL) return NULL;
|
245
|
+
if (in->type == INT_LIST_PREFIXED) return in->list[in->ids[index]];
|
246
|
+
} else if (in->type == INT_LIST_FULL) {
|
247
|
+
in = (_InstNode*)in->list[in->ids[*ci->code]];
|
248
|
+
if (in == NULL) return NULL;
|
249
|
+
if (in->type == INT_LIST_PREFIXED) return in->list[in->ids[index]];
|
250
|
+
}
|
251
|
+
|
252
|
+
/* No VEXed instruction was found. */
|
253
|
+
return NULL;
|
254
|
+
}
|
255
|
+
|
256
|
+
_InstInfo* inst_lookup(_CodeInfo* ci, _PrefixState* ps)
|
257
|
+
{
|
258
|
+
unsigned int tmpIndex0 = 0, tmpIndex1 = 0, tmpIndex2 = 0, rex = ps->vrex;
|
259
|
+
_InstNode* in = NULL;
|
260
|
+
_InstInfo* ii = NULL;
|
261
|
+
int isWaitIncluded = FALSE;
|
262
|
+
|
263
|
+
/* See whether we have to handle a VEX prefixed instruction. */
|
264
|
+
if (ps->decodedPrefixes & INST_PRE_VEX) {
|
265
|
+
ii = inst_vex_lookup(ci, ps);
|
266
|
+
/* Make sure that VEX.L exists when forced. */
|
267
|
+
if (ii && (((_InstInfoEx*)ii)->flagsEx & INST_FORCE_VEXL) && (~ps->vrex & PREFIX_EX_L)) return NULL;
|
268
|
+
return ii;
|
269
|
+
}
|
270
|
+
|
271
|
+
/* Read first byte. */
|
272
|
+
ci->codeLen -= 1;
|
273
|
+
if (ci->codeLen < 0) return NULL;
|
274
|
+
tmpIndex0 = *ci->code;
|
275
|
+
|
276
|
+
/* Check for special 0x9b, WAIT instruction, which can be part of some instructions(x87). */
|
277
|
+
if (tmpIndex0 == INST_WAIT_INDEX) {
|
278
|
+
/* Only OCST_1dBYTES get a chance to include this byte as part of the opcode. */
|
279
|
+
isWaitIncluded = TRUE;
|
280
|
+
|
281
|
+
/* Ignore all prefixes, since they are useless and operate on the WAIT instruction itself. */
|
282
|
+
prefixes_ignore_all(ps);
|
283
|
+
|
284
|
+
/* Move to next code byte as a new whole instruction. */
|
285
|
+
ci->code += 1;
|
286
|
+
ci->codeLen -= 1;
|
287
|
+
if (ci->codeLen < 0) return NULL; /* Faster to return NULL, it will be detected as WAIT later anyway. */
|
288
|
+
tmpIndex0 = *ci->code;
|
289
|
+
}
|
290
|
+
|
291
|
+
/* Check for NULL node for index 0. */
|
292
|
+
in = (_InstNode*)Instructions.list[Instructions.ids[tmpIndex0]];
|
293
|
+
if (in == NULL) return NULL;
|
294
|
+
|
295
|
+
/* Single byte instruction (OCST_1BYTE). */
|
296
|
+
if ((in->type == INT_INFO) && (!isWaitIncluded)) {
|
297
|
+
/* Some single byte instructions need extra treatment. */
|
298
|
+
switch (tmpIndex0)
|
299
|
+
{
|
300
|
+
case INST_ARPL_INDEX:
|
301
|
+
/*
|
302
|
+
* ARPL/MOVSXD share the same opcode, and both have different operands and mnemonics, of course.
|
303
|
+
* Practically, I couldn't come up with a comfortable way to merge the operands' types of ARPL/MOVSXD.
|
304
|
+
* And since the DB can't be patched dynamically, because the DB has to be multi-threaded compliant,
|
305
|
+
* I have no choice but to check for ARPL/MOVSXD right here - "right about now, the funk soul brother, check it out now, the funk soul brother...", fatboy slim
|
306
|
+
*/
|
307
|
+
return ci->dt == Decode64Bits ? (_InstInfo*)&II_movsxd : &II_arpl;
|
308
|
+
|
309
|
+
case INST_NOP_INDEX: /* Nopnopnop */
|
310
|
+
/* Check for Pause, since it's prefixed with 0xf3, which is not a real mandatory prefix. */
|
311
|
+
if (ps->decodedPrefixes & INST_PRE_REP) {
|
312
|
+
/* Flag this prefix as used. */
|
313
|
+
ps->usedPrefixes |= INST_PRE_REP;
|
314
|
+
return &II_pause;
|
315
|
+
}
|
316
|
+
|
317
|
+
/*
|
318
|
+
* Treat NOP/XCHG specially.
|
319
|
+
* If we're not in 64bits restore XCHG to NOP, since in the DB it's XCHG.
|
320
|
+
* Else if we're in 64bits examine REX, if exists, and decide which instruction should go to output.
|
321
|
+
* 48 90 XCHG RAX, RAX is a true NOP (eat REX in this case because it's valid).
|
322
|
+
* 90 XCHG EAX, EAX is a true NOP (and not high dword of RAX = 0 although it should be a 32 bits operation).
|
323
|
+
* Note that if the REX.B is used, then the register is not RAX anymore but R8, which means it's not a NOP.
|
324
|
+
*/
|
325
|
+
if (rex & PREFIX_EX_W) ps->usedPrefixes |= INST_PRE_REX;
|
326
|
+
if ((ci->dt != Decode64Bits) || (~rex & PREFIX_EX_B)) return &II_nop;
|
327
|
+
return (_InstInfo*)in;
|
328
|
+
|
329
|
+
case INST_LEA_INDEX:
|
330
|
+
/* Ignore segment override prefixes for LEA instruction. */
|
331
|
+
ps->decodedPrefixes &= ~INST_PRE_SEGOVRD_MASK;
|
332
|
+
/* Update unused mask for ignoring segment prefix. */
|
333
|
+
prefixes_ignore(ps, PFXIDX_SEG);
|
334
|
+
break;
|
335
|
+
}
|
336
|
+
return (_InstInfo*)in;
|
337
|
+
}
|
338
|
+
|
339
|
+
/* Read first byte, still doens't mean all of its bits are used (I.E: ModRM). */
|
340
|
+
ci->code += 1;
|
341
|
+
ci->codeLen -= 1;
|
342
|
+
if (ci->codeLen < 0) return NULL;
|
343
|
+
tmpIndex1 = *ci->code;
|
344
|
+
|
345
|
+
/* Try single byte instruction + reg bits (OCST_13BYTES). */
|
346
|
+
if ((in->type == INT_LIST_GROUP) && (!isWaitIncluded)) return (_InstInfo*)in->list[in->ids[(tmpIndex1 >> 3) & 7]];
|
347
|
+
|
348
|
+
/* Try single byte instruction + reg byte OR one whole byte (OCST_1dBYTES). */
|
349
|
+
if (in->type == INT_LIST_DIVIDED) {
|
350
|
+
/* OCST_1dBYTES is relatively simple to OCST_2dBYTES, since it's really divided at 0xc0. */
|
351
|
+
if (tmpIndex1 < INST_DIVIDED_MODRM) {
|
352
|
+
/* An instruction which requires a ModR/M byte. Thus it's 1.3 bytes long instruction. */
|
353
|
+
tmpIndex1 = (tmpIndex1 >> 3) & 7; /* Isolate the 3 REG/OPCODE bits. */
|
354
|
+
} else { /* Normal 2 bytes instruction. */
|
355
|
+
/*
|
356
|
+
* Divided instructions can't be in the range of 0x8-0xc0.
|
357
|
+
* That's because 0-8 are used for 3 bits group.
|
358
|
+
* And 0xc0-0xff are used for not-divided instruction.
|
359
|
+
* So the inbetween range is omitted, thus saving some more place in the tables.
|
360
|
+
*/
|
361
|
+
tmpIndex1 -= INST_DIVIDED_MODRM - 8;
|
362
|
+
}
|
363
|
+
|
364
|
+
in = (_InstNode*)in->list[in->ids[tmpIndex1]];
|
365
|
+
|
366
|
+
/* Return any instruction or NULL. */
|
367
|
+
if (in == NULL) return NULL;
|
368
|
+
if (in->type == INT_INFO) {
|
369
|
+
/* If the instruction doesn't support the wait (marked as opsize) as part of the opcode, it's illegal. */
|
370
|
+
if ((~((_InstInfo*)in)->flags & INST_PRE_OP_SIZE) && (isWaitIncluded)) return NULL;
|
371
|
+
return (_InstInfo*)in;
|
372
|
+
}
|
373
|
+
/*
|
374
|
+
* If we got here the instruction can support the wait prefix, so see if it was part of the stream.
|
375
|
+
* Examine prefixed table, specially used for 0x9b, since it's optional.
|
376
|
+
* No Wait: index = 0.
|
377
|
+
* Wait Exists, index = 1.
|
378
|
+
*/
|
379
|
+
return (_InstInfo*)in->list[in->ids[isWaitIncluded]];
|
380
|
+
}
|
381
|
+
|
382
|
+
/* Don't allow to continue if WAIT is part of the opcode, because there are no instructions that include it. */
|
383
|
+
if (isWaitIncluded) return NULL;
|
384
|
+
|
385
|
+
/* Try 2 bytes long instruction (doesn't include ModRM byte). */
|
386
|
+
if (in->type == INT_LIST_FULL) {
|
387
|
+
in = (_InstNode*)in->list[in->ids[tmpIndex1]];
|
388
|
+
/* Check for NULL node for index 1. */
|
389
|
+
if (in == NULL) return NULL;
|
390
|
+
|
391
|
+
/* This is where we check if we just read two escape bytes in a row, which means it is a 3DNow! instruction. */
|
392
|
+
if ((tmpIndex0 == _3DNOW_ESCAPE_BYTE) && (tmpIndex1 == _3DNOW_ESCAPE_BYTE)) return &II_3dnow;
|
393
|
+
|
394
|
+
/* 2 bytes instruction (OCST_2BYTES). */
|
395
|
+
if (in->type == INT_INFO) return (_InstInfo*)in;
|
396
|
+
|
397
|
+
/*
|
398
|
+
* 2 bytes + mandatory perfix.
|
399
|
+
* Mandatory prefixes can be anywhere in the prefixes.
|
400
|
+
* There cannot be more than one mandatory prefix, unless it's a normal operand size prefix.
|
401
|
+
*/
|
402
|
+
if (in->type == INT_LIST_PREFIXED) return inst_lookup_prefixed(in, ps);
|
403
|
+
}
|
404
|
+
|
405
|
+
/* Read third byte, still doens't mean all of its bits are used (I.E: ModRM). */
|
406
|
+
ci->code += 1;
|
407
|
+
ci->codeLen -= 1;
|
408
|
+
if (ci->codeLen < 0) return NULL;
|
409
|
+
tmpIndex2 = *ci->code;
|
410
|
+
|
411
|
+
/* Assume it's a ModRM byte. */
|
412
|
+
ii = (_InstInfo*)in->list[in->ids[(tmpIndex2 >> 3) & 7]];
|
413
|
+
|
414
|
+
/* Try 2 bytes + reg instruction (OCST_23BYTES). */
|
415
|
+
if (in->type == INT_LIST_GROUP) {
|
416
|
+
if (ii == NULL) return NULL;
|
417
|
+
if (ii->type == INT_INFO) return ii;
|
418
|
+
/* It has to be a prefixed table. */
|
419
|
+
return inst_lookup_prefixed((_InstNode*)ii, ps);
|
420
|
+
}
|
421
|
+
|
422
|
+
/* Try 2 bytes + divided range (OCST_2dBYTES). */
|
423
|
+
if (in->type == INT_LIST_DIVIDED) {
|
424
|
+
/*
|
425
|
+
* OCST_2dBYTES is complex, because there are a few instructions which are not divided in some special cases.
|
426
|
+
* If the instruction wasn't divided (but still it must be a 2.3 because we are in divided category)
|
427
|
+
* or it was an official 2.3 (because its index was less than 0xc0) -
|
428
|
+
* Then it means the instruction should be using the REG bits, otherwise give a chance to range 0xc0-0xff.
|
429
|
+
*/
|
430
|
+
/* If we found an instruction only by its REG bits, AND it is not divided, then return it. */
|
431
|
+
if ((ii != NULL) && (ii->flags & INST_NOT_DIVIDED)) return ii;
|
432
|
+
/* Otherwise, if the range is above 0xc0, try the special divided range (range 0x8-0xc0 is omitted). */
|
433
|
+
if (tmpIndex2 >= INST_DIVIDED_MODRM) ii = (_InstInfo*)in->list[in->ids[tmpIndex2 - INST_DIVIDED_MODRM + 8]];
|
434
|
+
/* It might be that we got here without touching ii in the above if statements, then it becomes an invalid instruction prolly. */
|
435
|
+
return ii;
|
436
|
+
}
|
437
|
+
|
438
|
+
/* Try 3 full bytes (OCST_3BYTES - no ModRM byte). */
|
439
|
+
if (in->type == INT_LIST_FULL) {
|
440
|
+
/* OCST_3BYTES. */
|
441
|
+
in = (_InstNode*)in->list[in->ids[tmpIndex2]];
|
442
|
+
/* Check for NULL node for index 2. */
|
443
|
+
if (in == NULL) return NULL;
|
444
|
+
if (in->type == INT_INFO) return (_InstInfo*)in;
|
445
|
+
if (in->type == INT_LIST_PREFIXED) return inst_lookup_prefixed(in, ps);
|
446
|
+
}
|
447
|
+
|
448
|
+
/* Kahtchinggg, damn. */
|
449
|
+
return NULL;
|
450
|
+
}
|
451
|
+
|
452
|
+
/*
|
453
|
+
* 3DNow! instruction handling:
|
454
|
+
|
455
|
+
* This is used when we encounter a 3DNow! instruction.
|
456
|
+
* We can't really locate a 3DNow! instruction before we see two escaped bytes,
|
457
|
+
* 0x0f, 0x0f. Then we have to extract operands which are, dest=mmx register, src=mmx register or quadword indirection.
|
458
|
+
* When we are finished with the extraction of operands we can resume to locate the instruction by reading another byte
|
459
|
+
* which tells us which 3DNow instruction we really tracked down...
|
460
|
+
* So in order to tell the extract operands function which operands the 3DNow! instruction require, we need to set up some
|
461
|
+
* generic instruction info for 3DNow! instructions.
|
462
|
+
|
463
|
+
* In the inst_lookup itself, when we read an OCST_3BYTES which the two first bytes are 0x0f and 0x0f.
|
464
|
+
* we will return this special generic II for the specific operands we are interested in (MM, MM64).
|
465
|
+
* Then after extracting the operand, we'll call a completion routine for locating the instruction
|
466
|
+
* which will be called only for 3DNow! instructions, distinguished by a flag, and it will read the last byte of the 3 bytes.
|
467
|
+
*
|
468
|
+
* The id of this opcode should not be used, the following function should change it anyway.
|
469
|
+
*/
|
470
|
+
_InstInfo II_3dnow = {INT_INFO, ISC_3DNOW, OT_MM64, OT_MM, I_UNDEFINED, INST_32BITS | INST_MODRM_REQUIRED | INST_3DNOW_FETCH};
|
471
|
+
|
472
|
+
_InstInfo* inst_lookup_3dnow(_CodeInfo* ci)
|
473
|
+
{
|
474
|
+
/* Start off from the two escape bytes gates... which is 3DNow! table.*/
|
475
|
+
_InstNode* in = &Table_0F_0F;
|
476
|
+
|
477
|
+
/* Make sure we can read a byte off the strem. */
|
478
|
+
if (ci->codeLen < 1) return NULL;
|
479
|
+
|
480
|
+
in = (_InstNode*)in->list[in->ids[*ci->code]];
|
481
|
+
|
482
|
+
if ((in != NULL) && (in->type == INT_INFO)) {
|
483
|
+
ci->codeLen -= 1;
|
484
|
+
ci->code += 1;
|
485
|
+
|
486
|
+
return (_InstInfo*)in;
|
487
|
+
}
|
488
|
+
|
489
|
+
return NULL;
|
490
|
+
}
|