RubyGems - rallhook - Versions diffs - 0.7.5 → 0.8.0 - Mend

rallhook 0.7.5 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

data/AUTHORS +2 -0
data/CHANGELOG +2 -0
data/README +0 -2
data/Rakefile +1 -1
data/TODO +0 -1
data/ext/rallhook_base/deps/distorm/config.h +170 -0
data/ext/rallhook_base/deps/distorm/distorm.h +401 -0
data/ext/rallhook_base/deps/distorm/mnemonics.c +258 -0
data/ext/rallhook_base/deps/distorm/mnemonics.h +200 -0
data/ext/rallhook_base/deps/distorm/src/decoder.c +548 -0
data/ext/rallhook_base/deps/distorm/src/decoder.h +18 -0
data/ext/rallhook_base/deps/distorm/src/distorm.c +375 -0
data/ext/rallhook_base/deps/distorm/src/instructions.c +490 -0
data/ext/rallhook_base/deps/distorm/src/instructions.h +445 -0
data/ext/rallhook_base/deps/distorm/src/insts.c +4851 -0
data/ext/rallhook_base/deps/distorm/src/insts.h +36 -0
data/ext/rallhook_base/deps/distorm/src/operands.c +1270 -0
data/ext/rallhook_base/deps/distorm/src/operands.h +38 -0
data/ext/rallhook_base/deps/distorm/src/prefix.c +380 -0
data/ext/rallhook_base/deps/distorm/src/prefix.h +76 -0
data/ext/rallhook_base/deps/distorm/src/pydistorm.h +62 -0
data/ext/rallhook_base/deps/distorm/src/textdefs.c +180 -0
data/ext/rallhook_base/deps/distorm/src/textdefs.h +68 -0
data/ext/rallhook_base/deps/distorm/src/wstring.c +55 -0
data/ext/rallhook_base/deps/distorm/src/wstring.h +43 -0
data/ext/rallhook_base/deps/distorm/src/x86defs.c +41 -0
data/ext/rallhook_base/deps/distorm/src/x86defs.h +105 -0
data/ext/rallhook_base/extconf.rb +15 -20
data/ext/rallhook_base/rallhook.c +20 -8
metadata +27 -5

data/ext/rallhook_base/deps/distorm/src/decoder.h ADDED

@@ -0,0 +1,18 @@
+/*
+decoder.h
+Copyright (C) 2003-2009 Gil Dabah, http://ragestorm.net/distorm/
+This file is licensed under the GPL license. See the file COPYING.
+*/
+#ifndef DECODER_H
+#define DECODER_H
+#include "../config.h"
+typedef unsigned int _iflags;
+_DecodeResult decode_internal(const _CodeInfo* ci, int supportOldIntr, _DInst result[], unsigned int maxResultCount, unsigned int* usedInstructionsCount);
+#endif /* DECODER_H */

data/ext/rallhook_base/deps/distorm/src/distorm.c ADDED

@@ -0,0 +1,375 @@
+/*
+distorm.c
+diStorm3 C Library Interface
+diStorm3 - Powerful disassembler for X86/AMD64
+http://ragestorm.net/distorm/
+distorm at gmail dot com
+Copyright (C) 2010  Gil Dabah
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+You should have received a copy of the GNU General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>
+*/
+#include "../distorm.h"
+#include "../config.h"
+#include "decoder.h"
+#include "x86defs.h"
+#include "textdefs.h"
+#include "wstring.h"
+#include "../mnemonics.h"
+/* C LIBRARY EXPORTS */
+#ifdef SUPPORT_64BIT_OFFSET
+	_DecodeResult distorm_decompose64(const _CodeInfo* ci, _DInst result[], unsigned int maxInstructions, unsigned int* usedInstructionsCount)
+#else
+	_DecodeResult distorm_decompose32(const _CodeInfo* ci, _DInst result[], unsigned int maxInstructions, unsigned int* usedInstructionsCount)
+#endif
+{
+	if (usedInstructionsCount == NULL) {
+		return DECRES_SUCCESS;
+	}
+	/* DECRES_SUCCESS still may indicate we may have something in the result, so zero it first thing. */
+	*usedInstructionsCount = 0;
+	if ((ci == NULL) ||
+		(ci->codeLen < 0) ||
+		((ci->dt != Decode16Bits) && (ci->dt != Decode32Bits) && (ci->dt != Decode64Bits)) ||
+		(ci->code == NULL) ||
+		(result == NULL) ||
+		((ci->features & (DF_MAXIMUM_ADDR16 | DF_MAXIMUM_ADDR32)) == (DF_MAXIMUM_ADDR16 | DF_MAXIMUM_ADDR32)))
+	{
+		return DECRES_INPUTERR;
+	}
+	/* Assume length=0 is success. */
+	if (ci->codeLen == 0) {
+		return DECRES_SUCCESS;
+	}
+	return decode_internal(ci, FALSE, result, maxInstructions, usedInstructionsCount);
+}
+/* Helper function to concat an explicit size when it's unknown from the operands. */
+static void distorm_format_size(_WString* str, const _DInst* di, int opNum)
+{
+	/*
+	 * We only have to output the size explicitly if it's not clear from the operands.
+	 * For example:
+	 * mov al, [0x1234] -> The size is 8, we know it from the AL register operand.
+	 * mov [0x1234], 0x11 -> Now we don't know the size. Pam pam pam
+	 *
+	 * If given operand number is higher than 2, then output the size anyways.
+	 */
+	if ((opNum >= 2) || ((di->ops[0].type != O_REG) && (di->ops[1].type != O_REG))) {
+		switch (di->ops[opNum].size)
+		{
+			case 0: break; /* OT_MEM's unknown size. */
+			case 8: strcat_WSN(str, "BYTE "); break;
+			case 16: strcat_WSN(str, "WORD "); break;
+			case 32: strcat_WSN(str, "DWORD "); break;
+			case 64: strcat_WSN(str, "QWORD "); break;
+			case 80: strcat_WSN(str, "TBYTE "); break;
+			case 128: strcat_WSN(str, "DQWORD "); break;
+			case 256: strcat_WSN(str, "YWORD "); break;
+			default: /* Big oh uh if it gets here. */ break;
+		}
+	}
+}
+static void distorm_format_signed_disp(_WString* str, const _DInst* di, uint64_t addrMask)
+{
+	int64_t tmpDisp64;
+	if (di->dispSize) {
+		chrcat_WS(str, ((int64_t)di->disp < 0) ? MINUS_DISP_CHR : PLUS_DISP_CHR);
+		if ((int64_t)di->disp < 0) tmpDisp64 = -(int64_t)di->disp;
+		else tmpDisp64 = di->disp;
+		tmpDisp64 &= addrMask;
+		str_code_hqw(str, (uint8_t*)&tmpDisp64);
+	}
+}
+#ifdef SUPPORT_64BIT_OFFSET
+	_DLLEXPORT_ void distorm_format64(const _CodeInfo* ci, const _DInst* di, _DecodedInst* result)
+#else
+	_DLLEXPORT_ void distorm_format32(const _CodeInfo* ci, const _DInst* di, _DecodedInst* result)
+#endif
+{
+	_WString* str;
+	unsigned int i, isDefault;
+	int64_t tmpDisp64;
+	uint64_t addrMask = (uint64_t)-1;
+	uint8_t segment;
+	/* Set address mask, when default is for 64bits addresses. */
+	if (ci->features & DF_MAXIMUM_ADDR32) addrMask = 0xffffffff;
+	else if (ci->features & DF_MAXIMUM_ADDR16) addrMask = 0xffff;
+	/* Copy other fields. */
+	result->size = di->size;
+	result->offset = di->addr & addrMask;
+	if (di->flags == FLAG_NOT_DECODABLE) {
+		str = &result->mnemonic;
+		strclear_WS(&result->operands);
+		strcpy_WSN(str, "DB ");
+		str_code_hb(str, di->imm.byte);
+		strclear_WS(&result->instructionHex);
+		str_hex_b(&result->instructionHex, di->imm.byte);
+		return; /* Skip to next instruction. */
+	}
+	str = &result->instructionHex;
+	strclear_WS(str);
+	for (i = 0; i < di->size; i++)
+		str_hex_b(str, ci->code[(unsigned int)(di->addr - ci->codeOffset + i)]);
+	str = &result->mnemonic;
+	switch (FLAG_GET_PREFIX(di->flags))
+	{
+		case FLAG_LOCK:
+			strcpy_WSN(str, "LOCK ");
+		break;
+		case FLAG_REP:
+			strcpy_WSN(str, "REP ");
+		break;
+		case FLAG_REPNZ:
+			strcpy_WSN(str, "REPNZ ");
+		break;
+		default:
+			/* Init mnemonic string, cause next touch is concatenation. */
+			strclear_WS(str);
+		break;
+	}
+	strcat_WS(str, (const _WString*)&_MNEMONICS[di->opcode]);
+	/* Format operands: */
+	str = &result->operands;
+	strclear_WS(str);
+	/* Special treatment for String instructions. */
+	if ((META_GET_ISC(di->meta) == ISC_INTEGER) &&
+		((di->opcode == I_MOVS) ||
+		 (di->opcode == I_CMPS) ||
+		 (di->opcode == I_STOS) ||
+		 (di->opcode == I_LODS) ||
+		 (di->opcode == I_SCAS)))
+	{
+		/*
+		 * No operands are needed if the address size is the default one,
+		 * and no segment is overridden, so add the suffix letter,
+		 * to indicate size of operation and continue to next instruction.
+		 */
+		if ((FLAG_GET_ADDRSIZE(di->flags) == ci->dt) && (SEGMENT_IS_DEFAULT(di->segment)))
+		{
+			str = &result->mnemonic;
+			switch (di->ops[0].size)
+			{
+				case 8: chrcat_WS(str, 'B'); break;
+				case 16: chrcat_WS(str, 'W'); break;
+				case 32: chrcat_WS(str, 'D'); break;
+				case 64: chrcat_WS(str, 'Q'); break;
+			}
+			return;
+		}
+	}
+	for (i = 0; ((i < OPERANDS_NO) && (di->ops[i].type != O_NONE)); i++) {
+		if (i > 0) strcat_WSN(str, ", ");
+		switch (di->ops[i].type)
+		{
+			case O_REG:
+				strcat_WS(str, (const _WString*)&_REGISTERS[di->ops[i].index]);
+			break;
+			case O_IMM:
+				/* If the instruction is 'push', show explicit size (except byte imm). */
+				if (di->opcode == I_PUSH && di->ops[i].size != 8) distorm_format_size(str, di, i);
+				/* Special fix for negative sign extended immediates. */
+				if ((di->flags & FLAG_IMM_SIGNED) && (di->ops[i].size == 8)) {
+					if (di->imm.sbyte < 0) {
+						chrcat_WS(str, MINUS_DISP_CHR);
+						str_code_hb(str, -di->imm.sbyte);
+						break;
+					}
+				}
+				if (di->ops[i].size == 64) str_code_hqw(str, (uint8_t*)&di->imm.qword);
+				else str_code_hdw(str, di->imm.dword);
+			break;
+			case O_IMM1:
+				str_code_hdw(str, di->imm.ex.i1);
+			break;
+			case O_IMM2:
+				str_code_hdw(str, di->imm.ex.i2);
+			break;
+			case O_DISP:
+				distorm_format_size(str, di, i);
+				chrcat_WS(str, OPEN_CHR);
+				if ((SEGMENT_GET(di->segment) != R_NONE) && !SEGMENT_IS_DEFAULT(di->segment)) {
+					strcat_WS(str, (const _WString*)&_REGISTERS[SEGMENT_GET(di->segment)]);
+					chrcat_WS(str, SEG_OFF_CHR);
+				}
+				tmpDisp64 = di->disp & addrMask;
+				str_code_hqw(str, (uint8_t*)&tmpDisp64);
+				chrcat_WS(str, CLOSE_CHR);
+			break;
+			case O_SMEM:
+				distorm_format_size(str, di, i);
+				chrcat_WS(str, OPEN_CHR);
+				/*
+				 * This is where we need to take special care for String instructions.
+				 * If we got here, it means we need to explicitly show their operands.
+				 * The problem with CMPS and MOVS is that they have two(!) memory operands.
+				 * So we have to complete it ourselves, since the structure supplies only the segment that can be overridden.
+				 * And make the rest of the String operations explicit.
+				 */
+				segment = SEGMENT_GET(di->segment);
+				isDefault = SEGMENT_IS_DEFAULT(di->segment);
+				switch (di->opcode)
+				{
+					case I_MOVS:
+						isDefault = FALSE;
+						if (i == 0) segment = R_ES;
+					break;
+					case I_CMPS:
+						isDefault = FALSE;
+						if (i == 1) segment = R_ES;
+					break;
+					case I_INS:
+					case I_LODS:
+					case I_STOS:
+					case I_SCAS: isDefault = FALSE; break;
+				}
+				if (!isDefault && (segment != R_NONE)) {
+					strcat_WS(str, (const _WString*)&_REGISTERS[segment]);
+					chrcat_WS(str, SEG_OFF_CHR);
+				}
+				strcat_WS(str, (const _WString*)&_REGISTERS[di->ops[i].index]);
+				distorm_format_signed_disp(str, di, addrMask);
+				chrcat_WS(str, CLOSE_CHR);
+			break;
+			case O_MEM:
+				distorm_format_size(str, di, i);
+				chrcat_WS(str, OPEN_CHR);
+				if ((SEGMENT_GET(di->segment) != R_NONE) && !SEGMENT_IS_DEFAULT(di->segment)) {
+					strcat_WS(str, (const _WString*)&_REGISTERS[SEGMENT_GET(di->segment)]);
+					chrcat_WS(str, SEG_OFF_CHR);
+				}
+				if (di->base != R_NONE) {
+					strcat_WS(str, (const _WString*)&_REGISTERS[di->base]);
+					chrcat_WS(str, PLUS_DISP_CHR);
+				}
+				strcat_WS(str, (const _WString*)&_REGISTERS[di->ops[i].index]);
+				if (di->scale != 0) {
+					chrcat_WS(str, '*');
+					if (di->scale == 2) chrcat_WS(str, '2');
+					else if (di->scale == 4) chrcat_WS(str, '4');
+					else /* if (di->scale == 8) */ chrcat_WS(str, '8');
+				}
+				distorm_format_signed_disp(str, di, addrMask);
+				chrcat_WS(str, CLOSE_CHR);
+			break;
+			case O_PC:
+#ifdef SUPPORT_64BIT_OFFSET
+				str_off64(str, (di->imm.sqword + di->addr + di->size) & addrMask);
+#else
+				str_code_hdw(str, ((_OffsetType)di->imm.sdword + di->addr + di->size) & (uint32_t)addrMask);
+#endif
+			break;
+			case O_PTR:
+				str_code_hdw(str, di->imm.ptr.seg);
+				chrcat_WS(str, SEG_OFF_CHR);
+				str_code_hdw(str, di->imm.ptr.off);
+			break;
+		}
+	}
+	if (di->flags & FLAG_HINT_TAKEN) strcat_WSN(str, " ;TAKEN");
+	else if (di->flags & FLAG_HINT_NOT_TAKEN) strcat_WSN(str, " ;NOT TAKEN");
+}
+#ifdef SUPPORT_64BIT_OFFSET
+	_DLLEXPORT_ _DecodeResult distorm_decode64(_OffsetType codeOffset, const unsigned char* code, int codeLen, _DecodeType dt, _DecodedInst result[], unsigned int maxInstructions, unsigned int* usedInstructionsCount)
+#else
+	_DLLEXPORT_ _DecodeResult distorm_decode32(_OffsetType codeOffset, const unsigned char* code, int codeLen, _DecodeType dt, _DecodedInst result[], unsigned int maxInstructions, unsigned int* usedInstructionsCount)
+#endif
+{
+	_DecodeResult res;
+	_DInst di;
+	_CodeInfo ci;
+	unsigned int instsCount = 0, i;
+	*usedInstructionsCount = 0;
+	/* I use codeLen as a signed variable in order to ease detection of underflow... and besides - */
+	if (codeLen < 0) {
+		return DECRES_INPUTERR;
+	}
+	if ((dt != Decode16Bits) && (dt != Decode32Bits) && (dt != Decode64Bits)) {
+		return DECRES_INPUTERR;
+	}
+	if (code == NULL || result == NULL) {
+		return DECRES_INPUTERR;
+	}
+	/* Assume length=0 is success. */
+	if (codeLen == 0) {
+		return DECRES_SUCCESS;
+	}
+	/*
+	 * We have to format the result into text. But the interal decoder works with the new structure of _DInst.
+	 * Therefore, we will pass the result array(!) from the caller and the interal decoder will fill it in with _DInst's.
+	 * Then we will copy each result to a temporary structure, and use it to reformat that specific result.
+	 *
+	 * This is all done to save memory allocation and to work on the same result array in-place!!!
+	 * It's a bit ugly, I have to admit, but worth it.
+	 */
+	ci.codeOffset = codeOffset;
+	ci.code = code;
+	ci.codeLen = codeLen;
+	ci.dt = dt;
+	ci.features = DF_NONE;
+	res = decode_internal(&ci, TRUE, (_DInst*)result, maxInstructions, &instsCount);
+	for (i = 0; i < instsCount; i++) {
+		if ((*usedInstructionsCount + i) >= maxInstructions) return DECRES_MEMORYERR;
+		/* Copy the current decomposed result to a temp structure, so we can override the result with text. */
+		memcpy(&di, (char*)result + (i * sizeof(_DecodedInst)), sizeof(_DInst));
+#ifdef SUPPORT_64BIT_OFFSET
+		distorm_format64(&ci, &di, &result[i]);
+#else
+		distorm_format32(&ci, &di, &result[i]);
+#endif
+	}
+	*usedInstructionsCount = instsCount;
+	return res;
+}
+_DLLEXPORT_ unsigned int distorm_version()
+{
+	return __DISTORMV__;
+}

data/ext/rallhook_base/deps/distorm/src/instructions.c ADDED

@@ -0,0 +1,490 @@
+/*
+instructions.c
+diStorm3 - Powerful disassembler for X86/AMD64
+http://ragestorm.net/distorm/
+distorm at gmail dot com
+Copyright (C) 2010  Gil Dabah
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+You should have received a copy of the GNU General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>
+*/
+#include "instructions.h"
+#include "insts.h"
+#include "prefix.h"
+#include "x86defs.h"
+#include "../mnemonics.h"
+/*
+I use the trie data structure as I found it most fitting to a disassembler mechanism.
+When you read a byte and have to decide if it's enough or you should read more bytes, 'till you get to the instruction information.
+It's really fast because you POP the instruction info in top 3 iterates on the DB, because an instruction can be formed from two bytes + 3 bits reg from the ModR/M byte.
+For a simple explanation, check this out:
+http://www.csse.monash.edu.au/~lloyd/tildeAlgDS/Tree/Trie/
+Futher reading: http://en.wikipedia.org/wiki/Trie
+The first GATE (array you read off a trie data structure), as I call them, is statically allocated by the compiler.
+The second and third gates if used are being allocated dynamically by the instructions-insertion functionality.
+How would such a thing look in memory, say we support 4 instructions with 3 bytes top (means 2 dynamically allocated gates).
+->
+|-------|                                0,
+|0|     -------------------------------> |-------|
+|1|RET  |      1,                        |0|AND  |
+|2|     -----> |-------|                 |1|XOR  |
+|3|INT3 |      |0|PUSH |                 |2|OR   |         0,3,
+|-------|      |1|POP  |                 |3|     --------->|-------|
+               |2|PUSHF|                 |-------|         |0|ROR  |
+               |3|POPF |                                   |1|ROL  |
+               |-------|                                   |2|SHR  |
+                                                           |3|SHL  |
+                                                           |-------|
+Of course, this is NOT how Intel instructions set looks!!!
+but I just wanted to give a small demonstration.
+Now the instructions you get from such a trie DB goes like this:
+0, 0 - AND
+0, 1 - XOR
+0, 2 - OR
+0, 3, 0, ROR
+0, 3, 1, ROL
+0, 3, 2, SHR
+0, 3, 3, SHL
+1 - RET
+2, 0 - PUSH
+2, 1 - POP
+2, 2 - PUSHF
+2, 3 - POPF
+3 - INT3
+I guess it's clear by now.
+So now, if you read 0, you know that you have to enter the second gate(list) with the second byte specifying the index.
+But if you read 1, you know that you go to an instruction (in this case, a RET).
+That's why there's an Instruction-Node structure, it tells you whether you got to an instruction or another list
+so you should keep on reading byte).
+In Intel, you could go through 4 gates at top, because there're instructions which are built from 2 bytes and another smaller list
+for the REG part, or newest SSE4 instructions which use 4 bytes for opcode.
+Therefore, Intel's first gate is 256 long, and other gates are 256 (/72) or 8 long, yes, it costs pretty much alot of memory
+for non-used defined instructions, but I think that it still rocks.
+*/
+/*
+ * This function is reponsible to return the instruction information of the first found in code.
+ * It returns the _InstInfo of the found instruction, otherwise NULL.
+ * code should point to the ModR/M byte upon exit (if used), or after the instruction binary code itself.
+ * This function is NOT decoding-type dependant, it is up to the caller to see whether the instruction is valid.
+ * Get the instruction info, using a Trie data structure.
+ *
+ * Sometimes normal prefixes become mandatory prefixes, which means they are now part of the instruction opcode bytes.
+ * This is a bit tricky now,
+ * if the first byte is a REP (F3) prefix, we will have to give a chance to an SSE instruction.
+ * If an instruction doesn't exist, we will make it as a prefix and re-locateinst.
+ * A case such that a REP prefix is being changed into an instruction byte and also an SSE instruction will not be found can't happen,
+ * simply because there are no collisions between string instruction and SSE instructions (they are escaped).
+ * As for S/SSE2/3, check for F2 and 66 as well.
+ * In 64 bits, we have to make sure that we will skip the REX prefix, if it exists.
+ * There's a specific case, where a 66 is mandatory but it was dropped because REG.W was used,
+ * but it doesn't behave as an operand size prefix but as a mandatory, so we will have to take it into account.
+ * For example (64 bits decoding mode):
+ * 66 98 CBW
+ * 48 98 CDQE
+ * 66 48 98: db 0x66; CDQE
+ * Shows that operand size is dropped.
+ * Now, it's a mandatory prefix and NOT an operand size one.
+ * 66480f2dc0 db 0x48; CVTPD2PI XMM0, XMM0
+ * Although this instruction doesn't require a REX.W, it just shows, that even if it did - it doesn't matter.
+ * REX.W is dropped because it's not requried, but the decode function disabled the operand size even so.
+ */
+static _InstInfo* inst_lookup_prefixed(_InstNode* in, _PrefixState* ps)
+{
+	_InstInfo* ii = NULL;
+	/* Check prefixes of current decoded instruction (None, 0x66, 0xf3, 0xf2). */
+	switch (ps->decodedPrefixes & (INST_PRE_OP_SIZE | INST_PRE_REPS))
+	{
+		case 0:
+			/* Non-prefixed, index = 0. */
+			ii = in->list[in->ids[0]];
+		break;
+		case INST_PRE_OP_SIZE:
+			/* 0x66, index = 1. */
+			ii = in->list[in->ids[1]];
+			/* Mark that we used it as a mandatory prefix. */
+			ps->isOpSizeMandatory = 1;
+			ps->decodedPrefixes &= ~INST_PRE_OP_SIZE;
+		break;
+		case INST_PRE_REP:
+			/* 0xf3, index = 2. */
+			ii = in->list[in->ids[2]];
+			ps->decodedPrefixes &= ~INST_PRE_REP;
+		break;
+		case INST_PRE_REPNZ:
+			/* 0xf2, index = 3. */
+			ii = in->list[in->ids[3]];
+			ps->decodedPrefixes &= ~INST_PRE_REPNZ;
+		break;
+		default:
+			/*
+			 * Now we got a problem, since there are a few mandatory prefixes at once.
+			 * There is only one case when it's ok, when the operand size prefix is for real (not mandatory).
+			 * Otherwise we will have to return NULL, since the instruction is illegal.
+			 * Therefore we will start with REPNZ and REP prefixes,
+			 * try to get the instruction and only then check for the operand size prefix.
+			 */
+			/* If both REPNZ and REP are together, it's illegal for sure. */
+			if ((ps->decodedPrefixes & INST_PRE_REPS) == INST_PRE_REPS) return NULL;
+			/* Now we know it's either REPNZ+OPSIZE or REP+OPSIZE, so examine the instruction. */
+			if (ps->decodedPrefixes & INST_PRE_REPNZ) {
+				ii = in->list[in->ids[3]];
+				ps->decodedPrefixes &= ~INST_PRE_REPNZ;
+			} else if (ps->decodedPrefixes & INST_PRE_REP) {
+				ii = in->list[in->ids[2]];
+				ps->decodedPrefixes &= ~INST_PRE_REP;
+			}
+			/* If the instruction doesn't support operand size prefix, then it's illegal. */
+			if ((ii == NULL) || (~ii->flags & INST_PRE_OP_SIZE)) return NULL;
+		break;
+	}
+	/* If there was a prefix, but the instruction wasn't found. Try to fold back to use the normal instruction. */
+	if (ii == NULL) ii = in->list[in->ids[0]];
+	return ii;
+}
+static _InstInfo* inst_vex_lookup(_CodeInfo* ci, _PrefixState* ps)
+{
+	_InstNode* in = NULL;
+	unsigned int pp = 0, start = 0;
+	unsigned int index = 4; /* VEX instructions start at index 4 in the Prefixed table. */
+	uint8_t vex = *ps->vexPos, vex2 = 0, v = 0;
+	/* The VEX instruction will #ud if any of 66, f0, f2, f3, REX prefixes precede. */
+	_iflags illegal = (INST_PRE_OP_SIZE | INST_PRE_LOCK | INST_PRE_REP | INST_PRE_REPNZ | INST_PRE_REX);
+	if ((ps->decodedPrefixes & illegal) != 0) return NULL;
+	/* Read the some fields from the VEX prefix we need to extract the instruction. */
+	if (ps->prefixExtType == PET_VEX2BYTES) {
+		ps->vexV = v = (~vex >> 3) & 0xf;
+		pp = vex & 3;
+		/* Implied leading 0x0f byte by default for 2 bytes VEX prefix. */
+		start = 1;
+	} else { /* PET_VEX3BYTES */
+		start = vex & 0x1f;
+		vex2 = *(ps->vexPos + 1);
+		ps->vexV = v = (~vex2 >> 3) & 0xf;
+		pp = vex2 & 3;
+	}
+	/* start can be either 1 (0x0f), 2 (0x0f, 0x038) or 3 (0x0f, 0x3a), otherwise it's illegal. */
+	switch (start)
+	{
+		case 1: in = &Table_0F; break;
+		case 2: in = &Table_0F_38; break;
+		case 3: in = &Table_0F_3A; break;
+		default: return NULL;
+	}
+	/* If the instruction is encoded using the vvvv field, fix the index into the Prefixed table. */
+	if (v == 0) index += 4;
+	/* pp is actually the implied mandatory prefix, apply it to the index. */
+	index += pp; /* (None, 0x66, 0xf3, 0xf2) */
+	/* Read a byte from the stream. */
+	ci->codeLen -= 1;
+	if (ci->codeLen < 0) return NULL;
+	in = (_InstNode*)in->list[in->ids[*ci->code]];
+	if (in == NULL) return NULL;
+	/*
+	 * If we started with 0f38 or 0f3a so it's a prefixed table,
+	 * therefore it's surely a VEXed instruction (because of a high index).
+	 * However, starting with 0f, could also lead immediately to a prefixed table for some bytes.
+	 * it might return NULL, if the index is invalid.
+	 */
+	if (in->type == INT_LIST_PREFIXED) return in->list[in->ids[index]];
+	/*
+	 * If we reached here, obviously we started with 0f. VEXed instructions must be nodes of a prefixed table.
+	 * But since we found an instruction (or divided one), just return NULL.
+	 * They cannot lead to a VEXed instruction.
+	 */
+	if (in->type == INT_INFO || in->type == INT_LIST_DIVIDED) return NULL;
+	/* Now we are left with handling either GROUP or FULL tables, therefore we will read another byte from the stream. */
+	ci->code += 1;
+	ci->codeLen -= 1;
+	if (ci->codeLen < 0) return NULL;
+	if (in->type == INT_LIST_GROUP) {
+		in = (_InstNode*)in->list[in->ids[(*ci->code >> 3) & 7]];
+		if (in == NULL) return NULL;
+		if (in->type == INT_LIST_PREFIXED) return in->list[in->ids[index]];
+	} else if (in->type == INT_LIST_FULL) {
+		in = (_InstNode*)in->list[in->ids[*ci->code]];
+		if (in == NULL) return NULL;
+		if (in->type == INT_LIST_PREFIXED) return in->list[in->ids[index]];
+	}
+	/* No VEXed instruction was found. */
+	return NULL;
+}
+_InstInfo* inst_lookup(_CodeInfo* ci, _PrefixState* ps)
+{
+	unsigned int tmpIndex0 = 0, tmpIndex1 = 0, tmpIndex2 = 0, rex = ps->vrex;
+	_InstNode* in = NULL;
+	_InstInfo* ii = NULL;
+	int isWaitIncluded = FALSE;
+	/* See whether we have to handle a VEX prefixed instruction. */
+	if (ps->decodedPrefixes & INST_PRE_VEX) {
+		ii = inst_vex_lookup(ci, ps);
+		/* Make sure that VEX.L exists when forced. */
+		if (ii && (((_InstInfoEx*)ii)->flagsEx & INST_FORCE_VEXL) && (~ps->vrex & PREFIX_EX_L)) return NULL;
+		return ii;
+	}
+	/* Read first byte. */
+	ci->codeLen -= 1;
+	if (ci->codeLen < 0) return NULL;
+	tmpIndex0 = *ci->code;
+	/* Check for special 0x9b, WAIT instruction, which can be part of some instructions(x87). */
+	if (tmpIndex0 == INST_WAIT_INDEX) {
+		/* Only OCST_1dBYTES get a chance to include this byte as part of the opcode. */
+		isWaitIncluded = TRUE;
+		/* Ignore all prefixes, since they are useless and operate on the WAIT instruction itself. */
+		prefixes_ignore_all(ps);
+		/* Move to next code byte as a new whole instruction. */
+		ci->code += 1;
+		ci->codeLen -= 1;
+		if (ci->codeLen < 0) return NULL; /* Faster to return NULL, it will be detected as WAIT later anyway. */
+		tmpIndex0 = *ci->code;
+	}
+	/* Check for NULL node for index 0. */
+	in = (_InstNode*)Instructions.list[Instructions.ids[tmpIndex0]];
+	if (in == NULL) return NULL;
+	/* Single byte instruction (OCST_1BYTE). */
+	if ((in->type == INT_INFO) && (!isWaitIncluded)) {
+		/* Some single byte instructions need extra treatment. */
+		switch (tmpIndex0)
+		{
+			case INST_ARPL_INDEX:
+				/*
+				 * ARPL/MOVSXD share the same opcode, and both have different operands and mnemonics, of course.
+				 * Practically, I couldn't come up with a comfortable way to merge the operands' types of ARPL/MOVSXD.
+				 * And since the DB can't be patched dynamically, because the DB has to be multi-threaded compliant,
+				 * I have no choice but to check for ARPL/MOVSXD right here - "right about now, the funk soul brother, check it out now, the funk soul brother...", fatboy slim
+				 */
+			return ci->dt == Decode64Bits ? (_InstInfo*)&II_movsxd : &II_arpl;
+			case INST_NOP_INDEX: /* Nopnopnop */
+				/* Check for Pause, since it's prefixed with 0xf3, which is not a real mandatory prefix. */
+				if (ps->decodedPrefixes & INST_PRE_REP) {
+					/* Flag this prefix as used. */
+					ps->usedPrefixes |= INST_PRE_REP;
+					return &II_pause;
+				}
+				/*
+				 * Treat NOP/XCHG specially.
+				 * If we're not in 64bits restore XCHG to NOP, since in the DB it's XCHG.
+				 * Else if we're in 64bits examine REX, if exists, and decide which instruction should go to output.
+				 * 48 90 XCHG RAX, RAX is a true NOP (eat REX in this case because it's valid).
+				 * 90 XCHG EAX, EAX is a true NOP (and not high dword of RAX = 0 although it should be a 32 bits operation).
+				 * Note that if the REX.B is used, then the register is not RAX anymore but R8, which means it's not a NOP.
+				 */
+				if (rex & PREFIX_EX_W) ps->usedPrefixes |= INST_PRE_REX;
+				if ((ci->dt != Decode64Bits) || (~rex & PREFIX_EX_B)) return &II_nop;
+			return (_InstInfo*)in;
+			case INST_LEA_INDEX:
+				/* Ignore segment override prefixes for LEA instruction. */
+				ps->decodedPrefixes &= ~INST_PRE_SEGOVRD_MASK;
+				/* Update unused mask for ignoring segment prefix. */
+				prefixes_ignore(ps, PFXIDX_SEG);
+			break;
+		}
+		return (_InstInfo*)in;
+	}
+	/* Read first byte, still doens't mean all of its bits are used (I.E: ModRM). */
+	ci->code += 1;
+	ci->codeLen -= 1;
+	if (ci->codeLen < 0) return NULL;
+	tmpIndex1 = *ci->code;
+	/* Try single byte instruction + reg bits (OCST_13BYTES). */
+	if ((in->type == INT_LIST_GROUP) && (!isWaitIncluded)) return (_InstInfo*)in->list[in->ids[(tmpIndex1 >> 3) & 7]];
+	/* Try single byte instruction + reg byte OR one whole byte (OCST_1dBYTES). */
+	if (in->type == INT_LIST_DIVIDED) {
+		/* OCST_1dBYTES is relatively simple to OCST_2dBYTES, since it's really divided at 0xc0. */
+		if (tmpIndex1 < INST_DIVIDED_MODRM) {
+			/* An instruction which requires a ModR/M byte. Thus it's 1.3 bytes long instruction. */
+			tmpIndex1 = (tmpIndex1 >> 3) & 7; /* Isolate the 3 REG/OPCODE bits. */
+		} else { /* Normal 2 bytes instruction. */
+			/*
+			 * Divided instructions can't be in the range of 0x8-0xc0.
+			 * That's because 0-8 are used for 3 bits group.
+			 * And 0xc0-0xff are used for not-divided instruction.
+			 * So the inbetween range is omitted, thus saving some more place in the tables.
+			 */
+			tmpIndex1 -= INST_DIVIDED_MODRM - 8;
+		}
+		in = (_InstNode*)in->list[in->ids[tmpIndex1]];
+		/* Return any instruction or NULL. */
+		if (in == NULL) return NULL;
+		if (in->type == INT_INFO) {
+			/* If the instruction doesn't support the wait (marked as opsize) as part of the opcode, it's illegal. */
+			if ((~((_InstInfo*)in)->flags & INST_PRE_OP_SIZE) && (isWaitIncluded)) return NULL;
+			return (_InstInfo*)in;
+		}
+		/*
+		 * If we got here the instruction can support the wait prefix, so see if it was part of the stream.
+		 * Examine prefixed table, specially used for 0x9b, since it's optional.
+		 * No Wait: index = 0.
+		 * Wait Exists, index = 1.
+		 */
+		return (_InstInfo*)in->list[in->ids[isWaitIncluded]];
+	}
+	/* Don't allow to continue if WAIT is part of the opcode, because there are no instructions that include it. */
+	if (isWaitIncluded) return NULL;
+	/* Try 2 bytes long instruction (doesn't include ModRM byte). */
+	if (in->type == INT_LIST_FULL) {
+		in = (_InstNode*)in->list[in->ids[tmpIndex1]];
+		/* Check for NULL node for index 1. */
+		if (in == NULL) return NULL;
+		/* This is where we check if we just read two escape bytes in a row, which means it is a 3DNow! instruction. */
+		if ((tmpIndex0 == _3DNOW_ESCAPE_BYTE) && (tmpIndex1 == _3DNOW_ESCAPE_BYTE)) return &II_3dnow;
+		/* 2 bytes instruction (OCST_2BYTES). */
+		if (in->type == INT_INFO) return (_InstInfo*)in;
+		/*
+		 * 2 bytes + mandatory perfix.
+		 * Mandatory prefixes can be anywhere in the prefixes.
+		 * There cannot be more than one mandatory prefix, unless it's a normal operand size prefix.
+		 */
+		if (in->type == INT_LIST_PREFIXED) return inst_lookup_prefixed(in, ps);
+	}
+	/* Read third byte, still doens't mean all of its bits are used (I.E: ModRM). */
+	ci->code += 1;
+	ci->codeLen -= 1;
+	if (ci->codeLen < 0) return NULL;
+	tmpIndex2 = *ci->code;
+	/* Assume it's a ModRM byte. */
+	ii = (_InstInfo*)in->list[in->ids[(tmpIndex2 >> 3) & 7]];
+	/* Try 2 bytes + reg instruction (OCST_23BYTES). */
+	if (in->type == INT_LIST_GROUP) {
+		if (ii == NULL) return NULL;
+		if (ii->type == INT_INFO) return ii;
+		/* It has to be a prefixed table. */
+		return inst_lookup_prefixed((_InstNode*)ii, ps);
+	}
+	/* Try 2 bytes + divided range (OCST_2dBYTES). */
+	if (in->type == INT_LIST_DIVIDED) {
+		/*
+		 * OCST_2dBYTES is complex, because there are a few instructions which are not divided in some special cases.
+		 * If the instruction wasn't divided (but still it must be a 2.3 because we are in divided category)
+		 * or it was an official 2.3 (because its index was less than 0xc0) -
+		 * Then it means the instruction should be using the REG bits, otherwise give a chance to range 0xc0-0xff.
+		 */
+		/* If we found an instruction only by its REG bits, AND it is not divided, then return it. */
+		if ((ii != NULL) && (ii->flags & INST_NOT_DIVIDED)) return ii;
+		/* Otherwise, if the range is above 0xc0, try the special divided range (range 0x8-0xc0 is omitted). */
+		if (tmpIndex2 >= INST_DIVIDED_MODRM) ii = (_InstInfo*)in->list[in->ids[tmpIndex2 - INST_DIVIDED_MODRM + 8]];
+		/* It might be that we got here without touching ii in the above if statements, then it becomes an invalid instruction prolly. */
+		return ii;
+	}
+	/* Try 3 full bytes (OCST_3BYTES - no ModRM byte). */
+	if (in->type == INT_LIST_FULL) {
+		/* OCST_3BYTES. */
+		in = (_InstNode*)in->list[in->ids[tmpIndex2]];
+		/* Check for NULL node for index 2. */
+		if (in == NULL) return NULL;
+		if (in->type == INT_INFO) return (_InstInfo*)in;
+		if (in->type == INT_LIST_PREFIXED) return inst_lookup_prefixed(in, ps);
+	}
+	/* Kahtchinggg, damn. */
+	return NULL;
+}
+/*
+* 3DNow! instruction handling:
+* This is used when we encounter a 3DNow! instruction.
+* We can't really locate a 3DNow! instruction before we see two escaped bytes,
+* 0x0f, 0x0f. Then we have to extract operands which are, dest=mmx register, src=mmx register or quadword indirection.
+* When we are finished with the extraction of operands we can resume to locate the instruction by reading another byte
+* which tells us which 3DNow instruction we really tracked down...
+* So in order to tell the extract operands function which operands the 3DNow! instruction require, we need to set up some
+* generic instruction info for 3DNow! instructions.
+* In the inst_lookup itself, when we read an OCST_3BYTES which the two first bytes are 0x0f and 0x0f.
+* we will return this special generic II for the specific operands we are interested in (MM, MM64).
+* Then after extracting the operand, we'll call a completion routine for locating the instruction
+* which will be called only for 3DNow! instructions, distinguished by a flag, and it will read the last byte of the 3 bytes.
+*
+* The id of this opcode should not be used, the following function should change it anyway.
+*/
+_InstInfo II_3dnow = {INT_INFO, ISC_3DNOW, OT_MM64, OT_MM, I_UNDEFINED, INST_32BITS | INST_MODRM_REQUIRED | INST_3DNOW_FETCH};
+_InstInfo* inst_lookup_3dnow(_CodeInfo* ci)
+{
+	/* Start off from the two escape bytes gates... which is 3DNow! table.*/
+	_InstNode* in = &Table_0F_0F;
+	/* Make sure we can read a byte off the strem. */
+	if (ci->codeLen < 1) return NULL;
+	in = (_InstNode*)in->list[in->ids[*ci->code]];
+	if ((in != NULL) && (in->type == INT_INFO)) {
+		ci->codeLen -= 1;
+		ci->code += 1;
+		return (_InstInfo*)in;
+	}
+	return NULL;
+}