memprof 0.2.5 → 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +12 -16
- data/ext/arch.h +49 -6
- data/ext/bin_api.h +67 -19
- data/ext/elf.c +276 -56
- data/ext/mach.c +171 -58
- data/ext/memprof.c +91 -35
- data/ext/x86_64.c +77 -10
- data/ext/x86_64.h +86 -17
- data/ext/x86_gen.h +78 -38
- data/memprof.gemspec +2 -2
- metadata +3 -3
data/ext/x86_64.c
CHANGED
@@ -1,28 +1,60 @@
|
|
1
1
|
#if defined (_ARCH_x86_64_)
|
2
2
|
|
3
|
+
#include <assert.h>
|
3
4
|
#include <stdint.h>
|
4
5
|
#include <string.h>
|
5
6
|
|
6
7
|
#include "arch.h"
|
7
8
|
#include "x86_gen.h"
|
8
9
|
|
9
|
-
/*
|
10
|
-
*
|
10
|
+
/*
|
11
|
+
* inline_st1_tramp - inline stage 1 trampoline
|
12
|
+
*
|
13
|
+
* This is the stage 1 inline trampoline that will replace the mov instruction
|
14
|
+
* that updates freelist from the inlined function add_freelist.
|
15
|
+
*
|
16
|
+
* Note that the mov instruction is 7 bytes wide, so this trampoline needs two
|
17
|
+
* bytes of NOPs to keep it 7 bytes wide.
|
18
|
+
*
|
19
|
+
* In order to use this structure, you must set the displacement field to a
|
20
|
+
* 32bit displacement from the next instruction to the stage 2 trampoline.
|
21
|
+
*
|
22
|
+
* TODO replace the 2, 1 byte NOPs with a wider 16bit NOP.
|
23
|
+
*
|
24
|
+
* Original code:
|
25
|
+
*
|
26
|
+
* mov REGISTER, freelist # update the head of the freelist
|
11
27
|
*
|
12
|
-
*
|
13
|
-
*
|
14
|
-
*
|
28
|
+
* size = 7 bytes
|
29
|
+
*
|
30
|
+
* Code after tramp:
|
31
|
+
*
|
32
|
+
* jmp 0xfeedface(%rip) # jump to stage 2 trampoline
|
33
|
+
* nop # 1 byte NOP pad
|
34
|
+
* nop # 1 byte NOP pad
|
35
|
+
*
|
36
|
+
* size = 7 bytes
|
15
37
|
*/
|
16
38
|
struct inline_st1_tramp {
|
17
39
|
unsigned char jmp;
|
18
40
|
int32_t displacement;
|
19
41
|
unsigned char pad[2];
|
20
42
|
} __attribute__((__packed__)) inline_st1_tramp = {
|
21
|
-
.jmp =
|
43
|
+
.jmp = 0xe9,
|
22
44
|
.displacement = 0,
|
23
|
-
.pad = {
|
45
|
+
.pad = {0x90, 0x90},
|
24
46
|
};
|
25
47
|
|
48
|
+
/*
|
49
|
+
* inline_st1_base - inline stage 1 base instruction
|
50
|
+
*
|
51
|
+
* This structure is designed to be "laid onto" a piece of memory to ease the
|
52
|
+
* parsing, modification, and length calculation of the original instruction
|
53
|
+
* that will be overwritten with a jmp to the stage 2 trampoline.
|
54
|
+
*
|
55
|
+
* In order to use this structure, you must set the displacement, rex, and
|
56
|
+
* rex bytes to accurately represent the original instruction.
|
57
|
+
*/
|
26
58
|
struct inline_st1_base {
|
27
59
|
unsigned char rex;
|
28
60
|
unsigned char mov;
|
@@ -30,17 +62,24 @@ struct inline_st1_base {
|
|
30
62
|
int32_t displacement;
|
31
63
|
} __attribute__((__packed__)) inline_st1_mov = {
|
32
64
|
.rex = 0,
|
33
|
-
.mov =
|
65
|
+
.mov = 0x89,
|
34
66
|
.src_reg = 0,
|
35
67
|
.displacement = 0
|
36
68
|
};
|
37
69
|
|
38
70
|
/*
|
39
|
-
*
|
71
|
+
* arch_check_ins - architecture specific instruction check
|
72
|
+
*
|
73
|
+
* This function checks the opcodes at a specific adderss to see if
|
74
|
+
* they could be a move instruction.
|
75
|
+
*
|
76
|
+
* Returns 1 if the address matches a mov, 0 otherwise.
|
40
77
|
*/
|
41
78
|
static int
|
42
79
|
arch_check_ins(struct inline_st1_base *base)
|
43
80
|
{
|
81
|
+
assert(base != NULL);
|
82
|
+
|
44
83
|
/* is it a mov instruction? */
|
45
84
|
if (base->mov == 0x89 &&
|
46
85
|
|
@@ -55,12 +94,40 @@ arch_check_ins(struct inline_st1_base *base)
|
|
55
94
|
return 0;
|
56
95
|
}
|
57
96
|
|
97
|
+
/*
|
98
|
+
* arch_insert_inline_st2_tramp - architecture specific stage 2 tramp insert
|
99
|
+
*
|
100
|
+
* Given:
|
101
|
+
* - addr - The base address of an instruction sequence.
|
102
|
+
*
|
103
|
+
* - marker - This is the marker to search for which will indicate that the
|
104
|
+
* instruction sequence has been located.
|
105
|
+
*
|
106
|
+
* - trampoline - The address of the handler to redirect execution to.
|
107
|
+
*
|
108
|
+
* - table_entry - Address of where the stage 2 trampoline code will reside
|
109
|
+
*
|
110
|
+
* This function will:
|
111
|
+
* Insert and setup the stage 1 and stage 2 trampolines if addr points to an
|
112
|
+
* instruction that could be from the inlined add_freelist function.
|
113
|
+
*
|
114
|
+
* This function returns 1 on failure and 0 on success.
|
115
|
+
*/
|
58
116
|
int
|
59
117
|
arch_insert_inline_st2_tramp(void *addr, void *marker, void *trampoline, void *table_entry)
|
60
118
|
{
|
119
|
+
assert(addr != NULL);
|
120
|
+
assert(marker != NULL);
|
121
|
+
assert(trampoline != NULL);
|
122
|
+
assert(table_entry != NULL);
|
123
|
+
|
61
124
|
struct inline_st1_base *base = addr;
|
62
125
|
struct inline_tramp_st2_entry *entry = table_entry;
|
63
126
|
|
127
|
+
/* TODO make this a compile time assert */
|
128
|
+
assert(sizeof(struct inline_st1_base) ==
|
129
|
+
sizeof(struct inline_st1_tramp));
|
130
|
+
|
64
131
|
if (!arch_check_ins(base))
|
65
132
|
return 1;
|
66
133
|
|
@@ -109,7 +176,7 @@ arch_insert_inline_st2_tramp(void *addr, void *marker, void *trampoline, void *t
|
|
109
176
|
* NOPS. If its 7, we'll land directly on the next instruction.
|
110
177
|
*/
|
111
178
|
default_inline_st2_tramp.jmp_displacement = (addr + sizeof(*base)) -
|
112
|
-
|
179
|
+
(table_entry + sizeof(default_inline_st2_tramp));
|
113
180
|
|
114
181
|
/* write the address of our C level trampoline in to the structure */
|
115
182
|
default_inline_st2_tramp.frame.addr = trampoline;
|
data/ext/x86_64.h
CHANGED
@@ -5,7 +5,27 @@
|
|
5
5
|
#include "arch.h"
|
6
6
|
|
7
7
|
/*
|
8
|
-
*
|
8
|
+
* tramp_st2_entry - stage 2 trampoline entry
|
9
|
+
*
|
10
|
+
* This trampoline calls a handler function via the callee saved register %rbx.
|
11
|
+
* The handler function is stored in the field 'addr'.
|
12
|
+
*
|
13
|
+
* A default pre-filled (except addr, of course) version of this trampoline is
|
14
|
+
* provided so that the opcodes do not need to be filled in every time it is
|
15
|
+
* used. You only need to set the addr field of default_st2_tramp and you are
|
16
|
+
* ready to roll.
|
17
|
+
*
|
18
|
+
* This trampoline is the assembly code:
|
19
|
+
*
|
20
|
+
* push %rbx # save %rbx
|
21
|
+
* push %rbp # save previous stack frame's %rbp
|
22
|
+
* mov %rsp, %rbp # update %rbp to be current stack pointer
|
23
|
+
* andl 0xFFFFFFFFFFFFFFF0, %rsp # align stack pointer as per the ABI
|
24
|
+
* mov ADDR, %rbx # move address of handler into %rbx
|
25
|
+
* callq *%rbx # call handler
|
26
|
+
* pop %rbx # restore %rbx
|
27
|
+
* leave # restore %rbp, move stack pointer back
|
28
|
+
* ret # return
|
9
29
|
*/
|
10
30
|
static struct tramp_st2_entry {
|
11
31
|
unsigned char push_rbx;
|
@@ -19,20 +39,69 @@ static struct tramp_st2_entry {
|
|
19
39
|
unsigned char rbx_restore;
|
20
40
|
unsigned char ret;
|
21
41
|
} __attribute__((__packed__)) default_st2_tramp = {
|
22
|
-
.push_rbx = 0x53,
|
23
|
-
.push_rbp = 0x55,
|
24
|
-
.save_rsp = {0x48, 0x89, 0xe5},
|
25
|
-
.align_rsp = {0x48, 0x83, 0xe4, 0xf0},
|
26
|
-
.mov = {
|
27
|
-
.addr = 0,
|
28
|
-
.call = {
|
29
|
-
.rbx_restore = 0x5b,
|
30
|
-
.leave = 0xc9,
|
31
|
-
.ret = 0xc3,
|
42
|
+
.push_rbx = 0x53,
|
43
|
+
.push_rbp = 0x55,
|
44
|
+
.save_rsp = {0x48, 0x89, 0xe5},
|
45
|
+
.align_rsp = {0x48, 0x83, 0xe4, 0xf0},
|
46
|
+
.mov = {0x48, 0xbb},
|
47
|
+
.addr = 0,
|
48
|
+
.call = {0xff, 0xd3},
|
49
|
+
.rbx_restore = 0x5b,
|
50
|
+
.leave = 0xc9,
|
51
|
+
.ret = 0xc3,
|
32
52
|
};
|
33
53
|
|
34
54
|
/*
|
35
|
-
*
|
55
|
+
* inline_tramp_st2_entry - stage 2 inline trampoline entry
|
56
|
+
*
|
57
|
+
* This trampoline calls a handler function via the callee saved register %rbx,
|
58
|
+
* The handler function is stored in the field 'addr'.
|
59
|
+
*
|
60
|
+
* The major difference between this trampoline and the one above is that this
|
61
|
+
* trampoline is intended to be used as the target of an 'inline trampoline',
|
62
|
+
* that is code is redirected to this and the stack and registers may not be
|
63
|
+
* 'ready' for a function call.
|
64
|
+
*
|
65
|
+
* This trampoline provides space to regenerate the overwritten mov instruction
|
66
|
+
* and utmost care must be taken in order to recreate the overwritten
|
67
|
+
* instruction.
|
68
|
+
*
|
69
|
+
* This trampoline is hit with a jmp (NOT A CALL), and as such must take care
|
70
|
+
* to jmp back to resume execution.
|
71
|
+
*
|
72
|
+
* Like the above trampoline, this structure comes with a prefilled entry called
|
73
|
+
* default_inline_st2_tramp that has most of the fields prepopulated.
|
74
|
+
*
|
75
|
+
* To use this structure you must fill in:
|
76
|
+
* - mov_displacement - should be set to the 32bit displacement from the next
|
77
|
+
* instruction (i.e. frame) to freelist. This is used to recreate the
|
78
|
+
* overwritten instruction.
|
79
|
+
*
|
80
|
+
* - rdi_source_displacement - should be set to the 32bit displacement from
|
81
|
+
* the next instruction (i.e. push_rbx) to freelist. This is used to load
|
82
|
+
* freelist as the 1st argument to the handler.
|
83
|
+
*
|
84
|
+
* - addr - the address of the handler function to call
|
85
|
+
*
|
86
|
+
* - jmp_displacement - should be set to the 32bit displacement from the next
|
87
|
+
* instruction to the instruction after the stage 1 trampoline. This is
|
88
|
+
* used to resume execution after the handler has been hit.
|
89
|
+
*
|
90
|
+
*
|
91
|
+
* This structure represents the assembly code:
|
92
|
+
*
|
93
|
+
* mov SOURCE_REGISTER, freelist # update the freelist
|
94
|
+
* push %rdi # save %rdi
|
95
|
+
* mov freelist, %rdi # move first entry of freelist into %rdi
|
96
|
+
* push %rbp # save previous %rbp
|
97
|
+
* mov %rsp, %rbp # update %rbp to be current stack pointer
|
98
|
+
* andl 0xFFFFFFFFFFFFFFF0, %rsp # align stack pointer as per ABI
|
99
|
+
* mov ADDR, %rbx # load handler address into %rbx
|
100
|
+
* callq *%rbx # call handler
|
101
|
+
* leave # reset stack pointer, restore %rbp
|
102
|
+
* pop %rbx # restore %rbx
|
103
|
+
* pop %rdi # restore %rdi
|
104
|
+
* jmp NEXT_INSN # jmp to instruction after stage 1 tramp
|
36
105
|
*/
|
37
106
|
static struct inline_tramp_st2_entry {
|
38
107
|
unsigned char rex;
|
@@ -66,15 +135,15 @@ static struct inline_tramp_st2_entry {
|
|
66
135
|
|
67
136
|
.frame = {
|
68
137
|
.push_rdi = 0x57,
|
69
|
-
.mov_rdi =
|
138
|
+
.mov_rdi = {0x48, 0x8b, 0x3d},
|
70
139
|
.rdi_source_displacement = 0,
|
71
140
|
.push_rbx = 0x53,
|
72
141
|
.push_rbp = 0x55,
|
73
|
-
.save_rsp = {
|
74
|
-
.align_rsp = {
|
75
|
-
.mov = {
|
142
|
+
.save_rsp = {0x48, 0x89, 0xe5},
|
143
|
+
.align_rsp = {0x48, 0x83, 0xe4, 0xf0},
|
144
|
+
.mov = {0x48, 0xbb},
|
76
145
|
.addr = 0,
|
77
|
-
.call = {
|
146
|
+
.call = {0xff, 0xd3},
|
78
147
|
.leave = 0xc9,
|
79
148
|
.rbx_restore = 0x5b,
|
80
149
|
.rdi_restore = 0x5f,
|
data/ext/x86_gen.h
CHANGED
@@ -1,99 +1,139 @@
|
|
1
1
|
#if !defined(_x86_gen_)
|
2
2
|
#define _x86_gen_
|
3
3
|
|
4
|
+
#include <assert.h>
|
4
5
|
#include <sys/mman.h>
|
5
6
|
#include <stdint.h>
|
6
7
|
#include "arch.h"
|
7
8
|
|
8
|
-
/*
|
9
|
-
*
|
9
|
+
/*
|
10
|
+
* st1_base - stage 1 base instruction sequence
|
11
|
+
*
|
12
|
+
* This struct is intended to be "laid onto" a piece of memory to ease the
|
13
|
+
* parsing, use, and length calculation of call instructions that use a 32bit
|
14
|
+
* displacement.
|
15
|
+
*
|
16
|
+
* For example: callq <0xdeadbeef> #rb_newobj
|
10
17
|
*/
|
11
18
|
struct st1_base {
|
12
19
|
unsigned char call;
|
13
20
|
int32_t displacement;
|
14
21
|
} __attribute__((__packed__)) st1_mov = {
|
15
|
-
.call
|
22
|
+
.call = 0xe8,
|
16
23
|
.displacement = 0,
|
17
24
|
};
|
18
25
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
26
|
+
/*
|
27
|
+
* page_align - given an address, return a page aligned form
|
28
|
+
*
|
29
|
+
* TODO Don't assume page size, get it from sysconf and cache the result
|
30
|
+
*/
|
25
31
|
static inline void *
|
26
32
|
page_align(void *addr)
|
27
33
|
{
|
34
|
+
assert(addr != NULL);
|
28
35
|
return (void *)((size_t)addr & ~(0xFFFF));
|
29
36
|
}
|
30
37
|
|
38
|
+
/*
|
39
|
+
* copy_instructions - copy count bytes from src to dest, taking care to use
|
40
|
+
* mprotect to mark the section read/write.
|
41
|
+
*/
|
31
42
|
static void
|
32
|
-
copy_instructions(void *
|
43
|
+
copy_instructions(void *dest, void *src, size_t count)
|
33
44
|
{
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
45
|
+
assert(dest != NULL);
|
46
|
+
assert(src != NULL);
|
47
|
+
|
48
|
+
void *aligned_addr = page_align(dest);
|
49
|
+
|
50
|
+
/* I add "+ count" here to guard against the possibility of the instructions
|
51
|
+
* laying across a page boundary
|
52
|
+
*/
|
53
|
+
|
54
|
+
mprotect(aligned_addr, (dest - aligned_addr) + count, PROT_READ|PROT_WRITE|PROT_EXEC);
|
55
|
+
memcpy(dest, src, count);
|
56
|
+
mprotect(aligned_addr, (dest - aligned_addr) + count, PROT_READ|PROT_EXEC);
|
38
57
|
|
39
58
|
return;
|
40
59
|
}
|
41
60
|
|
61
|
+
/*
|
62
|
+
* WRITE_INSTRUCTIONS - page align start, recalculate len to take into account
|
63
|
+
* alignment, set the read/write permissions and execute the code stmt.
|
64
|
+
*/
|
42
65
|
#define WRITE_INSTRUCTIONS(start, len, stmt) do { \
|
43
|
-
|
66
|
+
void *aligned_addr = page_align((void *)start); \
|
67
|
+
int count = ((void *)start) - aligned_addr + len; \
|
68
|
+
mprotect(aligned_addr, count, PROT_READ | PROT_WRITE | PROT_EXEC); \
|
44
69
|
stmt; \
|
45
|
-
mprotect(
|
70
|
+
mprotect(aligned_addr, count, PROT_READ | PROT_EXEC); \
|
46
71
|
} while (0)
|
47
72
|
|
73
|
+
/*
|
74
|
+
* arch_insert_st1_tramp - architecture specific stage 1 trampoline insert
|
75
|
+
*
|
76
|
+
* Given:
|
77
|
+
* - a start address (start),
|
78
|
+
* - the absolute address of the function to intercept (trampee),
|
79
|
+
* - the absolute address of the code to execute instead (tramp),
|
80
|
+
*
|
81
|
+
* This function will:
|
82
|
+
* - interpret address start as a struct st1_base,
|
83
|
+
* - check that the instruction at call is actually a call
|
84
|
+
* - if so, check that the target of the call is trampee
|
85
|
+
* - and change the target to tramp
|
86
|
+
*
|
87
|
+
* Returns 0 on success, 1 otherwise.
|
88
|
+
*/
|
48
89
|
int
|
49
90
|
arch_insert_st1_tramp(void *start, void *trampee, void *tramp)
|
50
91
|
{
|
92
|
+
assert(start != NULL);
|
93
|
+
assert(trampee != NULL);
|
94
|
+
assert(tramp != NULL);
|
95
|
+
|
51
96
|
int32_t fn_addr = 0;
|
52
|
-
struct st1_base *check =
|
53
|
-
void *aligned_addr = page_align(&(check->displacement));
|
97
|
+
struct st1_base *check = start;
|
54
98
|
|
55
99
|
if (check->call == 0xe8) {
|
56
100
|
fn_addr = check->displacement;
|
57
101
|
if ((trampee - (void *)(check + 1)) == fn_addr) {
|
58
|
-
WRITE_INSTRUCTIONS(
|
59
|
-
(
|
102
|
+
WRITE_INSTRUCTIONS(&check->displacement,
|
103
|
+
sizeof(*check),
|
60
104
|
(check->displacement = (tramp - (void *)(check + 1))));
|
61
|
-
return
|
105
|
+
return 0;
|
62
106
|
}
|
63
107
|
}
|
64
108
|
|
65
|
-
return
|
66
|
-
}
|
67
|
-
|
68
|
-
static void *
|
69
|
-
get_got_addr(struct plt_entry *plt)
|
70
|
-
{
|
71
|
-
return (void *)&(plt->pad) + plt->jmp_disp;
|
72
|
-
}
|
73
|
-
|
74
|
-
void
|
75
|
-
arch_overwrite_got(void *plt, void *tramp)
|
76
|
-
{
|
77
|
-
memcpy(get_got_addr(plt), &tramp, sizeof(void *));
|
78
|
-
return;
|
109
|
+
return 1;
|
79
110
|
}
|
80
111
|
|
112
|
+
/*
|
113
|
+
* arch_get_st2_tramp - architecture specific stage 2 tramp accessor. This
|
114
|
+
* function returns a pointer to the default stage 2 trampoline setting size
|
115
|
+
* if a non-NULL pointer was passed in.
|
116
|
+
*/
|
81
117
|
void *
|
82
118
|
arch_get_st2_tramp(size_t *size)
|
83
119
|
{
|
84
120
|
if (size) {
|
85
|
-
*size = sizeof(
|
121
|
+
*size = sizeof(default_st2_tramp);
|
86
122
|
}
|
87
123
|
|
88
124
|
return &default_st2_tramp;
|
89
125
|
}
|
90
126
|
|
91
|
-
|
127
|
+
/*
|
128
|
+
* arch_get_inline_st2_tramp - architecture specific inline stage 2 tramp
|
129
|
+
* accessor. This function returns a pointer to the default inline stage 2
|
130
|
+
* trampoline setting size if a non-NULL pointer was passed in.
|
131
|
+
*/
|
92
132
|
void *
|
93
133
|
arch_get_inline_st2_tramp(size_t *size)
|
94
134
|
{
|
95
135
|
if (size) {
|
96
|
-
*size = sizeof(
|
136
|
+
*size = sizeof(default_inline_st2_tramp);
|
97
137
|
}
|
98
138
|
|
99
139
|
return &default_inline_st2_tramp;
|