memprof 0.2.5 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,28 +1,60 @@
1
1
  #if defined (_ARCH_x86_64_)
2
2
 
3
+ #include <assert.h>
3
4
  #include <stdint.h>
4
5
  #include <string.h>
5
6
 
6
7
  #include "arch.h"
7
8
  #include "x86_gen.h"
8
9
 
9
- /* This is the stage 1 inline trampoline for hooking the inlined add_freelist
10
- * function .
10
+ /*
11
+ * inline_st1_tramp - inline stage 1 trampoline
12
+ *
13
+ * This is the stage 1 inline trampoline that will replace the mov instruction
14
+ * that updates freelist from the inlined function add_freelist.
15
+ *
16
+ * Note that the mov instruction is 7 bytes wide, so this trampoline needs two
17
+ * bytes of NOPs to keep it 7 bytes wide.
18
+ *
19
+ * In order to use this structure, you must set the displacement field to a
20
+ * 32bit displacement from the next instruction to the stage 2 trampoline.
21
+ *
22
+ * TODO replace the 2, 1 byte NOPs with a wider 16bit NOP.
23
+ *
24
+ * Original code:
25
+ *
26
+ * mov REGISTER, freelist # update the head of the freelist
11
27
  *
12
- * NOTE: The original instruction mov %reg, freelist is 7 bytes wide,
13
- * whereas jmpq $displacement is only 5 bytes wide. We *must* pad out
14
- * the next two bytes. This will be important to remember below.
28
+ * size = 7 bytes
29
+ *
30
+ * Code after tramp:
31
+ *
32
+ * jmp 0xfeedface(%rip) # jump to stage 2 trampoline
33
+ * nop # 1 byte NOP pad
34
+ * nop # 1 byte NOP pad
35
+ *
36
+ * size = 7 bytes
15
37
  */
16
38
  struct inline_st1_tramp {
17
39
  unsigned char jmp;
18
40
  int32_t displacement;
19
41
  unsigned char pad[2];
20
42
  } __attribute__((__packed__)) inline_st1_tramp = {
21
- .jmp = '\xe9',
43
+ .jmp = 0xe9,
22
44
  .displacement = 0,
23
- .pad = {'\x90','\x90'},
45
+ .pad = {0x90, 0x90},
24
46
  };
25
47
 
48
+ /*
49
+ * inline_st1_base - inline stage 1 base instruction
50
+ *
51
+ * This structure is designed to be "laid onto" a piece of memory to ease the
52
+ * parsing, modification, and length calculation of the original instruction
53
+ * that will be overwritten with a jmp to the stage 2 trampoline.
54
+ *
55
+ * In order to use this structure, you must set the displacement, rex, and
56
+ * rex bytes to accurately represent the original instruction.
57
+ */
26
58
  struct inline_st1_base {
27
59
  unsigned char rex;
28
60
  unsigned char mov;
@@ -30,17 +62,24 @@ struct inline_st1_base {
30
62
  int32_t displacement;
31
63
  } __attribute__((__packed__)) inline_st1_mov = {
32
64
  .rex = 0,
33
- .mov = '\x89',
65
+ .mov = 0x89,
34
66
  .src_reg = 0,
35
67
  .displacement = 0
36
68
  };
37
69
 
38
70
  /*
39
- * inline tramp stuff below
71
+ * arch_check_ins - architecture specific instruction check
72
+ *
73
+ * This function checks the opcodes at a specific adderss to see if
74
+ * they could be a move instruction.
75
+ *
76
+ * Returns 1 if the address matches a mov, 0 otherwise.
40
77
  */
41
78
  static int
42
79
  arch_check_ins(struct inline_st1_base *base)
43
80
  {
81
+ assert(base != NULL);
82
+
44
83
  /* is it a mov instruction? */
45
84
  if (base->mov == 0x89 &&
46
85
 
@@ -55,12 +94,40 @@ arch_check_ins(struct inline_st1_base *base)
55
94
  return 0;
56
95
  }
57
96
 
97
+ /*
98
+ * arch_insert_inline_st2_tramp - architecture specific stage 2 tramp insert
99
+ *
100
+ * Given:
101
+ * - addr - The base address of an instruction sequence.
102
+ *
103
+ * - marker - This is the marker to search for which will indicate that the
104
+ * instruction sequence has been located.
105
+ *
106
+ * - trampoline - The address of the handler to redirect execution to.
107
+ *
108
+ * - table_entry - Address of where the stage 2 trampoline code will reside
109
+ *
110
+ * This function will:
111
+ * Insert and setup the stage 1 and stage 2 trampolines if addr points to an
112
+ * instruction that could be from the inlined add_freelist function.
113
+ *
114
+ * This function returns 1 on failure and 0 on success.
115
+ */
58
116
  int
59
117
  arch_insert_inline_st2_tramp(void *addr, void *marker, void *trampoline, void *table_entry)
60
118
  {
119
+ assert(addr != NULL);
120
+ assert(marker != NULL);
121
+ assert(trampoline != NULL);
122
+ assert(table_entry != NULL);
123
+
61
124
  struct inline_st1_base *base = addr;
62
125
  struct inline_tramp_st2_entry *entry = table_entry;
63
126
 
127
+ /* TODO make this a compile time assert */
128
+ assert(sizeof(struct inline_st1_base) ==
129
+ sizeof(struct inline_st1_tramp));
130
+
64
131
  if (!arch_check_ins(base))
65
132
  return 1;
66
133
 
@@ -109,7 +176,7 @@ arch_insert_inline_st2_tramp(void *addr, void *marker, void *trampoline, void *t
109
176
  * NOPS. If its 7, we'll land directly on the next instruction.
110
177
  */
111
178
  default_inline_st2_tramp.jmp_displacement = (addr + sizeof(*base)) -
112
- (table_entry + sizeof(default_inline_st2_tramp));
179
+ (table_entry + sizeof(default_inline_st2_tramp));
113
180
 
114
181
  /* write the address of our C level trampoline in to the structure */
115
182
  default_inline_st2_tramp.frame.addr = trampoline;
@@ -5,7 +5,27 @@
5
5
  #include "arch.h"
6
6
 
7
7
  /*
8
- * This is the "normal" stage 2 trampoline with a default entry pre-filled
8
+ * tramp_st2_entry - stage 2 trampoline entry
9
+ *
10
+ * This trampoline calls a handler function via the callee saved register %rbx.
11
+ * The handler function is stored in the field 'addr'.
12
+ *
13
+ * A default pre-filled (except addr, of course) version of this trampoline is
14
+ * provided so that the opcodes do not need to be filled in every time it is
15
+ * used. You only need to set the addr field of default_st2_tramp and you are
16
+ * ready to roll.
17
+ *
18
+ * This trampoline is the assembly code:
19
+ *
20
+ * push %rbx # save %rbx
21
+ * push %rbp # save previous stack frame's %rbp
22
+ * mov %rsp, %rbp # update %rbp to be current stack pointer
23
+ * andl 0xFFFFFFFFFFFFFFF0, %rsp # align stack pointer as per the ABI
24
+ * mov ADDR, %rbx # move address of handler into %rbx
25
+ * callq *%rbx # call handler
26
+ * pop %rbx # restore %rbx
27
+ * leave # restore %rbp, move stack pointer back
28
+ * ret # return
9
29
  */
10
30
  static struct tramp_st2_entry {
11
31
  unsigned char push_rbx;
@@ -19,20 +39,69 @@ static struct tramp_st2_entry {
19
39
  unsigned char rbx_restore;
20
40
  unsigned char ret;
21
41
  } __attribute__((__packed__)) default_st2_tramp = {
22
- .push_rbx = 0x53, // push rbx
23
- .push_rbp = 0x55, // push rbp
24
- .save_rsp = {0x48, 0x89, 0xe5}, // mov rsp, rbp
25
- .align_rsp = {0x48, 0x83, 0xe4, 0xf0}, // andl ~0x1, rsp
26
- .mov = {'\x48', '\xbb'}, // mov addr into rbx
27
- .addr = 0, // ^^^
28
- .call = {'\xff', '\xd3'}, // call rbx
29
- .rbx_restore = 0x5b, // pop rbx
30
- .leave = 0xc9, // leave
31
- .ret = 0xc3, // ret
42
+ .push_rbx = 0x53,
43
+ .push_rbp = 0x55,
44
+ .save_rsp = {0x48, 0x89, 0xe5},
45
+ .align_rsp = {0x48, 0x83, 0xe4, 0xf0},
46
+ .mov = {0x48, 0xbb},
47
+ .addr = 0,
48
+ .call = {0xff, 0xd3},
49
+ .rbx_restore = 0x5b,
50
+ .leave = 0xc9,
51
+ .ret = 0xc3,
32
52
  };
33
53
 
34
54
  /*
35
- * This is the inline stage 2 trampoline with a default entry pre-filled
55
+ * inline_tramp_st2_entry - stage 2 inline trampoline entry
56
+ *
57
+ * This trampoline calls a handler function via the callee saved register %rbx,
58
+ * The handler function is stored in the field 'addr'.
59
+ *
60
+ * The major difference between this trampoline and the one above is that this
61
+ * trampoline is intended to be used as the target of an 'inline trampoline',
62
+ * that is code is redirected to this and the stack and registers may not be
63
+ * 'ready' for a function call.
64
+ *
65
+ * This trampoline provides space to regenerate the overwritten mov instruction
66
+ * and utmost care must be taken in order to recreate the overwritten
67
+ * instruction.
68
+ *
69
+ * This trampoline is hit with a jmp (NOT A CALL), and as such must take care
70
+ * to jmp back to resume execution.
71
+ *
72
+ * Like the above trampoline, this structure comes with a prefilled entry called
73
+ * default_inline_st2_tramp that has most of the fields prepopulated.
74
+ *
75
+ * To use this structure you must fill in:
76
+ * - mov_displacement - should be set to the 32bit displacement from the next
77
+ * instruction (i.e. frame) to freelist. This is used to recreate the
78
+ * overwritten instruction.
79
+ *
80
+ * - rdi_source_displacement - should be set to the 32bit displacement from
81
+ * the next instruction (i.e. push_rbx) to freelist. This is used to load
82
+ * freelist as the 1st argument to the handler.
83
+ *
84
+ * - addr - the address of the handler function to call
85
+ *
86
+ * - jmp_displacement - should be set to the 32bit displacement from the next
87
+ * instruction to the instruction after the stage 1 trampoline. This is
88
+ * used to resume execution after the handler has been hit.
89
+ *
90
+ *
91
+ * This structure represents the assembly code:
92
+ *
93
+ * mov SOURCE_REGISTER, freelist # update the freelist
94
+ * push %rdi # save %rdi
95
+ * mov freelist, %rdi # move first entry of freelist into %rdi
96
+ * push %rbp # save previous %rbp
97
+ * mov %rsp, %rbp # update %rbp to be current stack pointer
98
+ * andl 0xFFFFFFFFFFFFFFF0, %rsp # align stack pointer as per ABI
99
+ * mov ADDR, %rbx # load handler address into %rbx
100
+ * callq *%rbx # call handler
101
+ * leave # reset stack pointer, restore %rbp
102
+ * pop %rbx # restore %rbx
103
+ * pop %rdi # restore %rdi
104
+ * jmp NEXT_INSN # jmp to instruction after stage 1 tramp
36
105
  */
37
106
  static struct inline_tramp_st2_entry {
38
107
  unsigned char rex;
@@ -66,15 +135,15 @@ static struct inline_tramp_st2_entry {
66
135
 
67
136
  .frame = {
68
137
  .push_rdi = 0x57,
69
- .mov_rdi = {'\x48', '\x8b', '\x3d'},
138
+ .mov_rdi = {0x48, 0x8b, 0x3d},
70
139
  .rdi_source_displacement = 0,
71
140
  .push_rbx = 0x53,
72
141
  .push_rbp = 0x55,
73
- .save_rsp = {'\x48', '\x89', '\xe5'},
74
- .align_rsp = {'\x48', '\x83', '\xe4', '\xf0'},
75
- .mov = {'\x48', '\xbb'},
142
+ .save_rsp = {0x48, 0x89, 0xe5},
143
+ .align_rsp = {0x48, 0x83, 0xe4, 0xf0},
144
+ .mov = {0x48, 0xbb},
76
145
  .addr = 0,
77
- .call = {'\xff', '\xd3'},
146
+ .call = {0xff, 0xd3},
78
147
  .leave = 0xc9,
79
148
  .rbx_restore = 0x5b,
80
149
  .rdi_restore = 0x5f,
@@ -1,99 +1,139 @@
1
1
  #if !defined(_x86_gen_)
2
2
  #define _x86_gen_
3
3
 
4
+ #include <assert.h>
4
5
  #include <sys/mman.h>
5
6
  #include <stdint.h>
6
7
  #include "arch.h"
7
8
 
8
- /* This structure makes it easier to find and update call instructions that
9
- * will become the stage 1 trampoline
9
+ /*
10
+ * st1_base - stage 1 base instruction sequence
11
+ *
12
+ * This struct is intended to be "laid onto" a piece of memory to ease the
13
+ * parsing, use, and length calculation of call instructions that use a 32bit
14
+ * displacement.
15
+ *
16
+ * For example: callq <0xdeadbeef> #rb_newobj
10
17
  */
11
18
  struct st1_base {
12
19
  unsigned char call;
13
20
  int32_t displacement;
14
21
  } __attribute__((__packed__)) st1_mov = {
15
- .call = '\xe8',
22
+ .call = 0xe8,
16
23
  .displacement = 0,
17
24
  };
18
25
 
19
- struct plt_entry {
20
- unsigned char jmp[2];
21
- uint32_t jmp_disp;
22
- unsigned char pad[10];
23
- } __attribute__((__packed__));
24
-
26
+ /*
27
+ * page_align - given an address, return a page aligned form
28
+ *
29
+ * TODO Don't assume page size, get it from sysconf and cache the result
30
+ */
25
31
  static inline void *
26
32
  page_align(void *addr)
27
33
  {
34
+ assert(addr != NULL);
28
35
  return (void *)((size_t)addr & ~(0xFFFF));
29
36
  }
30
37
 
38
+ /*
39
+ * copy_instructions - copy count bytes from src to dest, taking care to use
40
+ * mprotect to mark the section read/write.
41
+ */
31
42
  static void
32
- copy_instructions(void *to, void *from, size_t count)
43
+ copy_instructions(void *dest, void *src, size_t count)
33
44
  {
34
- void *aligned_addr = page_align(to);
35
- mprotect(aligned_addr, (to - aligned_addr) + 10, PROT_READ|PROT_WRITE|PROT_EXEC);
36
- memcpy(to, from, count);
37
- mprotect(aligned_addr, (to - aligned_addr) + 10, PROT_READ|PROT_EXEC);
45
+ assert(dest != NULL);
46
+ assert(src != NULL);
47
+
48
+ void *aligned_addr = page_align(dest);
49
+
50
+ /* I add "+ count" here to guard against the possibility of the instructions
51
+ * laying across a page boundary
52
+ */
53
+
54
+ mprotect(aligned_addr, (dest - aligned_addr) + count, PROT_READ|PROT_WRITE|PROT_EXEC);
55
+ memcpy(dest, src, count);
56
+ mprotect(aligned_addr, (dest - aligned_addr) + count, PROT_READ|PROT_EXEC);
38
57
 
39
58
  return;
40
59
  }
41
60
 
61
+ /*
62
+ * WRITE_INSTRUCTIONS - page align start, recalculate len to take into account
63
+ * alignment, set the read/write permissions and execute the code stmt.
64
+ */
42
65
  #define WRITE_INSTRUCTIONS(start, len, stmt) do { \
43
- mprotect(start, len, PROT_READ | PROT_WRITE | PROT_EXEC); \
66
+ void *aligned_addr = page_align((void *)start); \
67
+ int count = ((void *)start) - aligned_addr + len; \
68
+ mprotect(aligned_addr, count, PROT_READ | PROT_WRITE | PROT_EXEC); \
44
69
  stmt; \
45
- mprotect(start, len, PROT_READ | PROT_EXEC); \
70
+ mprotect(aligned_addr, count, PROT_READ | PROT_EXEC); \
46
71
  } while (0)
47
72
 
73
+ /*
74
+ * arch_insert_st1_tramp - architecture specific stage 1 trampoline insert
75
+ *
76
+ * Given:
77
+ * - a start address (start),
78
+ * - the absolute address of the function to intercept (trampee),
79
+ * - the absolute address of the code to execute instead (tramp),
80
+ *
81
+ * This function will:
82
+ * - interpret address start as a struct st1_base,
83
+ * - check that the instruction at call is actually a call
84
+ * - if so, check that the target of the call is trampee
85
+ * - and change the target to tramp
86
+ *
87
+ * Returns 0 on success, 1 otherwise.
88
+ */
48
89
  int
49
90
  arch_insert_st1_tramp(void *start, void *trampee, void *tramp)
50
91
  {
92
+ assert(start != NULL);
93
+ assert(trampee != NULL);
94
+ assert(tramp != NULL);
95
+
51
96
  int32_t fn_addr = 0;
52
- struct st1_base *check = (struct st1_base *)start;
53
- void *aligned_addr = page_align(&(check->displacement));
97
+ struct st1_base *check = start;
54
98
 
55
99
  if (check->call == 0xe8) {
56
100
  fn_addr = check->displacement;
57
101
  if ((trampee - (void *)(check + 1)) == fn_addr) {
58
- WRITE_INSTRUCTIONS(aligned_addr,
59
- ((void *)&(check->displacement) - aligned_addr) + 10,
102
+ WRITE_INSTRUCTIONS(&check->displacement,
103
+ sizeof(*check),
60
104
  (check->displacement = (tramp - (void *)(check + 1))));
61
- return 1;
105
+ return 0;
62
106
  }
63
107
  }
64
108
 
65
- return 0;
66
- }
67
-
68
- static void *
69
- get_got_addr(struct plt_entry *plt)
70
- {
71
- return (void *)&(plt->pad) + plt->jmp_disp;
72
- }
73
-
74
- void
75
- arch_overwrite_got(void *plt, void *tramp)
76
- {
77
- memcpy(get_got_addr(plt), &tramp, sizeof(void *));
78
- return;
109
+ return 1;
79
110
  }
80
111
 
112
+ /*
113
+ * arch_get_st2_tramp - architecture specific stage 2 tramp accessor. This
114
+ * function returns a pointer to the default stage 2 trampoline setting size
115
+ * if a non-NULL pointer was passed in.
116
+ */
81
117
  void *
82
118
  arch_get_st2_tramp(size_t *size)
83
119
  {
84
120
  if (size) {
85
- *size = sizeof(struct tramp_st2_entry);
121
+ *size = sizeof(default_st2_tramp);
86
122
  }
87
123
 
88
124
  return &default_st2_tramp;
89
125
  }
90
126
 
91
-
127
+ /*
128
+ * arch_get_inline_st2_tramp - architecture specific inline stage 2 tramp
129
+ * accessor. This function returns a pointer to the default inline stage 2
130
+ * trampoline setting size if a non-NULL pointer was passed in.
131
+ */
92
132
  void *
93
133
  arch_get_inline_st2_tramp(size_t *size)
94
134
  {
95
135
  if (size) {
96
- *size = sizeof(struct inline_tramp_st2_entry);
136
+ *size = sizeof(default_inline_st2_tramp);
97
137
  }
98
138
 
99
139
  return &default_inline_st2_tramp;