toolbox 0.1.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- checksums.yaml.gz.sig +0 -0
- data/bake/ruby/gdb.rb +135 -0
- data/bake/toolbox/gdb.rb +137 -0
- data/bake/toolbox/lldb.rb +137 -0
- data/context/fiber-debugging.md +171 -0
- data/context/getting-started.md +178 -0
- data/context/heap-debugging.md +351 -0
- data/context/index.yaml +28 -0
- data/context/object-inspection.md +208 -0
- data/context/stack-inspection.md +188 -0
- data/data/toolbox/command.py +254 -0
- data/data/toolbox/constants.py +200 -0
- data/data/toolbox/context.py +295 -0
- data/data/toolbox/debugger/__init__.py +99 -0
- data/data/toolbox/debugger/gdb_backend.py +595 -0
- data/data/toolbox/debugger/lldb_backend.py +885 -0
- data/data/toolbox/fiber.py +885 -0
- data/data/toolbox/format.py +200 -0
- data/data/toolbox/heap.py +669 -0
- data/data/toolbox/init.py +85 -0
- data/data/toolbox/object.py +84 -0
- data/data/toolbox/rarray.py +124 -0
- data/data/toolbox/rbasic.py +103 -0
- data/data/toolbox/rbignum.py +52 -0
- data/data/toolbox/rclass.py +136 -0
- data/data/toolbox/readme.md +214 -0
- data/data/toolbox/rexception.py +150 -0
- data/data/toolbox/rfloat.py +98 -0
- data/data/toolbox/rhash.py +159 -0
- data/data/toolbox/rstring.py +234 -0
- data/data/toolbox/rstruct.py +157 -0
- data/data/toolbox/rsymbol.py +302 -0
- data/data/toolbox/stack.py +630 -0
- data/data/toolbox/value.py +183 -0
- data/lib/toolbox/gdb.rb +21 -0
- data/lib/toolbox/lldb.rb +21 -0
- data/lib/toolbox/version.rb +7 -1
- data/lib/toolbox.rb +9 -24
- data/license.md +21 -0
- data/readme.md +64 -0
- data/releases.md +9 -0
- data.tar.gz.sig +2 -0
- metadata +95 -165
- metadata.gz.sig +0 -0
- data/Rakefile +0 -57
- data/lib/dirs.rb +0 -9
- data/lib/toolbox/config.rb +0 -211
- data/lib/toolbox/default_controller.rb +0 -393
- data/lib/toolbox/helpers.rb +0 -11
- data/lib/toolbox/rendering.rb +0 -413
- data/lib/toolbox/searching.rb +0 -85
- data/lib/toolbox/session_params.rb +0 -63
- data/lib/toolbox/sorting.rb +0 -74
- data/locale/de/LC_MESSAGES/toolbox.mo +0 -0
- data/public/images/add.png +0 -0
- data/public/images/arrow_down.gif +0 -0
- data/public/images/arrow_up.gif +0 -0
- data/public/images/close.png +0 -0
- data/public/images/edit.gif +0 -0
- data/public/images/email.png +0 -0
- data/public/images/page.png +0 -0
- data/public/images/page_acrobat.png +0 -0
- data/public/images/page_add.png +0 -0
- data/public/images/page_copy.png +0 -0
- data/public/images/page_delete.png +0 -0
- data/public/images/page_edit.png +0 -0
- data/public/images/page_excel.png +0 -0
- data/public/images/page_list.png +0 -0
- data/public/images/page_save.png +0 -0
- data/public/images/page_word.png +0 -0
- data/public/images/remove.png +0 -0
- data/public/images/show.gif +0 -0
- data/public/images/spinner.gif +0 -0
- data/public/javascripts/popup.js +0 -498
- data/public/javascripts/toolbox.js +0 -18
- data/public/stylesheets/context_menu.css +0 -168
- data/public/stylesheets/popup.css +0 -30
- data/public/stylesheets/toolbox.css +0 -107
- data/view/toolbox/_collection.html.erb +0 -24
- data/view/toolbox/_collection_header.html.erb +0 -7
- data/view/toolbox/_context_menu.html.erb +0 -17
- data/view/toolbox/_dialogs.html.erb +0 -6
- data/view/toolbox/_form.html.erb +0 -30
- data/view/toolbox/_form_collection_row.html.erb +0 -18
- data/view/toolbox/_form_fieldset.html.erb +0 -30
- data/view/toolbox/_form_fieldset_row.html.erb +0 -19
- data/view/toolbox/_list.html.erb +0 -25
- data/view/toolbox/_list_row.html.erb +0 -10
- data/view/toolbox/_menu.html.erb +0 -7
- data/view/toolbox/_search_field.html.erb +0 -8
- data/view/toolbox/_show.html.erb +0 -12
- data/view/toolbox/_show_collection_row.html.erb +0 -6
- data/view/toolbox/_show_fieldset.html.erb +0 -21
- data/view/toolbox/edit.html.erb +0 -5
- data/view/toolbox/index.html.erb +0 -3
- data/view/toolbox/new.html.erb +0 -9
- data/view/toolbox/show.html.erb +0 -39
|
@@ -0,0 +1,669 @@
|
|
|
1
|
+
import debugger
|
|
2
|
+
import sys
|
|
3
|
+
|
|
4
|
+
# Constants
|
|
5
|
+
RBASIC_FLAGS_TYPE_MASK = 0x1f
|
|
6
|
+
|
|
7
|
+
class RubyHeap:
|
|
8
|
+
"""Ruby heap scanning infrastructure.
|
|
9
|
+
|
|
10
|
+
Provides methods to iterate through the Ruby heap and find objects
|
|
11
|
+
by type. Returns VALUEs (not extracted pointers) for maximum flexibility.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
def __init__(self):
|
|
15
|
+
"""Initialize heap scanner (call initialize() to set up VM pointers)."""
|
|
16
|
+
self.vm_ptr = None
|
|
17
|
+
self.objspace = None
|
|
18
|
+
|
|
19
|
+
# Cached type lookups
|
|
20
|
+
self._rbasic_type = None
|
|
21
|
+
self._value_type = None
|
|
22
|
+
self._char_ptr_type = None
|
|
23
|
+
self._flags_offset = None
|
|
24
|
+
self._value_size = None
|
|
25
|
+
|
|
26
|
+
def initialize(self):
|
|
27
|
+
"""Initialize VM and objspace pointers.
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
True if initialization successful, False otherwise
|
|
31
|
+
"""
|
|
32
|
+
try:
|
|
33
|
+
self.vm_ptr = debugger.parse_and_eval('ruby_current_vm_ptr')
|
|
34
|
+
if int(self.vm_ptr) == 0:
|
|
35
|
+
print("Error: ruby_current_vm_ptr is NULL")
|
|
36
|
+
print("Make sure Ruby is fully initialized and the process is running.")
|
|
37
|
+
return False
|
|
38
|
+
|
|
39
|
+
# Ruby 3.3+ moved objspace into a gc struct, Ruby 3.2- has it directly in VM
|
|
40
|
+
# Try gc.objspace first (Ruby 3.3+), fall back to vm.objspace (Ruby 3.2-)
|
|
41
|
+
gc_struct = self.vm_ptr['gc']
|
|
42
|
+
if gc_struct is not None:
|
|
43
|
+
# Ruby 3.3+ path
|
|
44
|
+
self.objspace = gc_struct['objspace']
|
|
45
|
+
else:
|
|
46
|
+
# Ruby 3.2- path
|
|
47
|
+
self.objspace = self.vm_ptr['objspace']
|
|
48
|
+
|
|
49
|
+
if self.objspace is None:
|
|
50
|
+
print("Error: Could not access objspace field")
|
|
51
|
+
print(f"VM pointer type: {self.vm_ptr.type}")
|
|
52
|
+
print("Make sure you're debugging a Ruby process with debug symbols.")
|
|
53
|
+
return False
|
|
54
|
+
|
|
55
|
+
# Check if objspace is NULL (can happen if GC not initialized)
|
|
56
|
+
try:
|
|
57
|
+
objspace_int = int(self.objspace)
|
|
58
|
+
except (debugger.Error, TypeError, ValueError) as e:
|
|
59
|
+
print(f"Error: Can't convert objspace to int: {e}")
|
|
60
|
+
return False
|
|
61
|
+
return False
|
|
62
|
+
|
|
63
|
+
if objspace_int == 0:
|
|
64
|
+
print("Error: objspace is NULL")
|
|
65
|
+
print("Make sure the Ruby GC has been initialized.")
|
|
66
|
+
return False
|
|
67
|
+
|
|
68
|
+
# Cache commonly used type lookups
|
|
69
|
+
self._rbasic_type = debugger.lookup_type('struct RBasic').pointer()
|
|
70
|
+
self._value_type = debugger.lookup_type('VALUE')
|
|
71
|
+
self._char_ptr_type = debugger.lookup_type('char').pointer()
|
|
72
|
+
|
|
73
|
+
# Cache flags field offset for fast memory access
|
|
74
|
+
# This is critical for LLDB performance where field lookup is expensive
|
|
75
|
+
try:
|
|
76
|
+
# Get a dummy RBasic to find the flags offset
|
|
77
|
+
rbasic_struct = debugger.lookup_type('struct RBasic')
|
|
78
|
+
# In RBasic, 'flags' is the first field (offset 0)
|
|
79
|
+
# We need to find its offset programmatically for portability
|
|
80
|
+
fields = rbasic_struct.fields()
|
|
81
|
+
flags_field = next((f for f in fields if f.name == 'flags'), None)
|
|
82
|
+
if flags_field:
|
|
83
|
+
self._flags_offset = flags_field.bitpos // 8
|
|
84
|
+
else:
|
|
85
|
+
# Flags is typically the first field (offset 0)
|
|
86
|
+
self._flags_offset = 0
|
|
87
|
+
self._value_size = self._value_type.sizeof
|
|
88
|
+
except (debugger.Error, AttributeError):
|
|
89
|
+
# Fallback: flags is at offset 0 (first field in RBasic)
|
|
90
|
+
self._flags_offset = 0
|
|
91
|
+
self._value_size = 8
|
|
92
|
+
|
|
93
|
+
return True
|
|
94
|
+
except debugger.Error as e:
|
|
95
|
+
print(f"Error initializing: {e}")
|
|
96
|
+
print("Make sure you're debugging a Ruby process with debug symbols.")
|
|
97
|
+
return False
|
|
98
|
+
except debugger.MemoryError as e:
|
|
99
|
+
print(f"Memory error during initialization: {e}")
|
|
100
|
+
print("The Ruby VM may not be fully initialized yet.")
|
|
101
|
+
print("Try breaking at a point where Ruby is running (e.g., after rb_vm_exec).")
|
|
102
|
+
return False
|
|
103
|
+
|
|
104
|
+
def _read_flags_fast(self, obj_address):
|
|
105
|
+
"""Read flags field directly from memory without field lookup.
|
|
106
|
+
|
|
107
|
+
This is a critical optimization for LLDB where GetChildMemberWithName
|
|
108
|
+
is expensive. By reading flags directly using the cached offset,
|
|
109
|
+
we avoid thousands of field lookups during heap iteration.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
obj_address: Memory address of the RBasic object
|
|
113
|
+
|
|
114
|
+
Returns:
|
|
115
|
+
Integer flags value
|
|
116
|
+
"""
|
|
117
|
+
try:
|
|
118
|
+
flags_address = obj_address + self._flags_offset
|
|
119
|
+
# Read VALUE-sized memory at flags offset
|
|
120
|
+
flags_bytes = debugger.read_memory(flags_address, self._value_size)
|
|
121
|
+
# Convert bytes to integer (little-endian on x86_64)
|
|
122
|
+
return int.from_bytes(flags_bytes, byteorder='little', signed=False)
|
|
123
|
+
except (debugger.Error, debugger.MemoryError):
|
|
124
|
+
# Fallback to field access if direct memory read fails
|
|
125
|
+
obj_ptr = debugger.create_value(obj_address, self._rbasic_type)
|
|
126
|
+
return int(obj_ptr['flags'])
|
|
127
|
+
|
|
128
|
+
def _get_page(self, page_index):
|
|
129
|
+
"""Get a heap page by index, handling Ruby version differences.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
page_index: Index of the page to retrieve
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
Page object, or None on error
|
|
136
|
+
"""
|
|
137
|
+
try:
|
|
138
|
+
# Ruby 3.3+ uses rb_darray with 'data' field, Ruby 3.2- uses direct pointer
|
|
139
|
+
sorted_field = self.objspace['heap_pages']['sorted']
|
|
140
|
+
if sorted_field is not None:
|
|
141
|
+
data_field = sorted_field['data']
|
|
142
|
+
if data_field is not None:
|
|
143
|
+
# Ruby 3.3+: rb_darray with 'data' field
|
|
144
|
+
return data_field[page_index]
|
|
145
|
+
# Ruby 3.2 and earlier: sorted is a direct pointer array
|
|
146
|
+
return self.objspace['heap_pages']['sorted'][page_index]
|
|
147
|
+
except (debugger.MemoryError, debugger.Error):
|
|
148
|
+
return None
|
|
149
|
+
|
|
150
|
+
def iterate_heap(self):
|
|
151
|
+
"""Yield all objects from the Ruby heap.
|
|
152
|
+
|
|
153
|
+
Yields:
|
|
154
|
+
Tuple of (VALUE, flags, address) for each object on the heap
|
|
155
|
+
"""
|
|
156
|
+
for obj, flags, address in self.iterate_heap_from(None):
|
|
157
|
+
yield obj, flags, address
|
|
158
|
+
|
|
159
|
+
def scan(self, type_flag=None, limit=None, from_address=None):
|
|
160
|
+
"""Scan heap for objects matching a specific Ruby type flag.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
type_flag: Ruby type constant (e.g., RUBY_T_STRING, RUBY_T_DATA), or None for all types
|
|
164
|
+
limit: Maximum number of objects to find (None for no limit)
|
|
165
|
+
from_address: Address to continue from (for pagination)
|
|
166
|
+
|
|
167
|
+
Returns:
|
|
168
|
+
Tuple of (objects, next_address) where:
|
|
169
|
+
- objects: List of VALUEs matching the type
|
|
170
|
+
- next_address: The next address to scan from (for pagination), or None if no more objects
|
|
171
|
+
"""
|
|
172
|
+
objects = []
|
|
173
|
+
next_address = None
|
|
174
|
+
|
|
175
|
+
# Iterate heap, starting from the address if specified
|
|
176
|
+
for obj, flags, obj_address in self.iterate_heap_from(from_address):
|
|
177
|
+
# Check type (lower 5 bits of flags) if type_flag is specified
|
|
178
|
+
if type_flag is not None:
|
|
179
|
+
if (flags & RBASIC_FLAGS_TYPE_MASK) != type_flag:
|
|
180
|
+
continue
|
|
181
|
+
|
|
182
|
+
# If we've already hit the limit, this is the next address to continue from
|
|
183
|
+
if limit and len(objects) >= limit:
|
|
184
|
+
next_address = obj_address
|
|
185
|
+
break
|
|
186
|
+
|
|
187
|
+
objects.append(obj)
|
|
188
|
+
|
|
189
|
+
# Return the next address to scan from (the first object we didn't include)
|
|
190
|
+
return objects, next_address
|
|
191
|
+
|
|
192
|
+
def _find_page_for_address(self, address):
|
|
193
|
+
"""Find which heap page contains the given address.
|
|
194
|
+
|
|
195
|
+
Args:
|
|
196
|
+
address: Memory address to search for
|
|
197
|
+
|
|
198
|
+
Returns:
|
|
199
|
+
Page index if found, None otherwise
|
|
200
|
+
"""
|
|
201
|
+
if not self.objspace:
|
|
202
|
+
return None
|
|
203
|
+
|
|
204
|
+
try:
|
|
205
|
+
allocated_pages = int(self.objspace['heap_pages']['allocated_pages'])
|
|
206
|
+
except (debugger.MemoryError, debugger.Error):
|
|
207
|
+
return None
|
|
208
|
+
|
|
209
|
+
# Linear search through pages
|
|
210
|
+
# TODO: Could use binary search since pages are sorted
|
|
211
|
+
for i in range(allocated_pages):
|
|
212
|
+
page = self._get_page(i)
|
|
213
|
+
if page is None:
|
|
214
|
+
continue
|
|
215
|
+
|
|
216
|
+
try:
|
|
217
|
+
start = page['start'] # Keep as Value object
|
|
218
|
+
total_slots = int(page['total_slots'])
|
|
219
|
+
slot_size = int(page['slot_size'])
|
|
220
|
+
|
|
221
|
+
# Check if address falls within this page's range
|
|
222
|
+
# Convert to int for arithmetic comparison
|
|
223
|
+
page_end = int(start) + (total_slots * slot_size)
|
|
224
|
+
if int(start) <= address < page_end:
|
|
225
|
+
return i
|
|
226
|
+
except (debugger.MemoryError, debugger.Error):
|
|
227
|
+
continue
|
|
228
|
+
|
|
229
|
+
return None
|
|
230
|
+
|
|
231
|
+
def iterate_heap_from(self, from_address=None):
|
|
232
|
+
"""Yield all objects from the Ruby heap, optionally starting from a specific address.
|
|
233
|
+
|
|
234
|
+
Args:
|
|
235
|
+
from_address: If specified, finds the page containing this address and starts from there.
|
|
236
|
+
If None, starts from the beginning of the heap.
|
|
237
|
+
|
|
238
|
+
Yields:
|
|
239
|
+
Tuple of (VALUE, flags, address) for each object on the heap
|
|
240
|
+
"""
|
|
241
|
+
# If we have a from_address, find which page contains it
|
|
242
|
+
start_page = 0
|
|
243
|
+
start_address = None
|
|
244
|
+
if from_address is not None:
|
|
245
|
+
start_page = self._find_page_for_address(from_address)
|
|
246
|
+
if start_page is None:
|
|
247
|
+
# Address not found in any page, start from beginning
|
|
248
|
+
print(f"Warning: Address 0x{from_address:x} not found in heap, starting from beginning", file=sys.stderr)
|
|
249
|
+
start_page = 0
|
|
250
|
+
else:
|
|
251
|
+
# Remember to skip within the page to this address
|
|
252
|
+
start_address = from_address
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
# Delegate to the page-based iterator
|
|
256
|
+
for obj, flags, obj_address in self._iterate_heap_from_page(start_page, start_address):
|
|
257
|
+
yield obj, flags, obj_address
|
|
258
|
+
|
|
259
|
+
def _iterate_heap_from_page(self, start_page=0, skip_until_address=None):
|
|
260
|
+
"""Yield all objects from the Ruby heap, starting from a specific page.
|
|
261
|
+
|
|
262
|
+
Args:
|
|
263
|
+
start_page: Page index to start from (default: 0)
|
|
264
|
+
skip_until_address: If specified, calculate the slot index and start from there (for first page only)
|
|
265
|
+
|
|
266
|
+
Yields:
|
|
267
|
+
Tuple of (VALUE, flags, address) for each object on the heap
|
|
268
|
+
"""
|
|
269
|
+
if not self.objspace:
|
|
270
|
+
return
|
|
271
|
+
|
|
272
|
+
try:
|
|
273
|
+
allocated_pages = int(self.objspace['heap_pages']['allocated_pages'])
|
|
274
|
+
except debugger.MemoryError as e:
|
|
275
|
+
print(f"Error reading heap_pages: {e}")
|
|
276
|
+
print("The heap may not be initialized yet.")
|
|
277
|
+
return
|
|
278
|
+
|
|
279
|
+
# Cache types for pointer arithmetic and casting
|
|
280
|
+
rbasic_type = debugger.lookup_type('struct RBasic')
|
|
281
|
+
rbasic_ptr_type = rbasic_type.pointer()
|
|
282
|
+
char_ptr_type = debugger.lookup_type('char').pointer()
|
|
283
|
+
|
|
284
|
+
for i in range(start_page, allocated_pages):
|
|
285
|
+
page = self._get_page(i)
|
|
286
|
+
if page is None:
|
|
287
|
+
continue
|
|
288
|
+
|
|
289
|
+
try:
|
|
290
|
+
# Get start address - in some Ruby versions it's a pointer, in others it's an integer
|
|
291
|
+
start_value = page['start']
|
|
292
|
+
# Try to cast to char* (for pointer types), but if it fails or is already int-like, just use int
|
|
293
|
+
try:
|
|
294
|
+
start_char_ptr = start_value.cast(char_ptr_type)
|
|
295
|
+
start_int = int(start_char_ptr)
|
|
296
|
+
except (debugger.Error, AttributeError):
|
|
297
|
+
# start is already an integer value (e.g., Ruby 3.2 uses uintptr_t)
|
|
298
|
+
start_int = int(start_value)
|
|
299
|
+
|
|
300
|
+
total_slots = int(page['total_slots'])
|
|
301
|
+
slot_size = int(page['slot_size'])
|
|
302
|
+
except (debugger.MemoryError, debugger.Error) as e:
|
|
303
|
+
print(f"Error reading page {i}: {e}", file=sys.stderr)
|
|
304
|
+
continue
|
|
305
|
+
|
|
306
|
+
# Skip pages with invalid dimensions
|
|
307
|
+
if total_slots <= 0 or slot_size <= 0:
|
|
308
|
+
print(f"Warning: Page {i} has invalid dimensions (total_slots={total_slots}, slot_size={slot_size}), skipping", file=sys.stderr)
|
|
309
|
+
continue
|
|
310
|
+
|
|
311
|
+
# For the first page, calculate which slot to start from
|
|
312
|
+
start_slot = 0
|
|
313
|
+
if i == start_page and skip_until_address is not None:
|
|
314
|
+
# Calculate slot index from address
|
|
315
|
+
offset_from_page_start = int(skip_until_address) - start_int
|
|
316
|
+
start_slot = offset_from_page_start // slot_size
|
|
317
|
+
|
|
318
|
+
# Ensure we don't go out of bounds
|
|
319
|
+
if start_slot >= total_slots:
|
|
320
|
+
continue # Skip this entire page
|
|
321
|
+
if start_slot < 0:
|
|
322
|
+
start_slot = 0
|
|
323
|
+
|
|
324
|
+
# POINTER ARITHMETIC + BULK READ APPROACH:
|
|
325
|
+
#
|
|
326
|
+
# Ruby heap pages contain variable-width allocations (slot_size bytes each).
|
|
327
|
+
# We treat the page start as a char* for byte-wise pointer arithmetic:
|
|
328
|
+
# 1. Cast page start to char* (byte pointer)
|
|
329
|
+
# 2. Add byte offset: char_ptr + (slot_index * slot_size)
|
|
330
|
+
# 3. Cast result to RBasic* to get the object pointer
|
|
331
|
+
#
|
|
332
|
+
# For performance, we also:
|
|
333
|
+
# - Read all flags in one bulk memory read (fast Python bytes)
|
|
334
|
+
# - Extract flags using byte slicing (pure Python, no debugger overhead)
|
|
335
|
+
#
|
|
336
|
+
# This approach is both semantically correct (proper pointer arithmetic)
|
|
337
|
+
# and performant (~370ms for 17k objects).
|
|
338
|
+
try:
|
|
339
|
+
# Step 1: Read all flags for this page in one memory read (FAST)
|
|
340
|
+
page_size = total_slots * slot_size
|
|
341
|
+
flags_data = None
|
|
342
|
+
try:
|
|
343
|
+
page_data = debugger.read_memory(start_int, page_size)
|
|
344
|
+
flags_data = page_data
|
|
345
|
+
except (debugger.Error, debugger.MemoryError):
|
|
346
|
+
# If bulk read fails, we'll read flags individually
|
|
347
|
+
flags_data = None
|
|
348
|
+
|
|
349
|
+
# Step 2: Iterate through slots using integer arithmetic for speed
|
|
350
|
+
for j in range(start_slot, total_slots):
|
|
351
|
+
try:
|
|
352
|
+
# Integer arithmetic for speed: start_int + byte_offset
|
|
353
|
+
byte_offset = j * slot_size
|
|
354
|
+
obj_address = start_int + byte_offset
|
|
355
|
+
|
|
356
|
+
# Read flags from bulk-read memory (FAST - pure Python byte manipulation)
|
|
357
|
+
if flags_data is not None:
|
|
358
|
+
try:
|
|
359
|
+
flags_offset_in_page = byte_offset + self._flags_offset
|
|
360
|
+
flags_bytes = flags_data[flags_offset_in_page:flags_offset_in_page + self._value_size]
|
|
361
|
+
flags = int.from_bytes(flags_bytes, byteorder='little', signed=False)
|
|
362
|
+
except (IndexError, ValueError):
|
|
363
|
+
# Fall back to direct read
|
|
364
|
+
flags = self._read_flags_fast(obj_address)
|
|
365
|
+
else:
|
|
366
|
+
# No bulk data, read directly
|
|
367
|
+
flags = self._read_flags_fast(obj_address)
|
|
368
|
+
|
|
369
|
+
# Skip free objects (most common case - skip early)
|
|
370
|
+
if flags == 0:
|
|
371
|
+
continue
|
|
372
|
+
|
|
373
|
+
# Create VALUE for live objects
|
|
374
|
+
# The obj_address IS the VALUE (pointer to the heap slot)
|
|
375
|
+
obj = debugger.create_value_from_int(obj_address, self._value_type)
|
|
376
|
+
yield obj, flags, obj_address
|
|
377
|
+
except (debugger.Error, RuntimeError):
|
|
378
|
+
continue
|
|
379
|
+
|
|
380
|
+
except (debugger.Error, debugger.MemoryError) as e:
|
|
381
|
+
# If reading page failed, skip it
|
|
382
|
+
print(f"Failed to read page {i}: {e}, skipping", file=sys.stderr)
|
|
383
|
+
continue
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
def find_typed_data(self, data_type, limit=None, progress=False):
|
|
387
|
+
"""Find RTypedData objects matching a specific type.
|
|
388
|
+
|
|
389
|
+
Args:
|
|
390
|
+
data_type: Pointer to rb_data_type_struct to match
|
|
391
|
+
limit: Maximum number of objects to find (None for no limit)
|
|
392
|
+
progress: If True, print progress to stderr
|
|
393
|
+
|
|
394
|
+
Returns:
|
|
395
|
+
List of VALUEs (not extracted data pointers) matching the type
|
|
396
|
+
"""
|
|
397
|
+
objects = []
|
|
398
|
+
|
|
399
|
+
# T_DATA constant
|
|
400
|
+
T_DATA = 0x0c
|
|
401
|
+
|
|
402
|
+
# Get RTypedData type for casting
|
|
403
|
+
rtypeddata_type = debugger.lookup_type('struct RTypedData').pointer()
|
|
404
|
+
|
|
405
|
+
try:
|
|
406
|
+
if progress:
|
|
407
|
+
allocated_pages = int(self.objspace['heap_pages']['allocated_pages'])
|
|
408
|
+
print(f"Scanning {allocated_pages} heap pages...", file=sys.stderr)
|
|
409
|
+
except (debugger.MemoryError, debugger.Error):
|
|
410
|
+
pass
|
|
411
|
+
|
|
412
|
+
objects_checked = 0
|
|
413
|
+
|
|
414
|
+
for obj, flags, address in self.iterate_heap():
|
|
415
|
+
# Check if we've reached the limit
|
|
416
|
+
if limit and len(objects) >= limit:
|
|
417
|
+
if progress:
|
|
418
|
+
print(f"Reached limit of {limit} object(s), stopping scan", file=sys.stderr)
|
|
419
|
+
break
|
|
420
|
+
|
|
421
|
+
objects_checked += 1
|
|
422
|
+
|
|
423
|
+
# Print progress every 10000 objects
|
|
424
|
+
if progress and objects_checked % 10000 == 0:
|
|
425
|
+
print(f" Checked {objects_checked} objects, found {len(objects)} match(es)...", file=sys.stderr)
|
|
426
|
+
|
|
427
|
+
# Check if it's T_DATA
|
|
428
|
+
if (flags & RBASIC_FLAGS_TYPE_MASK) != T_DATA:
|
|
429
|
+
continue
|
|
430
|
+
|
|
431
|
+
# Cast to RTypedData and check type
|
|
432
|
+
try:
|
|
433
|
+
typed_data = obj.cast(rtypeddata_type)
|
|
434
|
+
|
|
435
|
+
# Compare values directly using __eq__
|
|
436
|
+
if typed_data['type'] == data_type:
|
|
437
|
+
# Return the VALUE, not the extracted data pointer
|
|
438
|
+
objects.append(obj)
|
|
439
|
+
if progress:
|
|
440
|
+
print(f" Found object #{len(objects)} at VALUE 0x{int(obj):x}", file=sys.stderr)
|
|
441
|
+
except (debugger.Error, RuntimeError):
|
|
442
|
+
continue
|
|
443
|
+
|
|
444
|
+
if progress:
|
|
445
|
+
if limit and len(objects) >= limit:
|
|
446
|
+
print(f"Scan complete: checked {objects_checked} objects (stopped at limit)", file=sys.stderr)
|
|
447
|
+
else:
|
|
448
|
+
print(f"Scan complete: checked {objects_checked} objects", file=sys.stderr)
|
|
449
|
+
|
|
450
|
+
return objects
|
|
451
|
+
|
|
452
|
+
|
|
453
|
+
class RubyHeapScanCommand(debugger.Command):
|
|
454
|
+
"""Scan the Ruby heap for objects, optionally filtered by type.
|
|
455
|
+
|
|
456
|
+
Usage: rb-heap-scan [--type TYPE] [--limit N] [--from $heap]
|
|
457
|
+
|
|
458
|
+
TYPE can be:
|
|
459
|
+
- A Ruby type constant like RUBY_T_STRING, RUBY_T_ARRAY, RUBY_T_HASH
|
|
460
|
+
- A numeric value (e.g., 0x05 for T_STRING)
|
|
461
|
+
- Omit --type to scan all objects
|
|
462
|
+
|
|
463
|
+
Options:
|
|
464
|
+
--type TYPE Filter by Ruby type (omit to scan all objects)
|
|
465
|
+
--limit N Stop after finding N objects (default: 10)
|
|
466
|
+
--from ADDR Start scanning from the given address (for pagination)
|
|
467
|
+
|
|
468
|
+
Pagination:
|
|
469
|
+
The address of the last found object is saved to $heap, allowing you to paginate:
|
|
470
|
+
rb-heap-scan --type RUBY_T_STRING --limit 10 # First page
|
|
471
|
+
rb-heap-scan --type RUBY_T_STRING --limit 10 --from $heap # Next page
|
|
472
|
+
|
|
473
|
+
The $heap variable contains the address of the last scanned object.
|
|
474
|
+
|
|
475
|
+
Examples:
|
|
476
|
+
rb-heap-scan --type RUBY_T_STRING
|
|
477
|
+
rb-heap-scan --type RUBY_T_ARRAY --limit 20
|
|
478
|
+
rb-heap-scan --type 0x05 # T_STRING
|
|
479
|
+
rb-heap-scan --limit 100 # All objects
|
|
480
|
+
rb-heap-scan --from $heap # Continue from last scan
|
|
481
|
+
"""
|
|
482
|
+
|
|
483
|
+
def __init__(self):
|
|
484
|
+
super(RubyHeapScanCommand, self).__init__("rb-heap-scan", debugger.COMMAND_USER)
|
|
485
|
+
|
|
486
|
+
def usage(self):
|
|
487
|
+
"""Print usage information."""
|
|
488
|
+
print("Usage: rb-heap-scan [--type TYPE] [--limit N] [--from $heap]")
|
|
489
|
+
print("Examples:")
|
|
490
|
+
print(" rb-heap-scan --type RUBY_T_STRING # Find up to 10 strings")
|
|
491
|
+
print(" rb-heap-scan --type RUBY_T_ARRAY --limit 5 # Find up to 5 arrays")
|
|
492
|
+
print(" rb-heap-scan --type 0x05 --limit 100 # Find up to 100 T_STRING objects")
|
|
493
|
+
print(" rb-heap-scan --limit 20 # Scan 20 objects (any type)")
|
|
494
|
+
print(" rb-heap-scan --type RUBY_T_STRING --from $heap # Continue from last scan")
|
|
495
|
+
print()
|
|
496
|
+
print("Pagination:")
|
|
497
|
+
print(" The address of the last object is saved to $heap for pagination:")
|
|
498
|
+
print(" rb-heap-scan --type RUBY_T_STRING --limit 10 # First page")
|
|
499
|
+
print(" rb-heap-scan --type RUBY_T_STRING --from $heap # Next page")
|
|
500
|
+
|
|
501
|
+
def _parse_type(self, type_arg):
|
|
502
|
+
"""Parse a type argument and return the type value.
|
|
503
|
+
|
|
504
|
+
Args:
|
|
505
|
+
type_arg: String type argument (constant name or numeric value)
|
|
506
|
+
|
|
507
|
+
Returns:
|
|
508
|
+
Integer type value, or None on error
|
|
509
|
+
"""
|
|
510
|
+
import constants
|
|
511
|
+
|
|
512
|
+
# Try as a constant name first
|
|
513
|
+
type_value = constants.get(type_arg)
|
|
514
|
+
|
|
515
|
+
if type_value is None:
|
|
516
|
+
# Try parsing as a number (hex or decimal)
|
|
517
|
+
try:
|
|
518
|
+
if type_arg.startswith('0x') or type_arg.startswith('0X'):
|
|
519
|
+
type_value = int(type_arg, 16)
|
|
520
|
+
else:
|
|
521
|
+
type_value = int(type_arg)
|
|
522
|
+
except ValueError:
|
|
523
|
+
print(f"Error: Unknown type constant '{type_arg}'")
|
|
524
|
+
print("Use a constant like RUBY_T_STRING or a numeric value like 0x05")
|
|
525
|
+
return None
|
|
526
|
+
|
|
527
|
+
# Validate type value is reasonable (0-31 for the 5-bit type field)
|
|
528
|
+
if not (0 <= type_value <= 31):
|
|
529
|
+
print(f"Warning: Type value {type_value} (0x{type_value:x}) is outside valid range 0-31")
|
|
530
|
+
|
|
531
|
+
return type_value
|
|
532
|
+
|
|
533
|
+
def invoke(self, arg, from_tty):
|
|
534
|
+
"""Execute the heap scan command."""
|
|
535
|
+
try:
|
|
536
|
+
# Parse arguments
|
|
537
|
+
import command
|
|
538
|
+
arguments = command.parse_arguments(arg if arg else "")
|
|
539
|
+
|
|
540
|
+
# Check if we're continuing from a previous scan
|
|
541
|
+
from_option = arguments.get_option('from')
|
|
542
|
+
if from_option is not None:
|
|
543
|
+
try:
|
|
544
|
+
# $heap should be an address (pointer value)
|
|
545
|
+
from_address = int(debugger.parse_and_eval(from_option))
|
|
546
|
+
except (debugger.Error, ValueError, TypeError) as e:
|
|
547
|
+
# If $heap doesn't exist or is void/invalid, start from the beginning
|
|
548
|
+
print(f"Note: {from_option} is not set or invalid, wrapping around to start of heap", file=sys.stderr)
|
|
549
|
+
from_address = None
|
|
550
|
+
else:
|
|
551
|
+
# New scan
|
|
552
|
+
from_address = None
|
|
553
|
+
|
|
554
|
+
# Get limit (default 10)
|
|
555
|
+
limit = 10
|
|
556
|
+
limit_value = arguments.get_option('limit')
|
|
557
|
+
if limit_value is not None:
|
|
558
|
+
try:
|
|
559
|
+
limit = int(limit_value)
|
|
560
|
+
except (ValueError, TypeError):
|
|
561
|
+
print("Error: --limit must be a number")
|
|
562
|
+
return
|
|
563
|
+
|
|
564
|
+
# Get type (optional)
|
|
565
|
+
type_value = None
|
|
566
|
+
type_option = arguments.get_option('type')
|
|
567
|
+
if type_option is not None:
|
|
568
|
+
type_value = self._parse_type(type_option)
|
|
569
|
+
if type_value is None:
|
|
570
|
+
return
|
|
571
|
+
|
|
572
|
+
# Initialize heap
|
|
573
|
+
heap = RubyHeap()
|
|
574
|
+
if not heap.initialize():
|
|
575
|
+
return
|
|
576
|
+
|
|
577
|
+
# Print search description
|
|
578
|
+
if type_value is not None:
|
|
579
|
+
type_desc = f"type 0x{type_value:02x}"
|
|
580
|
+
else:
|
|
581
|
+
type_desc = "all types"
|
|
582
|
+
|
|
583
|
+
if from_address:
|
|
584
|
+
print(f"Scanning heap for {type_desc}, limit={limit}, continuing from address 0x{from_address:x}...")
|
|
585
|
+
else:
|
|
586
|
+
print(f"Scanning heap for {type_desc}, limit={limit}...")
|
|
587
|
+
print()
|
|
588
|
+
|
|
589
|
+
# Find objects
|
|
590
|
+
objects, next_address = heap.scan(type_value, limit=limit, from_address=from_address)
|
|
591
|
+
|
|
592
|
+
if not objects:
|
|
593
|
+
print("No objects found")
|
|
594
|
+
if from_address:
|
|
595
|
+
print("(You may have reached the end of the heap)")
|
|
596
|
+
return
|
|
597
|
+
|
|
598
|
+
# Import format for terminal output
|
|
599
|
+
import format
|
|
600
|
+
terminal = format.create_terminal(from_tty)
|
|
601
|
+
|
|
602
|
+
# Import value module for interpretation
|
|
603
|
+
import value as value_module
|
|
604
|
+
|
|
605
|
+
print(f"Found {len(objects)} object(s):")
|
|
606
|
+
print()
|
|
607
|
+
|
|
608
|
+
for i, obj in enumerate(objects):
|
|
609
|
+
obj_int = int(obj)
|
|
610
|
+
|
|
611
|
+
# Set as convenience variable
|
|
612
|
+
var_name = f"heap{i}"
|
|
613
|
+
debugger.set_convenience_variable(var_name, obj)
|
|
614
|
+
|
|
615
|
+
# Try to interpret and display the object
|
|
616
|
+
try:
|
|
617
|
+
interpreted = value_module.interpret(obj)
|
|
618
|
+
|
|
619
|
+
print(terminal.print(
|
|
620
|
+
format.metadata, f" [{i}] ",
|
|
621
|
+
format.dim, f"${var_name} = ",
|
|
622
|
+
format.reset, interpreted
|
|
623
|
+
))
|
|
624
|
+
except Exception as e:
|
|
625
|
+
print(terminal.print(
|
|
626
|
+
format.metadata, f" [{i}] ",
|
|
627
|
+
format.dim, f"${var_name} = ",
|
|
628
|
+
format.error, f"<error: {e}>"
|
|
629
|
+
))
|
|
630
|
+
|
|
631
|
+
print()
|
|
632
|
+
print(terminal.print(
|
|
633
|
+
format.dim,
|
|
634
|
+
f"Objects saved in $heap0 through $heap{len(objects)-1}",
|
|
635
|
+
format.reset
|
|
636
|
+
))
|
|
637
|
+
|
|
638
|
+
# Save next address to $heap for pagination
|
|
639
|
+
if next_address is not None:
|
|
640
|
+
# Save the next address to continue from
|
|
641
|
+
void_ptr_type = debugger.lookup_type('void').pointer()
|
|
642
|
+
debugger.set_convenience_variable('heap', debugger.create_value(next_address, void_ptr_type))
|
|
643
|
+
print(terminal.print(
|
|
644
|
+
format.dim,
|
|
645
|
+
f"Next scan address saved to $heap: 0x{next_address:016x}",
|
|
646
|
+
format.reset
|
|
647
|
+
))
|
|
648
|
+
print(terminal.print(
|
|
649
|
+
format.dim,
|
|
650
|
+
f"Run 'rb-heap-scan --type {type_option if type_option else '...'} --from $heap' for next page",
|
|
651
|
+
format.reset
|
|
652
|
+
))
|
|
653
|
+
else:
|
|
654
|
+
# Reached the end of the heap - unset $heap so next scan starts fresh
|
|
655
|
+
debugger.set_convenience_variable('heap', None)
|
|
656
|
+
print(terminal.print(
|
|
657
|
+
format.dim,
|
|
658
|
+
f"Reached end of heap (no more objects to scan)",
|
|
659
|
+
format.reset
|
|
660
|
+
))
|
|
661
|
+
|
|
662
|
+
except Exception as e:
|
|
663
|
+
print(f"Error: {e}")
|
|
664
|
+
import traceback
|
|
665
|
+
traceback.print_exc()
|
|
666
|
+
|
|
667
|
+
|
|
668
|
+
# Register commands
|
|
669
|
+
RubyHeapScanCommand()
|