toolbox 0.1.4 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. checksums.yaml +7 -0
  2. checksums.yaml.gz.sig +0 -0
  3. data/bake/ruby/gdb.rb +135 -0
  4. data/bake/toolbox/gdb.rb +137 -0
  5. data/bake/toolbox/lldb.rb +137 -0
  6. data/context/fiber-debugging.md +171 -0
  7. data/context/getting-started.md +200 -0
  8. data/context/heap-debugging.md +351 -0
  9. data/context/index.yaml +28 -0
  10. data/context/object-inspection.md +208 -0
  11. data/context/stack-inspection.md +188 -0
  12. data/data/toolbox/command.py +479 -0
  13. data/data/toolbox/constants.py +200 -0
  14. data/data/toolbox/context.py +371 -0
  15. data/data/toolbox/debugger/__init__.py +101 -0
  16. data/data/toolbox/debugger/gdb_backend.py +664 -0
  17. data/data/toolbox/debugger/lldb_backend.py +986 -0
  18. data/data/toolbox/fiber.py +877 -0
  19. data/data/toolbox/format.py +205 -0
  20. data/data/toolbox/heap.py +679 -0
  21. data/data/toolbox/init.py +89 -0
  22. data/data/toolbox/print.py +79 -0
  23. data/data/toolbox/rarray.py +116 -0
  24. data/data/toolbox/rbasic.py +99 -0
  25. data/data/toolbox/rbignum.py +48 -0
  26. data/data/toolbox/rclass.py +136 -0
  27. data/data/toolbox/readme.md +214 -0
  28. data/data/toolbox/rexception.py +150 -0
  29. data/data/toolbox/rfloat.py +88 -0
  30. data/data/toolbox/rhash.py +151 -0
  31. data/data/toolbox/rstring.py +230 -0
  32. data/data/toolbox/rstruct.py +149 -0
  33. data/data/toolbox/rsymbol.py +278 -0
  34. data/data/toolbox/rvalue.py +183 -0
  35. data/data/toolbox/stack.py +620 -0
  36. data/lib/toolbox/gdb.rb +21 -0
  37. data/lib/toolbox/lldb.rb +21 -0
  38. data/lib/toolbox/version.rb +7 -1
  39. data/lib/toolbox.rb +9 -24
  40. data/license.md +21 -0
  41. data/readme.md +64 -0
  42. data/releases.md +9 -0
  43. data.tar.gz.sig +0 -0
  44. metadata +95 -165
  45. metadata.gz.sig +0 -0
  46. data/Rakefile +0 -61
  47. data/lib/dirs.rb +0 -9
  48. data/lib/toolbox/config.rb +0 -211
  49. data/lib/toolbox/default_controller.rb +0 -393
  50. data/lib/toolbox/helpers.rb +0 -11
  51. data/lib/toolbox/rendering.rb +0 -413
  52. data/lib/toolbox/searching.rb +0 -85
  53. data/lib/toolbox/session_params.rb +0 -63
  54. data/lib/toolbox/sorting.rb +0 -74
  55. data/locale/de/LC_MESSAGES/toolbox.mo +0 -0
  56. data/public/images/add.png +0 -0
  57. data/public/images/arrow_down.gif +0 -0
  58. data/public/images/arrow_up.gif +0 -0
  59. data/public/images/close.png +0 -0
  60. data/public/images/edit.gif +0 -0
  61. data/public/images/email.png +0 -0
  62. data/public/images/page.png +0 -0
  63. data/public/images/page_acrobat.png +0 -0
  64. data/public/images/page_add.png +0 -0
  65. data/public/images/page_copy.png +0 -0
  66. data/public/images/page_delete.png +0 -0
  67. data/public/images/page_edit.png +0 -0
  68. data/public/images/page_excel.png +0 -0
  69. data/public/images/page_list.png +0 -0
  70. data/public/images/page_save.png +0 -0
  71. data/public/images/page_word.png +0 -0
  72. data/public/images/remove.png +0 -0
  73. data/public/images/show.gif +0 -0
  74. data/public/images/spinner.gif +0 -0
  75. data/public/javascripts/popup.js +0 -498
  76. data/public/javascripts/toolbox.js +0 -18
  77. data/public/stylesheets/context_menu.css +0 -168
  78. data/public/stylesheets/popup.css +0 -30
  79. data/public/stylesheets/toolbox.css +0 -107
  80. data/view/toolbox/_collection.html.erb +0 -24
  81. data/view/toolbox/_collection_header.html.erb +0 -7
  82. data/view/toolbox/_context_menu.html.erb +0 -17
  83. data/view/toolbox/_dialogs.html.erb +0 -6
  84. data/view/toolbox/_form.html.erb +0 -30
  85. data/view/toolbox/_form_collection_row.html.erb +0 -18
  86. data/view/toolbox/_form_fieldset.html.erb +0 -30
  87. data/view/toolbox/_form_fieldset_row.html.erb +0 -19
  88. data/view/toolbox/_list.html.erb +0 -25
  89. data/view/toolbox/_list_row.html.erb +0 -10
  90. data/view/toolbox/_menu.html.erb +0 -7
  91. data/view/toolbox/_search_field.html.erb +0 -8
  92. data/view/toolbox/_show.html.erb +0 -12
  93. data/view/toolbox/_show_collection_row.html.erb +0 -6
  94. data/view/toolbox/_show_fieldset.html.erb +0 -21
  95. data/view/toolbox/edit.html.erb +0 -5
  96. data/view/toolbox/index.html.erb +0 -3
  97. data/view/toolbox/new.html.erb +0 -9
  98. data/view/toolbox/show.html.erb +0 -39
@@ -0,0 +1,679 @@
1
+ import debugger
2
+ import sys
3
+ import command
4
+ import constants
5
+ import format
6
+ import rvalue
7
+
8
+ # Constants
9
+ RBASIC_FLAGS_TYPE_MASK = 0x1f
10
+
11
+ class RubyHeap:
12
+ """Ruby heap scanning infrastructure.
13
+
14
+ Provides methods to iterate through the Ruby heap and find objects
15
+ by type. Returns VALUEs (not extracted pointers) for maximum flexibility.
16
+ """
17
+
18
+ def __init__(self):
19
+ """Initialize heap scanner (call initialize() to set up VM pointers)."""
20
+ self.vm_ptr = None
21
+ self.objspace = None
22
+
23
+ # Cached type lookups
24
+ self._rbasic_type = None
25
+ self._value_type = None
26
+ self._char_ptr_type = None
27
+ self._flags_offset = None
28
+ self._value_size = None
29
+
30
+ def initialize(self):
31
+ """Initialize VM and objspace pointers.
32
+
33
+ Returns:
34
+ True if initialization successful, False otherwise
35
+ """
36
+ try:
37
+ self.vm_ptr = debugger.parse_and_eval('ruby_current_vm_ptr')
38
+ if int(self.vm_ptr) == 0:
39
+ print("Error: ruby_current_vm_ptr is NULL")
40
+ print("Make sure Ruby is fully initialized and the process is running.")
41
+ return False
42
+
43
+ # Ruby 3.3+ moved objspace into a gc struct, Ruby 3.2- has it directly in VM
44
+ # Try gc.objspace first (Ruby 3.3+), fall back to vm.objspace (Ruby 3.2-)
45
+ gc_struct = self.vm_ptr['gc']
46
+ if gc_struct is not None:
47
+ # Ruby 3.3+ path
48
+ self.objspace = gc_struct['objspace']
49
+ else:
50
+ # Ruby 3.2- path
51
+ self.objspace = self.vm_ptr['objspace']
52
+
53
+ if self.objspace is None:
54
+ print("Error: Could not access objspace field")
55
+ print(f"VM pointer type: {self.vm_ptr.type}")
56
+ print("Make sure you're debugging a Ruby process with debug symbols.")
57
+ return False
58
+
59
+ # Check if objspace is NULL (can happen if GC not initialized)
60
+ try:
61
+ objspace_int = int(self.objspace)
62
+ except (debugger.Error, TypeError, ValueError) as e:
63
+ print(f"Error: Can't convert objspace to int: {e}")
64
+ return False
65
+ return False
66
+
67
+ if objspace_int == 0:
68
+ print("Error: objspace is NULL")
69
+ print("Make sure the Ruby GC has been initialized.")
70
+ return False
71
+
72
+ # Cache commonly used type lookups
73
+ self._rbasic_type = constants.type_struct('struct RBasic').pointer()
74
+ self._value_type = constants.type_struct('VALUE')
75
+ self._char_ptr_type = constants.type_struct('char').pointer()
76
+
77
+ # Cache flags field offset for fast memory access
78
+ # This is critical for LLDB performance where field lookup is expensive
79
+ try:
80
+ # Get a dummy RBasic to find the flags offset
81
+ rbasic_struct = constants.type_struct('struct RBasic')
82
+ # In RBasic, 'flags' is the first field (offset 0)
83
+ # We need to find its offset programmatically for portability
84
+ fields = rbasic_struct.fields()
85
+ flags_field = next((f for f in fields if f.name == 'flags'), None)
86
+ if flags_field:
87
+ self._flags_offset = flags_field.bitpos // 8
88
+ else:
89
+ # Flags is typically the first field (offset 0)
90
+ self._flags_offset = 0
91
+ self._value_size = self._value_type.sizeof
92
+ except (debugger.Error, AttributeError):
93
+ # Fallback: flags is at offset 0 (first field in RBasic)
94
+ self._flags_offset = 0
95
+ self._value_size = 8
96
+
97
+ return True
98
+ except debugger.Error as e:
99
+ print(f"Error initializing: {e}")
100
+ print("Make sure you're debugging a Ruby process with debug symbols.")
101
+ return False
102
+ except debugger.MemoryError as e:
103
+ print(f"Memory error during initialization: {e}")
104
+ print("The Ruby VM may not be fully initialized yet.")
105
+ print("Try breaking at a point where Ruby is running (e.g., after rb_vm_exec).")
106
+ return False
107
+
108
+ def _read_flags_fast(self, obj_address):
109
+ """Read flags field directly from memory without field lookup.
110
+
111
+ This is a critical optimization for LLDB where GetChildMemberWithName
112
+ is expensive. By reading flags directly using the cached offset,
113
+ we avoid thousands of field lookups during heap iteration.
114
+
115
+ Args:
116
+ obj_address: Memory address of the RBasic object
117
+
118
+ Returns:
119
+ Integer flags value
120
+ """
121
+ try:
122
+ flags_address = obj_address + self._flags_offset
123
+ # Read VALUE-sized memory at flags offset
124
+ flags_bytes = debugger.read_memory(flags_address, self._value_size)
125
+ # Convert bytes to integer (little-endian on x86_64)
126
+ return int.from_bytes(flags_bytes, byteorder='little', signed=False)
127
+ except (debugger.Error, debugger.MemoryError):
128
+ # Fallback to field access if direct memory read fails
129
+ obj_ptr = debugger.create_value(obj_address, self._rbasic_type)
130
+ return int(obj_ptr['flags'])
131
+
132
+ def _get_page(self, page_index):
133
+ """Get a heap page by index, handling Ruby version differences.
134
+
135
+ Args:
136
+ page_index: Index of the page to retrieve
137
+
138
+ Returns:
139
+ Page object, or None on error
140
+ """
141
+ try:
142
+ # Ruby 3.3+ uses rb_darray with 'data' field, Ruby 3.2- uses direct pointer
143
+ sorted_field = self.objspace['heap_pages']['sorted']
144
+ if sorted_field is not None:
145
+ data_field = sorted_field['data']
146
+ if data_field is not None:
147
+ # Ruby 3.3+: rb_darray with 'data' field
148
+ return data_field[page_index]
149
+ # Ruby 3.2 and earlier: sorted is a direct pointer array
150
+ return self.objspace['heap_pages']['sorted'][page_index]
151
+ except (debugger.MemoryError, debugger.Error):
152
+ return None
153
+
154
+ def iterate_heap(self):
155
+ """Yield all objects from the Ruby heap.
156
+
157
+ Yields:
158
+ Tuple of (VALUE, flags, address) for each object on the heap
159
+ """
160
+ for obj, flags, address in self.iterate_heap_from(None):
161
+ yield obj, flags, address
162
+
163
+ def scan(self, type_flag=None, limit=None, from_address=None):
164
+ """Scan heap for objects matching a specific Ruby type flag.
165
+
166
+ Args:
167
+ type_flag: Ruby type constant (e.g., RUBY_T_STRING, RUBY_T_DATA), or None for all types
168
+ limit: Maximum number of objects to find (None for no limit)
169
+ from_address: Address to continue from (for pagination)
170
+
171
+ Returns:
172
+ Tuple of (objects, next_address) where:
173
+ - objects: List of VALUEs matching the type
174
+ - next_address: The next address to scan from (for pagination), or None if no more objects
175
+ """
176
+ objects = []
177
+ next_address = None
178
+
179
+ # Iterate heap, starting from the address if specified
180
+ for obj, flags, obj_address in self.iterate_heap_from(from_address):
181
+ # Check type (lower 5 bits of flags) if type_flag is specified
182
+ if type_flag is not None:
183
+ if (flags & RBASIC_FLAGS_TYPE_MASK) != type_flag:
184
+ continue
185
+
186
+ # If we've already hit the limit, this is the next address to continue from
187
+ if limit and len(objects) >= limit:
188
+ next_address = obj_address
189
+ break
190
+
191
+ objects.append(obj)
192
+
193
+ # Return the next address to scan from (the first object we didn't include)
194
+ return objects, next_address
195
+
196
+ def _find_page_for_address(self, address):
197
+ """Find which heap page contains the given address.
198
+
199
+ Args:
200
+ address: Memory address to search for
201
+
202
+ Returns:
203
+ Page index if found, None otherwise
204
+ """
205
+ if not self.objspace:
206
+ return None
207
+
208
+ try:
209
+ allocated_pages = int(self.objspace['heap_pages']['allocated_pages'])
210
+ except (debugger.MemoryError, debugger.Error):
211
+ return None
212
+
213
+ # Linear search through pages
214
+ # TODO: Could use binary search since pages are sorted
215
+ for i in range(allocated_pages):
216
+ page = self._get_page(i)
217
+ if page is None:
218
+ continue
219
+
220
+ try:
221
+ start = page['start'] # Keep as Value object
222
+ total_slots = int(page['total_slots'])
223
+ slot_size = int(page['slot_size'])
224
+
225
+ # Check if address falls within this page's range
226
+ # Convert to int for arithmetic comparison
227
+ page_end = int(start) + (total_slots * slot_size)
228
+ if int(start) <= address < page_end:
229
+ return i
230
+ except (debugger.MemoryError, debugger.Error):
231
+ continue
232
+
233
+ return None
234
+
235
+ def iterate_heap_from(self, from_address=None):
236
+ """Yield all objects from the Ruby heap, optionally starting from a specific address.
237
+
238
+ Args:
239
+ from_address: If specified, finds the page containing this address and starts from there.
240
+ If None, starts from the beginning of the heap.
241
+
242
+ Yields:
243
+ Tuple of (VALUE, flags, address) for each object on the heap
244
+ """
245
+ # If we have a from_address, find which page contains it
246
+ start_page = 0
247
+ start_address = None
248
+ if from_address is not None:
249
+ start_page = self._find_page_for_address(from_address)
250
+ if start_page is None:
251
+ # Address not found in any page, start from beginning
252
+ print(f"Warning: Address 0x{from_address:x} not found in heap, starting from beginning", file=sys.stderr)
253
+ start_page = 0
254
+ else:
255
+ # Remember to skip within the page to this address
256
+ start_address = from_address
257
+
258
+
259
+ # Delegate to the page-based iterator
260
+ for obj, flags, obj_address in self._iterate_heap_from_page(start_page, start_address):
261
+ yield obj, flags, obj_address
262
+
263
+ def _iterate_heap_from_page(self, start_page=0, skip_until_address=None):
264
+ """Yield all objects from the Ruby heap, starting from a specific page.
265
+
266
+ Args:
267
+ start_page: Page index to start from (default: 0)
268
+ skip_until_address: If specified, calculate the slot index and start from there (for first page only)
269
+
270
+ Yields:
271
+ Tuple of (VALUE, flags, address) for each object on the heap
272
+ """
273
+ if not self.objspace:
274
+ return
275
+
276
+ try:
277
+ allocated_pages = int(self.objspace['heap_pages']['allocated_pages'])
278
+ except debugger.MemoryError as e:
279
+ print(f"Error reading heap_pages: {e}")
280
+ print("The heap may not be initialized yet.")
281
+ return
282
+
283
+ # Cache types for pointer arithmetic and casting
284
+ rbasic_type = constants.type_struct('struct RBasic')
285
+ rbasic_ptr_type = rbasic_type.pointer()
286
+ char_ptr_type = constants.type_struct('char').pointer()
287
+
288
+ for i in range(start_page, allocated_pages):
289
+ page = self._get_page(i)
290
+ if page is None:
291
+ continue
292
+
293
+ try:
294
+ # Get start address - in some Ruby versions it's a pointer, in others it's an integer
295
+ start_value = page['start']
296
+ # Try to cast to char* (for pointer types), but if it fails or is already int-like, just use int
297
+ try:
298
+ start_char_ptr = start_value.cast(char_ptr_type)
299
+ start_int = int(start_char_ptr)
300
+ except (debugger.Error, AttributeError):
301
+ # start is already an integer value (e.g., Ruby 3.2 uses uintptr_t)
302
+ start_int = int(start_value)
303
+
304
+ total_slots = int(page['total_slots'])
305
+ slot_size = int(page['slot_size'])
306
+ except (debugger.MemoryError, debugger.Error) as e:
307
+ print(f"Error reading page {i}: {e}", file=sys.stderr)
308
+ continue
309
+
310
+ # Skip pages with invalid dimensions
311
+ if total_slots <= 0 or slot_size <= 0:
312
+ print(f"Warning: Page {i} has invalid dimensions (total_slots={total_slots}, slot_size={slot_size}), skipping", file=sys.stderr)
313
+ continue
314
+
315
+ # For the first page, calculate which slot to start from
316
+ start_slot = 0
317
+ if i == start_page and skip_until_address is not None:
318
+ # Calculate slot index from address
319
+ offset_from_page_start = int(skip_until_address) - start_int
320
+ start_slot = offset_from_page_start // slot_size
321
+
322
+ # Ensure we don't go out of bounds
323
+ if start_slot >= total_slots:
324
+ continue # Skip this entire page
325
+ if start_slot < 0:
326
+ start_slot = 0
327
+
328
+ # POINTER ARITHMETIC + BULK READ APPROACH:
329
+ #
330
+ # Ruby heap pages contain variable-width allocations (slot_size bytes each).
331
+ # We treat the page start as a char* for byte-wise pointer arithmetic:
332
+ # 1. Cast page start to char* (byte pointer)
333
+ # 2. Add byte offset: char_ptr + (slot_index * slot_size)
334
+ # 3. Cast result to RBasic* to get the object pointer
335
+ #
336
+ # For performance, we also:
337
+ # - Read all flags in one bulk memory read (fast Python bytes)
338
+ # - Extract flags using byte slicing (pure Python, no debugger overhead)
339
+ #
340
+ # This approach is both semantically correct (proper pointer arithmetic)
341
+ # and performant (~370ms for 17k objects).
342
+ try:
343
+ # Step 1: Read all flags for this page in one memory read (FAST)
344
+ page_size = total_slots * slot_size
345
+ flags_data = None
346
+ try:
347
+ page_data = debugger.read_memory(start_int, page_size)
348
+ flags_data = page_data
349
+ except (debugger.Error, debugger.MemoryError):
350
+ # If bulk read fails, we'll read flags individually
351
+ flags_data = None
352
+
353
+ # Step 2: Iterate through slots using integer arithmetic for speed
354
+ for j in range(start_slot, total_slots):
355
+ try:
356
+ # Integer arithmetic for speed: start_int + byte_offset
357
+ byte_offset = j * slot_size
358
+ obj_address = start_int + byte_offset
359
+
360
+ # Read flags from bulk-read memory (FAST - pure Python byte manipulation)
361
+ if flags_data is not None:
362
+ try:
363
+ flags_offset_in_page = byte_offset + self._flags_offset
364
+ flags_bytes = flags_data[flags_offset_in_page:flags_offset_in_page + self._value_size]
365
+ flags = int.from_bytes(flags_bytes, byteorder='little', signed=False)
366
+ except (IndexError, ValueError):
367
+ # Fall back to direct read
368
+ flags = self._read_flags_fast(obj_address)
369
+ else:
370
+ # No bulk data, read directly
371
+ flags = self._read_flags_fast(obj_address)
372
+
373
+ # Skip free objects (most common case - skip early)
374
+ if flags == 0:
375
+ continue
376
+
377
+ # Create VALUE for live objects
378
+ # The obj_address IS the VALUE (pointer to the heap slot)
379
+ obj = debugger.create_value_from_int(obj_address, self._value_type)
380
+ yield obj, flags, obj_address
381
+ except (debugger.Error, RuntimeError):
382
+ continue
383
+
384
+ except (debugger.Error, debugger.MemoryError) as e:
385
+ # If reading page failed, skip it
386
+ print(f"Failed to read page {i}: {e}, skipping", file=sys.stderr)
387
+ continue
388
+
389
+
390
+ def find_typed_data(self, data_type, limit=None, progress=False):
391
+ """Find RTypedData objects matching a specific type.
392
+
393
+ Args:
394
+ data_type: Pointer to rb_data_type_struct to match
395
+ limit: Maximum number of objects to find (None for no limit)
396
+ progress: If True, print progress to stderr
397
+
398
+ Returns:
399
+ List of VALUEs (not extracted data pointers) matching the type
400
+ """
401
+ objects = []
402
+
403
+ # T_DATA constant
404
+ T_DATA = 0x0c
405
+
406
+ # Get RTypedData type for casting
407
+ rtypeddata_type = constants.type_struct('struct RTypedData').pointer()
408
+
409
+ try:
410
+ if progress:
411
+ allocated_pages = int(self.objspace['heap_pages']['allocated_pages'])
412
+ print(f"Scanning {allocated_pages} heap pages...", file=sys.stderr)
413
+ except (debugger.MemoryError, debugger.Error):
414
+ pass
415
+
416
+ objects_checked = 0
417
+
418
+ for obj, flags, address in self.iterate_heap():
419
+ # Check if we've reached the limit
420
+ if limit and len(objects) >= limit:
421
+ if progress:
422
+ print(f"Reached limit of {limit} object(s), stopping scan", file=sys.stderr)
423
+ break
424
+
425
+ objects_checked += 1
426
+
427
+ # Print progress every 10000 objects
428
+ if progress and objects_checked % 10000 == 0:
429
+ print(f" Checked {objects_checked} objects, found {len(objects)} match(es)...", file=sys.stderr)
430
+
431
+ # Check if it's T_DATA
432
+ if (flags & RBASIC_FLAGS_TYPE_MASK) != T_DATA:
433
+ continue
434
+
435
+ # Cast to RTypedData and check type
436
+ try:
437
+ typed_data = obj.cast(rtypeddata_type)
438
+ type_field = typed_data['type']
439
+
440
+ # Check if field access failed (returns None when type is incomplete)
441
+ if type_field is None:
442
+ # On first failure, print a helpful error message once
443
+ if not hasattr(self, '_incomplete_type_warning_shown'):
444
+ self._incomplete_type_warning_shown = True
445
+ print("\nError: struct RTypedData debug symbols are incomplete", file=sys.stderr)
446
+ print("Cannot access RTypedData fields with this Ruby version.", file=sys.stderr)
447
+ print("\nThis is a known issue with Ruby 3.4.x on macOS:", file=sys.stderr)
448
+ print(" • A dsymutil bug drops RTypedData from debug symbols", file=sys.stderr)
449
+ print(" • Caused by complex 'const T *const' type in the struct", file=sys.stderr)
450
+ print(" • Fixed in Ruby head (commit ce51ef30df)", file=sys.stderr)
451
+ print("\nWorkarounds:", file=sys.stderr)
452
+ print(" • Use Ruby head: ruby-install ruby-head -- CFLAGS=\"-g -O0\"", file=sys.stderr)
453
+ print(" • Or use GDB on Linux (works with Ruby 3.4.x)", file=sys.stderr)
454
+ print("\nSee: https://socketry.github.io/toolbox/guides/getting-started/", file=sys.stderr)
455
+ print(file=sys.stderr)
456
+ # Can't scan without complete type info
457
+ break
458
+
459
+ # Compare type pointers
460
+ if type_field == data_type:
461
+ # Return the VALUE, not the extracted data pointer
462
+ objects.append(obj)
463
+ if progress:
464
+ print(f" Found object #{len(objects)} at VALUE 0x{int(obj):x}", file=sys.stderr)
465
+ except (debugger.Error, RuntimeError):
466
+ continue
467
+
468
+ if progress:
469
+ if limit and len(objects) >= limit:
470
+ print(f"Scan complete: checked {objects_checked} objects (stopped at limit)", file=sys.stderr)
471
+ else:
472
+ print(f"Scan complete: checked {objects_checked} objects", file=sys.stderr)
473
+
474
+ return objects
475
+
476
+
477
+ class RubyHeapScanHandler:
478
+ """Scan the Ruby heap for objects, optionally filtered by type.
479
+
480
+ Usage: rb-heap-scan [--type TYPE] [--limit N] [--from $heap]
481
+
482
+ TYPE can be:
483
+ - A Ruby type constant like RUBY_T_STRING, RUBY_T_ARRAY, RUBY_T_HASH
484
+ - A numeric value (e.g., 0x05 for T_STRING)
485
+ - Omit --type to scan all objects
486
+
487
+ Options:
488
+ --type TYPE Filter by Ruby type (omit to scan all objects)
489
+ --limit N Stop after finding N objects (default: 10)
490
+ --from ADDR Start scanning from the given address (for pagination)
491
+
492
+ Pagination:
493
+ The address of the last found object is saved to $heap, allowing you to paginate:
494
+ rb-heap-scan --type RUBY_T_STRING --limit 10 # First page
495
+ rb-heap-scan --type RUBY_T_STRING --limit 10 --from $heap # Next page
496
+
497
+ The $heap variable contains the address of the last scanned object.
498
+
499
+ Examples:
500
+ rb-heap-scan --type RUBY_T_STRING
501
+ rb-heap-scan --type RUBY_T_ARRAY --limit 20
502
+ rb-heap-scan --type 0x05 # T_STRING
503
+ rb-heap-scan --limit 100 # All objects
504
+ rb-heap-scan --from $heap # Continue from last scan
505
+ """
506
+
507
+ USAGE = command.Usage(
508
+ summary="Scan the Ruby heap for objects, optionally filtered by type",
509
+ parameters=[],
510
+ options={
511
+ 'type': (str, None, 'Filter by Ruby type (e.g., RUBY_T_STRING, RUBY_T_ARRAY, or 0x05)'),
512
+ 'limit': (int, 10, 'Maximum objects to find'),
513
+ 'from': (str, None, 'Start address for pagination (use $heap)')
514
+ },
515
+ flags=[],
516
+ examples=[
517
+ ("rb-heap-scan --type RUBY_T_STRING", "Find up to 10 strings"),
518
+ ("rb-heap-scan --type RUBY_T_ARRAY --limit 20", "Find first 20 arrays"),
519
+ ("rb-heap-scan --from $heap", "Continue from last scan (pagination)")
520
+ ]
521
+ )
522
+
523
+ def _parse_type(self, type_arg):
524
+ """Parse a type argument and return the type value.
525
+
526
+ Args:
527
+ type_arg: String type argument (constant name or numeric value)
528
+
529
+ Returns:
530
+ Integer type value, or None on error
531
+ """
532
+ import constants
533
+
534
+ # Try as a constant name first
535
+ type_value = constants.get(type_arg)
536
+
537
+ if type_value is None:
538
+ # Try parsing as a number (hex or decimal)
539
+ try:
540
+ if type_arg.startswith('0x') or type_arg.startswith('0X'):
541
+ type_value = int(type_arg, 16)
542
+ else:
543
+ type_value = int(type_arg)
544
+ except ValueError:
545
+ print(f"Error: Unknown type constant '{type_arg}'")
546
+ print("Use a constant like RUBY_T_STRING or a numeric value like 0x05")
547
+ return None
548
+
549
+ # Validate type value is reasonable (0-31 for the 5-bit type field)
550
+ if not (0 <= type_value <= 31):
551
+ print(f"Warning: Type value {type_value} (0x{type_value:x}) is outside valid range 0-31")
552
+
553
+ return type_value
554
+
555
+ def invoke(self, arguments, terminal):
556
+ """Execute the heap scan command."""
557
+ try:
558
+ # Check if we're continuing from a previous scan
559
+ from_option = arguments.get_option('from')
560
+ if from_option is not None:
561
+ try:
562
+ # $heap should be an address (pointer value)
563
+ from_address = int(debugger.parse_and_eval(from_option))
564
+ except (debugger.Error, ValueError, TypeError) as e:
565
+ # If $heap doesn't exist or is void/invalid, start from the beginning
566
+ print(f"Note: {from_option} is not set or invalid, wrapping around to start of heap", file=sys.stderr)
567
+ from_address = None
568
+ else:
569
+ # New scan
570
+ from_address = None
571
+
572
+ # Get limit (default 10)
573
+ limit = 10
574
+ limit_value = arguments.get_option('limit')
575
+ if limit_value is not None:
576
+ try:
577
+ limit = int(limit_value)
578
+ except (ValueError, TypeError):
579
+ print("Error: --limit must be a number")
580
+ return
581
+
582
+ # Get type (optional)
583
+ type_value = None
584
+ type_option = arguments.get_option('type')
585
+ if type_option is not None:
586
+ type_value = self._parse_type(type_option)
587
+ if type_value is None:
588
+ return
589
+
590
+ # Initialize heap
591
+ heap = RubyHeap()
592
+ if not heap.initialize():
593
+ return
594
+
595
+ # Print search description
596
+ if type_value is not None:
597
+ type_desc = f"type 0x{type_value:02x}"
598
+ else:
599
+ type_desc = "all types"
600
+
601
+ if from_address:
602
+ print(f"Scanning heap for {type_desc}, limit={limit}, continuing from address 0x{from_address:x}...")
603
+ else:
604
+ print(f"Scanning heap for {type_desc}, limit={limit}...")
605
+ print()
606
+
607
+ # Find objects
608
+ objects, next_address = heap.scan(type_value, limit=limit, from_address=from_address)
609
+
610
+ if not objects:
611
+ print("No objects found")
612
+ if from_address:
613
+ print("(You may have reached the end of the heap)")
614
+ return
615
+
616
+ print(f"Found {len(objects)} object(s):")
617
+ print()
618
+
619
+ for i, obj in enumerate(objects):
620
+ # Set as convenience variable
621
+ obj_int = int(obj)
622
+ var_name = f"heap{i}"
623
+ debugger.set_convenience_variable(var_name, obj)
624
+
625
+ # Try to interpret and display the object
626
+ try:
627
+ interpreted = rvalue.interpret(obj)
628
+
629
+ terminal.print(
630
+ format.metadata, f" [{i}] ",
631
+ format.dim, f"${var_name} = ",
632
+ format.reset, interpreted
633
+ )
634
+ except Exception as e:
635
+ terminal.print(
636
+ format.metadata, f" [{i}] ",
637
+ format.dim, f"${var_name} = ",
638
+ format.error, f"<error: {e}>"
639
+ )
640
+
641
+ print()
642
+ terminal.print(
643
+ format.dim,
644
+ f"Objects saved in $heap0 through $heap{len(objects)-1}",
645
+ format.reset
646
+ )
647
+
648
+ # Save next address to $heap for pagination
649
+ if next_address is not None:
650
+ # Save the next address to continue from
651
+ void_ptr_type = constants.type_struct('void').pointer()
652
+ debugger.set_convenience_variable('heap', debugger.create_value(next_address, void_ptr_type))
653
+ terminal.print(
654
+ format.dim,
655
+ f"Next scan address saved to $heap: 0x{next_address:016x}",
656
+ format.reset
657
+ )
658
+ terminal.print(
659
+ format.dim,
660
+ f"Run 'rb-heap-scan --type {type_option if type_option else '...'} --from $heap' for next page",
661
+ format.reset
662
+ )
663
+ else:
664
+ # Reached the end of the heap - unset $heap so next scan starts fresh
665
+ debugger.set_convenience_variable('heap', None)
666
+ terminal.print(
667
+ format.dim,
668
+ f"Reached end of heap (no more objects to scan)",
669
+ format.reset
670
+ )
671
+
672
+ except Exception as e:
673
+ print(f"Error: {e}")
674
+ import traceback
675
+ traceback.print_exc()
676
+
677
+
678
+ # Register commands
679
+ debugger.register("rb-heap-scan", RubyHeapScanHandler, usage=RubyHeapScanHandler.USAGE)