ruby-gdb 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,157 @@
1
+ import gdb
2
+ import rbasic
3
+ import constants
4
+ import format
5
+
6
+ class RHashBase:
7
+ """Base class for RHash variants."""
8
+
9
+ def __init__(self, value):
10
+ """value is a VALUE pointing to a T_HASH object."""
11
+ self.value = value
12
+ self.rhash = value.cast(constants.get_type('struct RHash').pointer())
13
+ self.basic = value.cast(constants.get_type('struct RBasic').pointer())
14
+ self.flags = int(self.basic.dereference()['flags'])
15
+
16
+ def size(self):
17
+ """Get hash size. Must be implemented by subclasses."""
18
+ raise NotImplementedError
19
+
20
+ def pairs(self):
21
+ """Iterate over (key, value) pairs. Must be implemented by subclasses."""
22
+ raise NotImplementedError
23
+
24
+ class RHashSTTable(RHashBase):
25
+ """Hash using ST table structure (older or larger hashes)."""
26
+
27
+ def __init__(self, value):
28
+ super().__init__(value)
29
+ # Calculate st_table pointer
30
+ rhash_size = gdb.parse_and_eval("sizeof(struct RHash)")
31
+ st_table_ptr = gdb.Value(int(value) + int(rhash_size))
32
+ self.st_table = st_table_ptr.cast(constants.get_type("st_table").pointer())
33
+
34
+ def size(self):
35
+ return int(self.st_table.dereference()['num_entries'])
36
+
37
+ def pairs(self):
38
+ """Yield (key, value) pairs."""
39
+ num_entries = self.size()
40
+ for i in range(num_entries):
41
+ key = self.st_table.dereference()['entries'][i]['key']
42
+ value = self.st_table.dereference()['entries'][i]['record']
43
+ yield (key, value)
44
+
45
+ def __str__(self):
46
+ """Return string representation of hash."""
47
+ addr = int(self.value)
48
+ return f"<T_HASH@0x{addr:x} ST-Table entries={self.size()}>"
49
+
50
+ def print_to(self, terminal):
51
+ """Print this hash with formatting."""
52
+ addr = int(self.value)
53
+ return terminal.print(
54
+ format.metadata, '<',
55
+ format.type, 'T_HASH',
56
+ format.metadata, f'@0x{addr:x} ST-Table entries={self.size()}>',
57
+ format.reset
58
+ )
59
+
60
+ def print_recursive(self, printer, depth):
61
+ """Print this hash recursively."""
62
+ printer.print(self)
63
+
64
+ if depth <= 0:
65
+ if self.size() > 0:
66
+ printer.print_with_indent(printer.max_depth - depth, " ...")
67
+ return
68
+
69
+ # Print each key-value pair
70
+ for i, (key, value) in enumerate(self.pairs()):
71
+ printer.print_key_label(printer.max_depth - depth, i)
72
+ printer.print_value(key, depth - 1)
73
+ printer.print_value_label(printer.max_depth - depth)
74
+ printer.print_value(value, depth - 1)
75
+
76
+ class RHashARTable(RHashBase):
77
+ """Hash using AR table structure (newer, smaller hashes)."""
78
+
79
+ def __init__(self, value):
80
+ super().__init__(value)
81
+ # Feature detection: check if as.ar field exists (Ruby 3.2)
82
+ # vs embedded after RHash (Ruby 3.3+)
83
+ try:
84
+ # Try Ruby 3.2 layout: ar_table is accessed via as.ar pointer
85
+ self.ar_table = self.rhash.dereference()['as']['ar']
86
+ except (gdb.error, KeyError):
87
+ # Ruby 3.3+: ar_table is embedded directly after RHash structure
88
+ rhash_size = gdb.parse_and_eval("sizeof(struct RHash)")
89
+ ar_table_addr = int(self.rhash) + int(rhash_size)
90
+ ar_table_type = constants.get_type("struct ar_table_struct").pointer()
91
+ self.ar_table = gdb.Value(ar_table_addr).cast(ar_table_type)
92
+
93
+ # Get array table size and bound from flags
94
+ self.ar_size = ((self.flags & constants.get("RHASH_AR_TABLE_SIZE_MASK")) >> constants.get("RHASH_AR_TABLE_SIZE_SHIFT"))
95
+ self.ar_bound = ((self.flags & constants.get("RHASH_AR_TABLE_BOUND_MASK")) >> constants.get("RHASH_AR_TABLE_BOUND_SHIFT"))
96
+
97
+ def size(self):
98
+ return self.ar_size
99
+
100
+ def bound(self):
101
+ """Get the bound (capacity) of the AR table."""
102
+ return self.ar_bound
103
+
104
+ def pairs(self):
105
+ """Yield (key, value) pairs, skipping undefined/deleted entries."""
106
+ RUBY_Qundef = constants.get("RUBY_Qundef")
107
+ for i in range(int(self.ar_bound)):
108
+ key = self.ar_table.dereference()['pairs'][i]['key']
109
+ # Skip undefined/deleted entries
110
+ if int(key) != RUBY_Qundef:
111
+ value = self.ar_table.dereference()['pairs'][i]['val']
112
+ yield (key, value)
113
+
114
+ def __str__(self):
115
+ """Return string representation of hash."""
116
+ addr = int(self.value)
117
+ return f"<T_HASH@0x{addr:x} AR-Table size={self.size()} bound={self.bound()}>"
118
+
119
+ def print_to(self, terminal):
120
+ """Print this hash with formatting."""
121
+ addr = int(self.value)
122
+ return terminal.print(
123
+ format.metadata, '<',
124
+ format.type, 'T_HASH',
125
+ format.metadata, f'@0x{addr:x} AR-Table size={self.size()} bound={self.bound()}>',
126
+ format.reset
127
+ )
128
+
129
+ def print_recursive(self, printer, depth):
130
+ """Print this hash recursively."""
131
+ printer.print(self)
132
+
133
+ if depth <= 0:
134
+ if self.size() > 0:
135
+ printer.print_with_indent(printer.max_depth - depth, " ...")
136
+ return
137
+
138
+ # Print each key-value pair
139
+ for i, (key, value) in enumerate(self.pairs()):
140
+ printer.print_key_label(printer.max_depth - depth, i)
141
+ printer.print_value(key, depth - 1)
142
+ printer.print_value_label(printer.max_depth - depth)
143
+ printer.print_value(value, depth - 1)
144
+
145
+ def RHash(value):
146
+ """Factory function that returns the appropriate RHash variant.
147
+
148
+ Caller should ensure value is a RUBY_T_HASH before calling this function.
149
+ """
150
+ # Get flags to determine ST table vs AR table
151
+ basic = value.cast(constants.get_type('struct RBasic').pointer())
152
+ flags = int(basic.dereference()['flags'])
153
+
154
+ if (flags & constants.get("RHASH_ST_TABLE_FLAG")) != 0:
155
+ return RHashSTTable(value)
156
+ else:
157
+ return RHashARTable(value)
@@ -0,0 +1,217 @@
1
+ import gdb
2
+ import rbasic
3
+ import constants
4
+ import format
5
+
6
+ def _char_ptr_type():
7
+ """Return char* type for GDB."""
8
+ return constants.get_type('char').pointer()
9
+
10
+ class RStringBase:
11
+ """Base class for RString variants with common functionality."""
12
+
13
+ def __init__(self, value):
14
+ """value is a VALUE pointing to a T_STRING object."""
15
+ self.value = value
16
+ self.rstring = value.cast(constants.get_type('struct RString').pointer())
17
+ self.basic = value.cast(constants.get_type('struct RBasic').pointer())
18
+ self.flags = int(self.basic.dereference()['flags'])
19
+
20
+ def _is_embedded(self):
21
+ """Check if string is embedded. Must be implemented by subclasses."""
22
+ raise NotImplementedError
23
+
24
+ def length(self):
25
+ """Get string length. Must be implemented by subclasses."""
26
+ raise NotImplementedError
27
+
28
+ def data_ptr(self):
29
+ """Get pointer to string data. Must be implemented by subclasses."""
30
+ raise NotImplementedError
31
+
32
+ def read_bytes(self, max_fallback_scan=256):
33
+ """Return (bytes, length). If declared length is 0, will fallback to scanning for NUL up to max_fallback_scan."""
34
+ pointer = self.data_ptr()
35
+ length = self.length()
36
+
37
+ inferior = gdb.selected_inferior()
38
+ if length and length > 0:
39
+ return (inferior.read_memory(pointer, length).tobytes(), length)
40
+
41
+ # Fallback: scan for NUL terminator when length is unavailable (e.g., symbol table strings)
42
+ buffer = inferior.read_memory(pointer, max_fallback_scan).tobytes()
43
+ n = buffer.find(b'\x00')
44
+ if n == -1:
45
+ n = max_fallback_scan
46
+ return (buffer[:n], n)
47
+
48
+ def to_str(self, encoding='utf-8'):
49
+ """Convert to Python string."""
50
+ data, length = self.read_bytes()
51
+ return data.decode(encoding, errors='replace')
52
+
53
+ def __str__(self):
54
+ """Return the string with type tag and metadata."""
55
+ addr = int(self.value)
56
+ storage = "embedded" if self._is_embedded() else "heap"
57
+ content = self.to_str()
58
+ return f"<T_STRING@0x{addr:x} {storage} length={self.length()}> {repr(content)}"
59
+
60
+ def print_to(self, terminal):
61
+ """Print this string with formatting."""
62
+ addr = int(self.value)
63
+ storage = "embedded" if self._is_embedded() else "heap"
64
+ content = self.to_str()
65
+ tag = terminal.print(
66
+ format.metadata, '<',
67
+ format.type, 'T_STRING',
68
+ format.metadata, f'@0x{addr:x} {storage} length={self.length()}>',
69
+ format.reset
70
+ )
71
+ # Use repr() to properly escape quotes, newlines, etc.
72
+ string_val = terminal.print(format.string, repr(content), format.reset)
73
+ return f"{tag} {string_val}"
74
+
75
+ def print_recursive(self, printer, depth):
76
+ """Print this string (no recursion needed for strings)."""
77
+ printer.print(self)
78
+
79
+ class RString34(RStringBase):
80
+ """Ruby 3.4+ strings: top-level len, as.embed.ary, FL_USER1 set = heap."""
81
+
82
+ def _is_embedded(self):
83
+ FL_USER1 = constants.get('RUBY_FL_USER1', 1 << 13)
84
+ # FL_USER1 set => heap (not embedded)
85
+ return (self.flags & FL_USER1) == 0
86
+
87
+ def length(self):
88
+ return int(self.rstring.dereference()['len'])
89
+
90
+ def data_ptr(self):
91
+ if not self._is_embedded():
92
+ return self.rstring.dereference()['as']['heap']['ptr']
93
+ # Embedded: use as.embed.ary
94
+ ary = self.rstring.dereference()['as']['embed']['ary']
95
+ try:
96
+ if ary.type.code == gdb.TYPE_CODE_ARRAY:
97
+ return ary.address.cast(_char_ptr_type())
98
+ return ary
99
+ except Exception:
100
+ return ary.address.cast(_char_ptr_type())
101
+
102
+ class RString33(RStringBase):
103
+ """Ruby 3.3 strings: top-level len, as.embed.len exists, FL_USER1 set = embedded."""
104
+
105
+ def _is_embedded(self):
106
+ FL_USER1 = constants.get('RUBY_FL_USER1', 1 << 13)
107
+ # FL_USER1 set => embedded
108
+ return (self.flags & FL_USER1) != 0
109
+
110
+ def length(self):
111
+ return int(self.rstring.dereference()['len'])
112
+
113
+ def data_ptr(self):
114
+ if not self._is_embedded():
115
+ return self.rstring.dereference()['as']['heap']['ptr']
116
+ # Embedded: use as.embed.ary
117
+ ary = self.rstring.dereference()['as']['embed']['ary']
118
+ try:
119
+ if ary.type.code == gdb.TYPE_CODE_ARRAY:
120
+ return ary.address.cast(_char_ptr_type())
121
+ return ary
122
+ except Exception:
123
+ return ary.address.cast(_char_ptr_type())
124
+
125
+ class RString32RVARGC(RStringBase):
126
+ """Ruby 3.2 with RVARGC: as.embed.len for embedded, as.embed.ary for data."""
127
+
128
+ def _is_embedded(self):
129
+ FL_USER1 = constants.get('RUBY_FL_USER1', 1 << 13)
130
+ # FL_USER1 set => heap (not embedded)
131
+ return (self.flags & FL_USER1) == 0
132
+
133
+ def length(self):
134
+ if self._is_embedded():
135
+ return int(self.rstring.dereference()['as']['embed']['len'])
136
+ else:
137
+ return int(self.rstring.dereference()['as']['heap']['len'])
138
+
139
+ def data_ptr(self):
140
+ if not self._is_embedded():
141
+ return self.rstring.dereference()['as']['heap']['ptr']
142
+ # Embedded: use as.embed.ary
143
+ ary = self.rstring.dereference()['as']['embed']['ary']
144
+ try:
145
+ if ary.type.code == gdb.TYPE_CODE_ARRAY:
146
+ return ary.address.cast(_char_ptr_type())
147
+ return ary
148
+ except Exception:
149
+ return ary.address.cast(_char_ptr_type())
150
+
151
+ class RString32Legacy(RStringBase):
152
+ """Legacy Ruby 3.2: embedded length in FL_USER2..6, data in as.ary."""
153
+
154
+ def _is_embedded(self):
155
+ FL_USER1 = constants.get('RUBY_FL_USER1', 1 << 13)
156
+ # FL_USER1 set => heap (not embedded)
157
+ return (self.flags & FL_USER1) == 0
158
+
159
+ def length(self):
160
+ if self._is_embedded():
161
+ FL2 = constants.get('RUBY_FL_USER2')
162
+ FL3 = constants.get('RUBY_FL_USER3')
163
+ FL4 = constants.get('RUBY_FL_USER4')
164
+ FL5 = constants.get('RUBY_FL_USER5')
165
+ FL6 = constants.get('RUBY_FL_USER6')
166
+ USHIFT = constants.get('RUBY_FL_USHIFT')
167
+ mask = FL2 | FL3 | FL4 | FL5 | FL6
168
+ return (self.flags & mask) >> (USHIFT + 2)
169
+ else:
170
+ return int(self.rstring.dereference()['as']['heap']['len'])
171
+
172
+ def data_ptr(self):
173
+ if not self._is_embedded():
174
+ return self.rstring.dereference()['as']['heap']['ptr']
175
+ # Embedded: use as.ary (not as.embed.ary)
176
+ ary = self.rstring.dereference()['as']['ary']
177
+ try:
178
+ if ary.type.code == gdb.TYPE_CODE_ARRAY:
179
+ return ary.address.cast(_char_ptr_type())
180
+ return ary
181
+ except Exception:
182
+ return ary.address.cast(_char_ptr_type())
183
+
184
+ def RString(value):
185
+ """Factory function that detects the RString variant and returns the appropriate instance.
186
+
187
+ Caller should ensure value is a RUBY_T_STRING before calling this function.
188
+
189
+ Detects at runtime whether the process uses:
190
+ - Ruby 3.4+: top-level len field, as.embed.ary (no embed.len), FL_USER1 set = heap
191
+ - Ruby 3.3: top-level len field, as.embed.len exists, FL_USER1 set = embedded
192
+ - Ruby 3.2 with RVARGC: as.embed.len for embedded, as.embed.ary for data
193
+ - Legacy 3.2: embedded length in FL_USER2..6, data in as.ary
194
+ """
195
+ rstring = value.cast(constants.get_type('struct RString').pointer())
196
+
197
+ # Try top-level len field (Ruby 3.3+/3.4+)
198
+ try:
199
+ _ = rstring.dereference()['len']
200
+ # Now check if embed structure has len field (3.3) or just ary (3.4+)
201
+ try:
202
+ _ = rstring.dereference()['as']['embed']['len']
203
+ return RString33(value)
204
+ except Exception:
205
+ return RString34(value)
206
+ except Exception:
207
+ pass
208
+
209
+ # Try RVARGC embedded len field (3.2 RVARGC)
210
+ try:
211
+ _ = rstring.dereference()['as']['embed']['len']
212
+ return RString32RVARGC(value)
213
+ except Exception:
214
+ pass
215
+
216
+ # Fallback to legacy 3.2
217
+ return RString32Legacy(value)
@@ -0,0 +1,157 @@
1
+ import gdb
2
+ import rbasic
3
+ import constants
4
+ import format
5
+
6
+
7
+ class RStructBase:
8
+ """Base class for Ruby struct variants."""
9
+
10
+ def __init__(self, value):
11
+ """value is a VALUE pointing to a T_STRUCT object."""
12
+ self.value = value
13
+ self.rstruct = value.cast(constants.get_type('struct RStruct').pointer())
14
+
15
+ def length(self):
16
+ """Get the number of fields in the struct."""
17
+ raise NotImplementedError("Subclass must implement length()")
18
+
19
+ def items_ptr(self):
20
+ """Get pointer to the struct's data array."""
21
+ raise NotImplementedError("Subclass must implement items_ptr()")
22
+
23
+ def __len__(self):
24
+ """Support len(struct) syntax."""
25
+ return self.length()
26
+
27
+ def __getitem__(self, index):
28
+ """Support struct[i] syntax to access fields."""
29
+ length = self.length()
30
+ if index < 0 or index >= length:
31
+ raise IndexError(f"struct index out of range: {index} (length: {length})")
32
+
33
+ ptr = self.items_ptr()
34
+ return ptr[index]
35
+
36
+
37
+ class RStructEmbedded(RStructBase):
38
+ """Ruby struct with embedded storage (small structs)."""
39
+
40
+ def length(self):
41
+ """Get length from flags for embedded struct."""
42
+ flags = int(self.rstruct.dereference()['basic']['flags'])
43
+
44
+ # Extract length from FL_USER1 and FL_USER2 flags
45
+ RUBY_FL_USER1 = constants.get("RUBY_FL_USER1")
46
+ RUBY_FL_USER2 = constants.get("RUBY_FL_USER2")
47
+ RUBY_FL_USHIFT = constants.get("RUBY_FL_USHIFT")
48
+
49
+ mask = RUBY_FL_USER1 | RUBY_FL_USER2
50
+ shift = RUBY_FL_USHIFT + 1
51
+ return (flags & mask) >> shift
52
+
53
+ def items_ptr(self):
54
+ """Get pointer to embedded data array."""
55
+ return self.rstruct.dereference()['as']['ary']
56
+
57
+ def __str__(self):
58
+ """Return string representation of struct."""
59
+ addr = int(self.value)
60
+ return f"<T_STRUCT@0x{addr:x} embedded length={len(self)}>"
61
+
62
+ def print_to(self, terminal):
63
+ """Return formatted struct representation."""
64
+ addr = int(self.value)
65
+ return terminal.print(
66
+ format.metadata, '<',
67
+ format.type, 'T_STRUCT',
68
+ format.metadata, f'@0x{addr:x} embedded length={len(self)}>',
69
+ format.reset
70
+ )
71
+
72
+ def print_recursive(self, printer, depth):
73
+ """Print this struct recursively."""
74
+ printer.print(self)
75
+
76
+ if depth <= 0:
77
+ if len(self) > 0:
78
+ printer.print_with_indent(printer.max_depth - depth, " ...")
79
+ return
80
+
81
+ # Print each field
82
+ for i in range(len(self)):
83
+ printer.print_item_label(printer.max_depth - depth, i)
84
+ printer.print_value(self[i], depth - 1)
85
+
86
+
87
+ class RStructHeap(RStructBase):
88
+ """Ruby struct with heap-allocated storage (large structs)."""
89
+
90
+ def length(self):
91
+ """Get length from heap structure."""
92
+ return int(self.rstruct.dereference()['as']['heap']['len'])
93
+
94
+ def items_ptr(self):
95
+ """Get pointer to heap-allocated data array."""
96
+ return self.rstruct.dereference()['as']['heap']['ptr']
97
+
98
+ def __str__(self):
99
+ """Return string representation of struct."""
100
+ addr = int(self.value)
101
+ return f"<T_STRUCT@0x{addr:x} heap length={len(self)}>"
102
+
103
+ def print_to(self, terminal):
104
+ """Return formatted struct representation."""
105
+ addr = int(self.value)
106
+ return terminal.print(
107
+ format.metadata, '<',
108
+ format.type, 'T_STRUCT',
109
+ format.metadata, f'@0x{addr:x} heap length={len(self)}>',
110
+ format.reset
111
+ )
112
+
113
+ def print_recursive(self, printer, depth):
114
+ """Print this struct recursively."""
115
+ printer.print(self)
116
+
117
+ if depth <= 0:
118
+ if len(self) > 0:
119
+ printer.print_with_indent(printer.max_depth - depth, " ...")
120
+ return
121
+
122
+ # Print each field
123
+ for i in range(len(self)):
124
+ printer.print_item_label(printer.max_depth - depth, i)
125
+ printer.print_value(self[i], depth - 1)
126
+
127
+
128
+ def RStruct(value):
129
+ """
130
+ Factory function to create the appropriate RStruct variant.
131
+
132
+ Caller should ensure value is a RUBY_T_STRUCT before calling this function.
133
+
134
+ Returns:
135
+ RStructEmbedded or RStructHeap instance
136
+ """
137
+ # Cast to RStruct pointer to read flags
138
+ rstruct_type = constants.get_type("struct RStruct").pointer()
139
+ rstruct = value.cast(rstruct_type)
140
+ flags = int(rstruct.dereference()['basic']['flags'])
141
+
142
+ # Feature detection: check for RSTRUCT_EMBED_LEN_MASK flag
143
+ # If struct uses embedded storage, the length is encoded in flags
144
+ RSTRUCT_EMBED_LEN_MASK = constants.get("RSTRUCT_EMBED_LEN_MASK")
145
+ if RSTRUCT_EMBED_LEN_MASK is None:
146
+ # Fallback: try to detect by checking if as.heap exists
147
+ try:
148
+ _ = rstruct.dereference()['as']['heap']
149
+ return RStructHeap(value)
150
+ except Exception:
151
+ return RStructEmbedded(value)
152
+
153
+ # Check if embedded flag is set
154
+ if flags & RSTRUCT_EMBED_LEN_MASK:
155
+ return RStructEmbedded(value)
156
+ else:
157
+ return RStructHeap(value)