innodb_ruby 0.6.6 → 0.7.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,12 @@
1
+ # A parser for InnoDB file formats, in Ruby #
2
+
3
+ The purpose for this library and tools is to expose some otherwise hidden internals of InnoDB. This code is not intended for critical production usage. It is definitely buggy, and it may be dangerous. Neither its internal APIs or its output are considered stable and are subject to change at any time.
4
+
5
+ It is intended as for a few purposes:
6
+
7
+ * *As a learning tool.* What better way to improve your understanding of a structure than to implement it in another language?
8
+ * *As a teaching tool.* Using `irb` to interactively investigate the actual structures on disk is invaluable for demonstrating (to yourself or others) what is actually happening.
9
+ * *As an investigative tool.* InnoDB unfortunately doesn't provide enough information about what it is doing or has done with its on-disk storage. How full are pages? Exactly how many records per page? How is the B+tree structured for a particular table? All of these questions can be answered easily with `innodb_ruby`.
10
+ * *As a debugging tool.* While making changes to the structures or behaviors of InnoDB, it is necessary to have tools to expose the results both of the original behavior and the new one, in order to validate that the changes have the desired effect.
11
+
12
+ Various parts of this library and the tools included may have wildly differing maturity levels, as it is worked on primarily based on immediate needs of the authors.
@@ -70,6 +70,122 @@ def space_page_type_regions(space)
70
70
  end
71
71
  end
72
72
 
73
+ def space_lists(space)
74
+ fsp = space.page(0).fsp_header
75
+ puts "%-20s%-12s%-12s%-12s%-12s%-12s" % [
76
+ "name",
77
+ "length",
78
+ "f_page",
79
+ "f_offset",
80
+ "l_page",
81
+ "l_offset",
82
+ ]
83
+ fsp.each do |key, list|
84
+ next unless list.is_a?(Innodb::List)
85
+ puts "%-20s%-12i%-12i%-12i%-12i%-12i" % [
86
+ key,
87
+ list.base[:length],
88
+ list.base[:first] && list.base[:first][:page],
89
+ list.base[:first] && list.base[:first][:offset],
90
+ list.base[:last] && list.base[:last][:page],
91
+ list.base[:last] && list.base[:last][:offset],
92
+ ]
93
+ end
94
+ end
95
+
96
+ def space_indexes(space)
97
+ puts "%-12s%-12s%-12s%-12s%-12s%-12s" % [
98
+ "id",
99
+ "root",
100
+ "fseg",
101
+ "used",
102
+ "allocated",
103
+ "fill_factor",
104
+ ]
105
+
106
+ space.each_index do |index|
107
+ [:internal, :leaf].each do |fseg_name|
108
+ fseg = index.root.fseg_header[fseg_name]
109
+
110
+ fragments =
111
+ fseg[:frag_array].inject(0) { |c, i| c += 1 if i; c }
112
+
113
+ used =
114
+ fragments +
115
+ fseg[:not_full_n_used] +
116
+ Innodb::Xdes::PAGES_PER_EXTENT * fseg[:full].base[:length]
117
+
118
+ allocated =
119
+ fragments +
120
+ Innodb::Xdes::PAGES_PER_EXTENT * fseg[:full].base[:length] +
121
+ Innodb::Xdes::PAGES_PER_EXTENT * fseg[:not_full].base[:length] +
122
+ Innodb::Xdes::PAGES_PER_EXTENT * fseg[:free].base[:length]
123
+
124
+ puts "%-12i%-12i%-12s%-12i%-12i%-12s" % [
125
+ index.id,
126
+ index.root.offset,
127
+ fseg_name,
128
+ used,
129
+ allocated,
130
+ "%.2f%%" % [100.0 * (used.to_f / allocated.to_f)],
131
+ ]
132
+ end
133
+ end
134
+ end
135
+
136
+ def print_xdes_list(list)
137
+ puts "%-12s%-64s" % [
138
+ "start_page",
139
+ "bitmap"
140
+ ]
141
+ list.each do |entry|
142
+ puts "%-12i%-64s" % [
143
+ entry.xdes[:start_page],
144
+ entry.xdes[:bitmap].bytes.map { |byte|
145
+ [0, 2, 4, 6].map { |shift|
146
+ ((byte >> shift) & 1 == 1) ? "." : "#"
147
+ }
148
+ }.flatten.join,
149
+ ]
150
+ end
151
+ end
152
+
153
+ def list_summary(space, list_name)
154
+ fsp = space.page(0).fsp_header
155
+
156
+ unless fsp[list_name] && fsp[list_name].is_a?(Innodb::List)
157
+ raise "List '#{list_name}' doesn't exist"
158
+ end
159
+
160
+ case fsp[list_name]
161
+ when Innodb::List::Xdes
162
+ print_xdes_list(fsp[list_name])
163
+ when Innodb::List::Inode
164
+ puts "%-12s" % [
165
+ "page",
166
+ ]
167
+ fsp[list_name].each do |page|
168
+ puts "%-12i" % [
169
+ page.offset,
170
+ ]
171
+ end
172
+ end
173
+ end
174
+
175
+ def index_fseg_summary(index, fseg_name, list_name)
176
+ fseg_header = index.root.fseg_header
177
+ unless fseg_header[fseg_name]
178
+ raise "File segment '#{fseg_name}' doesn't exist"
179
+ end
180
+
181
+ fseg = fseg_header[fseg_name]
182
+ unless fseg[list_name] && fseg[list_name].is_a?(Innodb::List)
183
+ raise "List '#{list_name}' doesn't exist"
184
+ end
185
+
186
+ print_xdes_list(fseg[list_name])
187
+ end
188
+
73
189
  def space_index_pages_free_plot(space, image)
74
190
  unless require "gnuplot"
75
191
  raise "Couldn't load gnuplot. Is it installed?"
@@ -225,6 +341,9 @@ Usage: innodb_space -f <file> [-p <page>] [-l <level>] <mode> [<mode>, ...]
225
341
  --level, -l <level>
226
342
  Operate on the level <level>; may be specified more than once.
227
343
 
344
+ --list, -L <list>
345
+ Operate on the list <list>; may be specified more than once.
346
+
228
347
  --require, -r <file>
229
348
  Use Ruby's "require" to load the file <file>. This is useful for loading
230
349
  classes with record describers.
@@ -255,6 +374,16 @@ The following modes are supported:
255
374
  Summarize all contiguous regions of the same page type. This is useful to
256
375
  provide an overall view of the space and allocations within it.
257
376
 
377
+ space-lists
378
+ Print the names of all lists in a space.
379
+
380
+ space-indexes
381
+ Summarize all indexes (actually each segment of the indexes) to show
382
+ the number of pages used and allocated, and the segment fill factor.
383
+
384
+ list-summary
385
+ Summarize the contents of a list.
386
+
258
387
  index-recurse
259
388
  Recurse an index, starting at the root (which must be provided in the first
260
389
  --page/-p argument), printing the node pages, node pointers (links), leaf
@@ -269,6 +398,13 @@ The following modes are supported:
269
398
  Print a summary of all pages at a given level (provided with the --level/-l
270
399
  argument) in an index.
271
400
 
401
+ index-fseg-internal-summary
402
+ index-fseg-leaf-summary
403
+ Summarize the file segment (whose name is provided in the first --list/-L
404
+ argument) for internal or leaf pages for a given index (whose root page
405
+ is provided in the first --page/-p argument). The lists used for each
406
+ index are "full", "not_full", and "free".
407
+
272
408
  END_OF_USAGE
273
409
 
274
410
  exit exit_code
@@ -278,6 +414,7 @@ end
278
414
  @options.file = nil
279
415
  @options.pages = []
280
416
  @options.levels = []
417
+ @options.lists = []
281
418
  @options.describer = nil
282
419
 
283
420
  getopt_options = [
@@ -285,6 +422,7 @@ getopt_options = [
285
422
  [ "--file", "-f", GetoptLong::REQUIRED_ARGUMENT ],
286
423
  [ "--page", "-p", GetoptLong::REQUIRED_ARGUMENT ],
287
424
  [ "--level", "-l", GetoptLong::REQUIRED_ARGUMENT ],
425
+ [ "--list", "-L", GetoptLong::REQUIRED_ARGUMENT ],
288
426
  [ "--require", "-r", GetoptLong::REQUIRED_ARGUMENT ],
289
427
  [ "--describer", "-d", GetoptLong::REQUIRED_ARGUMENT ],
290
428
  ]
@@ -303,6 +441,8 @@ getopt.each do |opt, arg|
303
441
  @options.pages << arg.to_i
304
442
  when "--level"
305
443
  @options.levels << arg.to_i
444
+ when "--list"
445
+ @options.lists << arg.to_sym
306
446
  when "--require"
307
447
  require arg
308
448
  when "--describer"
@@ -343,6 +483,14 @@ ARGV.each do |mode|
343
483
  space_index_pages_free_plot(space, name)
344
484
  when "space-page-type-regions"
345
485
  space_page_type_regions(space)
486
+ when "space-lists"
487
+ space_lists(space)
488
+ when "space-indexes"
489
+ space_indexes(space)
490
+ when "list-summary"
491
+ @options.lists.each do |list|
492
+ list_summary(space, list)
493
+ end
346
494
  when "index-recurse"
347
495
  unless space.record_describer
348
496
  usage 1, "Record describer necessary for index recursion"
@@ -377,7 +525,15 @@ ARGV.each do |mode|
377
525
  end
378
526
 
379
527
  index_level_summary(space.index(@options.pages.first), @options.levels)
528
+ when "index-fseg-leaf-summary"
529
+ @options.lists.each do |list|
530
+ index_fseg_summary(space.index(@options.pages.first), :leaf, list)
531
+ end
532
+ when "index-fseg-internal-summary"
533
+ @options.lists.each do |list|
534
+ index_fseg_summary(space.index(@options.pages.first), :internal, list)
535
+ end
380
536
  else
381
537
  usage 1, "Unknown mode: #{mode}"
382
538
  end
383
- end
539
+ end
@@ -8,7 +8,9 @@ require "innodb/page/inode"
8
8
  require "innodb/page/index"
9
9
  require "innodb/page/trx_sys"
10
10
  require "innodb/record_describer"
11
+ require "innodb/field"
11
12
  require "innodb/space"
12
13
  require "innodb/index"
13
14
  require "innodb/log_block"
14
15
  require "innodb/log"
16
+ require "innodb/xdes"
@@ -1,3 +1,5 @@
1
+ require "bindata"
2
+
1
3
  # A cursor to walk through InnoDB data structures to read fields.
2
4
  class Innodb::Cursor
3
5
  def initialize(buffer, offset)
@@ -60,9 +62,8 @@ class Innodb::Cursor
60
62
  end
61
63
 
62
64
  # Read a number of bytes forwards or backwards from the current cursor
63
- # position and adjust the cursor position by that amount, optionally
64
- # unpacking the data using the provided type.
65
- def read_and_advance(length, type=nil)
65
+ # position and adjust the cursor position by that amount.
66
+ def read_and_advance(length)
66
67
  data = nil
67
68
  #print "data(#{@cursor[0]}..."
68
69
  case @direction
@@ -74,7 +75,7 @@ class Innodb::Cursor
74
75
  data = @buffer.data(@cursor[0], length)
75
76
  end
76
77
  #puts "#{@cursor[0]}) = #{data.bytes.map { |n| "%02x" % n }.join}"
77
- type ? data.unpack(type).first : data
78
+ data
78
79
  end
79
80
 
80
81
  # Return raw bytes.
@@ -87,50 +88,67 @@ class Innodb::Cursor
87
88
  read_and_advance(length).bytes.map { |c| "%02x" % c }.join
88
89
  end
89
90
 
90
- # Return a big-endian unsigned 8-bit integer.
91
+ # Read an unsigned 8-bit integer.
91
92
  def get_uint8(offset=nil)
92
93
  seek(offset)
93
- read_and_advance(1, "C")
94
+ data = read_and_advance(1)
95
+ BinData::Uint8.read(data)
94
96
  end
95
97
 
96
- # Return a big-endian unsigned 16-bit integer.
98
+ # Read a big-endian unsigned 16-bit integer.
97
99
  def get_uint16(offset=nil)
98
100
  seek(offset)
99
- read_and_advance(2, "n")
101
+ data = read_and_advance(2)
102
+ BinData::Uint16be.read(data)
100
103
  end
101
104
 
102
- # Return a big-endian signed 16-bit integer.
105
+ # Read a big-endian signed 16-bit integer.
103
106
  def get_sint16(offset=nil)
104
107
  seek(offset)
105
- uint = read_and_advance(2, "n")
106
- (uint & 32768) == 0 ? uint : -(uint ^ 65535) - 1
108
+ data = read_and_advance(2)
109
+ BinData::Int16be.read(data)
107
110
  end
108
111
 
109
- # Return a big-endian unsigned 24-bit integer.
112
+ # Read a big-endian unsigned 24-bit integer.
110
113
  def get_uint24(offset=nil)
111
114
  seek(offset)
112
- # Ruby 1.8 doesn't support big-endian 24-bit unpack; unpack as one
113
- # 8-bit and one 16-bit big-endian instead.
114
- high, low = read_and_advance(3).unpack("nC")
115
- (high << 8) | low
115
+ data = read_and_advance(3)
116
+ BinData::Uint24be.read(data)
116
117
  end
117
118
 
118
- # Return a big-endian unsigned 32-bit integer.
119
+ # Read a big-endian unsigned 32-bit integer.
119
120
  def get_uint32(offset=nil)
120
121
  seek(offset)
121
- read_and_advance(4, "N")
122
+ data = read_and_advance(4)
123
+ BinData::Uint32be.read(data)
122
124
  end
123
125
 
124
- # Return a big-endian unsigned 64-bit integer.
126
+ # Read a big-endian unsigned 64-bit integer.
125
127
  def get_uint64(offset=nil)
126
128
  seek(offset)
127
- # Ruby 1.8 doesn't support big-endian quad-word unpack; unpack as two
128
- # 32-bit big-endian instead.
129
- high, low = read_and_advance(8).unpack("NN")
130
- (high << 32) | low
129
+ data = read_and_advance(8)
130
+ BinData::Uint64be.read(data)
131
131
  end
132
132
 
133
- # Return an InnoDB-compressed unsigned 32-bit integer.
133
+ # Read a big-endian unsigned integer given its size in bytes.
134
+ def get_uint_by_size(size)
135
+ case size
136
+ when 1
137
+ get_uint8
138
+ when 2
139
+ get_uint16
140
+ when 3
141
+ get_uint24
142
+ when 4
143
+ get_uint32
144
+ when 8
145
+ get_uint64
146
+ else
147
+ raise "Not implemented"
148
+ end
149
+ end
150
+
151
+ # Read an InnoDB-compressed unsigned 32-bit integer.
134
152
  def get_ic_uint32
135
153
  flag = peek { get_uint8 }
136
154
 
@@ -151,15 +169,59 @@ class Innodb::Cursor
151
169
  end
152
170
  end
153
171
 
154
- # Return an InnoDB-munged signed 8-bit integer. (This is only implemented
155
- # for positive integers at the moment.)
172
+ # Read an InnoDB-munged signed 8-bit integer.
156
173
  def get_i_sint8
157
- get_uint8 ^ (1 << 7)
174
+ data = read_and_advance(1)
175
+ BinData::Int8.read(data) ^ (-1 << 7)
176
+ end
177
+
178
+ # Read an InnoDB-munged signed 16-bit integer.
179
+ def get_i_sint16
180
+ data = read_and_advance(2)
181
+ BinData::Int16be.read(data) ^ (-1 << 15)
158
182
  end
159
183
 
160
- # Return an InnoDB-munged signed 64-bit integer. (This is only implemented
161
- # for positive integers at the moment.)
184
+ # Read an InnoDB-munged signed 24-bit integer.
185
+ def get_i_sint24
186
+ data = read_and_advance(3)
187
+ BinData::Int24be.read(data) ^ (-1 << 23)
188
+ end
189
+
190
+ # Read an InnoDB-munged signed 32-bit integer.
191
+ def get_i_sint32
192
+ data = read_and_advance(4)
193
+ BinData::Int32be.read(data) ^ (-1 << 31)
194
+ end
195
+
196
+ # Read an InnoDB-munged signed 64-bit integer.
162
197
  def get_i_sint64
163
- get_uint64 ^ (1 << 63)
198
+ data = read_and_advance(8)
199
+ BinData::Int64be.read(data) ^ (-1 << 63)
200
+ end
201
+
202
+ # Read an InnoDB-munged signed integer given its size in bytes.
203
+ def get_i_sint_by_size(size)
204
+ case size
205
+ when 1
206
+ get_i_sint8
207
+ when 2
208
+ get_i_sint16
209
+ when 3
210
+ get_i_sint24
211
+ when 4
212
+ get_i_sint32
213
+ when 8
214
+ get_i_sint64
215
+ else
216
+ raise "Not implemented"
217
+ end
218
+ end
219
+
220
+ # Read an array of 1-bit integers.
221
+ def get_bit_array(num_bits)
222
+ size = (num_bits + 7) / 8
223
+ data = read_and_advance(size)
224
+ bit_array = BinData::Array.new(:type => :bit1, :initial_length => size * 8)
225
+ bit_array.read(data).to_ary
164
226
  end
165
- end
227
+ end
@@ -0,0 +1,66 @@
1
+ class Innodb::Field
2
+ attr_reader :position, :nullable, :fixed_len, :variable_len
3
+
4
+ def initialize(position, type, *properties)
5
+ @position = position
6
+ @type, @fixed_len, @variable_len = parse_data_type(type.to_s)
7
+ @nullable = (not properties.include?(:NOT_NULL))
8
+ @unsigned = properties.include?(:UNSIGNED)
9
+ end
10
+
11
+ # Parse the data type description string of a field.
12
+ def parse_data_type(data_type)
13
+ case data_type
14
+ when /^(tinyint|smallint|mediumint|int|bigint)$/i
15
+ type = data_type.upcase.to_sym
16
+ fixed_len = fixed_len_map[type]
17
+ [type, fixed_len, 0]
18
+ when /varchar\((\d+)\)$/i
19
+ [:VARCHAR, 0, $1.to_i]
20
+ else
21
+ raise "Data type '#{data_type}' is not supported"
22
+ end
23
+ end
24
+
25
+ # Maps data type to fixed storage length.
26
+ def fixed_len_map
27
+ {
28
+ :TINYINT => 1,
29
+ :SMALLINT => 2,
30
+ :MEDIUMINT => 3,
31
+ :INT => 4,
32
+ :BIGINT => 8,
33
+ }
34
+ end
35
+
36
+ # Return whether this field is NULL.
37
+ def null?(record)
38
+ case record[:format]
39
+ when :compact
40
+ header = record[:header]
41
+ header[:null_bitmap][@position]
42
+ end
43
+ end
44
+
45
+ # Return the length of this variable-length field.
46
+ def get_variable_len(record)
47
+ case record[:format]
48
+ when :compact
49
+ header = record[:header]
50
+ header[:variable_length][@position]
51
+ end
52
+ end
53
+
54
+ # Read an InnoDB encoded data field.
55
+ def read(record, cursor)
56
+ return :NULL if @nullable and null?(record)
57
+
58
+ case @type
59
+ when :TINYINT, :SMALLINT, :MEDIUMINT, :INT, :BIGINT
60
+ symbol = @unsigned ? :get_uint_by_size : :get_i_sint_by_size
61
+ cursor.send(symbol, @fixed_len)
62
+ when :VARCHAR
63
+ '\'' + cursor.get_bytes(get_variable_len(record)) + '\''
64
+ end
65
+ end
66
+ end