redis-rdb 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. data/.gitignore +2 -0
  2. data/LICENSE +22 -0
  3. data/README.md +91 -0
  4. data/Rakefile +7 -0
  5. data/examples/aof_dumper.rb +8 -0
  6. data/examples/read_rdb.rb +9 -0
  7. data/lib/rdb.rb +10 -0
  8. data/lib/rdb/callbacks.rb +129 -0
  9. data/lib/rdb/constants.rb +35 -0
  10. data/lib/rdb/dumper.rb +37 -0
  11. data/lib/rdb/dumpers/aof.rb +112 -0
  12. data/lib/rdb/errors.rb +4 -0
  13. data/lib/rdb/lzf.rb +48 -0
  14. data/lib/rdb/reader-state.rb +25 -0
  15. data/lib/rdb/reader.rb +387 -0
  16. data/lib/rdb/version.rb +3 -0
  17. data/redis-rdb.gemspec +30 -0
  18. data/test/helpers.rb +377 -0
  19. data/test/rdb/database_empty.rdb +1 -0
  20. data/test/rdb/database_multiple_logical_dbs.rdb +0 -0
  21. data/test/rdb/hash_as_ziplist.rdb +0 -0
  22. data/test/rdb/hash_normal.rdb +0 -0
  23. data/test/rdb/hash_with_big_values.rdb +0 -0
  24. data/test/rdb/hash_with_compressed_strings_as_zipmap.rdb +0 -0
  25. data/test/rdb/hash_with_uncompressed_strings_as_zipmap.rdb +0 -0
  26. data/test/rdb/keys_compressed.rdb +0 -0
  27. data/test/rdb/keys_integer.rdb +0 -0
  28. data/test/rdb/keys_uncompressed.rdb +0 -0
  29. data/test/rdb/keys_with_expiration.rdb +0 -0
  30. data/test/rdb/list_normal.rdb +0 -0
  31. data/test/rdb/list_of_compressed_strings_as_ziplist.rdb +0 -0
  32. data/test/rdb/list_of_integers_as_ziplist.rdb +0 -0
  33. data/test/rdb/list_of_uncompressed_strings_as_ziplist.rdb +0 -0
  34. data/test/rdb/set_as_intset_16bits.rdb +0 -0
  35. data/test/rdb/set_as_intset_32bits.rdb +0 -0
  36. data/test/rdb/set_as_intset_64bits.rdb +0 -0
  37. data/test/rdb/set_normal.rdb +0 -0
  38. data/test/rdb/sortedset_as_ziplist.rdb +0 -0
  39. data/test/rdb/sortedset_normal.rdb +0 -0
  40. data/test/test_reader.rb +416 -0
  41. metadata +109 -0
@@ -0,0 +1,4 @@
1
+ module RDB
2
+ class ReaderError < RuntimeError
3
+ end
4
+ end
@@ -0,0 +1,48 @@
1
+ module RDB
2
+ module LZF
3
+ class DecompressionError < RuntimeError
4
+ end
5
+
6
+ class << self
7
+ def decompress(rdb, compressed_length, expected_length)
8
+ ipos = opos = 0
9
+ input, output = rdb.read(compressed_length), ' ' * expected_length
10
+
11
+ while ipos < compressed_length
12
+ ctrl = input.getbyte(ipos)
13
+ ipos += 1
14
+
15
+ if ctrl < 32
16
+ (ctrl + 1).times do
17
+ output.setbyte(opos, input.getbyte(ipos))
18
+ ipos += 1
19
+ opos += 1
20
+ end
21
+ else
22
+ length = ctrl >> 5
23
+
24
+ if length == 7
25
+ length = length + input.getbyte(ipos)
26
+ ipos += 1
27
+ end
28
+
29
+ reference = opos - ((ctrl & 0x1f) << 8) - input.getbyte(ipos) - 1
30
+ ipos += 1
31
+
32
+ (length + 2).times do
33
+ output.setbyte(opos, output.getbyte(reference))
34
+ reference += 1
35
+ opos += 1
36
+ end
37
+ end
38
+ end
39
+
40
+ if opos != expected_length
41
+ raise DecompressionError, "Expected length #{expected_length} does not match #{opos}"
42
+ end
43
+
44
+ output
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,25 @@
1
+ module RDB
2
+ class ReaderState
3
+ attr_accessor :database, :info
4
+ attr_accessor :key, :key_type_id, :key_expiration
5
+ attr_reader :callbacks
6
+
7
+ def initialize(callbacks = nil)
8
+ @callbacks = callbacks || EmptyCallbacks.new
9
+ end
10
+
11
+ def key_expires?
12
+ !@key_expiration.nil?
13
+ end
14
+
15
+ def key_type
16
+ case @key_type_id
17
+ when Type::STRING then :string
18
+ when Type::SET, Type::SET_INTSET then :set
19
+ when Type::LIST, Type::LIST_ZIPLIST then :list
20
+ when Type::ZSET, Type::ZSET_ZIPLIST then :sortedset
21
+ when Type::HASH, Type::HASH_ZIPMAP, Type::HASH_ZIPLIST then :hash
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,387 @@
1
+ module RDB
2
+ class Reader
3
+ class << self
4
+ def read_file(rdb_file, options = {})
5
+ File.open(rdb_file, 'rb') do |rdb|
6
+ read(rdb, options)
7
+ end
8
+ end
9
+
10
+ def read(rdb, options = {})
11
+ rdb_version = read_rdb_version(rdb)
12
+
13
+ state = ReaderState.new(options[:callbacks])
14
+ callbacks = state.callbacks
15
+
16
+ callbacks.start_rdb(rdb_version)
17
+
18
+ loop do
19
+ state.key_type_id = rdb.readbyte
20
+ state.info = {}
21
+
22
+ case state.key_type_id
23
+ when Opcode::EXPIRETIME_MS
24
+ state.key_expiration = rdb.read(8).unpack('Q').first * 1000
25
+ state.info[:precision] = :millisecond
26
+ state.key_type_id = rdb.readbyte
27
+
28
+ when Opcode::EXPIRETIME
29
+ state.key_expiration = rdb.read(4).unpack('L').first * 1000000
30
+ state.info[:precision] = :second
31
+ state.key_type_id = rdb.readbyte
32
+
33
+ when Opcode::SELECTDB
34
+ callbacks.end_database(state.database) unless state.database.nil?
35
+ state.database = read_length(rdb).first
36
+ callbacks.start_database(state.database)
37
+ next
38
+
39
+ when Opcode::EOF
40
+ callbacks.end_database(state.database) unless state.database.nil?
41
+ callbacks.end_rdb()
42
+ break
43
+ end
44
+
45
+ state.key = read_string(rdb)
46
+
47
+ if callbacks.accept_key?(state)
48
+ read_object(rdb, state)
49
+ notify_expiration(state) if state.key_expires?
50
+ else
51
+ skip_object(rdb, state)
52
+ end
53
+ end
54
+ end
55
+
56
+ private
57
+
58
+ def notify_expiration(state)
59
+ state.callbacks.pexpireat(state.key, state.key_expiration, state)
60
+ state.key_expiration = nil
61
+ end
62
+
63
+ def read_rdb_version(rdb)
64
+ rdb_header = rdb.read(9)
65
+ signature, version = rdb_header[0..4], rdb_header[5..9].to_i
66
+
67
+ raise ReaderError, 'Wrong signature trying to load DB from file' if signature != 'REDIS'
68
+ raise ReaderError, "Can't handle RDB format version #{version}" if version < 1 or version > 6
69
+
70
+ version
71
+ end
72
+
73
+ def ntohl(value)
74
+ converted = (value & 0x000000ff) << 24
75
+ converted = converted | (value & 0xff000000) >> 24
76
+ converted = converted | (value & 0x0000ff00) << 8
77
+ converted = converted | (value & 0x00ff0000) >> 8
78
+ end
79
+
80
+ def read_length(rdb)
81
+ bytes, encoded = rdb.readbyte, false
82
+ encoding = (bytes & 0xC0) >> 6
83
+
84
+ [case encoding
85
+ when Length::BITS_6
86
+ bytes & 0x3F
87
+ when Length::BITS_14
88
+ ((bytes & 0x3F) << 8) | rdb.readbyte
89
+ when Length::BITS_32
90
+ ntohl(rdb.read(4).unpack('L').first)
91
+ when Length::ENCODED
92
+ encoded = true
93
+ bytes & 0x3F
94
+ else
95
+ raise ReaderError, "Invalid encoding type for length - #{encoding}"
96
+ end, encoded]
97
+ end
98
+
99
+ def read_string(rdb)
100
+ length, encoded = *read_length(rdb)
101
+
102
+ if encoded
103
+ case length
104
+ when Encoding::INT8
105
+ rdb.read(1).unpack('c').first
106
+ when Encoding::INT16
107
+ rdb.read(2).unpack('s').first
108
+ when Encoding::INT32
109
+ rdb.read(4).unpack('l').first
110
+ when Encoding::LZF
111
+ compressed_len = read_length(rdb).first
112
+ uncompressed_len = read_length(rdb).first
113
+ LZF.decompress(rdb, compressed_len, uncompressed_len)
114
+ else
115
+ raise ReaderError, "Invalid encoding for string - #{length}"
116
+ end
117
+ else
118
+ rdb.read(length)
119
+ end
120
+ end
121
+
122
+ def read_object(rdb, state)
123
+ key, callbacks = state.key, state.callbacks
124
+
125
+ case state.key_type_id
126
+ when Type::STRING
127
+ state.info[:encoding] = :string
128
+ callbacks.set(key, read_string(rdb), state)
129
+
130
+ when Type::LIST
131
+ state.info[:encoding] = :linkedlist
132
+ object_reader(rdb, state) do
133
+ callbacks.rpush(key, read_string(rdb), state)
134
+ end
135
+
136
+ when Type::SET
137
+ state.info[:encoding] = :hashtable
138
+ object_reader(rdb, state) do
139
+ callbacks.sadd(key, read_string(rdb), state)
140
+ end
141
+
142
+ when Type::ZSET
143
+ state.info[:encoding] = :skiplist
144
+ object_reader(rdb, state) do
145
+ value = read_string(rdb)
146
+ score = rdb.read(rdb.readbyte)
147
+ callbacks.zadd(key, score, value, state)
148
+ end
149
+
150
+ when Type::HASH
151
+ state.info[:encoding] = :hashtable
152
+ object_reader(rdb, state) do
153
+ callbacks.hset(key, read_string(rdb), read_string(rdb), state)
154
+ end
155
+
156
+ when Type::HASH_ZIPMAP
157
+ read_zipmap(rdb, state)
158
+
159
+ when Type::LIST_ZIPLIST
160
+ read_ziplist(rdb, state)
161
+
162
+ when Type::SET_INTSET
163
+ read_intset(rdb, state)
164
+
165
+ when Type::ZSET_ZIPLIST
166
+ read_zset_from_ziplist(rdb, state)
167
+
168
+ when Type::HASH_ZIPLIST
169
+ read_hash_from_ziplist(rdb, state)
170
+
171
+ else
172
+ skip_object(rdb, state)
173
+
174
+ end
175
+ end
176
+
177
+ def object_reader(rdb, state, &block)
178
+ elements = read_length(rdb).first
179
+ state.info[:length] = elements
180
+
181
+ state.callbacks.send("start_#{state.key_type}", state.key, elements, state)
182
+ elements.times do
183
+ block.call(rdb, state)
184
+ end
185
+ state.callbacks.send("end_#{state.key_type}", state.key, state)
186
+ end
187
+
188
+ def read_intset(rdb, state)
189
+ key, callbacks = state.key, state.callbacks
190
+ buffer = StringIO.new(read_string(rdb))
191
+
192
+ encoding, entries = *buffer.read(8).unpack('LL')
193
+
194
+ state.info.merge!({
195
+ encoding: :intset,
196
+ encoded_size: buffer.length,
197
+ length: entries,
198
+ })
199
+
200
+ callbacks.start_set(key, entries, state)
201
+ entries.times do
202
+ entry = case encoding
203
+ when 2 then buffer.read(2).unpack('S').first
204
+ when 4 then buffer.read(4).unpack('L').first
205
+ when 8 then buffer.read(8).unpack('Q').first
206
+ else
207
+ raise ReaderError, "Invalid encoding for intset - #{encoding}"
208
+ end
209
+
210
+ callbacks.sadd(key, entry, state)
211
+ end
212
+ callbacks.end_set(key, state)
213
+ end
214
+
215
+ def read_ziplist(rdb, state)
216
+ callbacks = state.callbacks
217
+ ziplist_reader(rdb, state) do |key, buffer|
218
+ callbacks.rpush(key, read_ziplist_entry(buffer, state), state)
219
+ end
220
+ end
221
+
222
+ def read_zset_from_ziplist(rdb, state)
223
+ callbacks = state.callbacks
224
+ ziplist_reader_interleaved(rdb, state) do |key, buffer|
225
+ member = read_ziplist_entry(buffer, state)
226
+ score = read_ziplist_entry(buffer, state)
227
+ callbacks.zadd(key, score, member, state)
228
+ end
229
+ end
230
+
231
+ def read_hash_from_ziplist(rdb, state)
232
+ callbacks = state.callbacks
233
+ ziplist_reader_interleaved(rdb, state) do |key, buffer|
234
+ field = read_ziplist_entry(buffer, state)
235
+ value = read_ziplist_entry(buffer, state)
236
+ callbacks.hset(key, field, value, state)
237
+ end
238
+ end
239
+
240
+ def ziplist_reader_interleaved(rdb, state, &block)
241
+ check_entries = lambda do |entries|
242
+ raise ReaderError, "Expected even number of elements, found #{entries}" if entries.odd?
243
+ entries / 2
244
+ end
245
+ ziplist_reader(rdb, state, check_entries, &block)
246
+ end
247
+
248
+ def ziplist_reader(rdb, state, check_entries = nil, &block)
249
+ key, callbacks = state.key, state.callbacks
250
+ buffer = StringIO.new(read_string(rdb))
251
+
252
+ bytes, offset, entries = *buffer.read(10).unpack('LLS')
253
+ entries = check_entries.call(entries) unless check_entries.nil?
254
+
255
+ state.info.merge!({
256
+ encoding: :ziplist,
257
+ encoded_size: buffer.length,
258
+ length: entries,
259
+ })
260
+
261
+ callbacks.send("start_#{state.key_type}", key, entries, state)
262
+ entries.times do
263
+ block.call(key, buffer, state)
264
+ end
265
+
266
+ if ziplist_end = buffer.readbyte != 255
267
+ raise ReaderError, "Invalid ziplist end - #{ziplist_end}"
268
+ end
269
+ callbacks.send("end_#{state.key_type}", key, state)
270
+ end
271
+
272
+ def read_ziplist_entry(rdb, state)
273
+ previous_length = rdb.readbyte
274
+ if previous_length == 254
275
+ previous_length = rdb.read(4).unpack('L').first
276
+ end
277
+
278
+ header = rdb.readbyte
279
+ if header >> 6 == 0
280
+ rdb.read(header & 0x3F)
281
+ elsif header >> 6 == 1
282
+ rdb.read(((header & 0x3F) << 8) | rdb.readbyte)
283
+ elsif header >> 6 == 2
284
+ rdb.read(rdb.read(4).unpack('L').first)
285
+ elsif header >> 4 == 12
286
+ rdb.read(2).unpack('S').first
287
+ elsif header >> 4 == 13
288
+ rdb.read(4).unpack('L').first
289
+ elsif header >> 4 == 14
290
+ rdb.read(8).unpack('Q').first
291
+ elsif header == 240
292
+ "0#{rdb.read(3)}".unpack('l').first
293
+ elsif header == 254
294
+ rdb.read(1).unpack('c').first
295
+ elsif header >= 241 && header <= 253
296
+ header - 241
297
+ else
298
+ raise ReaderError, "Invalid entry header - #{header}"
299
+ end
300
+ end
301
+
302
+ def read_zipmap(rdb, state)
303
+ key, callbacks = state.key, state.callbacks
304
+ buffer = StringIO.new(read_string(rdb))
305
+
306
+ entries = buffer.readbyte
307
+ state.info.merge!({
308
+ encoding: :zipmap,
309
+ encoded_size: buffer.length,
310
+ length: entries,
311
+ })
312
+
313
+ callbacks.start_hash(key, entries, state)
314
+ loop do
315
+ next_length = read_zipmap_next_length(buffer)
316
+ break if next_length.nil?
317
+
318
+ field = buffer.read(next_length)
319
+
320
+ next_length = read_zipmap_next_length(buffer)
321
+ break if next_length.nil?
322
+
323
+ free, value = buffer.readbyte, buffer.read(next_length)
324
+ buffer.seek(free, IO::SEEK_CUR)
325
+
326
+ callbacks.hset(key, field, value, state)
327
+ end
328
+ callbacks.end_hash(key, state)
329
+ end
330
+
331
+ def read_zipmap_next_length(rdb)
332
+ length = rdb.readbyte
333
+ case length
334
+ when 1..253 then length
335
+ when 254 then rdb.read(4).unpack('L').first
336
+ else nil
337
+ end
338
+ end
339
+
340
+ def skip_object(rdb, state)
341
+ skip = case state.key_type_id
342
+ when Type::LIST then read_length(rdb).first
343
+ when Type::SET then read_length(rdb).first
344
+ when Type::ZSET then read_length(rdb).first * 2
345
+ when Type::HASH then read_length(rdb).first * 2
346
+ when Type::STRING then 1
347
+ when Type::LIST_ZIPLIST then 1
348
+ when Type::SET_INTSET then 1
349
+ when Type::ZSET_ZIPLIST then 1
350
+ when Type::HASH_ZIPMAP then 1
351
+ when Type::HASH_ZIPLIST then 1
352
+ else
353
+ raise ReaderError, "Trying to skip an unknown object type - #{state.key_type_id}"
354
+ end
355
+
356
+ callbacks = state.callbacks
357
+
358
+ skip.times do
359
+ skip_string(rdb)
360
+ callbacks.skip_object(state.key, state)
361
+ end
362
+ end
363
+
364
+ def skip_string(rdb)
365
+ length, encoded = *read_length(rdb)
366
+
367
+ skip = if encoded
368
+ case length
369
+ when Encoding::INT8 then 1
370
+ when Encoding::INT16 then 2
371
+ when Encoding::INT32 then 4
372
+ when Encoding::LZF
373
+ compressed_len = read_length(rdb).first
374
+ uncompressed_len = read_length(rdb).first
375
+ compressed_len
376
+ else
377
+ raise ReaderError, "Invalid encoding for string - #{length}"
378
+ end
379
+ else
380
+ length
381
+ end
382
+
383
+ rdb.seek(skip, IO::SEEK_CUR)
384
+ end
385
+ end
386
+ end
387
+ end