bitcask 0.0.1 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -5,51 +5,81 @@ Utilities for reading the Bitcask file format. You can use this to recover
5
5
  deleted values (before they are compacted), recover from a backup, list keys
6
6
  to do read-repair when list-keys is malfunctioning, and so forth.
7
7
 
8
- # Open a bitcask.
9
- b = Bitcask.new '/var/lib/riak/bitcask/0'
10
-
11
- # Dump all keys and values, in cron order, excluding tombstones.
12
- # Data files go in cronological order, so this is in effect replaying history.
13
- b.data_files.each do |data_file|
14
- data_file.each do |key, value|
15
- next if value == Bitcask::TOMBSTONE
16
- puts key
17
- puts value
18
- end
19
- end
20
-
21
- # If you know the offset, you can retrieve it directly.
22
- data_file[0] # => ["key", "value"]
23
-
24
- # And step through values one by one.
25
- data_file.read # => [k1, v1]
26
- data_file.read # => [k2, v2]
27
-
28
- # Seek, rewind, and pos are also supported.
29
-
30
- # In Riak, these are erlang terms.
31
- b.data_files.each do |data_file|
32
- data_file.each do |key, value|
33
- next if value == Bitcask::TOMBSTONE
34
-
35
- bucket, key = BERT.decode key
36
- value = BERT.decode value
37
-
38
- # Store the object's value in riak
39
- o = riak[bucket][key]
40
- o.raw_data = value.last
41
- o.store
42
-
43
- # Or dump the entire value to a file for later inspection.
44
- FileUtils.mkdir_p(bucket)
45
- File.open(File.join(bucket, key), 'w') do |out|
46
- out.write value.to_json
8
+ Install
9
+ -------
10
+
11
+ $ gem install bitcask
12
+
13
+ Examples
14
+ --------
15
+
16
+ Open a bitcask.
17
+
18
+ b = Bitcask.new '/var/lib/riak/bitcask/0'
19
+
20
+ Load the keydir, using hintfiles where possible.
21
+
22
+ b.load
23
+
24
+ Get a specific entry:
25
+
26
+ b['test'] #=> 'value_of_test'
27
+
28
+ Iterate over all values:
29
+
30
+ b.each do |key, value|
31
+ puts key
32
+ puts value
33
+ end
34
+
35
+ In Riak, these are erlang terms.
36
+
37
+ b.each do |key, value|
38
+ next if value == Bitcask::TOMBSTONE
39
+
40
+ bucket, key = BERT.decode key
41
+ value = BERT.decode value
42
+
43
+ # Store the object's value in riak
44
+ o = riak[bucket][key]
45
+ o.raw_data = value.last
46
+ o.store
47
+
48
+ # Or dump the entire value to a file for later inspection.
49
+ FileUtils.mkdir_p(bucket)
50
+ File.open(File.join(bucket, key), 'w') do |out|
51
+ out.write value.to_json
52
+ end
53
+ end
54
+
55
+ You can also work directly on the data files. Here's how to dump all keys and
56
+ values, in cron order, excluding tombstones. Data files go in cronological
57
+ order, so this is in effect replaying history since the last merge.
58
+
59
+ b.data_files.each do |data_file|
60
+ data_file.each do |entry|
61
+ next if entry.value == Bitcask::TOMBSTONE
62
+ puts entry.key
63
+ puts entry.value
64
+ end
47
65
  end
48
- end
49
- end
66
+
67
+ If you know the offset, you can retrieve it directly from a DataFile.
68
+
69
+ data_file[0] # => Struct {:key => 'key', :value => 'value'}
70
+
71
+ And step through values one by one.
72
+
73
+ data_file.read # => [k1, v1]
74
+ data_file.read # => [k2, v2]
75
+
76
+ Seek, rewind, and pos are also supported.
50
77
 
51
78
  You'd be surprised how fast this is. 10,000 values/sec, easy.
52
79
 
80
+ Status
81
+ ------
82
+
53
83
  Anyone who wants to expand this, feel free. I've been using it for emergency
54
84
  recovery operations, but don't plan to reimplement bitcask in Ruby myself. I
55
85
  welcome pull requests.
@@ -3,16 +3,29 @@ class Bitcask
3
3
 
4
4
  $LOAD_PATH << File.expand_path(File.dirname(__FILE__))
5
5
 
6
- # A single data file
6
+ require 'bitcask/hint_file'
7
7
  require 'bitcask/data_file'
8
+ require 'bitcask/keydir'
8
9
  require 'bitcask/errors'
9
10
  require 'bitcask/version'
10
11
 
12
+ include Enumerable
13
+
11
14
  TOMBSTONE = "bitcask_tombstone"
12
15
 
13
16
  # Opens a bitcask backed by the given directory.
17
+ attr_accessor :keydir
18
+ attr_reader :dir
14
19
  def initialize(dir)
15
20
  @dir = dir
21
+ @keydir = Bitcask::Keydir.new
22
+ end
23
+
24
+ # Uses the keydir to get an object from the bitcask. Returns a
25
+ # value.
26
+ def [](key)
27
+ index = @keydir[key] or return nil
28
+ @keydir.data_files[index.file_id][index.value_pos, index.value_sz].value
16
29
  end
17
30
 
18
31
  # Returns a list of all data filenames in this bitcask, sorted from oldest
@@ -29,4 +42,74 @@ class Bitcask
29
42
  Bitcask::DataFile.new filename
30
43
  end
31
44
  end
45
+
46
+ # Iterates over all keys in keydir. Yields key, value pairs.
47
+ def each
48
+ @keydir.each do |key, index|
49
+ entry = @keydir.data_files[index.file_id][index.value_pos, index.value_sz]
50
+ yield [entry.key, entry.value]
51
+ end
52
+ end
53
+
54
+ # Keydir keys.
55
+ def keys
56
+ keydir.keys
57
+ end
58
+
59
+ # Populate the keydir.
60
+ def load
61
+ data_files.each do |d|
62
+ if h = d.hint_file
63
+ load_hint_file h
64
+ else
65
+ load_data_file d
66
+ end
67
+ end
68
+ end
69
+
70
+ # Load a DataFile into the keydir.
71
+ def load_data_file(data_file)
72
+ # Determine data_file index.
73
+ @keydir.data_files |= [data_file]
74
+ file_id = @keydir.data_files.index data_file
75
+
76
+ pos = 0
77
+ data_file.each do |entry|
78
+ # Check for existing newer entry in keydir
79
+ if (cur = @keydir[entry.key]).nil? or entry.tstamp >= cur.tstamp
80
+ @keydir[entry.key] = Keydir::Entry.new(
81
+ file_id,
82
+ data_file.pos - pos,
83
+ pos,
84
+ entry.tstamp
85
+ )
86
+ end
87
+
88
+ pos = data_file.pos
89
+ end
90
+ end
91
+
92
+ # Load a HintFile into the keydir.
93
+ def load_hint_file(hint_file)
94
+ # Determine data_file index.
95
+ @keydir.data_files |= [hint_file.data_file]
96
+ file_id = @keydir.data_files.index hint_file.data_file
97
+
98
+ hint_file.each do |entry|
99
+ # Check for existing newer entry in keydir
100
+ if (cur = @keydir[entry.key]).nil? or entry.tstamp >= cur.tstamp
101
+ @keydir[entry.key] = Keydir::Entry.new(
102
+ file_id,
103
+ entry.value_sz,
104
+ entry.value_pos,
105
+ entry.tstamp
106
+ )
107
+ end
108
+ end
109
+ end
110
+
111
+ # Keydir size.
112
+ def size
113
+ @keydir.size
114
+ end
32
115
  end
@@ -4,18 +4,22 @@ class Bitcask::DataFile
4
4
  # This is most definitely not threadsafe, but it's so cheap you might as well
5
5
  # make lots of copies.
6
6
 
7
+ Entry = Struct.new :tstamp, :key, :value
8
+
9
+ include Enumerable
10
+
7
11
  def initialize(filename)
8
12
  @file = File.open(filename)
9
13
  end
10
14
 
11
15
  # Reads [key, value] from a particular offset.
12
16
  # Also advances the cursor.
13
- def [](offset)
17
+ def [](offset, size = nil)
14
18
  seek offset
15
- read
19
+ read size
16
20
  end
17
21
 
18
- # Iterates over every entry in this file, yielding the key and value.
22
+ # Iterates over every entry in this file, yielding an Entry.
19
23
  # Options:
20
24
  # :rewind (true) - Rewind the file to the beginning, instead of starting
21
25
  # right here.
@@ -43,28 +47,45 @@ class Bitcask::DataFile
43
47
  end
44
48
  end
45
49
 
50
+ def hint_file
51
+ @hint_file ||= begin
52
+ path = @file.path.sub(/\.data$/, '.hint')
53
+ if File.exists? path
54
+ h = Bitcask::HintFile.new path
55
+ h.data_file = self
56
+ h
57
+ end
58
+ end
59
+ end
60
+
46
61
  def pos
47
62
  @file.pos
48
63
  end
49
64
  alias tell pos
50
65
 
51
- # Returns a single [key, value] pair read from the current offset,
52
- # and advances to the next.
66
+ # Returns a single Entry read from the current offset, and advances to the
67
+ # next.
53
68
  #
54
69
  # Can raise Bitcask::ChecksumError
55
- def read
70
+ def read(size = nil)
71
+ if size
72
+ f = StringIO.new @file.read(size)
73
+ else
74
+ f = @file
75
+ end
76
+
56
77
  # Parse header
57
- header = @file.read(14) or return
78
+ header = f.read(14) or return
58
79
  crc, tstamp, ksz, value_sz = header.unpack "NNnN"
59
80
 
60
81
  # Read data
61
- key = @file.read ksz
62
- value = @file.read value_sz
82
+ key = f.read ksz
83
+ value = f.read value_sz
63
84
 
64
85
  # CRC check
65
86
  raise Bitcask::ChecksumError unless crc == Zlib.crc32(header[4..-1] + key + value)
66
87
 
67
- [key, value]
88
+ Entry.new tstamp, key, value
68
89
  end
69
90
 
70
91
  # Rewinds the file.
@@ -0,0 +1,80 @@
1
+ class Bitcask::HintFile
2
+ # A single Bitcask hint file.
3
+ #
4
+ # This is most definitely not threadsafe, but it's so cheap you might as well
5
+ # make lots of copies.
6
+
7
+ Entry = Struct.new :tstamp, :value_sz, :value_pos, :key
8
+
9
+ include Enumerable
10
+
11
+ attr_accessor :data_file
12
+ def initialize(filename)
13
+ @file = File.open(filename)
14
+ end
15
+
16
+ # Reads [key, value] from a particular offset.
17
+ # Also advances the cursor.
18
+ def [](offset)
19
+ seek offset
20
+ read
21
+ end
22
+
23
+ # Iterates over every entry in this file, yielding an Entry.
24
+ # Options:
25
+ # :rewind (true) - Rewind the file to the beginning, instead of starting
26
+ # right here.
27
+ # :raise_checksum (false) - Raise Bitcask::ChecksumError on crc failure,
28
+ # instead of silently continuing.
29
+ def each(opts = {})
30
+ options = {
31
+ :rewind => true,
32
+ :raise_checksum => false
33
+ }.merge opts
34
+
35
+ rewind if options[:rewind]
36
+
37
+ loop do
38
+ o = read
39
+ if o
40
+ yield o
41
+ else
42
+ return self
43
+ end
44
+ end
45
+ end
46
+
47
+ def pos
48
+ @file.pos
49
+ end
50
+ alias tell pos
51
+
52
+ # Returns [timestamp, key, value_pos, value_size] read from the current
53
+ # offset, and advances to the next.
54
+ #
55
+ # Can raise Bitcask::ChecksumError
56
+ def read
57
+ # Parse header
58
+ header = @file.read(18) or return
59
+ tstamp, ksz, value_sz, value_pos1, value_pos2 = header.unpack "NnNNN"
60
+
61
+ # value_pos is an 8 byte big-endian number...
62
+ # For reference, reverse is [value_pos >> 32, value & 0xFFFFFFFF].pack("NN")
63
+ value_pos = (value_pos1 << 32) | value_pos2
64
+
65
+ # Read key
66
+ key = @file.read ksz
67
+
68
+ Entry.new tstamp, value_sz, value_pos, key
69
+ end
70
+
71
+ # Rewinds the file.
72
+ def rewind
73
+ @file.rewind
74
+ end
75
+
76
+ # Seek to a given offset.
77
+ def seek(offset)
78
+ @file.seek offset
79
+ end
80
+ end
@@ -0,0 +1,10 @@
1
+ class Bitcask::Keydir < Hash
2
+ Entry = Struct.new :file_id, :value_sz, :value_pos, :tstamp
3
+
4
+ attr_accessor :data_files
5
+ def initialize(*a)
6
+ super *a
7
+
8
+ @data_files = []
9
+ end
10
+ end
@@ -1,3 +1,3 @@
1
1
  class Bitcask
2
- VERSION = '0.0.1'
2
+ VERSION = '0.1.0'
3
3
  end
metadata CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
4
4
  prerelease: false
5
5
  segments:
6
6
  - 0
7
- - 0
8
7
  - 1
9
- version: 0.0.1
8
+ - 0
9
+ version: 0.1.0
10
10
  platform: ruby
11
11
  authors:
12
12
  - Kyle Kingsbury
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2011-05-18 00:00:00 -07:00
17
+ date: 2011-06-11 00:00:00 -07:00
18
18
  default_executable:
19
19
  dependencies: []
20
20
 
@@ -31,10 +31,12 @@ files:
31
31
  - lib/bitcask/data_file.rb
32
32
  - lib/bitcask/version.rb
33
33
  - lib/bitcask/errors.rb
34
+ - lib/bitcask/hint_file.rb
35
+ - lib/bitcask/keydir.rb
34
36
  - LICENSE
35
37
  - README.markdown
36
38
  has_rdoc: true
37
- homepage: https://github.com/aphyr/bitcask
39
+ homepage: https://github.com/aphyr/bitcask-ruby
38
40
  licenses: []
39
41
 
40
42
  post_install_message: