bitcask 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,51 +5,81 @@ Utilities for reading the Bitcask file format. You can use this to recover
5
5
  deleted values (before they are compacted), recover from a backup, list keys
6
6
  to do read-repair when list-keys is malfunctioning, and so forth.
7
7
 
8
- # Open a bitcask.
9
- b = Bitcask.new '/var/lib/riak/bitcask/0'
10
-
11
- # Dump all keys and values, in cron order, excluding tombstones.
12
- # Data files go in cronological order, so this is in effect replaying history.
13
- b.data_files.each do |data_file|
14
- data_file.each do |key, value|
15
- next if value == Bitcask::TOMBSTONE
16
- puts key
17
- puts value
18
- end
19
- end
20
-
21
- # If you know the offset, you can retrieve it directly.
22
- data_file[0] # => ["key", "value"]
23
-
24
- # And step through values one by one.
25
- data_file.read # => [k1, v1]
26
- data_file.read # => [k2, v2]
27
-
28
- # Seek, rewind, and pos are also supported.
29
-
30
- # In Riak, these are erlang terms.
31
- b.data_files.each do |data_file|
32
- data_file.each do |key, value|
33
- next if value == Bitcask::TOMBSTONE
34
-
35
- bucket, key = BERT.decode key
36
- value = BERT.decode value
37
-
38
- # Store the object's value in riak
39
- o = riak[bucket][key]
40
- o.raw_data = value.last
41
- o.store
42
-
43
- # Or dump the entire value to a file for later inspection.
44
- FileUtils.mkdir_p(bucket)
45
- File.open(File.join(bucket, key), 'w') do |out|
46
- out.write value.to_json
8
+ Install
9
+ -------
10
+
11
+ $ gem install bitcask
12
+
13
+ Examples
14
+ --------
15
+
16
+ Open a bitcask.
17
+
18
+ b = Bitcask.new '/var/lib/riak/bitcask/0'
19
+
20
+ Load the keydir, using hintfiles where possible.
21
+
22
+ b.load
23
+
24
+ Get a specific entry:
25
+
26
+ b['test'] #=> 'value_of_test'
27
+
28
+ Iterate over all values:
29
+
30
+ b.each do |key, value|
31
+ puts key
32
+ puts value
33
+ end
34
+
35
+ In Riak, these are erlang terms.
36
+
37
+ b.each do |key, value|
38
+ next if value == Bitcask::TOMBSTONE
39
+
40
+ bucket, key = BERT.decode key
41
+ value = BERT.decode value
42
+
43
+ # Store the object's value in riak
44
+ o = riak[bucket][key]
45
+ o.raw_data = value.last
46
+ o.store
47
+
48
+ # Or dump the entire value to a file for later inspection.
49
+ FileUtils.mkdir_p(bucket)
50
+ File.open(File.join(bucket, key), 'w') do |out|
51
+ out.write value.to_json
52
+ end
53
+ end
54
+
55
+ You can also work directly on the data files. Here's how to dump all keys and
56
+ values, in cron order, excluding tombstones. Data files go in cronological
57
+ order, so this is in effect replaying history since the last merge.
58
+
59
+ b.data_files.each do |data_file|
60
+ data_file.each do |entry|
61
+ next if entry.value == Bitcask::TOMBSTONE
62
+ puts entry.key
63
+ puts entry.value
64
+ end
47
65
  end
48
- end
49
- end
66
+
67
+ If you know the offset, you can retrieve it directly from a DataFile.
68
+
69
+ data_file[0] # => Struct {:key => 'key', :value => 'value'}
70
+
71
+ And step through values one by one.
72
+
73
+ data_file.read # => [k1, v1]
74
+ data_file.read # => [k2, v2]
75
+
76
+ Seek, rewind, and pos are also supported.
50
77
 
51
78
  You'd be surprised how fast this is. 10,000 values/sec, easy.
52
79
 
80
+ Status
81
+ ------
82
+
53
83
  Anyone who wants to expand this, feel free. I've been using it for emergency
54
84
  recovery operations, but don't plan to reimplement bitcask in Ruby myself. I
55
85
  welcome pull requests.
@@ -3,16 +3,29 @@ class Bitcask
3
3
 
4
4
  $LOAD_PATH << File.expand_path(File.dirname(__FILE__))
5
5
 
6
- # A single data file
6
+ require 'bitcask/hint_file'
7
7
  require 'bitcask/data_file'
8
+ require 'bitcask/keydir'
8
9
  require 'bitcask/errors'
9
10
  require 'bitcask/version'
10
11
 
12
+ include Enumerable
13
+
11
14
  TOMBSTONE = "bitcask_tombstone"
12
15
 
13
16
  # Opens a bitcask backed by the given directory.
17
+ attr_accessor :keydir
18
+ attr_reader :dir
14
19
  def initialize(dir)
15
20
  @dir = dir
21
+ @keydir = Bitcask::Keydir.new
22
+ end
23
+
24
+ # Uses the keydir to get an object from the bitcask. Returns a
25
+ # value.
26
+ def [](key)
27
+ index = @keydir[key] or return nil
28
+ @keydir.data_files[index.file_id][index.value_pos, index.value_sz].value
16
29
  end
17
30
 
18
31
  # Returns a list of all data filenames in this bitcask, sorted from oldest
@@ -29,4 +42,74 @@ class Bitcask
29
42
  Bitcask::DataFile.new filename
30
43
  end
31
44
  end
45
+
46
+ # Iterates over all keys in keydir. Yields key, value pairs.
47
+ def each
48
+ @keydir.each do |key, index|
49
+ entry = @keydir.data_files[index.file_id][index.value_pos, index.value_sz]
50
+ yield [entry.key, entry.value]
51
+ end
52
+ end
53
+
54
+ # Keydir keys.
55
+ def keys
56
+ keydir.keys
57
+ end
58
+
59
+ # Populate the keydir.
60
+ def load
61
+ data_files.each do |d|
62
+ if h = d.hint_file
63
+ load_hint_file h
64
+ else
65
+ load_data_file d
66
+ end
67
+ end
68
+ end
69
+
70
+ # Load a DataFile into the keydir.
71
+ def load_data_file(data_file)
72
+ # Determine data_file index.
73
+ @keydir.data_files |= [data_file]
74
+ file_id = @keydir.data_files.index data_file
75
+
76
+ pos = 0
77
+ data_file.each do |entry|
78
+ # Check for existing newer entry in keydir
79
+ if (cur = @keydir[entry.key]).nil? or entry.tstamp >= cur.tstamp
80
+ @keydir[entry.key] = Keydir::Entry.new(
81
+ file_id,
82
+ data_file.pos - pos,
83
+ pos,
84
+ entry.tstamp
85
+ )
86
+ end
87
+
88
+ pos = data_file.pos
89
+ end
90
+ end
91
+
92
+ # Load a HintFile into the keydir.
93
+ def load_hint_file(hint_file)
94
+ # Determine data_file index.
95
+ @keydir.data_files |= [hint_file.data_file]
96
+ file_id = @keydir.data_files.index hint_file.data_file
97
+
98
+ hint_file.each do |entry|
99
+ # Check for existing newer entry in keydir
100
+ if (cur = @keydir[entry.key]).nil? or entry.tstamp >= cur.tstamp
101
+ @keydir[entry.key] = Keydir::Entry.new(
102
+ file_id,
103
+ entry.value_sz,
104
+ entry.value_pos,
105
+ entry.tstamp
106
+ )
107
+ end
108
+ end
109
+ end
110
+
111
+ # Keydir size.
112
+ def size
113
+ @keydir.size
114
+ end
32
115
  end
@@ -4,18 +4,22 @@ class Bitcask::DataFile
4
4
  # This is most definitely not threadsafe, but it's so cheap you might as well
5
5
  # make lots of copies.
6
6
 
7
+ Entry = Struct.new :tstamp, :key, :value
8
+
9
+ include Enumerable
10
+
7
11
  def initialize(filename)
8
12
  @file = File.open(filename)
9
13
  end
10
14
 
11
15
  # Reads [key, value] from a particular offset.
12
16
  # Also advances the cursor.
13
- def [](offset)
17
+ def [](offset, size = nil)
14
18
  seek offset
15
- read
19
+ read size
16
20
  end
17
21
 
18
- # Iterates over every entry in this file, yielding the key and value.
22
+ # Iterates over every entry in this file, yielding an Entry.
19
23
  # Options:
20
24
  # :rewind (true) - Rewind the file to the beginning, instead of starting
21
25
  # right here.
@@ -43,28 +47,45 @@ class Bitcask::DataFile
43
47
  end
44
48
  end
45
49
 
50
+ def hint_file
51
+ @hint_file ||= begin
52
+ path = @file.path.sub(/\.data$/, '.hint')
53
+ if File.exists? path
54
+ h = Bitcask::HintFile.new path
55
+ h.data_file = self
56
+ h
57
+ end
58
+ end
59
+ end
60
+
46
61
  def pos
47
62
  @file.pos
48
63
  end
49
64
  alias tell pos
50
65
 
51
- # Returns a single [key, value] pair read from the current offset,
52
- # and advances to the next.
66
+ # Returns a single Entry read from the current offset, and advances to the
67
+ # next.
53
68
  #
54
69
  # Can raise Bitcask::ChecksumError
55
- def read
70
+ def read(size = nil)
71
+ if size
72
+ f = StringIO.new @file.read(size)
73
+ else
74
+ f = @file
75
+ end
76
+
56
77
  # Parse header
57
- header = @file.read(14) or return
78
+ header = f.read(14) or return
58
79
  crc, tstamp, ksz, value_sz = header.unpack "NNnN"
59
80
 
60
81
  # Read data
61
- key = @file.read ksz
62
- value = @file.read value_sz
82
+ key = f.read ksz
83
+ value = f.read value_sz
63
84
 
64
85
  # CRC check
65
86
  raise Bitcask::ChecksumError unless crc == Zlib.crc32(header[4..-1] + key + value)
66
87
 
67
- [key, value]
88
+ Entry.new tstamp, key, value
68
89
  end
69
90
 
70
91
  # Rewinds the file.
@@ -0,0 +1,80 @@
1
+ class Bitcask::HintFile
2
+ # A single Bitcask hint file.
3
+ #
4
+ # This is most definitely not threadsafe, but it's so cheap you might as well
5
+ # make lots of copies.
6
+
7
+ Entry = Struct.new :tstamp, :value_sz, :value_pos, :key
8
+
9
+ include Enumerable
10
+
11
+ attr_accessor :data_file
12
+ def initialize(filename)
13
+ @file = File.open(filename)
14
+ end
15
+
16
+ # Reads [key, value] from a particular offset.
17
+ # Also advances the cursor.
18
+ def [](offset)
19
+ seek offset
20
+ read
21
+ end
22
+
23
+ # Iterates over every entry in this file, yielding an Entry.
24
+ # Options:
25
+ # :rewind (true) - Rewind the file to the beginning, instead of starting
26
+ # right here.
27
+ # :raise_checksum (false) - Raise Bitcask::ChecksumError on crc failure,
28
+ # instead of silently continuing.
29
+ def each(opts = {})
30
+ options = {
31
+ :rewind => true,
32
+ :raise_checksum => false
33
+ }.merge opts
34
+
35
+ rewind if options[:rewind]
36
+
37
+ loop do
38
+ o = read
39
+ if o
40
+ yield o
41
+ else
42
+ return self
43
+ end
44
+ end
45
+ end
46
+
47
+ def pos
48
+ @file.pos
49
+ end
50
+ alias tell pos
51
+
52
+ # Returns [timestamp, key, value_pos, value_size] read from the current
53
+ # offset, and advances to the next.
54
+ #
55
+ # Can raise Bitcask::ChecksumError
56
+ def read
57
+ # Parse header
58
+ header = @file.read(18) or return
59
+ tstamp, ksz, value_sz, value_pos1, value_pos2 = header.unpack "NnNNN"
60
+
61
+ # value_pos is an 8 byte big-endian number...
62
+ # For reference, reverse is [value_pos >> 32, value & 0xFFFFFFFF].pack("NN")
63
+ value_pos = (value_pos1 << 32) | value_pos2
64
+
65
+ # Read key
66
+ key = @file.read ksz
67
+
68
+ Entry.new tstamp, value_sz, value_pos, key
69
+ end
70
+
71
+ # Rewinds the file.
72
+ def rewind
73
+ @file.rewind
74
+ end
75
+
76
+ # Seek to a given offset.
77
+ def seek(offset)
78
+ @file.seek offset
79
+ end
80
+ end
@@ -0,0 +1,10 @@
1
+ class Bitcask::Keydir < Hash
2
+ Entry = Struct.new :file_id, :value_sz, :value_pos, :tstamp
3
+
4
+ attr_accessor :data_files
5
+ def initialize(*a)
6
+ super *a
7
+
8
+ @data_files = []
9
+ end
10
+ end
@@ -1,3 +1,3 @@
1
1
  class Bitcask
2
- VERSION = '0.0.1'
2
+ VERSION = '0.1.0'
3
3
  end
metadata CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
4
4
  prerelease: false
5
5
  segments:
6
6
  - 0
7
- - 0
8
7
  - 1
9
- version: 0.0.1
8
+ - 0
9
+ version: 0.1.0
10
10
  platform: ruby
11
11
  authors:
12
12
  - Kyle Kingsbury
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2011-05-18 00:00:00 -07:00
17
+ date: 2011-06-11 00:00:00 -07:00
18
18
  default_executable:
19
19
  dependencies: []
20
20
 
@@ -31,10 +31,12 @@ files:
31
31
  - lib/bitcask/data_file.rb
32
32
  - lib/bitcask/version.rb
33
33
  - lib/bitcask/errors.rb
34
+ - lib/bitcask/hint_file.rb
35
+ - lib/bitcask/keydir.rb
34
36
  - LICENSE
35
37
  - README.markdown
36
38
  has_rdoc: true
37
- homepage: https://github.com/aphyr/bitcask
39
+ homepage: https://github.com/aphyr/bitcask-ruby
38
40
  licenses: []
39
41
 
40
42
  post_install_message: