bitcask 0.0.1 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.markdown +71 -41
- data/lib/bitcask.rb +84 -1
- data/lib/bitcask/data_file.rb +31 -10
- data/lib/bitcask/hint_file.rb +80 -0
- data/lib/bitcask/keydir.rb +10 -0
- data/lib/bitcask/version.rb +1 -1
- metadata +6 -4
data/README.markdown
CHANGED
@@ -5,51 +5,81 @@ Utilities for reading the Bitcask file format. You can use this to recover
|
|
5
5
|
deleted values (before they are compacted), recover from a backup, list keys
|
6
6
|
to do read-repair when list-keys is malfunctioning, and so forth.
|
7
7
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
8
|
+
Install
|
9
|
+
-------
|
10
|
+
|
11
|
+
$ gem install bitcask
|
12
|
+
|
13
|
+
Examples
|
14
|
+
--------
|
15
|
+
|
16
|
+
Open a bitcask.
|
17
|
+
|
18
|
+
b = Bitcask.new '/var/lib/riak/bitcask/0'
|
19
|
+
|
20
|
+
Load the keydir, using hintfiles where possible.
|
21
|
+
|
22
|
+
b.load
|
23
|
+
|
24
|
+
Get a specific entry:
|
25
|
+
|
26
|
+
b['test'] #=> 'value_of_test'
|
27
|
+
|
28
|
+
Iterate over all values:
|
29
|
+
|
30
|
+
b.each do |key, value|
|
31
|
+
puts key
|
32
|
+
puts value
|
33
|
+
end
|
34
|
+
|
35
|
+
In Riak, these are erlang terms.
|
36
|
+
|
37
|
+
b.each do |key, value|
|
38
|
+
next if value == Bitcask::TOMBSTONE
|
39
|
+
|
40
|
+
bucket, key = BERT.decode key
|
41
|
+
value = BERT.decode value
|
42
|
+
|
43
|
+
# Store the object's value in riak
|
44
|
+
o = riak[bucket][key]
|
45
|
+
o.raw_data = value.last
|
46
|
+
o.store
|
47
|
+
|
48
|
+
# Or dump the entire value to a file for later inspection.
|
49
|
+
FileUtils.mkdir_p(bucket)
|
50
|
+
File.open(File.join(bucket, key), 'w') do |out|
|
51
|
+
out.write value.to_json
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
You can also work directly on the data files. Here's how to dump all keys and
|
56
|
+
values, in cron order, excluding tombstones. Data files go in cronological
|
57
|
+
order, so this is in effect replaying history since the last merge.
|
58
|
+
|
59
|
+
b.data_files.each do |data_file|
|
60
|
+
data_file.each do |entry|
|
61
|
+
next if entry.value == Bitcask::TOMBSTONE
|
62
|
+
puts entry.key
|
63
|
+
puts entry.value
|
64
|
+
end
|
47
65
|
end
|
48
|
-
|
49
|
-
|
66
|
+
|
67
|
+
If you know the offset, you can retrieve it directly from a DataFile.
|
68
|
+
|
69
|
+
data_file[0] # => Struct {:key => 'key', :value => 'value'}
|
70
|
+
|
71
|
+
And step through values one by one.
|
72
|
+
|
73
|
+
data_file.read # => [k1, v1]
|
74
|
+
data_file.read # => [k2, v2]
|
75
|
+
|
76
|
+
Seek, rewind, and pos are also supported.
|
50
77
|
|
51
78
|
You'd be surprised how fast this is. 10,000 values/sec, easy.
|
52
79
|
|
80
|
+
Status
|
81
|
+
------
|
82
|
+
|
53
83
|
Anyone who wants to expand this, feel free. I've been using it for emergency
|
54
84
|
recovery operations, but don't plan to reimplement bitcask in Ruby myself. I
|
55
85
|
welcome pull requests.
|
data/lib/bitcask.rb
CHANGED
@@ -3,16 +3,29 @@ class Bitcask
|
|
3
3
|
|
4
4
|
$LOAD_PATH << File.expand_path(File.dirname(__FILE__))
|
5
5
|
|
6
|
-
|
6
|
+
require 'bitcask/hint_file'
|
7
7
|
require 'bitcask/data_file'
|
8
|
+
require 'bitcask/keydir'
|
8
9
|
require 'bitcask/errors'
|
9
10
|
require 'bitcask/version'
|
10
11
|
|
12
|
+
include Enumerable
|
13
|
+
|
11
14
|
TOMBSTONE = "bitcask_tombstone"
|
12
15
|
|
13
16
|
# Opens a bitcask backed by the given directory.
|
17
|
+
attr_accessor :keydir
|
18
|
+
attr_reader :dir
|
14
19
|
def initialize(dir)
|
15
20
|
@dir = dir
|
21
|
+
@keydir = Bitcask::Keydir.new
|
22
|
+
end
|
23
|
+
|
24
|
+
# Uses the keydir to get an object from the bitcask. Returns a
|
25
|
+
# value.
|
26
|
+
def [](key)
|
27
|
+
index = @keydir[key] or return nil
|
28
|
+
@keydir.data_files[index.file_id][index.value_pos, index.value_sz].value
|
16
29
|
end
|
17
30
|
|
18
31
|
# Returns a list of all data filenames in this bitcask, sorted from oldest
|
@@ -29,4 +42,74 @@ class Bitcask
|
|
29
42
|
Bitcask::DataFile.new filename
|
30
43
|
end
|
31
44
|
end
|
45
|
+
|
46
|
+
# Iterates over all keys in keydir. Yields key, value pairs.
|
47
|
+
def each
|
48
|
+
@keydir.each do |key, index|
|
49
|
+
entry = @keydir.data_files[index.file_id][index.value_pos, index.value_sz]
|
50
|
+
yield [entry.key, entry.value]
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
# Keydir keys.
|
55
|
+
def keys
|
56
|
+
keydir.keys
|
57
|
+
end
|
58
|
+
|
59
|
+
# Populate the keydir.
|
60
|
+
def load
|
61
|
+
data_files.each do |d|
|
62
|
+
if h = d.hint_file
|
63
|
+
load_hint_file h
|
64
|
+
else
|
65
|
+
load_data_file d
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
# Load a DataFile into the keydir.
|
71
|
+
def load_data_file(data_file)
|
72
|
+
# Determine data_file index.
|
73
|
+
@keydir.data_files |= [data_file]
|
74
|
+
file_id = @keydir.data_files.index data_file
|
75
|
+
|
76
|
+
pos = 0
|
77
|
+
data_file.each do |entry|
|
78
|
+
# Check for existing newer entry in keydir
|
79
|
+
if (cur = @keydir[entry.key]).nil? or entry.tstamp >= cur.tstamp
|
80
|
+
@keydir[entry.key] = Keydir::Entry.new(
|
81
|
+
file_id,
|
82
|
+
data_file.pos - pos,
|
83
|
+
pos,
|
84
|
+
entry.tstamp
|
85
|
+
)
|
86
|
+
end
|
87
|
+
|
88
|
+
pos = data_file.pos
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
# Load a HintFile into the keydir.
|
93
|
+
def load_hint_file(hint_file)
|
94
|
+
# Determine data_file index.
|
95
|
+
@keydir.data_files |= [hint_file.data_file]
|
96
|
+
file_id = @keydir.data_files.index hint_file.data_file
|
97
|
+
|
98
|
+
hint_file.each do |entry|
|
99
|
+
# Check for existing newer entry in keydir
|
100
|
+
if (cur = @keydir[entry.key]).nil? or entry.tstamp >= cur.tstamp
|
101
|
+
@keydir[entry.key] = Keydir::Entry.new(
|
102
|
+
file_id,
|
103
|
+
entry.value_sz,
|
104
|
+
entry.value_pos,
|
105
|
+
entry.tstamp
|
106
|
+
)
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
# Keydir size.
|
112
|
+
def size
|
113
|
+
@keydir.size
|
114
|
+
end
|
32
115
|
end
|
data/lib/bitcask/data_file.rb
CHANGED
@@ -4,18 +4,22 @@ class Bitcask::DataFile
|
|
4
4
|
# This is most definitely not threadsafe, but it's so cheap you might as well
|
5
5
|
# make lots of copies.
|
6
6
|
|
7
|
+
Entry = Struct.new :tstamp, :key, :value
|
8
|
+
|
9
|
+
include Enumerable
|
10
|
+
|
7
11
|
def initialize(filename)
|
8
12
|
@file = File.open(filename)
|
9
13
|
end
|
10
14
|
|
11
15
|
# Reads [key, value] from a particular offset.
|
12
16
|
# Also advances the cursor.
|
13
|
-
def [](offset)
|
17
|
+
def [](offset, size = nil)
|
14
18
|
seek offset
|
15
|
-
read
|
19
|
+
read size
|
16
20
|
end
|
17
21
|
|
18
|
-
# Iterates over every entry in this file, yielding
|
22
|
+
# Iterates over every entry in this file, yielding an Entry.
|
19
23
|
# Options:
|
20
24
|
# :rewind (true) - Rewind the file to the beginning, instead of starting
|
21
25
|
# right here.
|
@@ -43,28 +47,45 @@ class Bitcask::DataFile
|
|
43
47
|
end
|
44
48
|
end
|
45
49
|
|
50
|
+
def hint_file
|
51
|
+
@hint_file ||= begin
|
52
|
+
path = @file.path.sub(/\.data$/, '.hint')
|
53
|
+
if File.exists? path
|
54
|
+
h = Bitcask::HintFile.new path
|
55
|
+
h.data_file = self
|
56
|
+
h
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
46
61
|
def pos
|
47
62
|
@file.pos
|
48
63
|
end
|
49
64
|
alias tell pos
|
50
65
|
|
51
|
-
# Returns a single
|
52
|
-
#
|
66
|
+
# Returns a single Entry read from the current offset, and advances to the
|
67
|
+
# next.
|
53
68
|
#
|
54
69
|
# Can raise Bitcask::ChecksumError
|
55
|
-
def read
|
70
|
+
def read(size = nil)
|
71
|
+
if size
|
72
|
+
f = StringIO.new @file.read(size)
|
73
|
+
else
|
74
|
+
f = @file
|
75
|
+
end
|
76
|
+
|
56
77
|
# Parse header
|
57
|
-
header =
|
78
|
+
header = f.read(14) or return
|
58
79
|
crc, tstamp, ksz, value_sz = header.unpack "NNnN"
|
59
80
|
|
60
81
|
# Read data
|
61
|
-
key =
|
62
|
-
value =
|
82
|
+
key = f.read ksz
|
83
|
+
value = f.read value_sz
|
63
84
|
|
64
85
|
# CRC check
|
65
86
|
raise Bitcask::ChecksumError unless crc == Zlib.crc32(header[4..-1] + key + value)
|
66
87
|
|
67
|
-
|
88
|
+
Entry.new tstamp, key, value
|
68
89
|
end
|
69
90
|
|
70
91
|
# Rewinds the file.
|
@@ -0,0 +1,80 @@
|
|
1
|
+
class Bitcask::HintFile
|
2
|
+
# A single Bitcask hint file.
|
3
|
+
#
|
4
|
+
# This is most definitely not threadsafe, but it's so cheap you might as well
|
5
|
+
# make lots of copies.
|
6
|
+
|
7
|
+
Entry = Struct.new :tstamp, :value_sz, :value_pos, :key
|
8
|
+
|
9
|
+
include Enumerable
|
10
|
+
|
11
|
+
attr_accessor :data_file
|
12
|
+
def initialize(filename)
|
13
|
+
@file = File.open(filename)
|
14
|
+
end
|
15
|
+
|
16
|
+
# Reads [key, value] from a particular offset.
|
17
|
+
# Also advances the cursor.
|
18
|
+
def [](offset)
|
19
|
+
seek offset
|
20
|
+
read
|
21
|
+
end
|
22
|
+
|
23
|
+
# Iterates over every entry in this file, yielding an Entry.
|
24
|
+
# Options:
|
25
|
+
# :rewind (true) - Rewind the file to the beginning, instead of starting
|
26
|
+
# right here.
|
27
|
+
# :raise_checksum (false) - Raise Bitcask::ChecksumError on crc failure,
|
28
|
+
# instead of silently continuing.
|
29
|
+
def each(opts = {})
|
30
|
+
options = {
|
31
|
+
:rewind => true,
|
32
|
+
:raise_checksum => false
|
33
|
+
}.merge opts
|
34
|
+
|
35
|
+
rewind if options[:rewind]
|
36
|
+
|
37
|
+
loop do
|
38
|
+
o = read
|
39
|
+
if o
|
40
|
+
yield o
|
41
|
+
else
|
42
|
+
return self
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def pos
|
48
|
+
@file.pos
|
49
|
+
end
|
50
|
+
alias tell pos
|
51
|
+
|
52
|
+
# Returns [timestamp, key, value_pos, value_size] read from the current
|
53
|
+
# offset, and advances to the next.
|
54
|
+
#
|
55
|
+
# Can raise Bitcask::ChecksumError
|
56
|
+
def read
|
57
|
+
# Parse header
|
58
|
+
header = @file.read(18) or return
|
59
|
+
tstamp, ksz, value_sz, value_pos1, value_pos2 = header.unpack "NnNNN"
|
60
|
+
|
61
|
+
# value_pos is an 8 byte big-endian number...
|
62
|
+
# For reference, reverse is [value_pos >> 32, value & 0xFFFFFFFF].pack("NN")
|
63
|
+
value_pos = (value_pos1 << 32) | value_pos2
|
64
|
+
|
65
|
+
# Read key
|
66
|
+
key = @file.read ksz
|
67
|
+
|
68
|
+
Entry.new tstamp, value_sz, value_pos, key
|
69
|
+
end
|
70
|
+
|
71
|
+
# Rewinds the file.
|
72
|
+
def rewind
|
73
|
+
@file.rewind
|
74
|
+
end
|
75
|
+
|
76
|
+
# Seek to a given offset.
|
77
|
+
def seek(offset)
|
78
|
+
@file.seek offset
|
79
|
+
end
|
80
|
+
end
|
data/lib/bitcask/version.rb
CHANGED
metadata
CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
6
|
- 0
|
7
|
-
- 0
|
8
7
|
- 1
|
9
|
-
|
8
|
+
- 0
|
9
|
+
version: 0.1.0
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Kyle Kingsbury
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2011-
|
17
|
+
date: 2011-06-11 00:00:00 -07:00
|
18
18
|
default_executable:
|
19
19
|
dependencies: []
|
20
20
|
|
@@ -31,10 +31,12 @@ files:
|
|
31
31
|
- lib/bitcask/data_file.rb
|
32
32
|
- lib/bitcask/version.rb
|
33
33
|
- lib/bitcask/errors.rb
|
34
|
+
- lib/bitcask/hint_file.rb
|
35
|
+
- lib/bitcask/keydir.rb
|
34
36
|
- LICENSE
|
35
37
|
- README.markdown
|
36
38
|
has_rdoc: true
|
37
|
-
homepage: https://github.com/aphyr/bitcask
|
39
|
+
homepage: https://github.com/aphyr/bitcask-ruby
|
38
40
|
licenses: []
|
39
41
|
|
40
42
|
post_install_message:
|