bitcask 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.markdown +71 -41
- data/lib/bitcask.rb +84 -1
- data/lib/bitcask/data_file.rb +31 -10
- data/lib/bitcask/hint_file.rb +80 -0
- data/lib/bitcask/keydir.rb +10 -0
- data/lib/bitcask/version.rb +1 -1
- metadata +6 -4
data/README.markdown
CHANGED
@@ -5,51 +5,81 @@ Utilities for reading the Bitcask file format. You can use this to recover
|
|
5
5
|
deleted values (before they are compacted), recover from a backup, list keys
|
6
6
|
to do read-repair when list-keys is malfunctioning, and so forth.
|
7
7
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
8
|
+
Install
|
9
|
+
-------
|
10
|
+
|
11
|
+
$ gem install bitcask
|
12
|
+
|
13
|
+
Examples
|
14
|
+
--------
|
15
|
+
|
16
|
+
Open a bitcask.
|
17
|
+
|
18
|
+
b = Bitcask.new '/var/lib/riak/bitcask/0'
|
19
|
+
|
20
|
+
Load the keydir, using hintfiles where possible.
|
21
|
+
|
22
|
+
b.load
|
23
|
+
|
24
|
+
Get a specific entry:
|
25
|
+
|
26
|
+
b['test'] #=> 'value_of_test'
|
27
|
+
|
28
|
+
Iterate over all values:
|
29
|
+
|
30
|
+
b.each do |key, value|
|
31
|
+
puts key
|
32
|
+
puts value
|
33
|
+
end
|
34
|
+
|
35
|
+
In Riak, these are erlang terms.
|
36
|
+
|
37
|
+
b.each do |key, value|
|
38
|
+
next if value == Bitcask::TOMBSTONE
|
39
|
+
|
40
|
+
bucket, key = BERT.decode key
|
41
|
+
value = BERT.decode value
|
42
|
+
|
43
|
+
# Store the object's value in riak
|
44
|
+
o = riak[bucket][key]
|
45
|
+
o.raw_data = value.last
|
46
|
+
o.store
|
47
|
+
|
48
|
+
# Or dump the entire value to a file for later inspection.
|
49
|
+
FileUtils.mkdir_p(bucket)
|
50
|
+
File.open(File.join(bucket, key), 'w') do |out|
|
51
|
+
out.write value.to_json
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
You can also work directly on the data files. Here's how to dump all keys and
|
56
|
+
values, in cron order, excluding tombstones. Data files go in cronological
|
57
|
+
order, so this is in effect replaying history since the last merge.
|
58
|
+
|
59
|
+
b.data_files.each do |data_file|
|
60
|
+
data_file.each do |entry|
|
61
|
+
next if entry.value == Bitcask::TOMBSTONE
|
62
|
+
puts entry.key
|
63
|
+
puts entry.value
|
64
|
+
end
|
47
65
|
end
|
48
|
-
|
49
|
-
|
66
|
+
|
67
|
+
If you know the offset, you can retrieve it directly from a DataFile.
|
68
|
+
|
69
|
+
data_file[0] # => Struct {:key => 'key', :value => 'value'}
|
70
|
+
|
71
|
+
And step through values one by one.
|
72
|
+
|
73
|
+
data_file.read # => [k1, v1]
|
74
|
+
data_file.read # => [k2, v2]
|
75
|
+
|
76
|
+
Seek, rewind, and pos are also supported.
|
50
77
|
|
51
78
|
You'd be surprised how fast this is. 10,000 values/sec, easy.
|
52
79
|
|
80
|
+
Status
|
81
|
+
------
|
82
|
+
|
53
83
|
Anyone who wants to expand this, feel free. I've been using it for emergency
|
54
84
|
recovery operations, but don't plan to reimplement bitcask in Ruby myself. I
|
55
85
|
welcome pull requests.
|
data/lib/bitcask.rb
CHANGED
@@ -3,16 +3,29 @@ class Bitcask
|
|
3
3
|
|
4
4
|
$LOAD_PATH << File.expand_path(File.dirname(__FILE__))
|
5
5
|
|
6
|
-
|
6
|
+
require 'bitcask/hint_file'
|
7
7
|
require 'bitcask/data_file'
|
8
|
+
require 'bitcask/keydir'
|
8
9
|
require 'bitcask/errors'
|
9
10
|
require 'bitcask/version'
|
10
11
|
|
12
|
+
include Enumerable
|
13
|
+
|
11
14
|
TOMBSTONE = "bitcask_tombstone"
|
12
15
|
|
13
16
|
# Opens a bitcask backed by the given directory.
|
17
|
+
attr_accessor :keydir
|
18
|
+
attr_reader :dir
|
14
19
|
def initialize(dir)
|
15
20
|
@dir = dir
|
21
|
+
@keydir = Bitcask::Keydir.new
|
22
|
+
end
|
23
|
+
|
24
|
+
# Uses the keydir to get an object from the bitcask. Returns a
|
25
|
+
# value.
|
26
|
+
def [](key)
|
27
|
+
index = @keydir[key] or return nil
|
28
|
+
@keydir.data_files[index.file_id][index.value_pos, index.value_sz].value
|
16
29
|
end
|
17
30
|
|
18
31
|
# Returns a list of all data filenames in this bitcask, sorted from oldest
|
@@ -29,4 +42,74 @@ class Bitcask
|
|
29
42
|
Bitcask::DataFile.new filename
|
30
43
|
end
|
31
44
|
end
|
45
|
+
|
46
|
+
# Iterates over all keys in keydir. Yields key, value pairs.
|
47
|
+
def each
|
48
|
+
@keydir.each do |key, index|
|
49
|
+
entry = @keydir.data_files[index.file_id][index.value_pos, index.value_sz]
|
50
|
+
yield [entry.key, entry.value]
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
# Keydir keys.
|
55
|
+
def keys
|
56
|
+
keydir.keys
|
57
|
+
end
|
58
|
+
|
59
|
+
# Populate the keydir.
|
60
|
+
def load
|
61
|
+
data_files.each do |d|
|
62
|
+
if h = d.hint_file
|
63
|
+
load_hint_file h
|
64
|
+
else
|
65
|
+
load_data_file d
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
# Load a DataFile into the keydir.
|
71
|
+
def load_data_file(data_file)
|
72
|
+
# Determine data_file index.
|
73
|
+
@keydir.data_files |= [data_file]
|
74
|
+
file_id = @keydir.data_files.index data_file
|
75
|
+
|
76
|
+
pos = 0
|
77
|
+
data_file.each do |entry|
|
78
|
+
# Check for existing newer entry in keydir
|
79
|
+
if (cur = @keydir[entry.key]).nil? or entry.tstamp >= cur.tstamp
|
80
|
+
@keydir[entry.key] = Keydir::Entry.new(
|
81
|
+
file_id,
|
82
|
+
data_file.pos - pos,
|
83
|
+
pos,
|
84
|
+
entry.tstamp
|
85
|
+
)
|
86
|
+
end
|
87
|
+
|
88
|
+
pos = data_file.pos
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
# Load a HintFile into the keydir.
|
93
|
+
def load_hint_file(hint_file)
|
94
|
+
# Determine data_file index.
|
95
|
+
@keydir.data_files |= [hint_file.data_file]
|
96
|
+
file_id = @keydir.data_files.index hint_file.data_file
|
97
|
+
|
98
|
+
hint_file.each do |entry|
|
99
|
+
# Check for existing newer entry in keydir
|
100
|
+
if (cur = @keydir[entry.key]).nil? or entry.tstamp >= cur.tstamp
|
101
|
+
@keydir[entry.key] = Keydir::Entry.new(
|
102
|
+
file_id,
|
103
|
+
entry.value_sz,
|
104
|
+
entry.value_pos,
|
105
|
+
entry.tstamp
|
106
|
+
)
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
# Keydir size.
|
112
|
+
def size
|
113
|
+
@keydir.size
|
114
|
+
end
|
32
115
|
end
|
data/lib/bitcask/data_file.rb
CHANGED
@@ -4,18 +4,22 @@ class Bitcask::DataFile
|
|
4
4
|
# This is most definitely not threadsafe, but it's so cheap you might as well
|
5
5
|
# make lots of copies.
|
6
6
|
|
7
|
+
Entry = Struct.new :tstamp, :key, :value
|
8
|
+
|
9
|
+
include Enumerable
|
10
|
+
|
7
11
|
def initialize(filename)
|
8
12
|
@file = File.open(filename)
|
9
13
|
end
|
10
14
|
|
11
15
|
# Reads [key, value] from a particular offset.
|
12
16
|
# Also advances the cursor.
|
13
|
-
def [](offset)
|
17
|
+
def [](offset, size = nil)
|
14
18
|
seek offset
|
15
|
-
read
|
19
|
+
read size
|
16
20
|
end
|
17
21
|
|
18
|
-
# Iterates over every entry in this file, yielding
|
22
|
+
# Iterates over every entry in this file, yielding an Entry.
|
19
23
|
# Options:
|
20
24
|
# :rewind (true) - Rewind the file to the beginning, instead of starting
|
21
25
|
# right here.
|
@@ -43,28 +47,45 @@ class Bitcask::DataFile
|
|
43
47
|
end
|
44
48
|
end
|
45
49
|
|
50
|
+
def hint_file
|
51
|
+
@hint_file ||= begin
|
52
|
+
path = @file.path.sub(/\.data$/, '.hint')
|
53
|
+
if File.exists? path
|
54
|
+
h = Bitcask::HintFile.new path
|
55
|
+
h.data_file = self
|
56
|
+
h
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
46
61
|
def pos
|
47
62
|
@file.pos
|
48
63
|
end
|
49
64
|
alias tell pos
|
50
65
|
|
51
|
-
# Returns a single
|
52
|
-
#
|
66
|
+
# Returns a single Entry read from the current offset, and advances to the
|
67
|
+
# next.
|
53
68
|
#
|
54
69
|
# Can raise Bitcask::ChecksumError
|
55
|
-
def read
|
70
|
+
def read(size = nil)
|
71
|
+
if size
|
72
|
+
f = StringIO.new @file.read(size)
|
73
|
+
else
|
74
|
+
f = @file
|
75
|
+
end
|
76
|
+
|
56
77
|
# Parse header
|
57
|
-
header =
|
78
|
+
header = f.read(14) or return
|
58
79
|
crc, tstamp, ksz, value_sz = header.unpack "NNnN"
|
59
80
|
|
60
81
|
# Read data
|
61
|
-
key =
|
62
|
-
value =
|
82
|
+
key = f.read ksz
|
83
|
+
value = f.read value_sz
|
63
84
|
|
64
85
|
# CRC check
|
65
86
|
raise Bitcask::ChecksumError unless crc == Zlib.crc32(header[4..-1] + key + value)
|
66
87
|
|
67
|
-
|
88
|
+
Entry.new tstamp, key, value
|
68
89
|
end
|
69
90
|
|
70
91
|
# Rewinds the file.
|
@@ -0,0 +1,80 @@
|
|
1
|
+
class Bitcask::HintFile
|
2
|
+
# A single Bitcask hint file.
|
3
|
+
#
|
4
|
+
# This is most definitely not threadsafe, but it's so cheap you might as well
|
5
|
+
# make lots of copies.
|
6
|
+
|
7
|
+
Entry = Struct.new :tstamp, :value_sz, :value_pos, :key
|
8
|
+
|
9
|
+
include Enumerable
|
10
|
+
|
11
|
+
attr_accessor :data_file
|
12
|
+
def initialize(filename)
|
13
|
+
@file = File.open(filename)
|
14
|
+
end
|
15
|
+
|
16
|
+
# Reads [key, value] from a particular offset.
|
17
|
+
# Also advances the cursor.
|
18
|
+
def [](offset)
|
19
|
+
seek offset
|
20
|
+
read
|
21
|
+
end
|
22
|
+
|
23
|
+
# Iterates over every entry in this file, yielding an Entry.
|
24
|
+
# Options:
|
25
|
+
# :rewind (true) - Rewind the file to the beginning, instead of starting
|
26
|
+
# right here.
|
27
|
+
# :raise_checksum (false) - Raise Bitcask::ChecksumError on crc failure,
|
28
|
+
# instead of silently continuing.
|
29
|
+
def each(opts = {})
|
30
|
+
options = {
|
31
|
+
:rewind => true,
|
32
|
+
:raise_checksum => false
|
33
|
+
}.merge opts
|
34
|
+
|
35
|
+
rewind if options[:rewind]
|
36
|
+
|
37
|
+
loop do
|
38
|
+
o = read
|
39
|
+
if o
|
40
|
+
yield o
|
41
|
+
else
|
42
|
+
return self
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def pos
|
48
|
+
@file.pos
|
49
|
+
end
|
50
|
+
alias tell pos
|
51
|
+
|
52
|
+
# Returns [timestamp, key, value_pos, value_size] read from the current
|
53
|
+
# offset, and advances to the next.
|
54
|
+
#
|
55
|
+
# Can raise Bitcask::ChecksumError
|
56
|
+
def read
|
57
|
+
# Parse header
|
58
|
+
header = @file.read(18) or return
|
59
|
+
tstamp, ksz, value_sz, value_pos1, value_pos2 = header.unpack "NnNNN"
|
60
|
+
|
61
|
+
# value_pos is an 8 byte big-endian number...
|
62
|
+
# For reference, reverse is [value_pos >> 32, value & 0xFFFFFFFF].pack("NN")
|
63
|
+
value_pos = (value_pos1 << 32) | value_pos2
|
64
|
+
|
65
|
+
# Read key
|
66
|
+
key = @file.read ksz
|
67
|
+
|
68
|
+
Entry.new tstamp, value_sz, value_pos, key
|
69
|
+
end
|
70
|
+
|
71
|
+
# Rewinds the file.
|
72
|
+
def rewind
|
73
|
+
@file.rewind
|
74
|
+
end
|
75
|
+
|
76
|
+
# Seek to a given offset.
|
77
|
+
def seek(offset)
|
78
|
+
@file.seek offset
|
79
|
+
end
|
80
|
+
end
|
data/lib/bitcask/version.rb
CHANGED
metadata
CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
6
|
- 0
|
7
|
-
- 0
|
8
7
|
- 1
|
9
|
-
|
8
|
+
- 0
|
9
|
+
version: 0.1.0
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Kyle Kingsbury
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2011-
|
17
|
+
date: 2011-06-11 00:00:00 -07:00
|
18
18
|
default_executable:
|
19
19
|
dependencies: []
|
20
20
|
|
@@ -31,10 +31,12 @@ files:
|
|
31
31
|
- lib/bitcask/data_file.rb
|
32
32
|
- lib/bitcask/version.rb
|
33
33
|
- lib/bitcask/errors.rb
|
34
|
+
- lib/bitcask/hint_file.rb
|
35
|
+
- lib/bitcask/keydir.rb
|
34
36
|
- LICENSE
|
35
37
|
- README.markdown
|
36
38
|
has_rdoc: true
|
37
|
-
homepage: https://github.com/aphyr/bitcask
|
39
|
+
homepage: https://github.com/aphyr/bitcask-ruby
|
38
40
|
licenses: []
|
39
41
|
|
40
42
|
post_install_message:
|