bitcask 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License
2
+
3
+ Copyright (c) 2011 Kyle Kingsbury
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,62 @@
1
+ Bitcask
2
+ =======
3
+
4
+ Utilities for reading the Bitcask file format. You can use this to recover
5
+ deleted values (before they are compacted), recover from a backup, list keys
6
+ to do read-repair when list-keys is malfunctioning, and so forth.
7
+
8
+ # Open a bitcask.
9
+ b = Bitcask.new '/var/lib/riak/bitcask/0'
10
+
11
+ # Dump all keys and values, in cron order, excluding tombstones.
12
+ # Data files go in cronological order, so this is in effect replaying history.
13
+ b.data_files.each do |data_file|
14
+ data_file.each do |key, value|
15
+ next if value == Bitcask::TOMBSTONE
16
+ puts key
17
+ puts value
18
+ end
19
+ end
20
+
21
+ # If you know the offset, you can retrieve it directly.
22
+ data_file[0] # => ["key", "value"]
23
+
24
+ # And step through values one by one.
25
+ data_file.read # => [k1, v1]
26
+ data_file.read # => [k2, v2]
27
+
28
+ # Seek, rewind, and pos are also supported.
29
+
30
+ # In Riak, these are erlang terms.
31
+ b.data_files.each do |data_file|
32
+ data_file.each do |key, value|
33
+ next if value == Bitcask::TOMBSTONE
34
+
35
+ bucket, key = BERT.decode key
36
+ value = BERT.decode value
37
+
38
+ # Store the object's value in riak
39
+ o = riak[bucket][key]
40
+ o.raw_data = value.last
41
+ o.store
42
+
43
+ # Or dump the entire value to a file for later inspection.
44
+ FileUtils.mkdir_p(bucket)
45
+ File.open(File.join(bucket, key), 'w') do |out|
46
+ out.write value.to_json
47
+ end
48
+ end
49
+ end
50
+
51
+ You'd be surprised how fast this is. 10,000 values/sec, easy.
52
+
53
+ Anyone who wants to expand this, feel free. I've been using it for emergency
54
+ recovery operations, but don't plan to reimplement bitcask in Ruby myself. I
55
+ welcome pull requests.
56
+
57
+ License
58
+ -------
59
+
60
+ This software was written by Kyle Kingsbury <aphyr@aphyr.com>, at Remixation,
61
+ Inc., for their iPad social video app "Showyou". Released under the MIT
62
+ license.
@@ -0,0 +1,32 @@
1
+ class Bitcask
2
+ require 'zlib'
3
+
4
+ $LOAD_PATH << File.expand_path(File.dirname(__FILE__))
5
+
6
+ # A single data file
7
+ require 'bitcask/data_file'
8
+ require 'bitcask/errors'
9
+ require 'bitcask/version'
10
+
11
+ TOMBSTONE = "bitcask_tombstone"
12
+
13
+ # Opens a bitcask backed by the given directory.
14
+ def initialize(dir)
15
+ @dir = dir
16
+ end
17
+
18
+ # Returns a list of all data filenames in this bitcask, sorted from oldest
19
+ # to newest.
20
+ def data_file_names
21
+ Dir.glob(File.join(@dir, '*.data')).sort! do |a, b|
22
+ a.to_i <=> b.to_i
23
+ end
24
+ end
25
+
26
+ # Returns a list of Bitcask::DataFiles in chronological order.
27
+ def data_files
28
+ data_file_names.map! do |filename|
29
+ Bitcask::DataFile.new filename
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,79 @@
1
+ class Bitcask::DataFile
2
+ # A single Bitcask data file.
3
+ #
4
+ # This is most definitely not threadsafe, but it's so cheap you might as well
5
+ # make lots of copies.
6
+
7
+ def initialize(filename)
8
+ @file = File.open(filename)
9
+ end
10
+
11
+ # Reads [key, value] from a particular offset.
12
+ # Also advances the cursor.
13
+ def [](offset)
14
+ seek offset
15
+ read
16
+ end
17
+
18
+ # Iterates over every entry in this file, yielding the key and value.
19
+ # Options:
20
+ # :rewind (true) - Rewind the file to the beginning, instead of starting
21
+ # right here.
22
+ # :raise_checksum (false) - Raise Bitcask::ChecksumError on crc failure,
23
+ # instead of silently continuing.
24
+ def each(opts = {})
25
+ options = {
26
+ :rewind => true,
27
+ :raise_checksum => false
28
+ }.merge opts
29
+
30
+ rewind if options[:rewind]
31
+
32
+ loop do
33
+ begin
34
+ o = read
35
+ if o
36
+ yield o
37
+ else
38
+ return self
39
+ end
40
+ rescue Bitcask::ChecksumError => e
41
+ raise e if options[:raise]
42
+ end
43
+ end
44
+ end
45
+
46
+ def pos
47
+ @file.pos
48
+ end
49
+ alias tell pos
50
+
51
+ # Returns a single [key, value] pair read from the current offset,
52
+ # and advances to the next.
53
+ #
54
+ # Can raise Bitcask::ChecksumError
55
+ def read
56
+ # Parse header
57
+ header = @file.read(14) or return
58
+ crc, tstamp, ksz, value_sz = header.unpack "NNnN"
59
+
60
+ # Read data
61
+ key = @file.read ksz
62
+ value = @file.read value_sz
63
+
64
+ # CRC check
65
+ raise Bitcask::ChecksumError unless crc == Zlib.crc32(header[4..-1] + key + value)
66
+
67
+ [key, value]
68
+ end
69
+
70
+ # Rewinds the file.
71
+ def rewind
72
+ @file.rewind
73
+ end
74
+
75
+ # Seek to a given offset.
76
+ def seek(offset)
77
+ @file.seek offset
78
+ end
79
+ end
@@ -0,0 +1,5 @@
1
+ class Bitcask::Error < RuntimeError
2
+ end
3
+
4
+ class Bitcask::ChecksumError < Bitcask::Error
5
+ end
@@ -0,0 +1,3 @@
1
+ class Bitcask
2
+ VERSION = '0.0.1'
3
+ end
metadata ADDED
@@ -0,0 +1,69 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bitcask
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 0
8
+ - 1
9
+ version: 0.0.1
10
+ platform: ruby
11
+ authors:
12
+ - Kyle Kingsbury
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2011-05-18 00:00:00 -07:00
18
+ default_executable:
19
+ dependencies: []
20
+
21
+ description:
22
+ email: aphyr@aphyr.com
23
+ executables: []
24
+
25
+ extensions: []
26
+
27
+ extra_rdoc_files: []
28
+
29
+ files:
30
+ - lib/bitcask.rb
31
+ - lib/bitcask/data_file.rb
32
+ - lib/bitcask/version.rb
33
+ - lib/bitcask/errors.rb
34
+ - LICENSE
35
+ - README.markdown
36
+ has_rdoc: true
37
+ homepage: https://github.com/aphyr/bitcask
38
+ licenses: []
39
+
40
+ post_install_message:
41
+ rdoc_options: []
42
+
43
+ require_paths:
44
+ - lib
45
+ required_ruby_version: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ segments:
50
+ - 1
51
+ - 8
52
+ - 6
53
+ version: 1.8.6
54
+ required_rubygems_version: !ruby/object:Gem::Requirement
55
+ requirements:
56
+ - - ">="
57
+ - !ruby/object:Gem::Version
58
+ segments:
59
+ - 0
60
+ version: "0"
61
+ requirements: []
62
+
63
+ rubyforge_project: bitcask
64
+ rubygems_version: 1.3.6
65
+ signing_key:
66
+ specification_version: 3
67
+ summary: An (incomplete) interface to the Bitcask storage system
68
+ test_files: []
69
+