bitcask 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License
2
+
3
+ Copyright (c) 2011 Kyle Kingsbury
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,62 @@
1
+ Bitcask
2
+ =======
3
+
4
+ Utilities for reading the Bitcask file format. You can use this to recover
5
+ deleted values (before they are compacted), recover from a backup, list keys
6
+ to do read-repair when list-keys is malfunctioning, and so forth.
7
+
8
+ # Open a bitcask.
9
+ b = Bitcask.new '/var/lib/riak/bitcask/0'
10
+
11
+ # Dump all keys and values, in cron order, excluding tombstones.
12
+ # Data files go in cronological order, so this is in effect replaying history.
13
+ b.data_files.each do |data_file|
14
+ data_file.each do |key, value|
15
+ next if value == Bitcask::TOMBSTONE
16
+ puts key
17
+ puts value
18
+ end
19
+ end
20
+
21
+ # If you know the offset, you can retrieve it directly.
22
+ data_file[0] # => ["key", "value"]
23
+
24
+ # And step through values one by one.
25
+ data_file.read # => [k1, v1]
26
+ data_file.read # => [k2, v2]
27
+
28
+ # Seek, rewind, and pos are also supported.
29
+
30
+ # In Riak, these are erlang terms.
31
+ b.data_files.each do |data_file|
32
+ data_file.each do |key, value|
33
+ next if value == Bitcask::TOMBSTONE
34
+
35
+ bucket, key = BERT.decode key
36
+ value = BERT.decode value
37
+
38
+ # Store the object's value in riak
39
+ o = riak[bucket][key]
40
+ o.raw_data = value.last
41
+ o.store
42
+
43
+ # Or dump the entire value to a file for later inspection.
44
+ FileUtils.mkdir_p(bucket)
45
+ File.open(File.join(bucket, key), 'w') do |out|
46
+ out.write value.to_json
47
+ end
48
+ end
49
+ end
50
+
51
+ You'd be surprised how fast this is. 10,000 values/sec, easy.
52
+
53
+ Anyone who wants to expand this, feel free. I've been using it for emergency
54
+ recovery operations, but don't plan to reimplement bitcask in Ruby myself. I
55
+ welcome pull requests.
56
+
57
+ License
58
+ -------
59
+
60
+ This software was written by Kyle Kingsbury <aphyr@aphyr.com>, at Remixation,
61
+ Inc., for their iPad social video app "Showyou". Released under the MIT
62
+ license.
@@ -0,0 +1,32 @@
1
+ class Bitcask
2
+ require 'zlib'
3
+
4
+ $LOAD_PATH << File.expand_path(File.dirname(__FILE__))
5
+
6
+ # A single data file
7
+ require 'bitcask/data_file'
8
+ require 'bitcask/errors'
9
+ require 'bitcask/version'
10
+
11
+ TOMBSTONE = "bitcask_tombstone"
12
+
13
+ # Opens a bitcask backed by the given directory.
14
+ def initialize(dir)
15
+ @dir = dir
16
+ end
17
+
18
+ # Returns a list of all data filenames in this bitcask, sorted from oldest
19
+ # to newest.
20
+ def data_file_names
21
+ Dir.glob(File.join(@dir, '*.data')).sort! do |a, b|
22
+ a.to_i <=> b.to_i
23
+ end
24
+ end
25
+
26
+ # Returns a list of Bitcask::DataFiles in chronological order.
27
+ def data_files
28
+ data_file_names.map! do |filename|
29
+ Bitcask::DataFile.new filename
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,79 @@
1
+ class Bitcask::DataFile
2
+ # A single Bitcask data file.
3
+ #
4
+ # This is most definitely not threadsafe, but it's so cheap you might as well
5
+ # make lots of copies.
6
+
7
+ def initialize(filename)
8
+ @file = File.open(filename)
9
+ end
10
+
11
+ # Reads [key, value] from a particular offset.
12
+ # Also advances the cursor.
13
+ def [](offset)
14
+ seek offset
15
+ read
16
+ end
17
+
18
+ # Iterates over every entry in this file, yielding the key and value.
19
+ # Options:
20
+ # :rewind (true) - Rewind the file to the beginning, instead of starting
21
+ # right here.
22
+ # :raise_checksum (false) - Raise Bitcask::ChecksumError on crc failure,
23
+ # instead of silently continuing.
24
+ def each(opts = {})
25
+ options = {
26
+ :rewind => true,
27
+ :raise_checksum => false
28
+ }.merge opts
29
+
30
+ rewind if options[:rewind]
31
+
32
+ loop do
33
+ begin
34
+ o = read
35
+ if o
36
+ yield o
37
+ else
38
+ return self
39
+ end
40
+ rescue Bitcask::ChecksumError => e
41
+ raise e if options[:raise]
42
+ end
43
+ end
44
+ end
45
+
46
+ def pos
47
+ @file.pos
48
+ end
49
+ alias tell pos
50
+
51
+ # Returns a single [key, value] pair read from the current offset,
52
+ # and advances to the next.
53
+ #
54
+ # Can raise Bitcask::ChecksumError
55
+ def read
56
+ # Parse header
57
+ header = @file.read(14) or return
58
+ crc, tstamp, ksz, value_sz = header.unpack "NNnN"
59
+
60
+ # Read data
61
+ key = @file.read ksz
62
+ value = @file.read value_sz
63
+
64
+ # CRC check
65
+ raise Bitcask::ChecksumError unless crc == Zlib.crc32(header[4..-1] + key + value)
66
+
67
+ [key, value]
68
+ end
69
+
70
+ # Rewinds the file.
71
+ def rewind
72
+ @file.rewind
73
+ end
74
+
75
+ # Seek to a given offset.
76
+ def seek(offset)
77
+ @file.seek offset
78
+ end
79
+ end
@@ -0,0 +1,5 @@
1
+ class Bitcask::Error < RuntimeError
2
+ end
3
+
4
+ class Bitcask::ChecksumError < Bitcask::Error
5
+ end
@@ -0,0 +1,3 @@
1
+ class Bitcask
2
+ VERSION = '0.0.1'
3
+ end
metadata ADDED
@@ -0,0 +1,69 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bitcask
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 0
8
+ - 1
9
+ version: 0.0.1
10
+ platform: ruby
11
+ authors:
12
+ - Kyle Kingsbury
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2011-05-18 00:00:00 -07:00
18
+ default_executable:
19
+ dependencies: []
20
+
21
+ description:
22
+ email: aphyr@aphyr.com
23
+ executables: []
24
+
25
+ extensions: []
26
+
27
+ extra_rdoc_files: []
28
+
29
+ files:
30
+ - lib/bitcask.rb
31
+ - lib/bitcask/data_file.rb
32
+ - lib/bitcask/version.rb
33
+ - lib/bitcask/errors.rb
34
+ - LICENSE
35
+ - README.markdown
36
+ has_rdoc: true
37
+ homepage: https://github.com/aphyr/bitcask
38
+ licenses: []
39
+
40
+ post_install_message:
41
+ rdoc_options: []
42
+
43
+ require_paths:
44
+ - lib
45
+ required_ruby_version: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ segments:
50
+ - 1
51
+ - 8
52
+ - 6
53
+ version: 1.8.6
54
+ required_rubygems_version: !ruby/object:Gem::Requirement
55
+ requirements:
56
+ - - ">="
57
+ - !ruby/object:Gem::Version
58
+ segments:
59
+ - 0
60
+ version: "0"
61
+ requirements: []
62
+
63
+ rubyforge_project: bitcask
64
+ rubygems_version: 1.3.6
65
+ signing_key:
66
+ specification_version: 3
67
+ summary: An (incomplete) interface to the Bitcask storage system
68
+ test_files: []
69
+