cdb-ruby 0.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/cdb/constants.rb +5 -0
- data/lib/cdb/reader.rb +119 -0
- data/lib/cdb/writer.rb +77 -0
- data/lib/cdb.rb +46 -0
- metadata +47 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 5ff7193e71ba8c7b0859abad844985f4cc5d674ccb9f93801a8c63fdfd24438e
|
4
|
+
data.tar.gz: f5b7b2a5e85ab59666230b95ab9761b969695764cf8678eadff029b8948dc5b3
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: b50bcd22e9574d7347c7990f8323724330ea97e1454f419aef0d2ad2a67b19fe7678ccbdc25519f079e72d77438cf9a249ceadbd10059222eb9c980f3f7595bb
|
7
|
+
data.tar.gz: e6ea469d9b0cd09ea680a18bdd4a965918a212ada615ff60e4870aacafcbe40450d7c19bded4901e2d94d5b6779fddac903a0cc837fc45f05649a4ebc0d86b6f
|
data/lib/cdb/reader.rb
ADDED
@@ -0,0 +1,119 @@
|
|
1
|
+
module Cdb
|
2
|
+
# Provides read-only access to a cdb.
|
3
|
+
class Reader
|
4
|
+
def initialize(file)
|
5
|
+
@file = file
|
6
|
+
end
|
7
|
+
|
8
|
+
# Fetches the value associated with the given key.
|
9
|
+
#
|
10
|
+
# Returns nil if the key doesn't exist in the cdb.
|
11
|
+
def [](key)
|
12
|
+
hash = Cdb.hash(key)
|
13
|
+
table = tables[hash % Cdb::NUM_HASHTABLES]
|
14
|
+
return nil if table.empty?
|
15
|
+
key_from_table(table, key, hash)
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
def key_from_table(table, key, hash)
|
21
|
+
index = (hash / Cdb::NUM_HASHTABLES) % table.length
|
22
|
+
loop do
|
23
|
+
entry_hash, offset = table[index]
|
24
|
+
return nil if offset.zero?
|
25
|
+
value = maybe_read_value(offset, key) if entry_hash == hash
|
26
|
+
return value unless value.nil?
|
27
|
+
index = (index + 1) % table.length
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def maybe_read_value(offset, key)
|
32
|
+
@file.seek(offset)
|
33
|
+
key_length, value_length = @file.read(8).unpack('VV')
|
34
|
+
@file.read(key_length) == key && @file.read(value_length) || nil
|
35
|
+
end
|
36
|
+
|
37
|
+
def tables
|
38
|
+
@tables ||= load_tables
|
39
|
+
end
|
40
|
+
|
41
|
+
def load_tables
|
42
|
+
read_at(0, Cdb::NUM_HASHTABLES * 8)
|
43
|
+
.unpack('V*')
|
44
|
+
.each_slice(2)
|
45
|
+
.map { |offset, capacity| load_table(offset, capacity) }
|
46
|
+
end
|
47
|
+
|
48
|
+
def load_table(offset, cap)
|
49
|
+
read_at(offset, cap * 8).unpack('V*').each_slice(2).to_a
|
50
|
+
end
|
51
|
+
|
52
|
+
def read_at(offset, len)
|
53
|
+
@file.seek(offset)
|
54
|
+
@file.read(len)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
# Provides write-only access to a cdb.
|
59
|
+
class Writer
|
60
|
+
# Initializes an empty cdb for writing to the given file-like object.
|
61
|
+
def self.create(file)
|
62
|
+
file.truncate(0)
|
63
|
+
file.write(empty_header)
|
64
|
+
Writer.new(file)
|
65
|
+
end
|
66
|
+
|
67
|
+
# Writes a key/value pair to the cdb.
|
68
|
+
#
|
69
|
+
# Attempting to write the same key twice will cause an error.
|
70
|
+
def []=(key, value)
|
71
|
+
offset = append(key, value)
|
72
|
+
index(key, offset)
|
73
|
+
end
|
74
|
+
|
75
|
+
# Finish writing the cdb.
|
76
|
+
#
|
77
|
+
# This flushes the hash table structure to disk.
|
78
|
+
def close
|
79
|
+
lookups = @tables.map { |t| write_table(t) }
|
80
|
+
@file.rewind
|
81
|
+
@file.write(lookups.flatten.pack('V*'))
|
82
|
+
end
|
83
|
+
|
84
|
+
# Returns an empty header -- NUM_HASHTABLES pairs of 32-bit integers, all
|
85
|
+
# containing zero.
|
86
|
+
def self.empty_header
|
87
|
+
"\0" * (Cdb::NUM_HASHTABLES * 8)
|
88
|
+
end
|
89
|
+
|
90
|
+
private
|
91
|
+
|
92
|
+
def initialize(file)
|
93
|
+
@file = file
|
94
|
+
@tables = (0...Cdb::NUM_HASHTABLES).map { HashTable.new }
|
95
|
+
end
|
96
|
+
|
97
|
+
def append(key, value)
|
98
|
+
offset = @file.pos
|
99
|
+
@file.write([key.length, value.length, key, value].pack('VVA*A*'))
|
100
|
+
offset
|
101
|
+
end
|
102
|
+
|
103
|
+
def index(key, offset)
|
104
|
+
hash = Cdb.hash(key)
|
105
|
+
table_for_hash(hash).put(HashTableEntry.new(hash, key, offset))
|
106
|
+
end
|
107
|
+
|
108
|
+
def write_table(table)
|
109
|
+
return [0, 0] if table.nil?
|
110
|
+
offset = @file.pos
|
111
|
+
@file.write(table.bytes)
|
112
|
+
[offset, table.capacity]
|
113
|
+
end
|
114
|
+
|
115
|
+
def table_for_hash(hash)
|
116
|
+
@tables[hash % Cdb::NUM_HASHTABLES]
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
data/lib/cdb/writer.rb
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
module Cdb
|
2
|
+
# In-memory hash table structure. Indexes key/value pairs in a Writer.
|
3
|
+
class HashTable
|
4
|
+
# Creates an empty hash table.
|
5
|
+
def initialize
|
6
|
+
@count = 0
|
7
|
+
@slots = []
|
8
|
+
end
|
9
|
+
|
10
|
+
# Adds a hash table entry to the table.
|
11
|
+
def put(entry)
|
12
|
+
grow if should_grow?
|
13
|
+
@slots[find_slot(entry)] = entry
|
14
|
+
@count += 1
|
15
|
+
end
|
16
|
+
|
17
|
+
# Returns the on-disk representation of a hash table (a serialized array
|
18
|
+
# of 32-bit integers representing the offset of each key/value record
|
19
|
+
# in the cdb file).
|
20
|
+
def bytes
|
21
|
+
@slots.map { |s| s.nil? && [0, 0] || [s.hash, s.offset] }
|
22
|
+
.flatten
|
23
|
+
.pack('V*')
|
24
|
+
end
|
25
|
+
|
26
|
+
# Returns the number of slots in the table.
|
27
|
+
def capacity
|
28
|
+
@slots.length
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
def fullness
|
34
|
+
return 1.0 if @slots.empty?
|
35
|
+
@count / @slots.length
|
36
|
+
end
|
37
|
+
|
38
|
+
def should_grow?
|
39
|
+
fullness > Cdb::HASHTABLE_MAX_FULLNESS
|
40
|
+
end
|
41
|
+
|
42
|
+
def grow
|
43
|
+
entries = @slots.reject(&:nil?)
|
44
|
+
new_cap = capacity.zero? && 2 || (capacity * 2)
|
45
|
+
@slots = empty_slots(new_cap)
|
46
|
+
entries.each { |entry| put(entry) }
|
47
|
+
end
|
48
|
+
|
49
|
+
def find_slot(entry)
|
50
|
+
index = initial_search_index(entry)
|
51
|
+
until @slots[index].nil?
|
52
|
+
raise "Duplicate key [#{entry.key}]" if @slots[index].key == entry.key
|
53
|
+
index = (index + 1) % capacity
|
54
|
+
end
|
55
|
+
index
|
56
|
+
end
|
57
|
+
|
58
|
+
def empty_slots(count)
|
59
|
+
[nil] * count
|
60
|
+
end
|
61
|
+
|
62
|
+
def initial_search_index(entry)
|
63
|
+
(entry.hash / Cdb::NUM_HASHTABLES) % capacity
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
# Value class for an entry in a hash table.
|
68
|
+
class HashTableEntry
|
69
|
+
attr_reader :hash, :key, :offset
|
70
|
+
|
71
|
+
def initialize(hash, key, offset)
|
72
|
+
@hash = hash
|
73
|
+
@key = key
|
74
|
+
@offset = offset
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
data/lib/cdb.rb
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'cdb/constants'
|
2
|
+
require 'cdb/reader'
|
3
|
+
require 'cdb/writer'
|
4
|
+
|
5
|
+
# Cdb is a lightweight, pure-ruby reader/writer for DJ Bernstein's cdb format
|
6
|
+
# (https://cr.yp.to/cdb.html).
|
7
|
+
#
|
8
|
+
# Author:: Olly Smith
|
9
|
+
# License:: Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
|
10
|
+
#
|
11
|
+
# Cdbs are fast, immutable, on-disk hashtables. They're great for storing
|
12
|
+
# modest (up to 4GB) amounts of arbitrary key-value pairs. They allow random
|
13
|
+
# lookup, but no enumeration or traversal.
|
14
|
+
#
|
15
|
+
# file = File.new('table.cdb')
|
16
|
+
# Cdb.writer(file) do |cdb|
|
17
|
+
# cdb['key1'] = 'value1'
|
18
|
+
# cdb['key2'] = 'value2'
|
19
|
+
# # ...
|
20
|
+
# end
|
21
|
+
# reader = Cdb.reader(file)
|
22
|
+
# reader['key1']
|
23
|
+
# # => "value1"
|
24
|
+
module Cdb
|
25
|
+
# Write data to a cdb in a file-like object.
|
26
|
+
def self.create(file)
|
27
|
+
writer = Writer.create(file)
|
28
|
+
yield(writer)
|
29
|
+
writer.close
|
30
|
+
end
|
31
|
+
|
32
|
+
# Open a cdb for reading.
|
33
|
+
def self.open(file)
|
34
|
+
Cdb::Reader.new(file)
|
35
|
+
end
|
36
|
+
|
37
|
+
# Calculate a cdb hash value.
|
38
|
+
#
|
39
|
+
# The cdb hash function is ``h = ((h << 5) + h) ^ c'', with a starting
|
40
|
+
# hash of 5381.
|
41
|
+
def self.hash(key)
|
42
|
+
key.bytes.inject(Cdb::INITIAL_HASH) do |h, c|
|
43
|
+
0xffffffff & ((h << 5) + h) ^ c
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
metadata
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: cdb-ruby
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Olly Smith
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2018-03-23 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description:
|
14
|
+
email: olly.smith@gmail.com
|
15
|
+
executables: []
|
16
|
+
extensions: []
|
17
|
+
extra_rdoc_files: []
|
18
|
+
files:
|
19
|
+
- lib/cdb.rb
|
20
|
+
- lib/cdb/constants.rb
|
21
|
+
- lib/cdb/reader.rb
|
22
|
+
- lib/cdb/writer.rb
|
23
|
+
homepage: https://github.com/oesmith/cdb-ruby
|
24
|
+
licenses:
|
25
|
+
- Apache-2.0
|
26
|
+
metadata: {}
|
27
|
+
post_install_message:
|
28
|
+
rdoc_options: []
|
29
|
+
require_paths:
|
30
|
+
- lib
|
31
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
32
|
+
requirements:
|
33
|
+
- - ">="
|
34
|
+
- !ruby/object:Gem::Version
|
35
|
+
version: '0'
|
36
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
requirements: []
|
42
|
+
rubyforge_project:
|
43
|
+
rubygems_version: 2.7.3
|
44
|
+
signing_key:
|
45
|
+
specification_version: 4
|
46
|
+
summary: A lightweight, pure-ruby reader/writer for DJ Bernstein's cdb format
|
47
|
+
test_files: []
|