cdb-ruby 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/cdb/constants.rb +5 -0
- data/lib/cdb/reader.rb +119 -0
- data/lib/cdb/writer.rb +77 -0
- data/lib/cdb.rb +46 -0
- metadata +47 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 5ff7193e71ba8c7b0859abad844985f4cc5d674ccb9f93801a8c63fdfd24438e
|
4
|
+
data.tar.gz: f5b7b2a5e85ab59666230b95ab9761b969695764cf8678eadff029b8948dc5b3
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: b50bcd22e9574d7347c7990f8323724330ea97e1454f419aef0d2ad2a67b19fe7678ccbdc25519f079e72d77438cf9a249ceadbd10059222eb9c980f3f7595bb
|
7
|
+
data.tar.gz: e6ea469d9b0cd09ea680a18bdd4a965918a212ada615ff60e4870aacafcbe40450d7c19bded4901e2d94d5b6779fddac903a0cc837fc45f05649a4ebc0d86b6f
|
data/lib/cdb/reader.rb
ADDED
@@ -0,0 +1,119 @@
|
|
1
|
+
module Cdb
|
2
|
+
# Provides read-only access to a cdb.
|
3
|
+
class Reader
|
4
|
+
def initialize(file)
|
5
|
+
@file = file
|
6
|
+
end
|
7
|
+
|
8
|
+
# Fetches the value associated with the given key.
|
9
|
+
#
|
10
|
+
# Returns nil if the key doesn't exist in the cdb.
|
11
|
+
def [](key)
|
12
|
+
hash = Cdb.hash(key)
|
13
|
+
table = tables[hash % Cdb::NUM_HASHTABLES]
|
14
|
+
return nil if table.empty?
|
15
|
+
key_from_table(table, key, hash)
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
def key_from_table(table, key, hash)
|
21
|
+
index = (hash / Cdb::NUM_HASHTABLES) % table.length
|
22
|
+
loop do
|
23
|
+
entry_hash, offset = table[index]
|
24
|
+
return nil if offset.zero?
|
25
|
+
value = maybe_read_value(offset, key) if entry_hash == hash
|
26
|
+
return value unless value.nil?
|
27
|
+
index = (index + 1) % table.length
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def maybe_read_value(offset, key)
|
32
|
+
@file.seek(offset)
|
33
|
+
key_length, value_length = @file.read(8).unpack('VV')
|
34
|
+
@file.read(key_length) == key && @file.read(value_length) || nil
|
35
|
+
end
|
36
|
+
|
37
|
+
def tables
|
38
|
+
@tables ||= load_tables
|
39
|
+
end
|
40
|
+
|
41
|
+
def load_tables
|
42
|
+
read_at(0, Cdb::NUM_HASHTABLES * 8)
|
43
|
+
.unpack('V*')
|
44
|
+
.each_slice(2)
|
45
|
+
.map { |offset, capacity| load_table(offset, capacity) }
|
46
|
+
end
|
47
|
+
|
48
|
+
def load_table(offset, cap)
|
49
|
+
read_at(offset, cap * 8).unpack('V*').each_slice(2).to_a
|
50
|
+
end
|
51
|
+
|
52
|
+
def read_at(offset, len)
|
53
|
+
@file.seek(offset)
|
54
|
+
@file.read(len)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
# Provides write-only access to a cdb.
|
59
|
+
class Writer
|
60
|
+
# Initializes an empty cdb for writing to the given file-like object.
|
61
|
+
def self.create(file)
|
62
|
+
file.truncate(0)
|
63
|
+
file.write(empty_header)
|
64
|
+
Writer.new(file)
|
65
|
+
end
|
66
|
+
|
67
|
+
# Writes a key/value pair to the cdb.
|
68
|
+
#
|
69
|
+
# Attempting to write the same key twice will cause an error.
|
70
|
+
def []=(key, value)
|
71
|
+
offset = append(key, value)
|
72
|
+
index(key, offset)
|
73
|
+
end
|
74
|
+
|
75
|
+
# Finish writing the cdb.
|
76
|
+
#
|
77
|
+
# This flushes the hash table structure to disk.
|
78
|
+
def close
|
79
|
+
lookups = @tables.map { |t| write_table(t) }
|
80
|
+
@file.rewind
|
81
|
+
@file.write(lookups.flatten.pack('V*'))
|
82
|
+
end
|
83
|
+
|
84
|
+
# Returns an empty header -- NUM_HASHTABLES pairs of 32-bit integers, all
|
85
|
+
# containing zero.
|
86
|
+
def self.empty_header
|
87
|
+
"\0" * (Cdb::NUM_HASHTABLES * 8)
|
88
|
+
end
|
89
|
+
|
90
|
+
private
|
91
|
+
|
92
|
+
def initialize(file)
|
93
|
+
@file = file
|
94
|
+
@tables = (0...Cdb::NUM_HASHTABLES).map { HashTable.new }
|
95
|
+
end
|
96
|
+
|
97
|
+
def append(key, value)
|
98
|
+
offset = @file.pos
|
99
|
+
@file.write([key.length, value.length, key, value].pack('VVA*A*'))
|
100
|
+
offset
|
101
|
+
end
|
102
|
+
|
103
|
+
def index(key, offset)
|
104
|
+
hash = Cdb.hash(key)
|
105
|
+
table_for_hash(hash).put(HashTableEntry.new(hash, key, offset))
|
106
|
+
end
|
107
|
+
|
108
|
+
def write_table(table)
|
109
|
+
return [0, 0] if table.nil?
|
110
|
+
offset = @file.pos
|
111
|
+
@file.write(table.bytes)
|
112
|
+
[offset, table.capacity]
|
113
|
+
end
|
114
|
+
|
115
|
+
def table_for_hash(hash)
|
116
|
+
@tables[hash % Cdb::NUM_HASHTABLES]
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
data/lib/cdb/writer.rb
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
module Cdb
|
2
|
+
# In-memory hash table structure. Indexes key/value pairs in a Writer.
|
3
|
+
class HashTable
|
4
|
+
# Creates an empty hash table.
|
5
|
+
def initialize
|
6
|
+
@count = 0
|
7
|
+
@slots = []
|
8
|
+
end
|
9
|
+
|
10
|
+
# Adds a hash table entry to the table.
|
11
|
+
def put(entry)
|
12
|
+
grow if should_grow?
|
13
|
+
@slots[find_slot(entry)] = entry
|
14
|
+
@count += 1
|
15
|
+
end
|
16
|
+
|
17
|
+
# Returns the on-disk representation of a hash table (a serialized array
|
18
|
+
# of 32-bit integers representing the offset of each key/value record
|
19
|
+
# in the cdb file).
|
20
|
+
def bytes
|
21
|
+
@slots.map { |s| s.nil? && [0, 0] || [s.hash, s.offset] }
|
22
|
+
.flatten
|
23
|
+
.pack('V*')
|
24
|
+
end
|
25
|
+
|
26
|
+
# Returns the number of slots in the table.
|
27
|
+
def capacity
|
28
|
+
@slots.length
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
def fullness
|
34
|
+
return 1.0 if @slots.empty?
|
35
|
+
@count / @slots.length
|
36
|
+
end
|
37
|
+
|
38
|
+
def should_grow?
|
39
|
+
fullness > Cdb::HASHTABLE_MAX_FULLNESS
|
40
|
+
end
|
41
|
+
|
42
|
+
def grow
|
43
|
+
entries = @slots.reject(&:nil?)
|
44
|
+
new_cap = capacity.zero? && 2 || (capacity * 2)
|
45
|
+
@slots = empty_slots(new_cap)
|
46
|
+
entries.each { |entry| put(entry) }
|
47
|
+
end
|
48
|
+
|
49
|
+
def find_slot(entry)
|
50
|
+
index = initial_search_index(entry)
|
51
|
+
until @slots[index].nil?
|
52
|
+
raise "Duplicate key [#{entry.key}]" if @slots[index].key == entry.key
|
53
|
+
index = (index + 1) % capacity
|
54
|
+
end
|
55
|
+
index
|
56
|
+
end
|
57
|
+
|
58
|
+
def empty_slots(count)
|
59
|
+
[nil] * count
|
60
|
+
end
|
61
|
+
|
62
|
+
def initial_search_index(entry)
|
63
|
+
(entry.hash / Cdb::NUM_HASHTABLES) % capacity
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
# Value class for an entry in a hash table.
|
68
|
+
class HashTableEntry
|
69
|
+
attr_reader :hash, :key, :offset
|
70
|
+
|
71
|
+
def initialize(hash, key, offset)
|
72
|
+
@hash = hash
|
73
|
+
@key = key
|
74
|
+
@offset = offset
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
data/lib/cdb.rb
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'cdb/constants'
|
2
|
+
require 'cdb/reader'
|
3
|
+
require 'cdb/writer'
|
4
|
+
|
5
|
+
# Cdb is a lightweight, pure-ruby reader/writer for DJ Bernstein's cdb format
|
6
|
+
# (https://cr.yp.to/cdb.html).
|
7
|
+
#
|
8
|
+
# Author:: Olly Smith
|
9
|
+
# License:: Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
|
10
|
+
#
|
11
|
+
# Cdbs are fast, immutable, on-disk hashtables. They're great for storing
|
12
|
+
# modest (up to 4GB) amounts of arbitrary key-value pairs. They allow random
|
13
|
+
# lookup, but no enumeration or traversal.
|
14
|
+
#
|
15
|
+
# file = File.new('table.cdb')
|
16
|
+
# Cdb.writer(file) do |cdb|
|
17
|
+
# cdb['key1'] = 'value1'
|
18
|
+
# cdb['key2'] = 'value2'
|
19
|
+
# # ...
|
20
|
+
# end
|
21
|
+
# reader = Cdb.reader(file)
|
22
|
+
# reader['key1']
|
23
|
+
# # => "value1"
|
24
|
+
module Cdb
|
25
|
+
# Write data to a cdb in a file-like object.
|
26
|
+
def self.create(file)
|
27
|
+
writer = Writer.create(file)
|
28
|
+
yield(writer)
|
29
|
+
writer.close
|
30
|
+
end
|
31
|
+
|
32
|
+
# Open a cdb for reading.
|
33
|
+
def self.open(file)
|
34
|
+
Cdb::Reader.new(file)
|
35
|
+
end
|
36
|
+
|
37
|
+
# Calculate a cdb hash value.
|
38
|
+
#
|
39
|
+
# The cdb hash function is ``h = ((h << 5) + h) ^ c'', with a starting
|
40
|
+
# hash of 5381.
|
41
|
+
def self.hash(key)
|
42
|
+
key.bytes.inject(Cdb::INITIAL_HASH) do |h, c|
|
43
|
+
0xffffffff & ((h << 5) + h) ^ c
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
metadata
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: cdb-ruby
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Olly Smith
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2018-03-23 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description:
|
14
|
+
email: olly.smith@gmail.com
|
15
|
+
executables: []
|
16
|
+
extensions: []
|
17
|
+
extra_rdoc_files: []
|
18
|
+
files:
|
19
|
+
- lib/cdb.rb
|
20
|
+
- lib/cdb/constants.rb
|
21
|
+
- lib/cdb/reader.rb
|
22
|
+
- lib/cdb/writer.rb
|
23
|
+
homepage: https://github.com/oesmith/cdb-ruby
|
24
|
+
licenses:
|
25
|
+
- Apache-2.0
|
26
|
+
metadata: {}
|
27
|
+
post_install_message:
|
28
|
+
rdoc_options: []
|
29
|
+
require_paths:
|
30
|
+
- lib
|
31
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
32
|
+
requirements:
|
33
|
+
- - ">="
|
34
|
+
- !ruby/object:Gem::Version
|
35
|
+
version: '0'
|
36
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
requirements: []
|
42
|
+
rubyforge_project:
|
43
|
+
rubygems_version: 2.7.3
|
44
|
+
signing_key:
|
45
|
+
specification_version: 4
|
46
|
+
summary: A lightweight, pure-ruby reader/writer for DJ Bernstein's cdb format
|
47
|
+
test_files: []
|