cdb-ruby 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 5ff7193e71ba8c7b0859abad844985f4cc5d674ccb9f93801a8c63fdfd24438e
4
+ data.tar.gz: f5b7b2a5e85ab59666230b95ab9761b969695764cf8678eadff029b8948dc5b3
5
+ SHA512:
6
+ metadata.gz: b50bcd22e9574d7347c7990f8323724330ea97e1454f419aef0d2ad2a67b19fe7678ccbdc25519f079e72d77438cf9a249ceadbd10059222eb9c980f3f7595bb
7
+ data.tar.gz: e6ea469d9b0cd09ea680a18bdd4a965918a212ada615ff60e4870aacafcbe40450d7c19bded4901e2d94d5b6779fddac903a0cc837fc45f05649a4ebc0d86b6f
@@ -0,0 +1,5 @@
1
+ module Cdb
2
+ HASHTABLE_MAX_FULLNESS = 0.75
3
+ INITIAL_HASH = 5381
4
+ NUM_HASHTABLES = 256
5
+ end
data/lib/cdb/reader.rb ADDED
@@ -0,0 +1,119 @@
1
+ module Cdb
2
+ # Provides read-only access to a cdb.
3
+ class Reader
4
+ def initialize(file)
5
+ @file = file
6
+ end
7
+
8
+ # Fetches the value associated with the given key.
9
+ #
10
+ # Returns nil if the key doesn't exist in the cdb.
11
+ def [](key)
12
+ hash = Cdb.hash(key)
13
+ table = tables[hash % Cdb::NUM_HASHTABLES]
14
+ return nil if table.empty?
15
+ key_from_table(table, key, hash)
16
+ end
17
+
18
+ private
19
+
20
+ def key_from_table(table, key, hash)
21
+ index = (hash / Cdb::NUM_HASHTABLES) % table.length
22
+ loop do
23
+ entry_hash, offset = table[index]
24
+ return nil if offset.zero?
25
+ value = maybe_read_value(offset, key) if entry_hash == hash
26
+ return value unless value.nil?
27
+ index = (index + 1) % table.length
28
+ end
29
+ end
30
+
31
+ def maybe_read_value(offset, key)
32
+ @file.seek(offset)
33
+ key_length, value_length = @file.read(8).unpack('VV')
34
+ @file.read(key_length) == key && @file.read(value_length) || nil
35
+ end
36
+
37
+ def tables
38
+ @tables ||= load_tables
39
+ end
40
+
41
+ def load_tables
42
+ read_at(0, Cdb::NUM_HASHTABLES * 8)
43
+ .unpack('V*')
44
+ .each_slice(2)
45
+ .map { |offset, capacity| load_table(offset, capacity) }
46
+ end
47
+
48
+ def load_table(offset, cap)
49
+ read_at(offset, cap * 8).unpack('V*').each_slice(2).to_a
50
+ end
51
+
52
+ def read_at(offset, len)
53
+ @file.seek(offset)
54
+ @file.read(len)
55
+ end
56
+ end
57
+
58
+ # Provides write-only access to a cdb.
59
+ class Writer
60
+ # Initializes an empty cdb for writing to the given file-like object.
61
+ def self.create(file)
62
+ file.truncate(0)
63
+ file.write(empty_header)
64
+ Writer.new(file)
65
+ end
66
+
67
+ # Writes a key/value pair to the cdb.
68
+ #
69
+ # Attempting to write the same key twice will cause an error.
70
+ def []=(key, value)
71
+ offset = append(key, value)
72
+ index(key, offset)
73
+ end
74
+
75
+ # Finish writing the cdb.
76
+ #
77
+ # This flushes the hash table structure to disk.
78
+ def close
79
+ lookups = @tables.map { |t| write_table(t) }
80
+ @file.rewind
81
+ @file.write(lookups.flatten.pack('V*'))
82
+ end
83
+
84
+ # Returns an empty header -- NUM_HASHTABLES pairs of 32-bit integers, all
85
+ # containing zero.
86
+ def self.empty_header
87
+ "\0" * (Cdb::NUM_HASHTABLES * 8)
88
+ end
89
+
90
+ private
91
+
92
+ def initialize(file)
93
+ @file = file
94
+ @tables = (0...Cdb::NUM_HASHTABLES).map { HashTable.new }
95
+ end
96
+
97
+ def append(key, value)
98
+ offset = @file.pos
99
+ @file.write([key.length, value.length, key, value].pack('VVA*A*'))
100
+ offset
101
+ end
102
+
103
+ def index(key, offset)
104
+ hash = Cdb.hash(key)
105
+ table_for_hash(hash).put(HashTableEntry.new(hash, key, offset))
106
+ end
107
+
108
+ def write_table(table)
109
+ return [0, 0] if table.nil?
110
+ offset = @file.pos
111
+ @file.write(table.bytes)
112
+ [offset, table.capacity]
113
+ end
114
+
115
+ def table_for_hash(hash)
116
+ @tables[hash % Cdb::NUM_HASHTABLES]
117
+ end
118
+ end
119
+ end
data/lib/cdb/writer.rb ADDED
@@ -0,0 +1,77 @@
1
+ module Cdb
2
+ # In-memory hash table structure. Indexes key/value pairs in a Writer.
3
+ class HashTable
4
+ # Creates an empty hash table.
5
+ def initialize
6
+ @count = 0
7
+ @slots = []
8
+ end
9
+
10
+ # Adds a hash table entry to the table.
11
+ def put(entry)
12
+ grow if should_grow?
13
+ @slots[find_slot(entry)] = entry
14
+ @count += 1
15
+ end
16
+
17
+ # Returns the on-disk representation of a hash table (a serialized array
18
+ # of 32-bit integers representing the offset of each key/value record
19
+ # in the cdb file).
20
+ def bytes
21
+ @slots.map { |s| s.nil? && [0, 0] || [s.hash, s.offset] }
22
+ .flatten
23
+ .pack('V*')
24
+ end
25
+
26
+ # Returns the number of slots in the table.
27
+ def capacity
28
+ @slots.length
29
+ end
30
+
31
+ private
32
+
33
+ def fullness
34
+ return 1.0 if @slots.empty?
35
+ @count / @slots.length
36
+ end
37
+
38
+ def should_grow?
39
+ fullness > Cdb::HASHTABLE_MAX_FULLNESS
40
+ end
41
+
42
+ def grow
43
+ entries = @slots.reject(&:nil?)
44
+ new_cap = capacity.zero? && 2 || (capacity * 2)
45
+ @slots = empty_slots(new_cap)
46
+ entries.each { |entry| put(entry) }
47
+ end
48
+
49
+ def find_slot(entry)
50
+ index = initial_search_index(entry)
51
+ until @slots[index].nil?
52
+ raise "Duplicate key [#{entry.key}]" if @slots[index].key == entry.key
53
+ index = (index + 1) % capacity
54
+ end
55
+ index
56
+ end
57
+
58
+ def empty_slots(count)
59
+ [nil] * count
60
+ end
61
+
62
+ def initial_search_index(entry)
63
+ (entry.hash / Cdb::NUM_HASHTABLES) % capacity
64
+ end
65
+ end
66
+
67
+ # Value class for an entry in a hash table.
68
+ class HashTableEntry
69
+ attr_reader :hash, :key, :offset
70
+
71
+ def initialize(hash, key, offset)
72
+ @hash = hash
73
+ @key = key
74
+ @offset = offset
75
+ end
76
+ end
77
+ end
data/lib/cdb.rb ADDED
@@ -0,0 +1,46 @@
1
+ require 'cdb/constants'
2
+ require 'cdb/reader'
3
+ require 'cdb/writer'
4
+
5
+ # Cdb is a lightweight, pure-ruby reader/writer for DJ Bernstein's cdb format
6
+ # (https://cr.yp.to/cdb.html).
7
+ #
8
+ # Author:: Olly Smith
9
+ # License:: Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
10
+ #
11
+ # Cdbs are fast, immutable, on-disk hashtables. They're great for storing
12
+ # modest (up to 4GB) amounts of arbitrary key-value pairs. They allow random
13
+ # lookup, but no enumeration or traversal.
14
+ #
15
+ # file = File.new('table.cdb')
16
+ # Cdb.writer(file) do |cdb|
17
+ # cdb['key1'] = 'value1'
18
+ # cdb['key2'] = 'value2'
19
+ # # ...
20
+ # end
21
+ # reader = Cdb.reader(file)
22
+ # reader['key1']
23
+ # # => "value1"
24
+ module Cdb
25
+ # Write data to a cdb in a file-like object.
26
+ def self.create(file)
27
+ writer = Writer.create(file)
28
+ yield(writer)
29
+ writer.close
30
+ end
31
+
32
+ # Open a cdb for reading.
33
+ def self.open(file)
34
+ Cdb::Reader.new(file)
35
+ end
36
+
37
+ # Calculate a cdb hash value.
38
+ #
39
+ # The cdb hash function is ``h = ((h << 5) + h) ^ c'', with a starting
40
+ # hash of 5381.
41
+ def self.hash(key)
42
+ key.bytes.inject(Cdb::INITIAL_HASH) do |h, c|
43
+ 0xffffffff & ((h << 5) + h) ^ c
44
+ end
45
+ end
46
+ end
metadata ADDED
@@ -0,0 +1,47 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: cdb-ruby
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Olly Smith
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2018-03-23 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description:
14
+ email: olly.smith@gmail.com
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - lib/cdb.rb
20
+ - lib/cdb/constants.rb
21
+ - lib/cdb/reader.rb
22
+ - lib/cdb/writer.rb
23
+ homepage: https://github.com/oesmith/cdb-ruby
24
+ licenses:
25
+ - Apache-2.0
26
+ metadata: {}
27
+ post_install_message:
28
+ rdoc_options: []
29
+ require_paths:
30
+ - lib
31
+ required_ruby_version: !ruby/object:Gem::Requirement
32
+ requirements:
33
+ - - ">="
34
+ - !ruby/object:Gem::Version
35
+ version: '0'
36
+ required_rubygems_version: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ requirements: []
42
+ rubyforge_project:
43
+ rubygems_version: 2.7.3
44
+ signing_key:
45
+ specification_version: 4
46
+ summary: A lightweight, pure-ruby reader/writer for DJ Bernstein's cdb format
47
+ test_files: []