cdb-ruby 0.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 5ff7193e71ba8c7b0859abad844985f4cc5d674ccb9f93801a8c63fdfd24438e
4
+ data.tar.gz: f5b7b2a5e85ab59666230b95ab9761b969695764cf8678eadff029b8948dc5b3
5
+ SHA512:
6
+ metadata.gz: b50bcd22e9574d7347c7990f8323724330ea97e1454f419aef0d2ad2a67b19fe7678ccbdc25519f079e72d77438cf9a249ceadbd10059222eb9c980f3f7595bb
7
+ data.tar.gz: e6ea469d9b0cd09ea680a18bdd4a965918a212ada615ff60e4870aacafcbe40450d7c19bded4901e2d94d5b6779fddac903a0cc837fc45f05649a4ebc0d86b6f
@@ -0,0 +1,5 @@
1
+ module Cdb
2
+ HASHTABLE_MAX_FULLNESS = 0.75
3
+ INITIAL_HASH = 5381
4
+ NUM_HASHTABLES = 256
5
+ end
data/lib/cdb/reader.rb ADDED
@@ -0,0 +1,119 @@
1
+ module Cdb
2
+ # Provides read-only access to a cdb.
3
+ class Reader
4
+ def initialize(file)
5
+ @file = file
6
+ end
7
+
8
+ # Fetches the value associated with the given key.
9
+ #
10
+ # Returns nil if the key doesn't exist in the cdb.
11
+ def [](key)
12
+ hash = Cdb.hash(key)
13
+ table = tables[hash % Cdb::NUM_HASHTABLES]
14
+ return nil if table.empty?
15
+ key_from_table(table, key, hash)
16
+ end
17
+
18
+ private
19
+
20
+ def key_from_table(table, key, hash)
21
+ index = (hash / Cdb::NUM_HASHTABLES) % table.length
22
+ loop do
23
+ entry_hash, offset = table[index]
24
+ return nil if offset.zero?
25
+ value = maybe_read_value(offset, key) if entry_hash == hash
26
+ return value unless value.nil?
27
+ index = (index + 1) % table.length
28
+ end
29
+ end
30
+
31
+ def maybe_read_value(offset, key)
32
+ @file.seek(offset)
33
+ key_length, value_length = @file.read(8).unpack('VV')
34
+ @file.read(key_length) == key && @file.read(value_length) || nil
35
+ end
36
+
37
+ def tables
38
+ @tables ||= load_tables
39
+ end
40
+
41
+ def load_tables
42
+ read_at(0, Cdb::NUM_HASHTABLES * 8)
43
+ .unpack('V*')
44
+ .each_slice(2)
45
+ .map { |offset, capacity| load_table(offset, capacity) }
46
+ end
47
+
48
+ def load_table(offset, cap)
49
+ read_at(offset, cap * 8).unpack('V*').each_slice(2).to_a
50
+ end
51
+
52
+ def read_at(offset, len)
53
+ @file.seek(offset)
54
+ @file.read(len)
55
+ end
56
+ end
57
+
58
+ # Provides write-only access to a cdb.
59
+ class Writer
60
+ # Initializes an empty cdb for writing to the given file-like object.
61
+ def self.create(file)
62
+ file.truncate(0)
63
+ file.write(empty_header)
64
+ Writer.new(file)
65
+ end
66
+
67
+ # Writes a key/value pair to the cdb.
68
+ #
69
+ # Attempting to write the same key twice will cause an error.
70
+ def []=(key, value)
71
+ offset = append(key, value)
72
+ index(key, offset)
73
+ end
74
+
75
+ # Finish writing the cdb.
76
+ #
77
+ # This flushes the hash table structure to disk.
78
+ def close
79
+ lookups = @tables.map { |t| write_table(t) }
80
+ @file.rewind
81
+ @file.write(lookups.flatten.pack('V*'))
82
+ end
83
+
84
+ # Returns an empty header -- NUM_HASHTABLES pairs of 32-bit integers, all
85
+ # containing zero.
86
+ def self.empty_header
87
+ "\0" * (Cdb::NUM_HASHTABLES * 8)
88
+ end
89
+
90
+ private
91
+
92
+ def initialize(file)
93
+ @file = file
94
+ @tables = (0...Cdb::NUM_HASHTABLES).map { HashTable.new }
95
+ end
96
+
97
+ def append(key, value)
98
+ offset = @file.pos
99
+ @file.write([key.length, value.length, key, value].pack('VVA*A*'))
100
+ offset
101
+ end
102
+
103
+ def index(key, offset)
104
+ hash = Cdb.hash(key)
105
+ table_for_hash(hash).put(HashTableEntry.new(hash, key, offset))
106
+ end
107
+
108
+ def write_table(table)
109
+ return [0, 0] if table.nil?
110
+ offset = @file.pos
111
+ @file.write(table.bytes)
112
+ [offset, table.capacity]
113
+ end
114
+
115
+ def table_for_hash(hash)
116
+ @tables[hash % Cdb::NUM_HASHTABLES]
117
+ end
118
+ end
119
+ end
data/lib/cdb/writer.rb ADDED
@@ -0,0 +1,77 @@
1
+ module Cdb
2
+ # In-memory hash table structure. Indexes key/value pairs in a Writer.
3
+ class HashTable
4
+ # Creates an empty hash table.
5
+ def initialize
6
+ @count = 0
7
+ @slots = []
8
+ end
9
+
10
+ # Adds a hash table entry to the table.
11
+ def put(entry)
12
+ grow if should_grow?
13
+ @slots[find_slot(entry)] = entry
14
+ @count += 1
15
+ end
16
+
17
+ # Returns the on-disk representation of a hash table (a serialized array
18
+ # of 32-bit integers representing the offset of each key/value record
19
+ # in the cdb file).
20
+ def bytes
21
+ @slots.map { |s| s.nil? && [0, 0] || [s.hash, s.offset] }
22
+ .flatten
23
+ .pack('V*')
24
+ end
25
+
26
+ # Returns the number of slots in the table.
27
+ def capacity
28
+ @slots.length
29
+ end
30
+
31
+ private
32
+
33
+ def fullness
34
+ return 1.0 if @slots.empty?
35
+ @count / @slots.length
36
+ end
37
+
38
+ def should_grow?
39
+ fullness > Cdb::HASHTABLE_MAX_FULLNESS
40
+ end
41
+
42
+ def grow
43
+ entries = @slots.reject(&:nil?)
44
+ new_cap = capacity.zero? && 2 || (capacity * 2)
45
+ @slots = empty_slots(new_cap)
46
+ entries.each { |entry| put(entry) }
47
+ end
48
+
49
+ def find_slot(entry)
50
+ index = initial_search_index(entry)
51
+ until @slots[index].nil?
52
+ raise "Duplicate key [#{entry.key}]" if @slots[index].key == entry.key
53
+ index = (index + 1) % capacity
54
+ end
55
+ index
56
+ end
57
+
58
+ def empty_slots(count)
59
+ [nil] * count
60
+ end
61
+
62
+ def initial_search_index(entry)
63
+ (entry.hash / Cdb::NUM_HASHTABLES) % capacity
64
+ end
65
+ end
66
+
67
+ # Value class for an entry in a hash table.
68
+ class HashTableEntry
69
+ attr_reader :hash, :key, :offset
70
+
71
+ def initialize(hash, key, offset)
72
+ @hash = hash
73
+ @key = key
74
+ @offset = offset
75
+ end
76
+ end
77
+ end
data/lib/cdb.rb ADDED
@@ -0,0 +1,46 @@
1
+ require 'cdb/constants'
2
+ require 'cdb/reader'
3
+ require 'cdb/writer'
4
+
5
+ # Cdb is a lightweight, pure-ruby reader/writer for DJ Bernstein's cdb format
6
+ # (https://cr.yp.to/cdb.html).
7
+ #
8
+ # Author:: Olly Smith
9
+ # License:: Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
10
+ #
11
+ # Cdbs are fast, immutable, on-disk hashtables. They're great for storing
12
+ # modest (up to 4GB) amounts of arbitrary key-value pairs. They allow random
13
+ # lookup, but no enumeration or traversal.
14
+ #
15
+ # file = File.new('table.cdb')
16
+ # Cdb.writer(file) do |cdb|
17
+ # cdb['key1'] = 'value1'
18
+ # cdb['key2'] = 'value2'
19
+ # # ...
20
+ # end
21
+ # reader = Cdb.reader(file)
22
+ # reader['key1']
23
+ # # => "value1"
24
+ module Cdb
25
+ # Write data to a cdb in a file-like object.
26
+ def self.create(file)
27
+ writer = Writer.create(file)
28
+ yield(writer)
29
+ writer.close
30
+ end
31
+
32
+ # Open a cdb for reading.
33
+ def self.open(file)
34
+ Cdb::Reader.new(file)
35
+ end
36
+
37
+ # Calculate a cdb hash value.
38
+ #
39
+ # The cdb hash function is ``h = ((h << 5) + h) ^ c'', with a starting
40
+ # hash of 5381.
41
+ def self.hash(key)
42
+ key.bytes.inject(Cdb::INITIAL_HASH) do |h, c|
43
+ 0xffffffff & ((h << 5) + h) ^ c
44
+ end
45
+ end
46
+ end
metadata ADDED
@@ -0,0 +1,47 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: cdb-ruby
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Olly Smith
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2018-03-23 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description:
14
+ email: olly.smith@gmail.com
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - lib/cdb.rb
20
+ - lib/cdb/constants.rb
21
+ - lib/cdb/reader.rb
22
+ - lib/cdb/writer.rb
23
+ homepage: https://github.com/oesmith/cdb-ruby
24
+ licenses:
25
+ - Apache-2.0
26
+ metadata: {}
27
+ post_install_message:
28
+ rdoc_options: []
29
+ require_paths:
30
+ - lib
31
+ required_ruby_version: !ruby/object:Gem::Requirement
32
+ requirements:
33
+ - - ">="
34
+ - !ruby/object:Gem::Version
35
+ version: '0'
36
+ required_rubygems_version: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ requirements: []
42
+ rubyforge_project:
43
+ rubygems_version: 2.7.3
44
+ signing_key:
45
+ specification_version: 4
46
+ summary: A lightweight, pure-ruby reader/writer for DJ Bernstein's cdb format
47
+ test_files: []