daybreak 0.1.3 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,52 @@
1
+ module Daybreak
2
+ # Database format serializer and deserializer. You can create
3
+ # your own implementations of this classes method and define
4
+ # your own database format!
5
+ # @api public
6
+ class Format
7
+ # Read database header from input stream
8
+ # @param [#read] input the input stream
9
+ def read_header(input)
10
+ raise 'Not a Daybreak database' if input.read(MAGIC.bytesize) != MAGIC
11
+ ver = input.read(2).unpack('n').first
12
+ raise "Expected database version #{VERSION}, got #{ver}" if ver != VERSION
13
+ end
14
+
15
+ # Return database header as string
16
+ def header
17
+ MAGIC + [VERSION].pack('n')
18
+ end
19
+
20
+ # Serialize record and return string
21
+ # @param [Array] record an array with [key, value] or [key] if the record is
22
+ # deleted
23
+ def dump(record)
24
+ data =
25
+ if record.size == 1
26
+ [record[0].bytesize, DELETE].pack('NN') << record[0]
27
+ else
28
+ [record[0].bytesize, record[1].bytesize].pack('NN') << record[0] << record[1]
29
+ end
30
+ data << crc32(data)
31
+ end
32
+
33
+ # Deserialize record from buffer
34
+ # @param [String] buf the buffer to read from
35
+ def parse(buf)
36
+ key_size, value_size = buf[0, 8].unpack('NN')
37
+ data = buf.slice!(0, 8 + key_size + (value_size == DELETE ? 0 : value_size))
38
+ raise 'CRC mismatch' unless buf.slice!(0, 4) == crc32(data)
39
+ value_size == DELETE ? [data[8, key_size]] : [data[8, key_size], data[8 + key_size, value_size]]
40
+ end
41
+
42
+ protected
43
+
44
+ MAGIC = 'DAYBREAK'
45
+ VERSION = 1
46
+ DELETE = (1 << 32) - 1
47
+
48
+ def crc32(s)
49
+ [Zlib.crc32(s, 0)].pack('N')
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,107 @@
1
+ module Daybreak
2
+ # Thread safe job queue
3
+ # @api private
4
+ class Queue
5
+ # HACK: Dangerous optimization on MRI which has a
6
+ # global interpreter lock and makes the @queue array
7
+ # thread safe.
8
+ if !defined?(RUBY_ENGINE) || RUBY_ENGINE == 'ruby'
9
+ def initialize
10
+ @queue, @full, @empty = [], [], []
11
+ @stop = false
12
+ @heartbeat = Thread.new(&method(:heartbeat))
13
+ @heartbeat.priority = -9
14
+ end
15
+
16
+ def <<(x)
17
+ @queue << x
18
+ thread = @full.first
19
+ thread.wakeup if thread
20
+ end
21
+
22
+ def pop
23
+ @queue.shift
24
+ if @queue.empty?
25
+ thread = @empty.first
26
+ thread.wakeup if thread
27
+ end
28
+ end
29
+
30
+ def next
31
+ while @queue.empty?
32
+ begin
33
+ @full << Thread.current
34
+ # If a push happens before Thread.stop, the thread won't be woken up
35
+ Thread.stop while @queue.empty?
36
+ ensure
37
+ @full.delete(Thread.current)
38
+ end
39
+ end
40
+ @queue.first
41
+ end
42
+
43
+ def flush
44
+ until @queue.empty?
45
+ begin
46
+ @empty << Thread.current
47
+ # If a pop happens before Thread.stop, the thread won't be woken up
48
+ Thread.stop until @queue.empty?
49
+ ensure
50
+ @empty.delete(Thread.current)
51
+ end
52
+ end
53
+ end
54
+
55
+ def stop
56
+ @stop = true
57
+ @heartbeat.join
58
+ end
59
+
60
+ private
61
+
62
+ # Check threads 10 times per second to avoid deadlocks
63
+ # since there is a race condition below
64
+ def heartbeat
65
+ until @stop
66
+ @empty.each(&:wakeup)
67
+ @full.each(&:wakeup)
68
+ sleep 0.1
69
+ end
70
+ end
71
+ else
72
+ def initialize
73
+ @mutex = Mutex.new
74
+ @full = ConditionVariable.new
75
+ @empty = ConditionVariable.new
76
+ @queue = []
77
+ end
78
+
79
+ def <<(x)
80
+ @mutex.synchronize do
81
+ @queue << x
82
+ @full.signal
83
+ end
84
+ end
85
+
86
+ def pop
87
+ @mutex.synchronize do
88
+ @queue.shift
89
+ @empty.signal if @queue.empty?
90
+ end
91
+ end
92
+
93
+ def next
94
+ @mutex.synchronize do
95
+ @full.wait(@mutex) while @queue.empty?
96
+ @queue.first
97
+ end
98
+ end
99
+
100
+ def flush
101
+ @mutex.synchronize do
102
+ @empty.wait(@mutex) until @queue.empty?
103
+ end
104
+ end
105
+ end
106
+ end
107
+ end
@@ -0,0 +1,39 @@
1
+ module Daybreak
2
+ module Serializer
3
+ # Default serializer which converts
4
+ # keys to strings and marshalls values
5
+ # @api public
6
+ class Default
7
+ # Return the value of the key to insert into the database
8
+ def key_for(key)
9
+ key.to_s
10
+ end
11
+
12
+ # Serialize a value
13
+ def dump(value)
14
+ Marshal.dump(value)
15
+ end
16
+
17
+ # Parse a value
18
+ def load(value)
19
+ Marshal.load(value)
20
+ end
21
+ end
22
+
23
+ # Serializer which does nothing
24
+ # @api public
25
+ class None
26
+ def key_for(key)
27
+ key
28
+ end
29
+
30
+ def dump(value)
31
+ value
32
+ end
33
+
34
+ def load(value)
35
+ value
36
+ end
37
+ end
38
+ end
39
+ end
@@ -1,4 +1,5 @@
1
1
  module Daybreak
2
- # Updated using SemVer
3
- VERSION = "0.1.3"
2
+ # Version string updated using SemVer
3
+ # @api public
4
+ VERSION = '0.2.0'
4
5
  end
data/script/bench ADDED
@@ -0,0 +1,95 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require File.expand_path(File.dirname(__FILE__)) + '/../test/test_helper.rb'
4
+ require 'benchmark'
5
+
6
+ RUNS = 20
7
+
8
+ def measure(instance, &block)
9
+ samples = []
10
+ $errors = 0
11
+ RUNS.times do
12
+ if block
13
+ samples << Benchmark.measure(&block).real * 1000
14
+ else
15
+ samples << Benchmark.measure do
16
+ DATA.each do |i|
17
+ instance[i] = i
18
+ end
19
+ DATA.each do |i|
20
+ $errors += 1 unless instance[i] == i
21
+ end
22
+ end.real * 1000
23
+ end
24
+ end
25
+ puts "#{$errors} ERRORS" if $errors > 0
26
+ instance.clear
27
+ samples
28
+ end
29
+
30
+ DICT = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890'.freeze
31
+ def uniform
32
+ min, max = 3, 1024
33
+ 1000.times.map do
34
+ n = rand(max - min) + max
35
+ (1..n).map { DICT[rand(DICT.length)] }.join
36
+ end
37
+ end
38
+ DATA = uniform
39
+
40
+ def run(instance, message = '', &blk)
41
+ puts "Running benchmarks for #{instance.class.name} #{message}"
42
+ measure instance, &blk
43
+ report measure(instance, &blk)
44
+ puts '=' * 64
45
+ ensure
46
+ instance.close if instance.respond_to? :close
47
+ end
48
+
49
+ def report(samples)
50
+ samples.sort!
51
+ total = samples.inject(:+)
52
+ mean = total / samples.length
53
+ stddev = Math.sqrt(samples.inject(0) {|m, s| m += (s - mean) ** 2 } / samples.length)
54
+ puts '%d samples, average time: %.4f ms, std. dev: %.4f ms' % [samples.length, mean, stddev]
55
+ puts '95%% < %.4f ms' % samples.slice((samples.length * 0.95).to_i)
56
+ end
57
+
58
+ begin
59
+ require 'dbm'
60
+ run DBM.new(DB_PATH + '.dbm')
61
+ rescue Exception => ex
62
+ puts "DBM not benchmarked: #{ex.message}"
63
+ end
64
+
65
+ begin
66
+ require 'gdbm'
67
+ run GDBM.new(DB_PATH + '.gdbm')
68
+ rescue Exception => ex
69
+ puts "GDBM not benchmarked: #{ex.message}"
70
+ end
71
+
72
+ run Hash.new
73
+ run Daybreak::DB.new DB_PATH
74
+
75
+ db = Daybreak::DB.new DB_PATH
76
+ run db, 'with lock' do
77
+ DATA.each do |i|
78
+ db.lock { db[i] = i }
79
+ end
80
+ DATA.each do |i|
81
+ db.lock { $errors += 1 unless db[i] == i }
82
+ end
83
+ end
84
+
85
+ db = Daybreak::DB.new DB_PATH
86
+ run db, 'with sync' do
87
+ DATA.each do |i|
88
+ db[i] = i
89
+ db.sync
90
+ end
91
+ DATA.each do |i|
92
+ $errors += 1 unless db[i] == i
93
+ db.sync
94
+ end
95
+ end
data/script/converter ADDED
@@ -0,0 +1,390 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $: << File.join(File.dirname(__FILE__), '..', 'lib')
4
+ require 'daybreak'
5
+
6
+ def convert(oldfile, newfile)
7
+ olddb = Daybreak1::DB.new(oldfile)
8
+ newdb = Daybreak::DB.new(newfile)
9
+ olddb.each do |key, value|
10
+ newdb[key] = value
11
+ end
12
+ olddb.close!
13
+ newdb.close
14
+ end
15
+
16
+ module Daybreak1
17
+ # Daybreak::DB contains the public api for Daybreak, you may extend it like
18
+ # any other Ruby class (i.e. to overwrite serialize and parse). It includes
19
+ # Enumerable for functional goodies like map, each, reduce and friends.
20
+ class DB
21
+ include Enumerable
22
+
23
+ # Create a new Daybreak::DB. The second argument is the default value
24
+ # to store when accessing a previously unset key, this follows the
25
+ # Hash standard.
26
+ # @param [String] file the path to the db file
27
+ # @param default the default value to store and return when a key is
28
+ # not yet in the database.
29
+ # @yield [key] a block that will return the default value to store.
30
+ # @yieldparam [String] key the key to be stored.
31
+ def initialize(file, default=nil, &blk)
32
+ @table = {}
33
+ @file_name = file
34
+ @writer = Writer.new(@file_name)
35
+ @default = block_given? ? blk : default
36
+ read!
37
+ end
38
+
39
+ # Set a key in the database to be written at some future date. If the data
40
+ # needs to be persisted immediately, call <tt>db.set(key, value, true)</tt>.
41
+ # @param [#to_s] key the key of the storage slot in the database
42
+ # @param value the value to store
43
+ # @param [Boolean] sync if true, sync this value immediately
44
+ def []=(key, value, sync = false)
45
+ key = key.to_s
46
+ write key, value, sync
47
+ @table[key] = value
48
+ end
49
+ alias_method :set, :"[]="
50
+
51
+ # set! flushes data immediately to disk.
52
+ # @param [#to_s] key the key of the storage slot in the database
53
+ # @param value the value to store
54
+ def set!(key, value)
55
+ set key, value, true
56
+ end
57
+
58
+ # Delete a key from the database
59
+ # @param [#to_s] key the key of the storage slot in the database
60
+ # @param [Boolean] sync if true, sync this deletion immediately
61
+ def delete(key, sync = false)
62
+ key = key.to_s
63
+ write key, '', sync, true
64
+ @table.delete key
65
+ end
66
+
67
+ # delete! immediately deletes the key on disk.
68
+ # @param [#to_s] key the key of the storage slot in the database
69
+ def delete!(key)
70
+ delete key, true
71
+ end
72
+
73
+ # Retrieve a value at key from the database. If the default value was specified
74
+ # when this database was created, that value will be set and returned. Aliased
75
+ # as <tt>get</tt>.
76
+ # @param [#to_s] key the value to retrieve from the database.
77
+ def [](key)
78
+ key = key.to_s
79
+ if @table.has_key? key
80
+ @table[key]
81
+ elsif default?
82
+ set key, Proc === @default ? @default.call(key) : @default
83
+ end
84
+ end
85
+ alias_method :get, :"[]"
86
+
87
+ # Iterate over the key, value pairs in the database.
88
+ # @yield [key, value] blk the iterator for each key value pair.
89
+ # @yieldparam [String] key the key.
90
+ # @yieldparam value the value from the database.
91
+ def each
92
+ keys.each { |k| yield(k, get(k)) }
93
+ end
94
+
95
+ # Does this db have a default value.
96
+ def default?
97
+ !@default.nil?
98
+ end
99
+
100
+ # Does this db have a value for this key?
101
+ # @param [key#to_s] key the key to check if the DB has a key.
102
+ def has_key?(key)
103
+ @table.has_key? key.to_s
104
+ end
105
+
106
+ # Return the keys in the db.
107
+ # @return [Array<String>]
108
+ def keys
109
+ @table.keys
110
+ end
111
+
112
+ # Return the number of stored items.
113
+ # @return [Integer]
114
+ def length
115
+ @table.keys.length
116
+ end
117
+ alias_method :size, :length
118
+
119
+ # Serialize the data for writing to disk, if you don't want to use <tt>Marshal</tt>
120
+ # overwrite this method.
121
+ # @param value the value to be serialized
122
+ # @return [String]
123
+ def serialize(value)
124
+ Marshal.dump(value)
125
+ end
126
+
127
+ # Parse the serialized value from disk, like serialize if you want to use a
128
+ # different serialization method overwrite this method.
129
+ # @param value the value to be parsed
130
+ # @return [String]
131
+ def parse(value)
132
+ Marshal.load(value)
133
+ end
134
+
135
+ # Empty the database file.
136
+ def empty!
137
+ @writer.truncate!
138
+ @table.clear
139
+ read!
140
+ end
141
+ alias_method :clear, :empty!
142
+
143
+ # Force all queued commits to be written to disk.
144
+ def flush!
145
+ @writer.flush!
146
+ end
147
+
148
+ # Close the database for reading and writing.
149
+ def close!
150
+ @writer.close!
151
+ end
152
+
153
+ # Compact the database to remove stale commits and reduce the file size.
154
+ def compact!
155
+ # Create a new temporary database
156
+ tmp_file = @file_name + "-#{$$}-#{Thread.current.object_id}"
157
+ copy_db = self.class.new tmp_file
158
+
159
+ # Copy the database key by key into the temporary table
160
+ each do |key, value|
161
+ copy_db.set(key, get(key))
162
+ end
163
+ copy_db.close!
164
+
165
+ close!
166
+
167
+ # Move the copy into place
168
+ File.rename tmp_file, @file_name
169
+
170
+ # Reopen this database
171
+ @writer = Writer.new(@file_name)
172
+ @table.clear
173
+ read!
174
+ end
175
+
176
+ # Read all values from the log file. If you want to check for changed data
177
+ # call this again.
178
+ def read!
179
+ buf = nil
180
+ File.open(@file_name, 'rb') do |fd|
181
+ fd.flock(File::LOCK_SH)
182
+ buf = fd.read
183
+ end
184
+ until buf.empty?
185
+ key, data, deleted = Record.deserialize(buf)
186
+ if deleted
187
+ @table.delete key
188
+ else
189
+ @table[key] = parse(data)
190
+ end
191
+ end
192
+ end
193
+
194
+ private
195
+
196
+ def write(key, value, sync = false, delete = false)
197
+ @writer.write([key, serialize(value), delete])
198
+ flush! if sync
199
+ end
200
+ end
201
+
202
+ # Records define how data is serialized and read from disk.
203
+ module Record
204
+ # Thrown when either key or data is missing
205
+ class UnnacceptableDataError < Exception; end
206
+
207
+ # Thrown when there is a CRC mismatch between the data from the disk
208
+ # and what was written to disk previously.
209
+ class CorruptDataError < Exception; end
210
+
211
+ extend self
212
+
213
+ # The mask a record uses to check for deletion.
214
+ DELETION_MASK = 1 << 31
215
+
216
+ # The serialized representation of the key value pair plus the CRC.
217
+ # @return [String]
218
+ def serialize(record)
219
+ raise UnnacceptableDataError, 'key and data must be defined' unless record[0] && record[1]
220
+ s = key_data_string(record)
221
+ s << crc_string(s)
222
+ end
223
+
224
+ # Create a new record to read from IO.
225
+ # @param [#read] io an IO instance to read from
226
+ def deserialize(buf)
227
+ record = []
228
+ masked = read32(buf)
229
+ # Read the record's key bytes
230
+ record << buf.slice!(0, masked & (DELETION_MASK - 1)) <<
231
+ # Read the record's value bytes
232
+ buf.slice!(0, read32(buf)) <<
233
+ # Set the deletion flag
234
+ ((masked & DELETION_MASK) != 0)
235
+ raise CorruptDataError, 'CRC mismatch' unless buf.slice!(0, 4) == crc_string(key_data_string(record))
236
+ record
237
+ end
238
+
239
+ private
240
+
241
+ # Return the deletion flag plus two length prefixed cells
242
+ def key_data_string(record)
243
+ part(record[0], record[0].bytesize + (record[2] ? DELETION_MASK : 0)) << part(record[1], record[1].bytesize)
244
+ end
245
+
246
+ def crc_string(s)
247
+ [Zlib.crc32(s, 0)].pack('N')
248
+ end
249
+
250
+ def part(data, length)
251
+ [length].pack('N') << data
252
+ end
253
+
254
+ def read32(buf)
255
+ buf.slice!(0, 4).unpack('N')[0]
256
+ end
257
+ end
258
+
259
+ # Writer's handle the actually fiddly task of committing data to disk.
260
+ # They have a Worker instance that writes in a select loop.
261
+ class Writer
262
+ # Open up the file, ready it for binary and nonblocking writing.
263
+ def initialize(file)
264
+ @file = file
265
+ open!
266
+ @worker = Worker.new(@fd)
267
+ end
268
+
269
+ # Send a record to the workers queue.
270
+ def write(record)
271
+ @worker.enqueue record
272
+ end
273
+
274
+ # Finish writing
275
+ def finish!
276
+ @worker.finish!
277
+ end
278
+
279
+ # Flush pending commits, and restart the worker.
280
+ def flush!
281
+ @worker.flush!
282
+ end
283
+
284
+ # Finish writing and close the file descriptor.
285
+ def close!
286
+ finish!
287
+ @fd.close
288
+ end
289
+
290
+ # Truncate the file.
291
+ def truncate!
292
+ finish!
293
+ @fd.truncate(0)
294
+ @fd.pos = 0
295
+ end
296
+
297
+ private
298
+
299
+ def open!
300
+ @fd = File.open @file, 'ab'
301
+
302
+ if defined?(Fcntl::O_NONBLOCK)
303
+ f = @fd.fcntl(Fcntl::F_GETFL, 0)
304
+ @fd.fcntl(Fcntl::F_SETFL, Fcntl::O_NONBLOCK | f)
305
+ end
306
+ end
307
+
308
+ # Workers handle the actual fiddly bits of asynchronous io and
309
+ # and handle background writes.
310
+ class Worker
311
+ def initialize(fd)
312
+ @queue = Queue.new
313
+ @fd = fd
314
+ @thread = Thread.new { work }
315
+ at_exit { finish! }
316
+ end
317
+
318
+ # Queue up a write to be committed later.
319
+ def enqueue(record)
320
+ @queue << record
321
+ end
322
+
323
+ # Loop and block if we don't have work to do or if
324
+ # the file isn't ready for another write just yet.
325
+ def work
326
+ buf, finished = '', false
327
+ until finished && buf.empty?
328
+ record = @queue.pop
329
+ if record
330
+ buf << Record.serialize(record)
331
+ else
332
+ finished = true
333
+ end
334
+ read, write = IO.select [], [@fd]
335
+ if write and fd = write.first
336
+ lock(fd) { buf = try_write fd, buf }
337
+ end
338
+ end
339
+ @fd.flush
340
+ end
341
+
342
+ # Try and write the buffer to the file via non blocking file writes.
343
+ # If the write fails try again.
344
+ def try_write(fd, buf)
345
+ if defined?(Fcntl::O_NONBLOCK)
346
+ s = fd.write_nonblock(buf)
347
+ else
348
+ s = fd.write(buf)
349
+ end
350
+ if s < buf.length
351
+ buf = buf[s..-1] # didn't finish
352
+ else
353
+ buf = ""
354
+ end
355
+ buf
356
+ rescue Errno::EAGAIN
357
+ buf
358
+ end
359
+
360
+ # Lock a file with the type <tt>lock</tt>
361
+ def lock(fd)
362
+ fd.flock File::LOCK_EX
363
+ begin
364
+ yield
365
+ ensure
366
+ fd.flock File::LOCK_UN
367
+ end
368
+ end
369
+
370
+ # finish! and start up another worker thread.
371
+ def flush!
372
+ finish!
373
+ @thread = Thread.new { work }
374
+ true
375
+ end
376
+
377
+ # Push a nil through the queue and block until the write loop is finished.
378
+ def finish!
379
+ @queue.push nil
380
+ @thread.join
381
+ end
382
+ end
383
+ end
384
+ end
385
+
386
+ if ARGV.size != 2
387
+ puts "Usage: #{$0} olddb newdb"
388
+ else
389
+ convert(ARGV[0], ARGV[1])
390
+ end