daybreak 0.1.3 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,52 @@
1
+ module Daybreak
2
+ # Database format serializer and deserializer. You can create
3
+ # your own implementations of this classes method and define
4
+ # your own database format!
5
+ # @api public
6
+ class Format
7
+ # Read database header from input stream
8
+ # @param [#read] input the input stream
9
+ def read_header(input)
10
+ raise 'Not a Daybreak database' if input.read(MAGIC.bytesize) != MAGIC
11
+ ver = input.read(2).unpack('n').first
12
+ raise "Expected database version #{VERSION}, got #{ver}" if ver != VERSION
13
+ end
14
+
15
+ # Return database header as string
16
+ def header
17
+ MAGIC + [VERSION].pack('n')
18
+ end
19
+
20
+ # Serialize record and return string
21
+ # @param [Array] record an array with [key, value] or [key] if the record is
22
+ # deleted
23
+ def dump(record)
24
+ data =
25
+ if record.size == 1
26
+ [record[0].bytesize, DELETE].pack('NN') << record[0]
27
+ else
28
+ [record[0].bytesize, record[1].bytesize].pack('NN') << record[0] << record[1]
29
+ end
30
+ data << crc32(data)
31
+ end
32
+
33
+ # Deserialize record from buffer
34
+ # @param [String] buf the buffer to read from
35
+ def parse(buf)
36
+ key_size, value_size = buf[0, 8].unpack('NN')
37
+ data = buf.slice!(0, 8 + key_size + (value_size == DELETE ? 0 : value_size))
38
+ raise 'CRC mismatch' unless buf.slice!(0, 4) == crc32(data)
39
+ value_size == DELETE ? [data[8, key_size]] : [data[8, key_size], data[8 + key_size, value_size]]
40
+ end
41
+
42
+ protected
43
+
44
+ MAGIC = 'DAYBREAK'
45
+ VERSION = 1
46
+ DELETE = (1 << 32) - 1
47
+
48
+ def crc32(s)
49
+ [Zlib.crc32(s, 0)].pack('N')
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,107 @@
1
+ module Daybreak
2
+ # Thread safe job queue
3
+ # @api private
4
+ class Queue
5
+ # HACK: Dangerous optimization on MRI which has a
6
+ # global interpreter lock and makes the @queue array
7
+ # thread safe.
8
+ if !defined?(RUBY_ENGINE) || RUBY_ENGINE == 'ruby'
9
+ def initialize
10
+ @queue, @full, @empty = [], [], []
11
+ @stop = false
12
+ @heartbeat = Thread.new(&method(:heartbeat))
13
+ @heartbeat.priority = -9
14
+ end
15
+
16
+ def <<(x)
17
+ @queue << x
18
+ thread = @full.first
19
+ thread.wakeup if thread
20
+ end
21
+
22
+ def pop
23
+ @queue.shift
24
+ if @queue.empty?
25
+ thread = @empty.first
26
+ thread.wakeup if thread
27
+ end
28
+ end
29
+
30
+ def next
31
+ while @queue.empty?
32
+ begin
33
+ @full << Thread.current
34
+ # If a push happens before Thread.stop, the thread won't be woken up
35
+ Thread.stop while @queue.empty?
36
+ ensure
37
+ @full.delete(Thread.current)
38
+ end
39
+ end
40
+ @queue.first
41
+ end
42
+
43
+ def flush
44
+ until @queue.empty?
45
+ begin
46
+ @empty << Thread.current
47
+ # If a pop happens before Thread.stop, the thread won't be woken up
48
+ Thread.stop until @queue.empty?
49
+ ensure
50
+ @empty.delete(Thread.current)
51
+ end
52
+ end
53
+ end
54
+
55
+ def stop
56
+ @stop = true
57
+ @heartbeat.join
58
+ end
59
+
60
+ private
61
+
62
+ # Check threads 10 times per second to avoid deadlocks
63
+ # since there is a race condition below
64
+ def heartbeat
65
+ until @stop
66
+ @empty.each(&:wakeup)
67
+ @full.each(&:wakeup)
68
+ sleep 0.1
69
+ end
70
+ end
71
+ else
72
+ def initialize
73
+ @mutex = Mutex.new
74
+ @full = ConditionVariable.new
75
+ @empty = ConditionVariable.new
76
+ @queue = []
77
+ end
78
+
79
+ def <<(x)
80
+ @mutex.synchronize do
81
+ @queue << x
82
+ @full.signal
83
+ end
84
+ end
85
+
86
+ def pop
87
+ @mutex.synchronize do
88
+ @queue.shift
89
+ @empty.signal if @queue.empty?
90
+ end
91
+ end
92
+
93
+ def next
94
+ @mutex.synchronize do
95
+ @full.wait(@mutex) while @queue.empty?
96
+ @queue.first
97
+ end
98
+ end
99
+
100
+ def flush
101
+ @mutex.synchronize do
102
+ @empty.wait(@mutex) until @queue.empty?
103
+ end
104
+ end
105
+ end
106
+ end
107
+ end
@@ -0,0 +1,39 @@
1
+ module Daybreak
2
+ module Serializer
3
+ # Default serializer which converts
4
+ # keys to strings and marshalls values
5
+ # @api public
6
+ class Default
7
+ # Return the value of the key to insert into the database
8
+ def key_for(key)
9
+ key.to_s
10
+ end
11
+
12
+ # Serialize a value
13
+ def dump(value)
14
+ Marshal.dump(value)
15
+ end
16
+
17
+ # Parse a value
18
+ def load(value)
19
+ Marshal.load(value)
20
+ end
21
+ end
22
+
23
+ # Serializer which does nothing
24
+ # @api public
25
+ class None
26
+ def key_for(key)
27
+ key
28
+ end
29
+
30
+ def dump(value)
31
+ value
32
+ end
33
+
34
+ def load(value)
35
+ value
36
+ end
37
+ end
38
+ end
39
+ end
@@ -1,4 +1,5 @@
1
1
  module Daybreak
2
- # Updated using SemVer
3
- VERSION = "0.1.3"
2
+ # Version string updated using SemVer
3
+ # @api public
4
+ VERSION = '0.2.0'
4
5
  end
data/script/bench ADDED
@@ -0,0 +1,95 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require File.expand_path(File.dirname(__FILE__)) + '/../test/test_helper.rb'
4
+ require 'benchmark'
5
+
6
+ RUNS = 20
7
+
8
+ def measure(instance, &block)
9
+ samples = []
10
+ $errors = 0
11
+ RUNS.times do
12
+ if block
13
+ samples << Benchmark.measure(&block).real * 1000
14
+ else
15
+ samples << Benchmark.measure do
16
+ DATA.each do |i|
17
+ instance[i] = i
18
+ end
19
+ DATA.each do |i|
20
+ $errors += 1 unless instance[i] == i
21
+ end
22
+ end.real * 1000
23
+ end
24
+ end
25
+ puts "#{$errors} ERRORS" if $errors > 0
26
+ instance.clear
27
+ samples
28
+ end
29
+
30
+ DICT = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890'.freeze
31
+ def uniform
32
+ min, max = 3, 1024
33
+ 1000.times.map do
34
+ n = rand(max - min) + max
35
+ (1..n).map { DICT[rand(DICT.length)] }.join
36
+ end
37
+ end
38
+ DATA = uniform
39
+
40
+ def run(instance, message = '', &blk)
41
+ puts "Running benchmarks for #{instance.class.name} #{message}"
42
+ measure instance, &blk
43
+ report measure(instance, &blk)
44
+ puts '=' * 64
45
+ ensure
46
+ instance.close if instance.respond_to? :close
47
+ end
48
+
49
+ def report(samples)
50
+ samples.sort!
51
+ total = samples.inject(:+)
52
+ mean = total / samples.length
53
+ stddev = Math.sqrt(samples.inject(0) {|m, s| m += (s - mean) ** 2 } / samples.length)
54
+ puts '%d samples, average time: %.4f ms, std. dev: %.4f ms' % [samples.length, mean, stddev]
55
+ puts '95%% < %.4f ms' % samples.slice((samples.length * 0.95).to_i)
56
+ end
57
+
58
+ begin
59
+ require 'dbm'
60
+ run DBM.new(DB_PATH + '.dbm')
61
+ rescue Exception => ex
62
+ puts "DBM not benchmarked: #{ex.message}"
63
+ end
64
+
65
+ begin
66
+ require 'gdbm'
67
+ run GDBM.new(DB_PATH + '.gdbm')
68
+ rescue Exception => ex
69
+ puts "GDBM not benchmarked: #{ex.message}"
70
+ end
71
+
72
+ run Hash.new
73
+ run Daybreak::DB.new DB_PATH
74
+
75
+ db = Daybreak::DB.new DB_PATH
76
+ run db, 'with lock' do
77
+ DATA.each do |i|
78
+ db.lock { db[i] = i }
79
+ end
80
+ DATA.each do |i|
81
+ db.lock { $errors += 1 unless db[i] == i }
82
+ end
83
+ end
84
+
85
+ db = Daybreak::DB.new DB_PATH
86
+ run db, 'with sync' do
87
+ DATA.each do |i|
88
+ db[i] = i
89
+ db.sync
90
+ end
91
+ DATA.each do |i|
92
+ $errors += 1 unless db[i] == i
93
+ db.sync
94
+ end
95
+ end
data/script/converter ADDED
@@ -0,0 +1,390 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $: << File.join(File.dirname(__FILE__), '..', 'lib')
4
+ require 'daybreak'
5
+
6
+ def convert(oldfile, newfile)
7
+ olddb = Daybreak1::DB.new(oldfile)
8
+ newdb = Daybreak::DB.new(newfile)
9
+ olddb.each do |key, value|
10
+ newdb[key] = value
11
+ end
12
+ olddb.close!
13
+ newdb.close
14
+ end
15
+
16
+ module Daybreak1
17
+ # Daybreak::DB contains the public api for Daybreak, you may extend it like
18
+ # any other Ruby class (i.e. to overwrite serialize and parse). It includes
19
+ # Enumerable for functional goodies like map, each, reduce and friends.
20
+ class DB
21
+ include Enumerable
22
+
23
+ # Create a new Daybreak::DB. The second argument is the default value
24
+ # to store when accessing a previously unset key, this follows the
25
+ # Hash standard.
26
+ # @param [String] file the path to the db file
27
+ # @param default the default value to store and return when a key is
28
+ # not yet in the database.
29
+ # @yield [key] a block that will return the default value to store.
30
+ # @yieldparam [String] key the key to be stored.
31
+ def initialize(file, default=nil, &blk)
32
+ @table = {}
33
+ @file_name = file
34
+ @writer = Writer.new(@file_name)
35
+ @default = block_given? ? blk : default
36
+ read!
37
+ end
38
+
39
+ # Set a key in the database to be written at some future date. If the data
40
+ # needs to be persisted immediately, call <tt>db.set(key, value, true)</tt>.
41
+ # @param [#to_s] key the key of the storage slot in the database
42
+ # @param value the value to store
43
+ # @param [Boolean] sync if true, sync this value immediately
44
+ def []=(key, value, sync = false)
45
+ key = key.to_s
46
+ write key, value, sync
47
+ @table[key] = value
48
+ end
49
+ alias_method :set, :"[]="
50
+
51
+ # set! flushes data immediately to disk.
52
+ # @param [#to_s] key the key of the storage slot in the database
53
+ # @param value the value to store
54
+ def set!(key, value)
55
+ set key, value, true
56
+ end
57
+
58
+ # Delete a key from the database
59
+ # @param [#to_s] key the key of the storage slot in the database
60
+ # @param [Boolean] sync if true, sync this deletion immediately
61
+ def delete(key, sync = false)
62
+ key = key.to_s
63
+ write key, '', sync, true
64
+ @table.delete key
65
+ end
66
+
67
+ # delete! immediately deletes the key on disk.
68
+ # @param [#to_s] key the key of the storage slot in the database
69
+ def delete!(key)
70
+ delete key, true
71
+ end
72
+
73
+ # Retrieve a value at key from the database. If the default value was specified
74
+ # when this database was created, that value will be set and returned. Aliased
75
+ # as <tt>get</tt>.
76
+ # @param [#to_s] key the value to retrieve from the database.
77
+ def [](key)
78
+ key = key.to_s
79
+ if @table.has_key? key
80
+ @table[key]
81
+ elsif default?
82
+ set key, Proc === @default ? @default.call(key) : @default
83
+ end
84
+ end
85
+ alias_method :get, :"[]"
86
+
87
+ # Iterate over the key, value pairs in the database.
88
+ # @yield [key, value] blk the iterator for each key value pair.
89
+ # @yieldparam [String] key the key.
90
+ # @yieldparam value the value from the database.
91
+ def each
92
+ keys.each { |k| yield(k, get(k)) }
93
+ end
94
+
95
+ # Does this db have a default value.
96
+ def default?
97
+ !@default.nil?
98
+ end
99
+
100
+ # Does this db have a value for this key?
101
+ # @param [key#to_s] key the key to check if the DB has a key.
102
+ def has_key?(key)
103
+ @table.has_key? key.to_s
104
+ end
105
+
106
+ # Return the keys in the db.
107
+ # @return [Array<String>]
108
+ def keys
109
+ @table.keys
110
+ end
111
+
112
+ # Return the number of stored items.
113
+ # @return [Integer]
114
+ def length
115
+ @table.keys.length
116
+ end
117
+ alias_method :size, :length
118
+
119
+ # Serialize the data for writing to disk, if you don't want to use <tt>Marshal</tt>
120
+ # overwrite this method.
121
+ # @param value the value to be serialized
122
+ # @return [String]
123
+ def serialize(value)
124
+ Marshal.dump(value)
125
+ end
126
+
127
+ # Parse the serialized value from disk, like serialize if you want to use a
128
+ # different serialization method overwrite this method.
129
+ # @param value the value to be parsed
130
+ # @return [String]
131
+ def parse(value)
132
+ Marshal.load(value)
133
+ end
134
+
135
+ # Empty the database file.
136
+ def empty!
137
+ @writer.truncate!
138
+ @table.clear
139
+ read!
140
+ end
141
+ alias_method :clear, :empty!
142
+
143
+ # Force all queued commits to be written to disk.
144
+ def flush!
145
+ @writer.flush!
146
+ end
147
+
148
+ # Close the database for reading and writing.
149
+ def close!
150
+ @writer.close!
151
+ end
152
+
153
+ # Compact the database to remove stale commits and reduce the file size.
154
+ def compact!
155
+ # Create a new temporary database
156
+ tmp_file = @file_name + "-#{$$}-#{Thread.current.object_id}"
157
+ copy_db = self.class.new tmp_file
158
+
159
+ # Copy the database key by key into the temporary table
160
+ each do |key, value|
161
+ copy_db.set(key, get(key))
162
+ end
163
+ copy_db.close!
164
+
165
+ close!
166
+
167
+ # Move the copy into place
168
+ File.rename tmp_file, @file_name
169
+
170
+ # Reopen this database
171
+ @writer = Writer.new(@file_name)
172
+ @table.clear
173
+ read!
174
+ end
175
+
176
+ # Read all values from the log file. If you want to check for changed data
177
+ # call this again.
178
+ def read!
179
+ buf = nil
180
+ File.open(@file_name, 'rb') do |fd|
181
+ fd.flock(File::LOCK_SH)
182
+ buf = fd.read
183
+ end
184
+ until buf.empty?
185
+ key, data, deleted = Record.deserialize(buf)
186
+ if deleted
187
+ @table.delete key
188
+ else
189
+ @table[key] = parse(data)
190
+ end
191
+ end
192
+ end
193
+
194
+ private
195
+
196
+ def write(key, value, sync = false, delete = false)
197
+ @writer.write([key, serialize(value), delete])
198
+ flush! if sync
199
+ end
200
+ end
201
+
202
+ # Records define how data is serialized and read from disk.
203
+ module Record
204
+ # Thrown when either key or data is missing
205
+ class UnnacceptableDataError < Exception; end
206
+
207
+ # Thrown when there is a CRC mismatch between the data from the disk
208
+ # and what was written to disk previously.
209
+ class CorruptDataError < Exception; end
210
+
211
+ extend self
212
+
213
+ # The mask a record uses to check for deletion.
214
+ DELETION_MASK = 1 << 31
215
+
216
+ # The serialized representation of the key value pair plus the CRC.
217
+ # @return [String]
218
+ def serialize(record)
219
+ raise UnnacceptableDataError, 'key and data must be defined' unless record[0] && record[1]
220
+ s = key_data_string(record)
221
+ s << crc_string(s)
222
+ end
223
+
224
+ # Create a new record to read from IO.
225
+ # @param [#read] io an IO instance to read from
226
+ def deserialize(buf)
227
+ record = []
228
+ masked = read32(buf)
229
+ # Read the record's key bytes
230
+ record << buf.slice!(0, masked & (DELETION_MASK - 1)) <<
231
+ # Read the record's value bytes
232
+ buf.slice!(0, read32(buf)) <<
233
+ # Set the deletion flag
234
+ ((masked & DELETION_MASK) != 0)
235
+ raise CorruptDataError, 'CRC mismatch' unless buf.slice!(0, 4) == crc_string(key_data_string(record))
236
+ record
237
+ end
238
+
239
+ private
240
+
241
+ # Return the deletion flag plus two length prefixed cells
242
+ def key_data_string(record)
243
+ part(record[0], record[0].bytesize + (record[2] ? DELETION_MASK : 0)) << part(record[1], record[1].bytesize)
244
+ end
245
+
246
+ def crc_string(s)
247
+ [Zlib.crc32(s, 0)].pack('N')
248
+ end
249
+
250
+ def part(data, length)
251
+ [length].pack('N') << data
252
+ end
253
+
254
+ def read32(buf)
255
+ buf.slice!(0, 4).unpack('N')[0]
256
+ end
257
+ end
258
+
259
+ # Writer's handle the actually fiddly task of committing data to disk.
260
+ # They have a Worker instance that writes in a select loop.
261
+ class Writer
262
+ # Open up the file, ready it for binary and nonblocking writing.
263
+ def initialize(file)
264
+ @file = file
265
+ open!
266
+ @worker = Worker.new(@fd)
267
+ end
268
+
269
+ # Send a record to the workers queue.
270
+ def write(record)
271
+ @worker.enqueue record
272
+ end
273
+
274
+ # Finish writing
275
+ def finish!
276
+ @worker.finish!
277
+ end
278
+
279
+ # Flush pending commits, and restart the worker.
280
+ def flush!
281
+ @worker.flush!
282
+ end
283
+
284
+ # Finish writing and close the file descriptor.
285
+ def close!
286
+ finish!
287
+ @fd.close
288
+ end
289
+
290
+ # Truncate the file.
291
+ def truncate!
292
+ finish!
293
+ @fd.truncate(0)
294
+ @fd.pos = 0
295
+ end
296
+
297
+ private
298
+
299
+ def open!
300
+ @fd = File.open @file, 'ab'
301
+
302
+ if defined?(Fcntl::O_NONBLOCK)
303
+ f = @fd.fcntl(Fcntl::F_GETFL, 0)
304
+ @fd.fcntl(Fcntl::F_SETFL, Fcntl::O_NONBLOCK | f)
305
+ end
306
+ end
307
+
308
+ # Workers handle the actual fiddly bits of asynchronous io and
309
+ # and handle background writes.
310
+ class Worker
311
+ def initialize(fd)
312
+ @queue = Queue.new
313
+ @fd = fd
314
+ @thread = Thread.new { work }
315
+ at_exit { finish! }
316
+ end
317
+
318
+ # Queue up a write to be committed later.
319
+ def enqueue(record)
320
+ @queue << record
321
+ end
322
+
323
+ # Loop and block if we don't have work to do or if
324
+ # the file isn't ready for another write just yet.
325
+ def work
326
+ buf, finished = '', false
327
+ until finished && buf.empty?
328
+ record = @queue.pop
329
+ if record
330
+ buf << Record.serialize(record)
331
+ else
332
+ finished = true
333
+ end
334
+ read, write = IO.select [], [@fd]
335
+ if write and fd = write.first
336
+ lock(fd) { buf = try_write fd, buf }
337
+ end
338
+ end
339
+ @fd.flush
340
+ end
341
+
342
+ # Try and write the buffer to the file via non blocking file writes.
343
+ # If the write fails try again.
344
+ def try_write(fd, buf)
345
+ if defined?(Fcntl::O_NONBLOCK)
346
+ s = fd.write_nonblock(buf)
347
+ else
348
+ s = fd.write(buf)
349
+ end
350
+ if s < buf.length
351
+ buf = buf[s..-1] # didn't finish
352
+ else
353
+ buf = ""
354
+ end
355
+ buf
356
+ rescue Errno::EAGAIN
357
+ buf
358
+ end
359
+
360
+ # Lock a file with the type <tt>lock</tt>
361
+ def lock(fd)
362
+ fd.flock File::LOCK_EX
363
+ begin
364
+ yield
365
+ ensure
366
+ fd.flock File::LOCK_UN
367
+ end
368
+ end
369
+
370
+ # finish! and start up another worker thread.
371
+ def flush!
372
+ finish!
373
+ @thread = Thread.new { work }
374
+ true
375
+ end
376
+
377
+ # Push a nil through the queue and block until the write loop is finished.
378
+ def finish!
379
+ @queue.push nil
380
+ @thread.join
381
+ end
382
+ end
383
+ end
384
+ end
385
+
386
+ if ARGV.size != 2
387
+ puts "Usage: #{$0} olddb newdb"
388
+ else
389
+ convert(ARGV[0], ARGV[1])
390
+ end