daybreak 0.1.3 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +1 -0
- data/.travis.yml +5 -0
- data/LICENSE +2 -2
- data/README +1 -3
- data/Rakefile +6 -9
- data/daybreak.gemspec +4 -4
- data/lib/daybreak.rb +3 -5
- data/lib/daybreak/db.rb +308 -114
- data/lib/daybreak/format.rb +52 -0
- data/lib/daybreak/queue.rb +107 -0
- data/lib/daybreak/serializer.rb +39 -0
- data/lib/daybreak/version.rb +3 -2
- data/script/bench +95 -0
- data/script/converter +390 -0
- data/test/test.rb +251 -57
- data/test/test_helper.rb +0 -3
- metadata +12 -11
- data/lib/daybreak/record.rb +0 -62
- data/lib/daybreak/writer.rb +0 -127
- data/test/bench.rb +0 -28
- data/test/compare.rb +0 -47
@@ -0,0 +1,52 @@
|
|
1
|
+
module Daybreak
|
2
|
+
# Database format serializer and deserializer. You can create
|
3
|
+
# your own implementations of this classes method and define
|
4
|
+
# your own database format!
|
5
|
+
# @api public
|
6
|
+
class Format
|
7
|
+
# Read database header from input stream
|
8
|
+
# @param [#read] input the input stream
|
9
|
+
def read_header(input)
|
10
|
+
raise 'Not a Daybreak database' if input.read(MAGIC.bytesize) != MAGIC
|
11
|
+
ver = input.read(2).unpack('n').first
|
12
|
+
raise "Expected database version #{VERSION}, got #{ver}" if ver != VERSION
|
13
|
+
end
|
14
|
+
|
15
|
+
# Return database header as string
|
16
|
+
def header
|
17
|
+
MAGIC + [VERSION].pack('n')
|
18
|
+
end
|
19
|
+
|
20
|
+
# Serialize record and return string
|
21
|
+
# @param [Array] record an array with [key, value] or [key] if the record is
|
22
|
+
# deleted
|
23
|
+
def dump(record)
|
24
|
+
data =
|
25
|
+
if record.size == 1
|
26
|
+
[record[0].bytesize, DELETE].pack('NN') << record[0]
|
27
|
+
else
|
28
|
+
[record[0].bytesize, record[1].bytesize].pack('NN') << record[0] << record[1]
|
29
|
+
end
|
30
|
+
data << crc32(data)
|
31
|
+
end
|
32
|
+
|
33
|
+
# Deserialize record from buffer
|
34
|
+
# @param [String] buf the buffer to read from
|
35
|
+
def parse(buf)
|
36
|
+
key_size, value_size = buf[0, 8].unpack('NN')
|
37
|
+
data = buf.slice!(0, 8 + key_size + (value_size == DELETE ? 0 : value_size))
|
38
|
+
raise 'CRC mismatch' unless buf.slice!(0, 4) == crc32(data)
|
39
|
+
value_size == DELETE ? [data[8, key_size]] : [data[8, key_size], data[8 + key_size, value_size]]
|
40
|
+
end
|
41
|
+
|
42
|
+
protected
|
43
|
+
|
44
|
+
MAGIC = 'DAYBREAK'
|
45
|
+
VERSION = 1
|
46
|
+
DELETE = (1 << 32) - 1
|
47
|
+
|
48
|
+
def crc32(s)
|
49
|
+
[Zlib.crc32(s, 0)].pack('N')
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,107 @@
|
|
1
|
+
module Daybreak
|
2
|
+
# Thread safe job queue
|
3
|
+
# @api private
|
4
|
+
class Queue
|
5
|
+
# HACK: Dangerous optimization on MRI which has a
|
6
|
+
# global interpreter lock and makes the @queue array
|
7
|
+
# thread safe.
|
8
|
+
if !defined?(RUBY_ENGINE) || RUBY_ENGINE == 'ruby'
|
9
|
+
def initialize
|
10
|
+
@queue, @full, @empty = [], [], []
|
11
|
+
@stop = false
|
12
|
+
@heartbeat = Thread.new(&method(:heartbeat))
|
13
|
+
@heartbeat.priority = -9
|
14
|
+
end
|
15
|
+
|
16
|
+
def <<(x)
|
17
|
+
@queue << x
|
18
|
+
thread = @full.first
|
19
|
+
thread.wakeup if thread
|
20
|
+
end
|
21
|
+
|
22
|
+
def pop
|
23
|
+
@queue.shift
|
24
|
+
if @queue.empty?
|
25
|
+
thread = @empty.first
|
26
|
+
thread.wakeup if thread
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def next
|
31
|
+
while @queue.empty?
|
32
|
+
begin
|
33
|
+
@full << Thread.current
|
34
|
+
# If a push happens before Thread.stop, the thread won't be woken up
|
35
|
+
Thread.stop while @queue.empty?
|
36
|
+
ensure
|
37
|
+
@full.delete(Thread.current)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
@queue.first
|
41
|
+
end
|
42
|
+
|
43
|
+
def flush
|
44
|
+
until @queue.empty?
|
45
|
+
begin
|
46
|
+
@empty << Thread.current
|
47
|
+
# If a pop happens before Thread.stop, the thread won't be woken up
|
48
|
+
Thread.stop until @queue.empty?
|
49
|
+
ensure
|
50
|
+
@empty.delete(Thread.current)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def stop
|
56
|
+
@stop = true
|
57
|
+
@heartbeat.join
|
58
|
+
end
|
59
|
+
|
60
|
+
private
|
61
|
+
|
62
|
+
# Check threads 10 times per second to avoid deadlocks
|
63
|
+
# since there is a race condition below
|
64
|
+
def heartbeat
|
65
|
+
until @stop
|
66
|
+
@empty.each(&:wakeup)
|
67
|
+
@full.each(&:wakeup)
|
68
|
+
sleep 0.1
|
69
|
+
end
|
70
|
+
end
|
71
|
+
else
|
72
|
+
def initialize
|
73
|
+
@mutex = Mutex.new
|
74
|
+
@full = ConditionVariable.new
|
75
|
+
@empty = ConditionVariable.new
|
76
|
+
@queue = []
|
77
|
+
end
|
78
|
+
|
79
|
+
def <<(x)
|
80
|
+
@mutex.synchronize do
|
81
|
+
@queue << x
|
82
|
+
@full.signal
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def pop
|
87
|
+
@mutex.synchronize do
|
88
|
+
@queue.shift
|
89
|
+
@empty.signal if @queue.empty?
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def next
|
94
|
+
@mutex.synchronize do
|
95
|
+
@full.wait(@mutex) while @queue.empty?
|
96
|
+
@queue.first
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
def flush
|
101
|
+
@mutex.synchronize do
|
102
|
+
@empty.wait(@mutex) until @queue.empty?
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module Daybreak
|
2
|
+
module Serializer
|
3
|
+
# Default serializer which converts
|
4
|
+
# keys to strings and marshalls values
|
5
|
+
# @api public
|
6
|
+
class Default
|
7
|
+
# Return the value of the key to insert into the database
|
8
|
+
def key_for(key)
|
9
|
+
key.to_s
|
10
|
+
end
|
11
|
+
|
12
|
+
# Serialize a value
|
13
|
+
def dump(value)
|
14
|
+
Marshal.dump(value)
|
15
|
+
end
|
16
|
+
|
17
|
+
# Parse a value
|
18
|
+
def load(value)
|
19
|
+
Marshal.load(value)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# Serializer which does nothing
|
24
|
+
# @api public
|
25
|
+
class None
|
26
|
+
def key_for(key)
|
27
|
+
key
|
28
|
+
end
|
29
|
+
|
30
|
+
def dump(value)
|
31
|
+
value
|
32
|
+
end
|
33
|
+
|
34
|
+
def load(value)
|
35
|
+
value
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
data/lib/daybreak/version.rb
CHANGED
data/script/bench
ADDED
@@ -0,0 +1,95 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require File.expand_path(File.dirname(__FILE__)) + '/../test/test_helper.rb'
|
4
|
+
require 'benchmark'
|
5
|
+
|
6
|
+
RUNS = 20
|
7
|
+
|
8
|
+
def measure(instance, &block)
|
9
|
+
samples = []
|
10
|
+
$errors = 0
|
11
|
+
RUNS.times do
|
12
|
+
if block
|
13
|
+
samples << Benchmark.measure(&block).real * 1000
|
14
|
+
else
|
15
|
+
samples << Benchmark.measure do
|
16
|
+
DATA.each do |i|
|
17
|
+
instance[i] = i
|
18
|
+
end
|
19
|
+
DATA.each do |i|
|
20
|
+
$errors += 1 unless instance[i] == i
|
21
|
+
end
|
22
|
+
end.real * 1000
|
23
|
+
end
|
24
|
+
end
|
25
|
+
puts "#{$errors} ERRORS" if $errors > 0
|
26
|
+
instance.clear
|
27
|
+
samples
|
28
|
+
end
|
29
|
+
|
30
|
+
DICT = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890'.freeze
|
31
|
+
def uniform
|
32
|
+
min, max = 3, 1024
|
33
|
+
1000.times.map do
|
34
|
+
n = rand(max - min) + max
|
35
|
+
(1..n).map { DICT[rand(DICT.length)] }.join
|
36
|
+
end
|
37
|
+
end
|
38
|
+
DATA = uniform
|
39
|
+
|
40
|
+
def run(instance, message = '', &blk)
|
41
|
+
puts "Running benchmarks for #{instance.class.name} #{message}"
|
42
|
+
measure instance, &blk
|
43
|
+
report measure(instance, &blk)
|
44
|
+
puts '=' * 64
|
45
|
+
ensure
|
46
|
+
instance.close if instance.respond_to? :close
|
47
|
+
end
|
48
|
+
|
49
|
+
def report(samples)
|
50
|
+
samples.sort!
|
51
|
+
total = samples.inject(:+)
|
52
|
+
mean = total / samples.length
|
53
|
+
stddev = Math.sqrt(samples.inject(0) {|m, s| m += (s - mean) ** 2 } / samples.length)
|
54
|
+
puts '%d samples, average time: %.4f ms, std. dev: %.4f ms' % [samples.length, mean, stddev]
|
55
|
+
puts '95%% < %.4f ms' % samples.slice((samples.length * 0.95).to_i)
|
56
|
+
end
|
57
|
+
|
58
|
+
begin
|
59
|
+
require 'dbm'
|
60
|
+
run DBM.new(DB_PATH + '.dbm')
|
61
|
+
rescue Exception => ex
|
62
|
+
puts "DBM not benchmarked: #{ex.message}"
|
63
|
+
end
|
64
|
+
|
65
|
+
begin
|
66
|
+
require 'gdbm'
|
67
|
+
run GDBM.new(DB_PATH + '.gdbm')
|
68
|
+
rescue Exception => ex
|
69
|
+
puts "GDBM not benchmarked: #{ex.message}"
|
70
|
+
end
|
71
|
+
|
72
|
+
run Hash.new
|
73
|
+
run Daybreak::DB.new DB_PATH
|
74
|
+
|
75
|
+
db = Daybreak::DB.new DB_PATH
|
76
|
+
run db, 'with lock' do
|
77
|
+
DATA.each do |i|
|
78
|
+
db.lock { db[i] = i }
|
79
|
+
end
|
80
|
+
DATA.each do |i|
|
81
|
+
db.lock { $errors += 1 unless db[i] == i }
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
db = Daybreak::DB.new DB_PATH
|
86
|
+
run db, 'with sync' do
|
87
|
+
DATA.each do |i|
|
88
|
+
db[i] = i
|
89
|
+
db.sync
|
90
|
+
end
|
91
|
+
DATA.each do |i|
|
92
|
+
$errors += 1 unless db[i] == i
|
93
|
+
db.sync
|
94
|
+
end
|
95
|
+
end
|
data/script/converter
ADDED
@@ -0,0 +1,390 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
$: << File.join(File.dirname(__FILE__), '..', 'lib')
|
4
|
+
require 'daybreak'
|
5
|
+
|
6
|
+
def convert(oldfile, newfile)
|
7
|
+
olddb = Daybreak1::DB.new(oldfile)
|
8
|
+
newdb = Daybreak::DB.new(newfile)
|
9
|
+
olddb.each do |key, value|
|
10
|
+
newdb[key] = value
|
11
|
+
end
|
12
|
+
olddb.close!
|
13
|
+
newdb.close
|
14
|
+
end
|
15
|
+
|
16
|
+
module Daybreak1
|
17
|
+
# Daybreak::DB contains the public api for Daybreak, you may extend it like
|
18
|
+
# any other Ruby class (i.e. to overwrite serialize and parse). It includes
|
19
|
+
# Enumerable for functional goodies like map, each, reduce and friends.
|
20
|
+
class DB
|
21
|
+
include Enumerable
|
22
|
+
|
23
|
+
# Create a new Daybreak::DB. The second argument is the default value
|
24
|
+
# to store when accessing a previously unset key, this follows the
|
25
|
+
# Hash standard.
|
26
|
+
# @param [String] file the path to the db file
|
27
|
+
# @param default the default value to store and return when a key is
|
28
|
+
# not yet in the database.
|
29
|
+
# @yield [key] a block that will return the default value to store.
|
30
|
+
# @yieldparam [String] key the key to be stored.
|
31
|
+
def initialize(file, default=nil, &blk)
|
32
|
+
@table = {}
|
33
|
+
@file_name = file
|
34
|
+
@writer = Writer.new(@file_name)
|
35
|
+
@default = block_given? ? blk : default
|
36
|
+
read!
|
37
|
+
end
|
38
|
+
|
39
|
+
# Set a key in the database to be written at some future date. If the data
|
40
|
+
# needs to be persisted immediately, call <tt>db.set(key, value, true)</tt>.
|
41
|
+
# @param [#to_s] key the key of the storage slot in the database
|
42
|
+
# @param value the value to store
|
43
|
+
# @param [Boolean] sync if true, sync this value immediately
|
44
|
+
def []=(key, value, sync = false)
|
45
|
+
key = key.to_s
|
46
|
+
write key, value, sync
|
47
|
+
@table[key] = value
|
48
|
+
end
|
49
|
+
alias_method :set, :"[]="
|
50
|
+
|
51
|
+
# set! flushes data immediately to disk.
|
52
|
+
# @param [#to_s] key the key of the storage slot in the database
|
53
|
+
# @param value the value to store
|
54
|
+
def set!(key, value)
|
55
|
+
set key, value, true
|
56
|
+
end
|
57
|
+
|
58
|
+
# Delete a key from the database
|
59
|
+
# @param [#to_s] key the key of the storage slot in the database
|
60
|
+
# @param [Boolean] sync if true, sync this deletion immediately
|
61
|
+
def delete(key, sync = false)
|
62
|
+
key = key.to_s
|
63
|
+
write key, '', sync, true
|
64
|
+
@table.delete key
|
65
|
+
end
|
66
|
+
|
67
|
+
# delete! immediately deletes the key on disk.
|
68
|
+
# @param [#to_s] key the key of the storage slot in the database
|
69
|
+
def delete!(key)
|
70
|
+
delete key, true
|
71
|
+
end
|
72
|
+
|
73
|
+
# Retrieve a value at key from the database. If the default value was specified
|
74
|
+
# when this database was created, that value will be set and returned. Aliased
|
75
|
+
# as <tt>get</tt>.
|
76
|
+
# @param [#to_s] key the value to retrieve from the database.
|
77
|
+
def [](key)
|
78
|
+
key = key.to_s
|
79
|
+
if @table.has_key? key
|
80
|
+
@table[key]
|
81
|
+
elsif default?
|
82
|
+
set key, Proc === @default ? @default.call(key) : @default
|
83
|
+
end
|
84
|
+
end
|
85
|
+
alias_method :get, :"[]"
|
86
|
+
|
87
|
+
# Iterate over the key, value pairs in the database.
|
88
|
+
# @yield [key, value] blk the iterator for each key value pair.
|
89
|
+
# @yieldparam [String] key the key.
|
90
|
+
# @yieldparam value the value from the database.
|
91
|
+
def each
|
92
|
+
keys.each { |k| yield(k, get(k)) }
|
93
|
+
end
|
94
|
+
|
95
|
+
# Does this db have a default value.
|
96
|
+
def default?
|
97
|
+
!@default.nil?
|
98
|
+
end
|
99
|
+
|
100
|
+
# Does this db have a value for this key?
|
101
|
+
# @param [key#to_s] key the key to check if the DB has a key.
|
102
|
+
def has_key?(key)
|
103
|
+
@table.has_key? key.to_s
|
104
|
+
end
|
105
|
+
|
106
|
+
# Return the keys in the db.
|
107
|
+
# @return [Array<String>]
|
108
|
+
def keys
|
109
|
+
@table.keys
|
110
|
+
end
|
111
|
+
|
112
|
+
# Return the number of stored items.
|
113
|
+
# @return [Integer]
|
114
|
+
def length
|
115
|
+
@table.keys.length
|
116
|
+
end
|
117
|
+
alias_method :size, :length
|
118
|
+
|
119
|
+
# Serialize the data for writing to disk, if you don't want to use <tt>Marshal</tt>
|
120
|
+
# overwrite this method.
|
121
|
+
# @param value the value to be serialized
|
122
|
+
# @return [String]
|
123
|
+
def serialize(value)
|
124
|
+
Marshal.dump(value)
|
125
|
+
end
|
126
|
+
|
127
|
+
# Parse the serialized value from disk, like serialize if you want to use a
|
128
|
+
# different serialization method overwrite this method.
|
129
|
+
# @param value the value to be parsed
|
130
|
+
# @return [String]
|
131
|
+
def parse(value)
|
132
|
+
Marshal.load(value)
|
133
|
+
end
|
134
|
+
|
135
|
+
# Empty the database file.
|
136
|
+
def empty!
|
137
|
+
@writer.truncate!
|
138
|
+
@table.clear
|
139
|
+
read!
|
140
|
+
end
|
141
|
+
alias_method :clear, :empty!
|
142
|
+
|
143
|
+
# Force all queued commits to be written to disk.
|
144
|
+
def flush!
|
145
|
+
@writer.flush!
|
146
|
+
end
|
147
|
+
|
148
|
+
# Close the database for reading and writing.
|
149
|
+
def close!
|
150
|
+
@writer.close!
|
151
|
+
end
|
152
|
+
|
153
|
+
# Compact the database to remove stale commits and reduce the file size.
|
154
|
+
def compact!
|
155
|
+
# Create a new temporary database
|
156
|
+
tmp_file = @file_name + "-#{$$}-#{Thread.current.object_id}"
|
157
|
+
copy_db = self.class.new tmp_file
|
158
|
+
|
159
|
+
# Copy the database key by key into the temporary table
|
160
|
+
each do |key, value|
|
161
|
+
copy_db.set(key, get(key))
|
162
|
+
end
|
163
|
+
copy_db.close!
|
164
|
+
|
165
|
+
close!
|
166
|
+
|
167
|
+
# Move the copy into place
|
168
|
+
File.rename tmp_file, @file_name
|
169
|
+
|
170
|
+
# Reopen this database
|
171
|
+
@writer = Writer.new(@file_name)
|
172
|
+
@table.clear
|
173
|
+
read!
|
174
|
+
end
|
175
|
+
|
176
|
+
# Read all values from the log file. If you want to check for changed data
|
177
|
+
# call this again.
|
178
|
+
def read!
|
179
|
+
buf = nil
|
180
|
+
File.open(@file_name, 'rb') do |fd|
|
181
|
+
fd.flock(File::LOCK_SH)
|
182
|
+
buf = fd.read
|
183
|
+
end
|
184
|
+
until buf.empty?
|
185
|
+
key, data, deleted = Record.deserialize(buf)
|
186
|
+
if deleted
|
187
|
+
@table.delete key
|
188
|
+
else
|
189
|
+
@table[key] = parse(data)
|
190
|
+
end
|
191
|
+
end
|
192
|
+
end
|
193
|
+
|
194
|
+
private
|
195
|
+
|
196
|
+
def write(key, value, sync = false, delete = false)
|
197
|
+
@writer.write([key, serialize(value), delete])
|
198
|
+
flush! if sync
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
202
|
+
# Records define how data is serialized and read from disk.
|
203
|
+
module Record
|
204
|
+
# Thrown when either key or data is missing
|
205
|
+
class UnnacceptableDataError < Exception; end
|
206
|
+
|
207
|
+
# Thrown when there is a CRC mismatch between the data from the disk
|
208
|
+
# and what was written to disk previously.
|
209
|
+
class CorruptDataError < Exception; end
|
210
|
+
|
211
|
+
extend self
|
212
|
+
|
213
|
+
# The mask a record uses to check for deletion.
|
214
|
+
DELETION_MASK = 1 << 31
|
215
|
+
|
216
|
+
# The serialized representation of the key value pair plus the CRC.
|
217
|
+
# @return [String]
|
218
|
+
def serialize(record)
|
219
|
+
raise UnnacceptableDataError, 'key and data must be defined' unless record[0] && record[1]
|
220
|
+
s = key_data_string(record)
|
221
|
+
s << crc_string(s)
|
222
|
+
end
|
223
|
+
|
224
|
+
# Create a new record to read from IO.
|
225
|
+
# @param [#read] io an IO instance to read from
|
226
|
+
def deserialize(buf)
|
227
|
+
record = []
|
228
|
+
masked = read32(buf)
|
229
|
+
# Read the record's key bytes
|
230
|
+
record << buf.slice!(0, masked & (DELETION_MASK - 1)) <<
|
231
|
+
# Read the record's value bytes
|
232
|
+
buf.slice!(0, read32(buf)) <<
|
233
|
+
# Set the deletion flag
|
234
|
+
((masked & DELETION_MASK) != 0)
|
235
|
+
raise CorruptDataError, 'CRC mismatch' unless buf.slice!(0, 4) == crc_string(key_data_string(record))
|
236
|
+
record
|
237
|
+
end
|
238
|
+
|
239
|
+
private
|
240
|
+
|
241
|
+
# Return the deletion flag plus two length prefixed cells
|
242
|
+
def key_data_string(record)
|
243
|
+
part(record[0], record[0].bytesize + (record[2] ? DELETION_MASK : 0)) << part(record[1], record[1].bytesize)
|
244
|
+
end
|
245
|
+
|
246
|
+
def crc_string(s)
|
247
|
+
[Zlib.crc32(s, 0)].pack('N')
|
248
|
+
end
|
249
|
+
|
250
|
+
def part(data, length)
|
251
|
+
[length].pack('N') << data
|
252
|
+
end
|
253
|
+
|
254
|
+
def read32(buf)
|
255
|
+
buf.slice!(0, 4).unpack('N')[0]
|
256
|
+
end
|
257
|
+
end
|
258
|
+
|
259
|
+
# Writer's handle the actually fiddly task of committing data to disk.
|
260
|
+
# They have a Worker instance that writes in a select loop.
|
261
|
+
class Writer
|
262
|
+
# Open up the file, ready it for binary and nonblocking writing.
|
263
|
+
def initialize(file)
|
264
|
+
@file = file
|
265
|
+
open!
|
266
|
+
@worker = Worker.new(@fd)
|
267
|
+
end
|
268
|
+
|
269
|
+
# Send a record to the workers queue.
|
270
|
+
def write(record)
|
271
|
+
@worker.enqueue record
|
272
|
+
end
|
273
|
+
|
274
|
+
# Finish writing
|
275
|
+
def finish!
|
276
|
+
@worker.finish!
|
277
|
+
end
|
278
|
+
|
279
|
+
# Flush pending commits, and restart the worker.
|
280
|
+
def flush!
|
281
|
+
@worker.flush!
|
282
|
+
end
|
283
|
+
|
284
|
+
# Finish writing and close the file descriptor.
|
285
|
+
def close!
|
286
|
+
finish!
|
287
|
+
@fd.close
|
288
|
+
end
|
289
|
+
|
290
|
+
# Truncate the file.
|
291
|
+
def truncate!
|
292
|
+
finish!
|
293
|
+
@fd.truncate(0)
|
294
|
+
@fd.pos = 0
|
295
|
+
end
|
296
|
+
|
297
|
+
private
|
298
|
+
|
299
|
+
def open!
|
300
|
+
@fd = File.open @file, 'ab'
|
301
|
+
|
302
|
+
if defined?(Fcntl::O_NONBLOCK)
|
303
|
+
f = @fd.fcntl(Fcntl::F_GETFL, 0)
|
304
|
+
@fd.fcntl(Fcntl::F_SETFL, Fcntl::O_NONBLOCK | f)
|
305
|
+
end
|
306
|
+
end
|
307
|
+
|
308
|
+
# Workers handle the actual fiddly bits of asynchronous io and
|
309
|
+
# and handle background writes.
|
310
|
+
class Worker
|
311
|
+
def initialize(fd)
|
312
|
+
@queue = Queue.new
|
313
|
+
@fd = fd
|
314
|
+
@thread = Thread.new { work }
|
315
|
+
at_exit { finish! }
|
316
|
+
end
|
317
|
+
|
318
|
+
# Queue up a write to be committed later.
|
319
|
+
def enqueue(record)
|
320
|
+
@queue << record
|
321
|
+
end
|
322
|
+
|
323
|
+
# Loop and block if we don't have work to do or if
|
324
|
+
# the file isn't ready for another write just yet.
|
325
|
+
def work
|
326
|
+
buf, finished = '', false
|
327
|
+
until finished && buf.empty?
|
328
|
+
record = @queue.pop
|
329
|
+
if record
|
330
|
+
buf << Record.serialize(record)
|
331
|
+
else
|
332
|
+
finished = true
|
333
|
+
end
|
334
|
+
read, write = IO.select [], [@fd]
|
335
|
+
if write and fd = write.first
|
336
|
+
lock(fd) { buf = try_write fd, buf }
|
337
|
+
end
|
338
|
+
end
|
339
|
+
@fd.flush
|
340
|
+
end
|
341
|
+
|
342
|
+
# Try and write the buffer to the file via non blocking file writes.
|
343
|
+
# If the write fails try again.
|
344
|
+
def try_write(fd, buf)
|
345
|
+
if defined?(Fcntl::O_NONBLOCK)
|
346
|
+
s = fd.write_nonblock(buf)
|
347
|
+
else
|
348
|
+
s = fd.write(buf)
|
349
|
+
end
|
350
|
+
if s < buf.length
|
351
|
+
buf = buf[s..-1] # didn't finish
|
352
|
+
else
|
353
|
+
buf = ""
|
354
|
+
end
|
355
|
+
buf
|
356
|
+
rescue Errno::EAGAIN
|
357
|
+
buf
|
358
|
+
end
|
359
|
+
|
360
|
+
# Lock a file with the type <tt>lock</tt>
|
361
|
+
def lock(fd)
|
362
|
+
fd.flock File::LOCK_EX
|
363
|
+
begin
|
364
|
+
yield
|
365
|
+
ensure
|
366
|
+
fd.flock File::LOCK_UN
|
367
|
+
end
|
368
|
+
end
|
369
|
+
|
370
|
+
# finish! and start up another worker thread.
|
371
|
+
def flush!
|
372
|
+
finish!
|
373
|
+
@thread = Thread.new { work }
|
374
|
+
true
|
375
|
+
end
|
376
|
+
|
377
|
+
# Push a nil through the queue and block until the write loop is finished.
|
378
|
+
def finish!
|
379
|
+
@queue.push nil
|
380
|
+
@thread.join
|
381
|
+
end
|
382
|
+
end
|
383
|
+
end
|
384
|
+
end
|
385
|
+
|
386
|
+
if ARGV.size != 2
|
387
|
+
puts "Usage: #{$0} olddb newdb"
|
388
|
+
else
|
389
|
+
convert(ARGV[0], ARGV[1])
|
390
|
+
end
|