daybreak 0.1.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +1 -0
- data/.travis.yml +5 -0
- data/LICENSE +2 -2
- data/README +1 -3
- data/Rakefile +6 -9
- data/daybreak.gemspec +4 -4
- data/lib/daybreak.rb +3 -5
- data/lib/daybreak/db.rb +308 -114
- data/lib/daybreak/format.rb +52 -0
- data/lib/daybreak/queue.rb +107 -0
- data/lib/daybreak/serializer.rb +39 -0
- data/lib/daybreak/version.rb +3 -2
- data/script/bench +95 -0
- data/script/converter +390 -0
- data/test/test.rb +251 -57
- data/test/test_helper.rb +0 -3
- metadata +12 -11
- data/lib/daybreak/record.rb +0 -62
- data/lib/daybreak/writer.rb +0 -127
- data/test/bench.rb +0 -28
- data/test/compare.rb +0 -47
@@ -0,0 +1,52 @@
|
|
1
|
+
module Daybreak
|
2
|
+
# Database format serializer and deserializer. You can create
|
3
|
+
# your own implementations of this classes method and define
|
4
|
+
# your own database format!
|
5
|
+
# @api public
|
6
|
+
class Format
|
7
|
+
# Read database header from input stream
|
8
|
+
# @param [#read] input the input stream
|
9
|
+
def read_header(input)
|
10
|
+
raise 'Not a Daybreak database' if input.read(MAGIC.bytesize) != MAGIC
|
11
|
+
ver = input.read(2).unpack('n').first
|
12
|
+
raise "Expected database version #{VERSION}, got #{ver}" if ver != VERSION
|
13
|
+
end
|
14
|
+
|
15
|
+
# Return database header as string
|
16
|
+
def header
|
17
|
+
MAGIC + [VERSION].pack('n')
|
18
|
+
end
|
19
|
+
|
20
|
+
# Serialize record and return string
|
21
|
+
# @param [Array] record an array with [key, value] or [key] if the record is
|
22
|
+
# deleted
|
23
|
+
def dump(record)
|
24
|
+
data =
|
25
|
+
if record.size == 1
|
26
|
+
[record[0].bytesize, DELETE].pack('NN') << record[0]
|
27
|
+
else
|
28
|
+
[record[0].bytesize, record[1].bytesize].pack('NN') << record[0] << record[1]
|
29
|
+
end
|
30
|
+
data << crc32(data)
|
31
|
+
end
|
32
|
+
|
33
|
+
# Deserialize record from buffer
|
34
|
+
# @param [String] buf the buffer to read from
|
35
|
+
def parse(buf)
|
36
|
+
key_size, value_size = buf[0, 8].unpack('NN')
|
37
|
+
data = buf.slice!(0, 8 + key_size + (value_size == DELETE ? 0 : value_size))
|
38
|
+
raise 'CRC mismatch' unless buf.slice!(0, 4) == crc32(data)
|
39
|
+
value_size == DELETE ? [data[8, key_size]] : [data[8, key_size], data[8 + key_size, value_size]]
|
40
|
+
end
|
41
|
+
|
42
|
+
protected
|
43
|
+
|
44
|
+
MAGIC = 'DAYBREAK'
|
45
|
+
VERSION = 1
|
46
|
+
DELETE = (1 << 32) - 1
|
47
|
+
|
48
|
+
def crc32(s)
|
49
|
+
[Zlib.crc32(s, 0)].pack('N')
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,107 @@
|
|
1
|
+
module Daybreak
|
2
|
+
# Thread safe job queue
|
3
|
+
# @api private
|
4
|
+
class Queue
|
5
|
+
# HACK: Dangerous optimization on MRI which has a
|
6
|
+
# global interpreter lock and makes the @queue array
|
7
|
+
# thread safe.
|
8
|
+
if !defined?(RUBY_ENGINE) || RUBY_ENGINE == 'ruby'
|
9
|
+
def initialize
|
10
|
+
@queue, @full, @empty = [], [], []
|
11
|
+
@stop = false
|
12
|
+
@heartbeat = Thread.new(&method(:heartbeat))
|
13
|
+
@heartbeat.priority = -9
|
14
|
+
end
|
15
|
+
|
16
|
+
def <<(x)
|
17
|
+
@queue << x
|
18
|
+
thread = @full.first
|
19
|
+
thread.wakeup if thread
|
20
|
+
end
|
21
|
+
|
22
|
+
def pop
|
23
|
+
@queue.shift
|
24
|
+
if @queue.empty?
|
25
|
+
thread = @empty.first
|
26
|
+
thread.wakeup if thread
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def next
|
31
|
+
while @queue.empty?
|
32
|
+
begin
|
33
|
+
@full << Thread.current
|
34
|
+
# If a push happens before Thread.stop, the thread won't be woken up
|
35
|
+
Thread.stop while @queue.empty?
|
36
|
+
ensure
|
37
|
+
@full.delete(Thread.current)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
@queue.first
|
41
|
+
end
|
42
|
+
|
43
|
+
def flush
|
44
|
+
until @queue.empty?
|
45
|
+
begin
|
46
|
+
@empty << Thread.current
|
47
|
+
# If a pop happens before Thread.stop, the thread won't be woken up
|
48
|
+
Thread.stop until @queue.empty?
|
49
|
+
ensure
|
50
|
+
@empty.delete(Thread.current)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def stop
|
56
|
+
@stop = true
|
57
|
+
@heartbeat.join
|
58
|
+
end
|
59
|
+
|
60
|
+
private
|
61
|
+
|
62
|
+
# Check threads 10 times per second to avoid deadlocks
|
63
|
+
# since there is a race condition below
|
64
|
+
def heartbeat
|
65
|
+
until @stop
|
66
|
+
@empty.each(&:wakeup)
|
67
|
+
@full.each(&:wakeup)
|
68
|
+
sleep 0.1
|
69
|
+
end
|
70
|
+
end
|
71
|
+
else
|
72
|
+
def initialize
|
73
|
+
@mutex = Mutex.new
|
74
|
+
@full = ConditionVariable.new
|
75
|
+
@empty = ConditionVariable.new
|
76
|
+
@queue = []
|
77
|
+
end
|
78
|
+
|
79
|
+
def <<(x)
|
80
|
+
@mutex.synchronize do
|
81
|
+
@queue << x
|
82
|
+
@full.signal
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def pop
|
87
|
+
@mutex.synchronize do
|
88
|
+
@queue.shift
|
89
|
+
@empty.signal if @queue.empty?
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def next
|
94
|
+
@mutex.synchronize do
|
95
|
+
@full.wait(@mutex) while @queue.empty?
|
96
|
+
@queue.first
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
def flush
|
101
|
+
@mutex.synchronize do
|
102
|
+
@empty.wait(@mutex) until @queue.empty?
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module Daybreak
|
2
|
+
module Serializer
|
3
|
+
# Default serializer which converts
|
4
|
+
# keys to strings and marshalls values
|
5
|
+
# @api public
|
6
|
+
class Default
|
7
|
+
# Return the value of the key to insert into the database
|
8
|
+
def key_for(key)
|
9
|
+
key.to_s
|
10
|
+
end
|
11
|
+
|
12
|
+
# Serialize a value
|
13
|
+
def dump(value)
|
14
|
+
Marshal.dump(value)
|
15
|
+
end
|
16
|
+
|
17
|
+
# Parse a value
|
18
|
+
def load(value)
|
19
|
+
Marshal.load(value)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# Serializer which does nothing
|
24
|
+
# @api public
|
25
|
+
class None
|
26
|
+
def key_for(key)
|
27
|
+
key
|
28
|
+
end
|
29
|
+
|
30
|
+
def dump(value)
|
31
|
+
value
|
32
|
+
end
|
33
|
+
|
34
|
+
def load(value)
|
35
|
+
value
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
data/lib/daybreak/version.rb
CHANGED
data/script/bench
ADDED
@@ -0,0 +1,95 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require File.expand_path(File.dirname(__FILE__)) + '/../test/test_helper.rb'
|
4
|
+
require 'benchmark'
|
5
|
+
|
6
|
+
RUNS = 20
|
7
|
+
|
8
|
+
def measure(instance, &block)
|
9
|
+
samples = []
|
10
|
+
$errors = 0
|
11
|
+
RUNS.times do
|
12
|
+
if block
|
13
|
+
samples << Benchmark.measure(&block).real * 1000
|
14
|
+
else
|
15
|
+
samples << Benchmark.measure do
|
16
|
+
DATA.each do |i|
|
17
|
+
instance[i] = i
|
18
|
+
end
|
19
|
+
DATA.each do |i|
|
20
|
+
$errors += 1 unless instance[i] == i
|
21
|
+
end
|
22
|
+
end.real * 1000
|
23
|
+
end
|
24
|
+
end
|
25
|
+
puts "#{$errors} ERRORS" if $errors > 0
|
26
|
+
instance.clear
|
27
|
+
samples
|
28
|
+
end
|
29
|
+
|
30
|
+
DICT = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890'.freeze
|
31
|
+
def uniform
|
32
|
+
min, max = 3, 1024
|
33
|
+
1000.times.map do
|
34
|
+
n = rand(max - min) + max
|
35
|
+
(1..n).map { DICT[rand(DICT.length)] }.join
|
36
|
+
end
|
37
|
+
end
|
38
|
+
DATA = uniform
|
39
|
+
|
40
|
+
def run(instance, message = '', &blk)
|
41
|
+
puts "Running benchmarks for #{instance.class.name} #{message}"
|
42
|
+
measure instance, &blk
|
43
|
+
report measure(instance, &blk)
|
44
|
+
puts '=' * 64
|
45
|
+
ensure
|
46
|
+
instance.close if instance.respond_to? :close
|
47
|
+
end
|
48
|
+
|
49
|
+
def report(samples)
|
50
|
+
samples.sort!
|
51
|
+
total = samples.inject(:+)
|
52
|
+
mean = total / samples.length
|
53
|
+
stddev = Math.sqrt(samples.inject(0) {|m, s| m += (s - mean) ** 2 } / samples.length)
|
54
|
+
puts '%d samples, average time: %.4f ms, std. dev: %.4f ms' % [samples.length, mean, stddev]
|
55
|
+
puts '95%% < %.4f ms' % samples.slice((samples.length * 0.95).to_i)
|
56
|
+
end
|
57
|
+
|
58
|
+
begin
|
59
|
+
require 'dbm'
|
60
|
+
run DBM.new(DB_PATH + '.dbm')
|
61
|
+
rescue Exception => ex
|
62
|
+
puts "DBM not benchmarked: #{ex.message}"
|
63
|
+
end
|
64
|
+
|
65
|
+
begin
|
66
|
+
require 'gdbm'
|
67
|
+
run GDBM.new(DB_PATH + '.gdbm')
|
68
|
+
rescue Exception => ex
|
69
|
+
puts "GDBM not benchmarked: #{ex.message}"
|
70
|
+
end
|
71
|
+
|
72
|
+
run Hash.new
|
73
|
+
run Daybreak::DB.new DB_PATH
|
74
|
+
|
75
|
+
db = Daybreak::DB.new DB_PATH
|
76
|
+
run db, 'with lock' do
|
77
|
+
DATA.each do |i|
|
78
|
+
db.lock { db[i] = i }
|
79
|
+
end
|
80
|
+
DATA.each do |i|
|
81
|
+
db.lock { $errors += 1 unless db[i] == i }
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
db = Daybreak::DB.new DB_PATH
|
86
|
+
run db, 'with sync' do
|
87
|
+
DATA.each do |i|
|
88
|
+
db[i] = i
|
89
|
+
db.sync
|
90
|
+
end
|
91
|
+
DATA.each do |i|
|
92
|
+
$errors += 1 unless db[i] == i
|
93
|
+
db.sync
|
94
|
+
end
|
95
|
+
end
|
data/script/converter
ADDED
@@ -0,0 +1,390 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
$: << File.join(File.dirname(__FILE__), '..', 'lib')
|
4
|
+
require 'daybreak'
|
5
|
+
|
6
|
+
def convert(oldfile, newfile)
|
7
|
+
olddb = Daybreak1::DB.new(oldfile)
|
8
|
+
newdb = Daybreak::DB.new(newfile)
|
9
|
+
olddb.each do |key, value|
|
10
|
+
newdb[key] = value
|
11
|
+
end
|
12
|
+
olddb.close!
|
13
|
+
newdb.close
|
14
|
+
end
|
15
|
+
|
16
|
+
module Daybreak1
|
17
|
+
# Daybreak::DB contains the public api for Daybreak, you may extend it like
|
18
|
+
# any other Ruby class (i.e. to overwrite serialize and parse). It includes
|
19
|
+
# Enumerable for functional goodies like map, each, reduce and friends.
|
20
|
+
class DB
|
21
|
+
include Enumerable
|
22
|
+
|
23
|
+
# Create a new Daybreak::DB. The second argument is the default value
|
24
|
+
# to store when accessing a previously unset key, this follows the
|
25
|
+
# Hash standard.
|
26
|
+
# @param [String] file the path to the db file
|
27
|
+
# @param default the default value to store and return when a key is
|
28
|
+
# not yet in the database.
|
29
|
+
# @yield [key] a block that will return the default value to store.
|
30
|
+
# @yieldparam [String] key the key to be stored.
|
31
|
+
def initialize(file, default=nil, &blk)
|
32
|
+
@table = {}
|
33
|
+
@file_name = file
|
34
|
+
@writer = Writer.new(@file_name)
|
35
|
+
@default = block_given? ? blk : default
|
36
|
+
read!
|
37
|
+
end
|
38
|
+
|
39
|
+
# Set a key in the database to be written at some future date. If the data
|
40
|
+
# needs to be persisted immediately, call <tt>db.set(key, value, true)</tt>.
|
41
|
+
# @param [#to_s] key the key of the storage slot in the database
|
42
|
+
# @param value the value to store
|
43
|
+
# @param [Boolean] sync if true, sync this value immediately
|
44
|
+
def []=(key, value, sync = false)
|
45
|
+
key = key.to_s
|
46
|
+
write key, value, sync
|
47
|
+
@table[key] = value
|
48
|
+
end
|
49
|
+
alias_method :set, :"[]="
|
50
|
+
|
51
|
+
# set! flushes data immediately to disk.
|
52
|
+
# @param [#to_s] key the key of the storage slot in the database
|
53
|
+
# @param value the value to store
|
54
|
+
def set!(key, value)
|
55
|
+
set key, value, true
|
56
|
+
end
|
57
|
+
|
58
|
+
# Delete a key from the database
|
59
|
+
# @param [#to_s] key the key of the storage slot in the database
|
60
|
+
# @param [Boolean] sync if true, sync this deletion immediately
|
61
|
+
def delete(key, sync = false)
|
62
|
+
key = key.to_s
|
63
|
+
write key, '', sync, true
|
64
|
+
@table.delete key
|
65
|
+
end
|
66
|
+
|
67
|
+
# delete! immediately deletes the key on disk.
|
68
|
+
# @param [#to_s] key the key of the storage slot in the database
|
69
|
+
def delete!(key)
|
70
|
+
delete key, true
|
71
|
+
end
|
72
|
+
|
73
|
+
# Retrieve a value at key from the database. If the default value was specified
|
74
|
+
# when this database was created, that value will be set and returned. Aliased
|
75
|
+
# as <tt>get</tt>.
|
76
|
+
# @param [#to_s] key the value to retrieve from the database.
|
77
|
+
def [](key)
|
78
|
+
key = key.to_s
|
79
|
+
if @table.has_key? key
|
80
|
+
@table[key]
|
81
|
+
elsif default?
|
82
|
+
set key, Proc === @default ? @default.call(key) : @default
|
83
|
+
end
|
84
|
+
end
|
85
|
+
alias_method :get, :"[]"
|
86
|
+
|
87
|
+
# Iterate over the key, value pairs in the database.
|
88
|
+
# @yield [key, value] blk the iterator for each key value pair.
|
89
|
+
# @yieldparam [String] key the key.
|
90
|
+
# @yieldparam value the value from the database.
|
91
|
+
def each
|
92
|
+
keys.each { |k| yield(k, get(k)) }
|
93
|
+
end
|
94
|
+
|
95
|
+
# Does this db have a default value.
|
96
|
+
def default?
|
97
|
+
!@default.nil?
|
98
|
+
end
|
99
|
+
|
100
|
+
# Does this db have a value for this key?
|
101
|
+
# @param [key#to_s] key the key to check if the DB has a key.
|
102
|
+
def has_key?(key)
|
103
|
+
@table.has_key? key.to_s
|
104
|
+
end
|
105
|
+
|
106
|
+
# Return the keys in the db.
|
107
|
+
# @return [Array<String>]
|
108
|
+
def keys
|
109
|
+
@table.keys
|
110
|
+
end
|
111
|
+
|
112
|
+
# Return the number of stored items.
|
113
|
+
# @return [Integer]
|
114
|
+
def length
|
115
|
+
@table.keys.length
|
116
|
+
end
|
117
|
+
alias_method :size, :length
|
118
|
+
|
119
|
+
# Serialize the data for writing to disk, if you don't want to use <tt>Marshal</tt>
|
120
|
+
# overwrite this method.
|
121
|
+
# @param value the value to be serialized
|
122
|
+
# @return [String]
|
123
|
+
def serialize(value)
|
124
|
+
Marshal.dump(value)
|
125
|
+
end
|
126
|
+
|
127
|
+
# Parse the serialized value from disk, like serialize if you want to use a
|
128
|
+
# different serialization method overwrite this method.
|
129
|
+
# @param value the value to be parsed
|
130
|
+
# @return [String]
|
131
|
+
def parse(value)
|
132
|
+
Marshal.load(value)
|
133
|
+
end
|
134
|
+
|
135
|
+
# Empty the database file.
|
136
|
+
def empty!
|
137
|
+
@writer.truncate!
|
138
|
+
@table.clear
|
139
|
+
read!
|
140
|
+
end
|
141
|
+
alias_method :clear, :empty!
|
142
|
+
|
143
|
+
# Force all queued commits to be written to disk.
|
144
|
+
def flush!
|
145
|
+
@writer.flush!
|
146
|
+
end
|
147
|
+
|
148
|
+
# Close the database for reading and writing.
|
149
|
+
def close!
|
150
|
+
@writer.close!
|
151
|
+
end
|
152
|
+
|
153
|
+
# Compact the database to remove stale commits and reduce the file size.
|
154
|
+
def compact!
|
155
|
+
# Create a new temporary database
|
156
|
+
tmp_file = @file_name + "-#{$$}-#{Thread.current.object_id}"
|
157
|
+
copy_db = self.class.new tmp_file
|
158
|
+
|
159
|
+
# Copy the database key by key into the temporary table
|
160
|
+
each do |key, value|
|
161
|
+
copy_db.set(key, get(key))
|
162
|
+
end
|
163
|
+
copy_db.close!
|
164
|
+
|
165
|
+
close!
|
166
|
+
|
167
|
+
# Move the copy into place
|
168
|
+
File.rename tmp_file, @file_name
|
169
|
+
|
170
|
+
# Reopen this database
|
171
|
+
@writer = Writer.new(@file_name)
|
172
|
+
@table.clear
|
173
|
+
read!
|
174
|
+
end
|
175
|
+
|
176
|
+
# Read all values from the log file. If you want to check for changed data
|
177
|
+
# call this again.
|
178
|
+
def read!
|
179
|
+
buf = nil
|
180
|
+
File.open(@file_name, 'rb') do |fd|
|
181
|
+
fd.flock(File::LOCK_SH)
|
182
|
+
buf = fd.read
|
183
|
+
end
|
184
|
+
until buf.empty?
|
185
|
+
key, data, deleted = Record.deserialize(buf)
|
186
|
+
if deleted
|
187
|
+
@table.delete key
|
188
|
+
else
|
189
|
+
@table[key] = parse(data)
|
190
|
+
end
|
191
|
+
end
|
192
|
+
end
|
193
|
+
|
194
|
+
private
|
195
|
+
|
196
|
+
def write(key, value, sync = false, delete = false)
|
197
|
+
@writer.write([key, serialize(value), delete])
|
198
|
+
flush! if sync
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
202
|
+
# Records define how data is serialized and read from disk.
|
203
|
+
module Record
|
204
|
+
# Thrown when either key or data is missing
|
205
|
+
class UnnacceptableDataError < Exception; end
|
206
|
+
|
207
|
+
# Thrown when there is a CRC mismatch between the data from the disk
|
208
|
+
# and what was written to disk previously.
|
209
|
+
class CorruptDataError < Exception; end
|
210
|
+
|
211
|
+
extend self
|
212
|
+
|
213
|
+
# The mask a record uses to check for deletion.
|
214
|
+
DELETION_MASK = 1 << 31
|
215
|
+
|
216
|
+
# The serialized representation of the key value pair plus the CRC.
|
217
|
+
# @return [String]
|
218
|
+
def serialize(record)
|
219
|
+
raise UnnacceptableDataError, 'key and data must be defined' unless record[0] && record[1]
|
220
|
+
s = key_data_string(record)
|
221
|
+
s << crc_string(s)
|
222
|
+
end
|
223
|
+
|
224
|
+
# Create a new record to read from IO.
|
225
|
+
# @param [#read] io an IO instance to read from
|
226
|
+
def deserialize(buf)
|
227
|
+
record = []
|
228
|
+
masked = read32(buf)
|
229
|
+
# Read the record's key bytes
|
230
|
+
record << buf.slice!(0, masked & (DELETION_MASK - 1)) <<
|
231
|
+
# Read the record's value bytes
|
232
|
+
buf.slice!(0, read32(buf)) <<
|
233
|
+
# Set the deletion flag
|
234
|
+
((masked & DELETION_MASK) != 0)
|
235
|
+
raise CorruptDataError, 'CRC mismatch' unless buf.slice!(0, 4) == crc_string(key_data_string(record))
|
236
|
+
record
|
237
|
+
end
|
238
|
+
|
239
|
+
private
|
240
|
+
|
241
|
+
# Return the deletion flag plus two length prefixed cells
|
242
|
+
def key_data_string(record)
|
243
|
+
part(record[0], record[0].bytesize + (record[2] ? DELETION_MASK : 0)) << part(record[1], record[1].bytesize)
|
244
|
+
end
|
245
|
+
|
246
|
+
def crc_string(s)
|
247
|
+
[Zlib.crc32(s, 0)].pack('N')
|
248
|
+
end
|
249
|
+
|
250
|
+
def part(data, length)
|
251
|
+
[length].pack('N') << data
|
252
|
+
end
|
253
|
+
|
254
|
+
def read32(buf)
|
255
|
+
buf.slice!(0, 4).unpack('N')[0]
|
256
|
+
end
|
257
|
+
end
|
258
|
+
|
259
|
+
# Writer's handle the actually fiddly task of committing data to disk.
|
260
|
+
# They have a Worker instance that writes in a select loop.
|
261
|
+
class Writer
|
262
|
+
# Open up the file, ready it for binary and nonblocking writing.
|
263
|
+
def initialize(file)
|
264
|
+
@file = file
|
265
|
+
open!
|
266
|
+
@worker = Worker.new(@fd)
|
267
|
+
end
|
268
|
+
|
269
|
+
# Send a record to the workers queue.
|
270
|
+
def write(record)
|
271
|
+
@worker.enqueue record
|
272
|
+
end
|
273
|
+
|
274
|
+
# Finish writing
|
275
|
+
def finish!
|
276
|
+
@worker.finish!
|
277
|
+
end
|
278
|
+
|
279
|
+
# Flush pending commits, and restart the worker.
|
280
|
+
def flush!
|
281
|
+
@worker.flush!
|
282
|
+
end
|
283
|
+
|
284
|
+
# Finish writing and close the file descriptor.
|
285
|
+
def close!
|
286
|
+
finish!
|
287
|
+
@fd.close
|
288
|
+
end
|
289
|
+
|
290
|
+
# Truncate the file.
|
291
|
+
def truncate!
|
292
|
+
finish!
|
293
|
+
@fd.truncate(0)
|
294
|
+
@fd.pos = 0
|
295
|
+
end
|
296
|
+
|
297
|
+
private
|
298
|
+
|
299
|
+
def open!
|
300
|
+
@fd = File.open @file, 'ab'
|
301
|
+
|
302
|
+
if defined?(Fcntl::O_NONBLOCK)
|
303
|
+
f = @fd.fcntl(Fcntl::F_GETFL, 0)
|
304
|
+
@fd.fcntl(Fcntl::F_SETFL, Fcntl::O_NONBLOCK | f)
|
305
|
+
end
|
306
|
+
end
|
307
|
+
|
308
|
+
# Workers handle the actual fiddly bits of asynchronous io and
|
309
|
+
# and handle background writes.
|
310
|
+
class Worker
|
311
|
+
def initialize(fd)
|
312
|
+
@queue = Queue.new
|
313
|
+
@fd = fd
|
314
|
+
@thread = Thread.new { work }
|
315
|
+
at_exit { finish! }
|
316
|
+
end
|
317
|
+
|
318
|
+
# Queue up a write to be committed later.
|
319
|
+
def enqueue(record)
|
320
|
+
@queue << record
|
321
|
+
end
|
322
|
+
|
323
|
+
# Loop and block if we don't have work to do or if
|
324
|
+
# the file isn't ready for another write just yet.
|
325
|
+
def work
|
326
|
+
buf, finished = '', false
|
327
|
+
until finished && buf.empty?
|
328
|
+
record = @queue.pop
|
329
|
+
if record
|
330
|
+
buf << Record.serialize(record)
|
331
|
+
else
|
332
|
+
finished = true
|
333
|
+
end
|
334
|
+
read, write = IO.select [], [@fd]
|
335
|
+
if write and fd = write.first
|
336
|
+
lock(fd) { buf = try_write fd, buf }
|
337
|
+
end
|
338
|
+
end
|
339
|
+
@fd.flush
|
340
|
+
end
|
341
|
+
|
342
|
+
# Try and write the buffer to the file via non blocking file writes.
|
343
|
+
# If the write fails try again.
|
344
|
+
def try_write(fd, buf)
|
345
|
+
if defined?(Fcntl::O_NONBLOCK)
|
346
|
+
s = fd.write_nonblock(buf)
|
347
|
+
else
|
348
|
+
s = fd.write(buf)
|
349
|
+
end
|
350
|
+
if s < buf.length
|
351
|
+
buf = buf[s..-1] # didn't finish
|
352
|
+
else
|
353
|
+
buf = ""
|
354
|
+
end
|
355
|
+
buf
|
356
|
+
rescue Errno::EAGAIN
|
357
|
+
buf
|
358
|
+
end
|
359
|
+
|
360
|
+
# Lock a file with the type <tt>lock</tt>
|
361
|
+
def lock(fd)
|
362
|
+
fd.flock File::LOCK_EX
|
363
|
+
begin
|
364
|
+
yield
|
365
|
+
ensure
|
366
|
+
fd.flock File::LOCK_UN
|
367
|
+
end
|
368
|
+
end
|
369
|
+
|
370
|
+
# finish! and start up another worker thread.
|
371
|
+
def flush!
|
372
|
+
finish!
|
373
|
+
@thread = Thread.new { work }
|
374
|
+
true
|
375
|
+
end
|
376
|
+
|
377
|
+
# Push a nil through the queue and block until the write loop is finished.
|
378
|
+
def finish!
|
379
|
+
@queue.push nil
|
380
|
+
@thread.join
|
381
|
+
end
|
382
|
+
end
|
383
|
+
end
|
384
|
+
end
|
385
|
+
|
386
|
+
if ARGV.size != 2
|
387
|
+
puts "Usage: #{$0} olddb newdb"
|
388
|
+
else
|
389
|
+
convert(ARGV[0], ARGV[1])
|
390
|
+
end
|