daybreak 0.1.3 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +1 -0
- data/.travis.yml +5 -0
- data/LICENSE +2 -2
- data/README +1 -3
- data/Rakefile +6 -9
- data/daybreak.gemspec +4 -4
- data/lib/daybreak.rb +3 -5
- data/lib/daybreak/db.rb +308 -114
- data/lib/daybreak/format.rb +52 -0
- data/lib/daybreak/queue.rb +107 -0
- data/lib/daybreak/serializer.rb +39 -0
- data/lib/daybreak/version.rb +3 -2
- data/script/bench +95 -0
- data/script/converter +390 -0
- data/test/test.rb +251 -57
- data/test/test_helper.rb +0 -3
- metadata +12 -11
- data/lib/daybreak/record.rb +0 -62
- data/lib/daybreak/writer.rb +0 -127
- data/test/bench.rb +0 -28
- data/test/compare.rb +0 -47
data/.travis.yml
CHANGED
data/LICENSE
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
Copyright (c) 2012
|
1
|
+
Copyright (c) 2012 ProPublica
|
2
2
|
|
3
3
|
MIT License
|
4
4
|
|
@@ -19,4 +19,4 @@ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
19
19
|
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
20
|
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
21
|
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
-
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README
CHANGED
@@ -5,10 +5,8 @@
|
|
5
5
|
|
6
6
|
Daybreak is a simple key value store for ruby. It has user defined persistence,
|
7
7
|
and all data is stored in a table in memory so ruby niceties are available.
|
8
|
-
Daybreak is faster than other ruby options like pstore and dbm.
|
8
|
+
Daybreak is faster than any other ruby options like pstore and dbm.
|
9
9
|
|
10
10
|
$ gem install daybreak
|
11
11
|
|
12
12
|
Docs: http://propublica.github.com/daybreak/
|
13
|
-
Issue Tracker: http://propublica.github.com/daybreak/issues
|
14
|
-
Benchmarks: https://gist.github.com/4146590
|
data/Rakefile
CHANGED
@@ -1,23 +1,18 @@
|
|
1
1
|
#!/usr/bin/env rake
|
2
2
|
require "bundler/gem_tasks"
|
3
3
|
|
4
|
-
task :
|
5
|
-
|
4
|
+
task :test do
|
5
|
+
ruby 'test/test.rb'
|
6
6
|
end
|
7
7
|
|
8
8
|
desc "Run benchmarks"
|
9
9
|
task :bench do
|
10
|
-
|
11
|
-
end
|
12
|
-
|
13
|
-
desc "Run comparisons with other libraries"
|
14
|
-
task :compare do
|
15
|
-
require "./test/compare.rb"
|
10
|
+
ruby 'script/bench'
|
16
11
|
end
|
17
12
|
|
18
13
|
desc "Profile a simple run"
|
19
14
|
task :prof do
|
20
|
-
|
15
|
+
ruby 'test/prof.rb'
|
21
16
|
end
|
22
17
|
|
23
18
|
require 'erb'
|
@@ -34,3 +29,5 @@ task :publish do |t|
|
|
34
29
|
`git push`
|
35
30
|
`git checkout master`
|
36
31
|
end
|
32
|
+
|
33
|
+
task :default => :test
|
data/daybreak.gemspec
CHANGED
@@ -2,10 +2,10 @@
|
|
2
2
|
require File.expand_path('../lib/daybreak/version', __FILE__)
|
3
3
|
|
4
4
|
Gem::Specification.new do |gem|
|
5
|
-
gem.authors = ["Jeff Larson"]
|
6
|
-
gem.email = ["thejefflarson@gmail.com"]
|
7
|
-
gem.description = %q{
|
8
|
-
gem.summary = %q{Daybreak provides an in memory key-value store
|
5
|
+
gem.authors = ["Jeff Larson", "Daniel Mendler"]
|
6
|
+
gem.email = ["thejefflarson@gmail.com", "mail@daniel-mendler.de"]
|
7
|
+
gem.description = %q{Incredibly fast pure-ruby key-value store}
|
8
|
+
gem.summary = %q{Daybreak provides an incredibly fast pure-ruby in memory key-value store, which is multi-process safe and uses a journal log to store the data.}
|
9
9
|
gem.homepage = "http://propublica.github.com/daybreak/"
|
10
10
|
|
11
11
|
gem.files = `git ls-files`.split($\).reject {|f| f =~ /^(index)/}
|
data/lib/daybreak.rb
CHANGED
@@ -1,9 +1,7 @@
|
|
1
|
-
require 'tempfile'
|
2
1
|
require 'thread'
|
3
|
-
require 'fcntl'
|
4
2
|
require 'zlib'
|
5
|
-
|
6
3
|
require 'daybreak/version'
|
7
|
-
require 'daybreak/
|
8
|
-
require 'daybreak/
|
4
|
+
require 'daybreak/serializer'
|
5
|
+
require 'daybreak/format'
|
6
|
+
require 'daybreak/queue'
|
9
7
|
require 'daybreak/db'
|
data/lib/daybreak/db.rb
CHANGED
@@ -1,186 +1,380 @@
|
|
1
1
|
module Daybreak
|
2
|
-
# Daybreak::DB contains the public api for Daybreak
|
3
|
-
# any other Ruby class (i.e. to overwrite serialize and parse). It includes
|
2
|
+
# Daybreak::DB contains the public api for Daybreak. It includes
|
4
3
|
# Enumerable for functional goodies like map, each, reduce and friends.
|
4
|
+
# @api public
|
5
5
|
class DB
|
6
6
|
include Enumerable
|
7
7
|
|
8
|
+
# Accessors for the database file, and a counter of how many records are in
|
9
|
+
# sync with the file.
|
10
|
+
attr_reader :file, :logsize
|
11
|
+
attr_writer :default
|
12
|
+
|
13
|
+
@databases = []
|
14
|
+
@databases_mutex = Mutex.new
|
15
|
+
|
16
|
+
# A handler that will ensure that databases are closed and synced when the
|
17
|
+
# current process exits.
|
18
|
+
at_exit do
|
19
|
+
loop do
|
20
|
+
db = @databases_mutex.synchronize { @databases.first }
|
21
|
+
break unless db
|
22
|
+
warn "Daybreak database #{db.file} was not closed, state might be inconsistent"
|
23
|
+
begin
|
24
|
+
db.close
|
25
|
+
rescue Exception => ex
|
26
|
+
warn "Failed to close daybreak database: #{ex.message}"
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
class << self
|
32
|
+
# @api private
|
33
|
+
def register(db)
|
34
|
+
@databases_mutex.synchronize { @databases << db }
|
35
|
+
end
|
36
|
+
|
37
|
+
# @api private
|
38
|
+
def unregister(db)
|
39
|
+
@databases_mutex.synchronize { @databases.delete(db) }
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
8
43
|
# Create a new Daybreak::DB. The second argument is the default value
|
9
44
|
# to store when accessing a previously unset key, this follows the
|
10
45
|
# Hash standard.
|
11
46
|
# @param [String] file the path to the db file
|
12
|
-
# @param
|
13
|
-
#
|
47
|
+
# @param [Hash] options a hash that contains the options for creating a new
|
48
|
+
# database. You can pass in :serializer, :format or :default.
|
14
49
|
# @yield [key] a block that will return the default value to store.
|
15
50
|
# @yieldparam [String] key the key to be stored.
|
16
|
-
def initialize(file,
|
17
|
-
@
|
18
|
-
@
|
19
|
-
@
|
20
|
-
@default =
|
21
|
-
|
51
|
+
def initialize(file, options = {}, &block)
|
52
|
+
@file = file
|
53
|
+
@serializer = (options[:serializer] || Serializer::Default).new
|
54
|
+
@format = (options[:format] || Format).new
|
55
|
+
@default = block ? block : options[:default]
|
56
|
+
@queue = Queue.new
|
57
|
+
@table = {}
|
58
|
+
open
|
59
|
+
@mutex = Mutex.new # Mutex to make #lock thread safe
|
60
|
+
@worker = Thread.new(&method(:worker))
|
61
|
+
@worker.priority = -1
|
62
|
+
update
|
63
|
+
self.class.register(self)
|
64
|
+
end
|
65
|
+
|
66
|
+
# Return default value belonging to key
|
67
|
+
# @param key the default value to retrieve.
|
68
|
+
def default(key = nil)
|
69
|
+
@default.respond_to?(:call) ? @default.call(key) : @default
|
70
|
+
end
|
71
|
+
|
72
|
+
# Retrieve a value at key from the database. If the default value was specified
|
73
|
+
# when this database was created, that value will be set and returned. Aliased
|
74
|
+
# as <tt>get</tt>.
|
75
|
+
# @param key the value to retrieve from the database.
|
76
|
+
def [](key)
|
77
|
+
skey = @serializer.key_for(key)
|
78
|
+
value = @table[skey]
|
79
|
+
if value != nil || @table.has_key?(skey)
|
80
|
+
value
|
81
|
+
elsif @default
|
82
|
+
value = default(key)
|
83
|
+
@queue << [skey, value]
|
84
|
+
@table[skey] = value
|
85
|
+
end
|
22
86
|
end
|
87
|
+
alias_method :get, :'[]'
|
23
88
|
|
24
89
|
# Set a key in the database to be written at some future date. If the data
|
25
90
|
# needs to be persisted immediately, call <tt>db.set(key, value, true)</tt>.
|
26
91
|
# @param [#to_s] key the key of the storage slot in the database
|
27
92
|
# @param value the value to store
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
write key, value, sync
|
93
|
+
def []=(key, value)
|
94
|
+
key = @serializer.key_for(key)
|
95
|
+
@queue << [key, value]
|
32
96
|
@table[key] = value
|
33
97
|
end
|
34
|
-
alias_method :set, :
|
98
|
+
alias_method :set, :'[]='
|
35
99
|
|
36
100
|
# set! flushes data immediately to disk.
|
37
|
-
# @param
|
101
|
+
# @param key the key of the storage slot in the database
|
38
102
|
# @param value the value to store
|
39
103
|
def set!(key, value)
|
40
|
-
set
|
104
|
+
set(key, value)
|
105
|
+
flush
|
106
|
+
value
|
41
107
|
end
|
42
108
|
|
43
109
|
# Delete a key from the database
|
44
|
-
# @param
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
@table.delete key
|
110
|
+
# @param key the key of the storage slot in the database
|
111
|
+
def delete(key)
|
112
|
+
key = @serializer.key_for(key)
|
113
|
+
@queue << [key]
|
114
|
+
@table.delete(key)
|
50
115
|
end
|
51
116
|
|
52
|
-
# delete
|
53
|
-
# @param
|
117
|
+
# Immediately delete the key on disk.
|
118
|
+
# @param key the key of the storage slot in the database
|
54
119
|
def delete!(key)
|
55
|
-
delete
|
120
|
+
value = delete(key)
|
121
|
+
flush
|
122
|
+
value
|
56
123
|
end
|
57
124
|
|
58
|
-
#
|
59
|
-
#
|
60
|
-
|
61
|
-
|
62
|
-
def [](key)
|
63
|
-
key = key.to_s
|
64
|
-
if @table.has_key? key
|
65
|
-
@table[key]
|
66
|
-
elsif default?
|
67
|
-
set key, Proc === @default ? @default.call(key) : @default
|
68
|
-
end
|
125
|
+
# Does this db have a value for this key?
|
126
|
+
# @param key the key to check if the DB has a key.
|
127
|
+
def has_key?(key)
|
128
|
+
@table.has_key?(@serializer.key_for(key))
|
69
129
|
end
|
70
|
-
alias_method :
|
130
|
+
alias_method :key?, :has_key?
|
131
|
+
alias_method :include?, :has_key?
|
132
|
+
alias_method :member?, :has_key?
|
71
133
|
|
72
|
-
|
73
|
-
|
74
|
-
# @yieldparam [String] key the key.
|
75
|
-
# @yieldparam value the value from the database.
|
76
|
-
def each
|
77
|
-
keys.each { |k| yield(k, get(k)) }
|
134
|
+
def has_value?(value)
|
135
|
+
@table.has_value?(value)
|
78
136
|
end
|
137
|
+
alias_method :value?, :has_value?
|
79
138
|
|
80
|
-
#
|
81
|
-
|
82
|
-
|
139
|
+
# Return the number of stored items.
|
140
|
+
# @return [Integer]
|
141
|
+
def size
|
142
|
+
@table.size
|
83
143
|
end
|
144
|
+
alias_method :length, :size
|
84
145
|
|
85
|
-
#
|
86
|
-
# @
|
87
|
-
def
|
88
|
-
@table.
|
146
|
+
# Return true if database is empty.
|
147
|
+
# @return [Boolean]
|
148
|
+
def empty?
|
149
|
+
@table.empty?
|
150
|
+
end
|
151
|
+
|
152
|
+
# Iterate over the key, value pairs in the database.
|
153
|
+
# @yield [key, value] blk the iterator for each key value pair.
|
154
|
+
# @yieldparam key the key.
|
155
|
+
# @yieldparam value the value from the database.
|
156
|
+
def each(&block)
|
157
|
+
@table.each(&block)
|
89
158
|
end
|
90
159
|
|
91
160
|
# Return the keys in the db.
|
92
|
-
# @return [Array
|
161
|
+
# @return [Array]
|
93
162
|
def keys
|
94
163
|
@table.keys
|
95
164
|
end
|
96
165
|
|
97
|
-
#
|
98
|
-
|
99
|
-
|
100
|
-
@table.keys.length
|
166
|
+
# Flush all changes to disk.
|
167
|
+
def flush
|
168
|
+
@queue.flush
|
101
169
|
end
|
102
|
-
alias_method :size, :length
|
103
170
|
|
104
|
-
#
|
105
|
-
#
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
Marshal.dump(value)
|
171
|
+
# Sync the database with what is on disk, by first flushing changes, and
|
172
|
+
# then reading the file if necessary.
|
173
|
+
def sync
|
174
|
+
flush
|
175
|
+
update
|
110
176
|
end
|
111
177
|
|
112
|
-
#
|
113
|
-
#
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
178
|
+
# Lock the database for an exclusive commit accross processes and threads
|
179
|
+
# @yield a block where every change to the database is synced
|
180
|
+
def lock
|
181
|
+
@mutex.synchronize do
|
182
|
+
# We need a flush before exclusive
|
183
|
+
# so that @exclusive is not modified by the worker
|
184
|
+
flush
|
185
|
+
exclusive do
|
186
|
+
update
|
187
|
+
result = yield
|
188
|
+
flush
|
189
|
+
result
|
190
|
+
end
|
191
|
+
end
|
118
192
|
end
|
119
193
|
|
120
|
-
#
|
121
|
-
def
|
122
|
-
|
194
|
+
# Remove all keys and values from the database.
|
195
|
+
def clear
|
196
|
+
flush
|
197
|
+
with_tmpfile do |path, file|
|
198
|
+
file.write(@format.header)
|
199
|
+
file.close
|
200
|
+
# Clear acts like a compactification
|
201
|
+
File.rename(path, @file)
|
202
|
+
end
|
123
203
|
@table.clear
|
124
|
-
|
204
|
+
open
|
205
|
+
self
|
125
206
|
end
|
126
|
-
alias_method :clear, :empty!
|
127
207
|
|
128
|
-
#
|
129
|
-
def
|
130
|
-
|
208
|
+
# Compact the database to remove stale commits and reduce the file size.
|
209
|
+
def compact
|
210
|
+
sync
|
211
|
+
with_tmpfile do |path, file|
|
212
|
+
compactsize = file.write(dump)
|
213
|
+
exclusive do
|
214
|
+
stat = @fd.stat
|
215
|
+
# Check if database was compactified at the same time
|
216
|
+
if stat.nlink > 0 && stat.ino == @inode
|
217
|
+
# Compactified database has the same size -> return
|
218
|
+
return self if stat.size == compactsize
|
219
|
+
# Append changed journal records if the database changed during compactification
|
220
|
+
file.write(read)
|
221
|
+
file.close
|
222
|
+
File.rename(path, @file)
|
223
|
+
end
|
224
|
+
end
|
225
|
+
end
|
226
|
+
open
|
227
|
+
update
|
228
|
+
self
|
131
229
|
end
|
132
230
|
|
133
231
|
# Close the database for reading and writing.
|
134
|
-
def close
|
135
|
-
@
|
232
|
+
def close
|
233
|
+
@queue << nil
|
234
|
+
@worker.join
|
235
|
+
@fd.close
|
236
|
+
@queue.stop if @queue.respond_to?(:stop)
|
237
|
+
self.class.unregister(self)
|
238
|
+
nil
|
136
239
|
end
|
137
240
|
|
138
|
-
#
|
139
|
-
def
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
241
|
+
# Check to see if we've already closed the database.
|
242
|
+
def closed?
|
243
|
+
@fd.closed?
|
244
|
+
end
|
245
|
+
|
246
|
+
private
|
247
|
+
|
248
|
+
# Update the @table with records read from the file, and increment @logsize
|
249
|
+
def update
|
250
|
+
buf = new_records
|
251
|
+
until buf.empty?
|
252
|
+
record = @format.parse(buf)
|
253
|
+
if record.size == 1
|
254
|
+
@table.delete(record.first)
|
255
|
+
else
|
256
|
+
@table[record.first] = @serializer.load(record.last)
|
257
|
+
end
|
258
|
+
@logsize += 1
|
147
259
|
end
|
148
|
-
|
260
|
+
end
|
149
261
|
|
150
|
-
|
262
|
+
# Read new records from journal log and return buffer
|
263
|
+
def new_records
|
264
|
+
loop do
|
265
|
+
unless @exclusive
|
266
|
+
# HACK: JRuby returns false if the process is already hold by the same process
|
267
|
+
# see https://github.com/jruby/jruby/issues/496
|
268
|
+
Thread.pass until @fd.flock(File::LOCK_SH)
|
269
|
+
end
|
270
|
+
# Check if database was compactified in the meantime
|
271
|
+
# break if not
|
272
|
+
stat = @fd.stat
|
273
|
+
break if stat.nlink > 0 && stat.ino == @inode
|
274
|
+
open
|
275
|
+
end
|
151
276
|
|
152
|
-
#
|
153
|
-
|
277
|
+
# Read new journal records
|
278
|
+
read
|
279
|
+
ensure
|
280
|
+
@fd.flock(File::LOCK_UN) unless @exclusive
|
281
|
+
end
|
154
282
|
|
155
|
-
|
156
|
-
@
|
157
|
-
@
|
158
|
-
|
283
|
+
def open
|
284
|
+
@fd.close if @fd
|
285
|
+
@fd = File.open(@file, 'ab+')
|
286
|
+
@fd.advise(:sequential) if @fd.respond_to? :advise
|
287
|
+
stat = @fd.stat
|
288
|
+
@inode = stat.ino
|
289
|
+
@logsize = 0
|
290
|
+
if stat.size == 0
|
291
|
+
@fd.write(@format.header)
|
292
|
+
@fd.flush
|
293
|
+
end
|
294
|
+
@pos = nil
|
159
295
|
end
|
160
296
|
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
297
|
+
def read
|
298
|
+
# File was opened
|
299
|
+
unless @pos
|
300
|
+
@fd.pos = 0
|
301
|
+
@format.read_header(@fd)
|
302
|
+
else
|
303
|
+
@fd.pos = @pos
|
168
304
|
end
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
305
|
+
buf = @fd.read
|
306
|
+
@pos = @fd.pos
|
307
|
+
buf
|
308
|
+
end
|
309
|
+
|
310
|
+
# Return database dump as string
|
311
|
+
def dump
|
312
|
+
dump = @format.header
|
313
|
+
# each is faster than inject
|
314
|
+
@table.each do |record|
|
315
|
+
record[1] = @serializer.dump(record.last)
|
316
|
+
dump << @format.dump(record)
|
176
317
|
end
|
318
|
+
dump
|
177
319
|
end
|
178
320
|
|
179
|
-
|
321
|
+
# Worker thread
|
322
|
+
def worker
|
323
|
+
loop do
|
324
|
+
record = @queue.next
|
325
|
+
write_record(record) if record
|
326
|
+
@queue.pop
|
327
|
+
break unless record
|
328
|
+
end
|
329
|
+
rescue Exception => ex
|
330
|
+
warn "Daybreak worker: #{ex.message}"
|
331
|
+
retry
|
332
|
+
end
|
333
|
+
|
334
|
+
# Write record to output stream and
|
335
|
+
# advance input stream
|
336
|
+
def write_record(record)
|
337
|
+
record[1] = @serializer.dump(record.last) if record.size > 1
|
338
|
+
record = @format.dump(record)
|
339
|
+
exclusive do
|
340
|
+
@fd.write(record)
|
341
|
+
# Flush to make sure the file is really updated
|
342
|
+
@fd.flush
|
343
|
+
end
|
344
|
+
@pos = @fd.pos if @pos && @fd.pos == @pos + record.bytesize
|
345
|
+
@logsize += 1
|
346
|
+
end
|
347
|
+
|
348
|
+
# Lock database exclusively
|
349
|
+
def exclusive
|
350
|
+
return yield if @exclusive
|
351
|
+
begin
|
352
|
+
loop do
|
353
|
+
# HACK: JRuby returns false if the process is already hold by the same process
|
354
|
+
# see https://github.com/jruby/jruby/issues/496
|
355
|
+
Thread.pass until @fd.flock(File::LOCK_EX)
|
356
|
+
# Check if database was compactified in the meantime
|
357
|
+
# break if not
|
358
|
+
stat = @fd.stat
|
359
|
+
break if stat.nlink > 0 && stat.ino == @inode
|
360
|
+
open
|
361
|
+
end
|
362
|
+
@exclusive = true
|
363
|
+
yield
|
364
|
+
ensure
|
365
|
+
@fd.flock(File::LOCK_UN)
|
366
|
+
@exclusive = false
|
367
|
+
end
|
368
|
+
end
|
180
369
|
|
181
|
-
|
182
|
-
|
183
|
-
|
370
|
+
# Open temporary file and pass it to the block
|
371
|
+
def with_tmpfile
|
372
|
+
path = [@file, $$.to_s(36), Thread.current.object_id.to_s(36)].join
|
373
|
+
file = File.open(path, 'wb')
|
374
|
+
yield(path, file)
|
375
|
+
ensure
|
376
|
+
file.close unless file.closed?
|
377
|
+
File.unlink(path) if File.exists?(path)
|
184
378
|
end
|
185
379
|
end
|
186
380
|
end
|