daybreak 0.1.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +1 -0
- data/.travis.yml +5 -0
- data/LICENSE +2 -2
- data/README +1 -3
- data/Rakefile +6 -9
- data/daybreak.gemspec +4 -4
- data/lib/daybreak.rb +3 -5
- data/lib/daybreak/db.rb +308 -114
- data/lib/daybreak/format.rb +52 -0
- data/lib/daybreak/queue.rb +107 -0
- data/lib/daybreak/serializer.rb +39 -0
- data/lib/daybreak/version.rb +3 -2
- data/script/bench +95 -0
- data/script/converter +390 -0
- data/test/test.rb +251 -57
- data/test/test_helper.rb +0 -3
- metadata +12 -11
- data/lib/daybreak/record.rb +0 -62
- data/lib/daybreak/writer.rb +0 -127
- data/test/bench.rb +0 -28
- data/test/compare.rb +0 -47
data/.travis.yml
CHANGED
data/LICENSE
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
Copyright (c) 2012
|
1
|
+
Copyright (c) 2012 ProPublica
|
2
2
|
|
3
3
|
MIT License
|
4
4
|
|
@@ -19,4 +19,4 @@ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
19
19
|
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
20
|
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
21
|
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
-
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README
CHANGED
@@ -5,10 +5,8 @@
|
|
5
5
|
|
6
6
|
Daybreak is a simple key value store for ruby. It has user defined persistence,
|
7
7
|
and all data is stored in a table in memory so ruby niceties are available.
|
8
|
-
Daybreak is faster than other ruby options like pstore and dbm.
|
8
|
+
Daybreak is faster than any other ruby options like pstore and dbm.
|
9
9
|
|
10
10
|
$ gem install daybreak
|
11
11
|
|
12
12
|
Docs: http://propublica.github.com/daybreak/
|
13
|
-
Issue Tracker: http://propublica.github.com/daybreak/issues
|
14
|
-
Benchmarks: https://gist.github.com/4146590
|
data/Rakefile
CHANGED
@@ -1,23 +1,18 @@
|
|
1
1
|
#!/usr/bin/env rake
|
2
2
|
require "bundler/gem_tasks"
|
3
3
|
|
4
|
-
task :
|
5
|
-
|
4
|
+
task :test do
|
5
|
+
ruby 'test/test.rb'
|
6
6
|
end
|
7
7
|
|
8
8
|
desc "Run benchmarks"
|
9
9
|
task :bench do
|
10
|
-
|
11
|
-
end
|
12
|
-
|
13
|
-
desc "Run comparisons with other libraries"
|
14
|
-
task :compare do
|
15
|
-
require "./test/compare.rb"
|
10
|
+
ruby 'script/bench'
|
16
11
|
end
|
17
12
|
|
18
13
|
desc "Profile a simple run"
|
19
14
|
task :prof do
|
20
|
-
|
15
|
+
ruby 'test/prof.rb'
|
21
16
|
end
|
22
17
|
|
23
18
|
require 'erb'
|
@@ -34,3 +29,5 @@ task :publish do |t|
|
|
34
29
|
`git push`
|
35
30
|
`git checkout master`
|
36
31
|
end
|
32
|
+
|
33
|
+
task :default => :test
|
data/daybreak.gemspec
CHANGED
@@ -2,10 +2,10 @@
|
|
2
2
|
require File.expand_path('../lib/daybreak/version', __FILE__)
|
3
3
|
|
4
4
|
Gem::Specification.new do |gem|
|
5
|
-
gem.authors = ["Jeff Larson"]
|
6
|
-
gem.email = ["thejefflarson@gmail.com"]
|
7
|
-
gem.description = %q{
|
8
|
-
gem.summary = %q{Daybreak provides an in memory key-value store
|
5
|
+
gem.authors = ["Jeff Larson", "Daniel Mendler"]
|
6
|
+
gem.email = ["thejefflarson@gmail.com", "mail@daniel-mendler.de"]
|
7
|
+
gem.description = %q{Incredibly fast pure-ruby key-value store}
|
8
|
+
gem.summary = %q{Daybreak provides an incredibly fast pure-ruby in memory key-value store, which is multi-process safe and uses a journal log to store the data.}
|
9
9
|
gem.homepage = "http://propublica.github.com/daybreak/"
|
10
10
|
|
11
11
|
gem.files = `git ls-files`.split($\).reject {|f| f =~ /^(index)/}
|
data/lib/daybreak.rb
CHANGED
@@ -1,9 +1,7 @@
|
|
1
|
-
require 'tempfile'
|
2
1
|
require 'thread'
|
3
|
-
require 'fcntl'
|
4
2
|
require 'zlib'
|
5
|
-
|
6
3
|
require 'daybreak/version'
|
7
|
-
require 'daybreak/
|
8
|
-
require 'daybreak/
|
4
|
+
require 'daybreak/serializer'
|
5
|
+
require 'daybreak/format'
|
6
|
+
require 'daybreak/queue'
|
9
7
|
require 'daybreak/db'
|
data/lib/daybreak/db.rb
CHANGED
@@ -1,186 +1,380 @@
|
|
1
1
|
module Daybreak
|
2
|
-
# Daybreak::DB contains the public api for Daybreak
|
3
|
-
# any other Ruby class (i.e. to overwrite serialize and parse). It includes
|
2
|
+
# Daybreak::DB contains the public api for Daybreak. It includes
|
4
3
|
# Enumerable for functional goodies like map, each, reduce and friends.
|
4
|
+
# @api public
|
5
5
|
class DB
|
6
6
|
include Enumerable
|
7
7
|
|
8
|
+
# Accessors for the database file, and a counter of how many records are in
|
9
|
+
# sync with the file.
|
10
|
+
attr_reader :file, :logsize
|
11
|
+
attr_writer :default
|
12
|
+
|
13
|
+
@databases = []
|
14
|
+
@databases_mutex = Mutex.new
|
15
|
+
|
16
|
+
# A handler that will ensure that databases are closed and synced when the
|
17
|
+
# current process exits.
|
18
|
+
at_exit do
|
19
|
+
loop do
|
20
|
+
db = @databases_mutex.synchronize { @databases.first }
|
21
|
+
break unless db
|
22
|
+
warn "Daybreak database #{db.file} was not closed, state might be inconsistent"
|
23
|
+
begin
|
24
|
+
db.close
|
25
|
+
rescue Exception => ex
|
26
|
+
warn "Failed to close daybreak database: #{ex.message}"
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
class << self
|
32
|
+
# @api private
|
33
|
+
def register(db)
|
34
|
+
@databases_mutex.synchronize { @databases << db }
|
35
|
+
end
|
36
|
+
|
37
|
+
# @api private
|
38
|
+
def unregister(db)
|
39
|
+
@databases_mutex.synchronize { @databases.delete(db) }
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
8
43
|
# Create a new Daybreak::DB. The second argument is the default value
|
9
44
|
# to store when accessing a previously unset key, this follows the
|
10
45
|
# Hash standard.
|
11
46
|
# @param [String] file the path to the db file
|
12
|
-
# @param
|
13
|
-
#
|
47
|
+
# @param [Hash] options a hash that contains the options for creating a new
|
48
|
+
# database. You can pass in :serializer, :format or :default.
|
14
49
|
# @yield [key] a block that will return the default value to store.
|
15
50
|
# @yieldparam [String] key the key to be stored.
|
16
|
-
def initialize(file,
|
17
|
-
@
|
18
|
-
@
|
19
|
-
@
|
20
|
-
@default =
|
21
|
-
|
51
|
+
def initialize(file, options = {}, &block)
|
52
|
+
@file = file
|
53
|
+
@serializer = (options[:serializer] || Serializer::Default).new
|
54
|
+
@format = (options[:format] || Format).new
|
55
|
+
@default = block ? block : options[:default]
|
56
|
+
@queue = Queue.new
|
57
|
+
@table = {}
|
58
|
+
open
|
59
|
+
@mutex = Mutex.new # Mutex to make #lock thread safe
|
60
|
+
@worker = Thread.new(&method(:worker))
|
61
|
+
@worker.priority = -1
|
62
|
+
update
|
63
|
+
self.class.register(self)
|
64
|
+
end
|
65
|
+
|
66
|
+
# Return default value belonging to key
|
67
|
+
# @param key the default value to retrieve.
|
68
|
+
def default(key = nil)
|
69
|
+
@default.respond_to?(:call) ? @default.call(key) : @default
|
70
|
+
end
|
71
|
+
|
72
|
+
# Retrieve a value at key from the database. If the default value was specified
|
73
|
+
# when this database was created, that value will be set and returned. Aliased
|
74
|
+
# as <tt>get</tt>.
|
75
|
+
# @param key the value to retrieve from the database.
|
76
|
+
def [](key)
|
77
|
+
skey = @serializer.key_for(key)
|
78
|
+
value = @table[skey]
|
79
|
+
if value != nil || @table.has_key?(skey)
|
80
|
+
value
|
81
|
+
elsif @default
|
82
|
+
value = default(key)
|
83
|
+
@queue << [skey, value]
|
84
|
+
@table[skey] = value
|
85
|
+
end
|
22
86
|
end
|
87
|
+
alias_method :get, :'[]'
|
23
88
|
|
24
89
|
# Set a key in the database to be written at some future date. If the data
|
25
90
|
# needs to be persisted immediately, call <tt>db.set(key, value, true)</tt>.
|
26
91
|
# @param [#to_s] key the key of the storage slot in the database
|
27
92
|
# @param value the value to store
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
write key, value, sync
|
93
|
+
def []=(key, value)
|
94
|
+
key = @serializer.key_for(key)
|
95
|
+
@queue << [key, value]
|
32
96
|
@table[key] = value
|
33
97
|
end
|
34
|
-
alias_method :set, :
|
98
|
+
alias_method :set, :'[]='
|
35
99
|
|
36
100
|
# set! flushes data immediately to disk.
|
37
|
-
# @param
|
101
|
+
# @param key the key of the storage slot in the database
|
38
102
|
# @param value the value to store
|
39
103
|
def set!(key, value)
|
40
|
-
set
|
104
|
+
set(key, value)
|
105
|
+
flush
|
106
|
+
value
|
41
107
|
end
|
42
108
|
|
43
109
|
# Delete a key from the database
|
44
|
-
# @param
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
@table.delete key
|
110
|
+
# @param key the key of the storage slot in the database
|
111
|
+
def delete(key)
|
112
|
+
key = @serializer.key_for(key)
|
113
|
+
@queue << [key]
|
114
|
+
@table.delete(key)
|
50
115
|
end
|
51
116
|
|
52
|
-
# delete
|
53
|
-
# @param
|
117
|
+
# Immediately delete the key on disk.
|
118
|
+
# @param key the key of the storage slot in the database
|
54
119
|
def delete!(key)
|
55
|
-
delete
|
120
|
+
value = delete(key)
|
121
|
+
flush
|
122
|
+
value
|
56
123
|
end
|
57
124
|
|
58
|
-
#
|
59
|
-
#
|
60
|
-
|
61
|
-
|
62
|
-
def [](key)
|
63
|
-
key = key.to_s
|
64
|
-
if @table.has_key? key
|
65
|
-
@table[key]
|
66
|
-
elsif default?
|
67
|
-
set key, Proc === @default ? @default.call(key) : @default
|
68
|
-
end
|
125
|
+
# Does this db have a value for this key?
|
126
|
+
# @param key the key to check if the DB has a key.
|
127
|
+
def has_key?(key)
|
128
|
+
@table.has_key?(@serializer.key_for(key))
|
69
129
|
end
|
70
|
-
alias_method :
|
130
|
+
alias_method :key?, :has_key?
|
131
|
+
alias_method :include?, :has_key?
|
132
|
+
alias_method :member?, :has_key?
|
71
133
|
|
72
|
-
|
73
|
-
|
74
|
-
# @yieldparam [String] key the key.
|
75
|
-
# @yieldparam value the value from the database.
|
76
|
-
def each
|
77
|
-
keys.each { |k| yield(k, get(k)) }
|
134
|
+
def has_value?(value)
|
135
|
+
@table.has_value?(value)
|
78
136
|
end
|
137
|
+
alias_method :value?, :has_value?
|
79
138
|
|
80
|
-
#
|
81
|
-
|
82
|
-
|
139
|
+
# Return the number of stored items.
|
140
|
+
# @return [Integer]
|
141
|
+
def size
|
142
|
+
@table.size
|
83
143
|
end
|
144
|
+
alias_method :length, :size
|
84
145
|
|
85
|
-
#
|
86
|
-
# @
|
87
|
-
def
|
88
|
-
@table.
|
146
|
+
# Return true if database is empty.
|
147
|
+
# @return [Boolean]
|
148
|
+
def empty?
|
149
|
+
@table.empty?
|
150
|
+
end
|
151
|
+
|
152
|
+
# Iterate over the key, value pairs in the database.
|
153
|
+
# @yield [key, value] blk the iterator for each key value pair.
|
154
|
+
# @yieldparam key the key.
|
155
|
+
# @yieldparam value the value from the database.
|
156
|
+
def each(&block)
|
157
|
+
@table.each(&block)
|
89
158
|
end
|
90
159
|
|
91
160
|
# Return the keys in the db.
|
92
|
-
# @return [Array
|
161
|
+
# @return [Array]
|
93
162
|
def keys
|
94
163
|
@table.keys
|
95
164
|
end
|
96
165
|
|
97
|
-
#
|
98
|
-
|
99
|
-
|
100
|
-
@table.keys.length
|
166
|
+
# Flush all changes to disk.
|
167
|
+
def flush
|
168
|
+
@queue.flush
|
101
169
|
end
|
102
|
-
alias_method :size, :length
|
103
170
|
|
104
|
-
#
|
105
|
-
#
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
Marshal.dump(value)
|
171
|
+
# Sync the database with what is on disk, by first flushing changes, and
|
172
|
+
# then reading the file if necessary.
|
173
|
+
def sync
|
174
|
+
flush
|
175
|
+
update
|
110
176
|
end
|
111
177
|
|
112
|
-
#
|
113
|
-
#
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
178
|
+
# Lock the database for an exclusive commit accross processes and threads
|
179
|
+
# @yield a block where every change to the database is synced
|
180
|
+
def lock
|
181
|
+
@mutex.synchronize do
|
182
|
+
# We need a flush before exclusive
|
183
|
+
# so that @exclusive is not modified by the worker
|
184
|
+
flush
|
185
|
+
exclusive do
|
186
|
+
update
|
187
|
+
result = yield
|
188
|
+
flush
|
189
|
+
result
|
190
|
+
end
|
191
|
+
end
|
118
192
|
end
|
119
193
|
|
120
|
-
#
|
121
|
-
def
|
122
|
-
|
194
|
+
# Remove all keys and values from the database.
|
195
|
+
def clear
|
196
|
+
flush
|
197
|
+
with_tmpfile do |path, file|
|
198
|
+
file.write(@format.header)
|
199
|
+
file.close
|
200
|
+
# Clear acts like a compactification
|
201
|
+
File.rename(path, @file)
|
202
|
+
end
|
123
203
|
@table.clear
|
124
|
-
|
204
|
+
open
|
205
|
+
self
|
125
206
|
end
|
126
|
-
alias_method :clear, :empty!
|
127
207
|
|
128
|
-
#
|
129
|
-
def
|
130
|
-
|
208
|
+
# Compact the database to remove stale commits and reduce the file size.
|
209
|
+
def compact
|
210
|
+
sync
|
211
|
+
with_tmpfile do |path, file|
|
212
|
+
compactsize = file.write(dump)
|
213
|
+
exclusive do
|
214
|
+
stat = @fd.stat
|
215
|
+
# Check if database was compactified at the same time
|
216
|
+
if stat.nlink > 0 && stat.ino == @inode
|
217
|
+
# Compactified database has the same size -> return
|
218
|
+
return self if stat.size == compactsize
|
219
|
+
# Append changed journal records if the database changed during compactification
|
220
|
+
file.write(read)
|
221
|
+
file.close
|
222
|
+
File.rename(path, @file)
|
223
|
+
end
|
224
|
+
end
|
225
|
+
end
|
226
|
+
open
|
227
|
+
update
|
228
|
+
self
|
131
229
|
end
|
132
230
|
|
133
231
|
# Close the database for reading and writing.
|
134
|
-
def close
|
135
|
-
@
|
232
|
+
def close
|
233
|
+
@queue << nil
|
234
|
+
@worker.join
|
235
|
+
@fd.close
|
236
|
+
@queue.stop if @queue.respond_to?(:stop)
|
237
|
+
self.class.unregister(self)
|
238
|
+
nil
|
136
239
|
end
|
137
240
|
|
138
|
-
#
|
139
|
-
def
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
241
|
+
# Check to see if we've already closed the database.
|
242
|
+
def closed?
|
243
|
+
@fd.closed?
|
244
|
+
end
|
245
|
+
|
246
|
+
private
|
247
|
+
|
248
|
+
# Update the @table with records read from the file, and increment @logsize
|
249
|
+
def update
|
250
|
+
buf = new_records
|
251
|
+
until buf.empty?
|
252
|
+
record = @format.parse(buf)
|
253
|
+
if record.size == 1
|
254
|
+
@table.delete(record.first)
|
255
|
+
else
|
256
|
+
@table[record.first] = @serializer.load(record.last)
|
257
|
+
end
|
258
|
+
@logsize += 1
|
147
259
|
end
|
148
|
-
|
260
|
+
end
|
149
261
|
|
150
|
-
|
262
|
+
# Read new records from journal log and return buffer
|
263
|
+
def new_records
|
264
|
+
loop do
|
265
|
+
unless @exclusive
|
266
|
+
# HACK: JRuby returns false if the process is already hold by the same process
|
267
|
+
# see https://github.com/jruby/jruby/issues/496
|
268
|
+
Thread.pass until @fd.flock(File::LOCK_SH)
|
269
|
+
end
|
270
|
+
# Check if database was compactified in the meantime
|
271
|
+
# break if not
|
272
|
+
stat = @fd.stat
|
273
|
+
break if stat.nlink > 0 && stat.ino == @inode
|
274
|
+
open
|
275
|
+
end
|
151
276
|
|
152
|
-
#
|
153
|
-
|
277
|
+
# Read new journal records
|
278
|
+
read
|
279
|
+
ensure
|
280
|
+
@fd.flock(File::LOCK_UN) unless @exclusive
|
281
|
+
end
|
154
282
|
|
155
|
-
|
156
|
-
@
|
157
|
-
@
|
158
|
-
|
283
|
+
def open
|
284
|
+
@fd.close if @fd
|
285
|
+
@fd = File.open(@file, 'ab+')
|
286
|
+
@fd.advise(:sequential) if @fd.respond_to? :advise
|
287
|
+
stat = @fd.stat
|
288
|
+
@inode = stat.ino
|
289
|
+
@logsize = 0
|
290
|
+
if stat.size == 0
|
291
|
+
@fd.write(@format.header)
|
292
|
+
@fd.flush
|
293
|
+
end
|
294
|
+
@pos = nil
|
159
295
|
end
|
160
296
|
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
297
|
+
def read
|
298
|
+
# File was opened
|
299
|
+
unless @pos
|
300
|
+
@fd.pos = 0
|
301
|
+
@format.read_header(@fd)
|
302
|
+
else
|
303
|
+
@fd.pos = @pos
|
168
304
|
end
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
305
|
+
buf = @fd.read
|
306
|
+
@pos = @fd.pos
|
307
|
+
buf
|
308
|
+
end
|
309
|
+
|
310
|
+
# Return database dump as string
|
311
|
+
def dump
|
312
|
+
dump = @format.header
|
313
|
+
# each is faster than inject
|
314
|
+
@table.each do |record|
|
315
|
+
record[1] = @serializer.dump(record.last)
|
316
|
+
dump << @format.dump(record)
|
176
317
|
end
|
318
|
+
dump
|
177
319
|
end
|
178
320
|
|
179
|
-
|
321
|
+
# Worker thread
|
322
|
+
def worker
|
323
|
+
loop do
|
324
|
+
record = @queue.next
|
325
|
+
write_record(record) if record
|
326
|
+
@queue.pop
|
327
|
+
break unless record
|
328
|
+
end
|
329
|
+
rescue Exception => ex
|
330
|
+
warn "Daybreak worker: #{ex.message}"
|
331
|
+
retry
|
332
|
+
end
|
333
|
+
|
334
|
+
# Write record to output stream and
|
335
|
+
# advance input stream
|
336
|
+
def write_record(record)
|
337
|
+
record[1] = @serializer.dump(record.last) if record.size > 1
|
338
|
+
record = @format.dump(record)
|
339
|
+
exclusive do
|
340
|
+
@fd.write(record)
|
341
|
+
# Flush to make sure the file is really updated
|
342
|
+
@fd.flush
|
343
|
+
end
|
344
|
+
@pos = @fd.pos if @pos && @fd.pos == @pos + record.bytesize
|
345
|
+
@logsize += 1
|
346
|
+
end
|
347
|
+
|
348
|
+
# Lock database exclusively
|
349
|
+
def exclusive
|
350
|
+
return yield if @exclusive
|
351
|
+
begin
|
352
|
+
loop do
|
353
|
+
# HACK: JRuby returns false if the process is already hold by the same process
|
354
|
+
# see https://github.com/jruby/jruby/issues/496
|
355
|
+
Thread.pass until @fd.flock(File::LOCK_EX)
|
356
|
+
# Check if database was compactified in the meantime
|
357
|
+
# break if not
|
358
|
+
stat = @fd.stat
|
359
|
+
break if stat.nlink > 0 && stat.ino == @inode
|
360
|
+
open
|
361
|
+
end
|
362
|
+
@exclusive = true
|
363
|
+
yield
|
364
|
+
ensure
|
365
|
+
@fd.flock(File::LOCK_UN)
|
366
|
+
@exclusive = false
|
367
|
+
end
|
368
|
+
end
|
180
369
|
|
181
|
-
|
182
|
-
|
183
|
-
|
370
|
+
# Open temporary file and pass it to the block
|
371
|
+
def with_tmpfile
|
372
|
+
path = [@file, $$.to_s(36), Thread.current.object_id.to_s(36)].join
|
373
|
+
file = File.open(path, 'wb')
|
374
|
+
yield(path, file)
|
375
|
+
ensure
|
376
|
+
file.close unless file.closed?
|
377
|
+
File.unlink(path) if File.exists?(path)
|
184
378
|
end
|
185
379
|
end
|
186
380
|
end
|