daybreak 0.1.3 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore CHANGED
@@ -1,4 +1,5 @@
1
1
  *.db
2
+ *.gdbm
2
3
  coverage
3
4
  doc
4
5
  profile
data/.travis.yml CHANGED
@@ -7,3 +7,8 @@ rvm:
7
7
  - jruby-19mode
8
8
  - rbx-18mode
9
9
  - rbx-19mode
10
+ env:
11
+ matrix:
12
+ - "TASK=test"
13
+ - "TASK=bench"
14
+ script: "bundle exec rake $TASK"
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2012 Jeff Larson
1
+ Copyright (c) 2012 ProPublica
2
2
 
3
3
  MIT License
4
4
 
@@ -19,4 +19,4 @@ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
19
  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
20
  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
21
  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README CHANGED
@@ -5,10 +5,8 @@
5
5
 
6
6
  Daybreak is a simple key value store for ruby. It has user defined persistence,
7
7
  and all data is stored in a table in memory so ruby niceties are available.
8
- Daybreak is faster than other ruby options like pstore and dbm.
8
+ Daybreak is faster than any other ruby options like pstore and dbm.
9
9
 
10
10
  $ gem install daybreak
11
11
 
12
12
  Docs: http://propublica.github.com/daybreak/
13
- Issue Tracker: http://propublica.github.com/daybreak/issues
14
- Benchmarks: https://gist.github.com/4146590
data/Rakefile CHANGED
@@ -1,23 +1,18 @@
1
1
  #!/usr/bin/env rake
2
2
  require "bundler/gem_tasks"
3
3
 
4
- task :default do
5
- require "./test/test.rb"
4
+ task :test do
5
+ ruby 'test/test.rb'
6
6
  end
7
7
 
8
8
  desc "Run benchmarks"
9
9
  task :bench do
10
- require "./test/bench.rb"
11
- end
12
-
13
- desc "Run comparisons with other libraries"
14
- task :compare do
15
- require "./test/compare.rb"
10
+ ruby 'script/bench'
16
11
  end
17
12
 
18
13
  desc "Profile a simple run"
19
14
  task :prof do
20
- require "./test/prof.rb"
15
+ ruby 'test/prof.rb'
21
16
  end
22
17
 
23
18
  require 'erb'
@@ -34,3 +29,5 @@ task :publish do |t|
34
29
  `git push`
35
30
  `git checkout master`
36
31
  end
32
+
33
+ task :default => :test
data/daybreak.gemspec CHANGED
@@ -2,10 +2,10 @@
2
2
  require File.expand_path('../lib/daybreak/version', __FILE__)
3
3
 
4
4
  Gem::Specification.new do |gem|
5
- gem.authors = ["Jeff Larson"]
6
- gem.email = ["thejefflarson@gmail.com"]
7
- gem.description = %q{A simple dimple key-value store for ruby.}
8
- gem.summary = %q{Daybreak provides an in memory key-value store that is easily enumerable in ruby.}
5
+ gem.authors = ["Jeff Larson", "Daniel Mendler"]
6
+ gem.email = ["thejefflarson@gmail.com", "mail@daniel-mendler.de"]
7
+ gem.description = %q{Incredibly fast pure-ruby key-value store}
8
+ gem.summary = %q{Daybreak provides an incredibly fast pure-ruby in memory key-value store, which is multi-process safe and uses a journal log to store the data.}
9
9
  gem.homepage = "http://propublica.github.com/daybreak/"
10
10
 
11
11
  gem.files = `git ls-files`.split($\).reject {|f| f =~ /^(index)/}
data/lib/daybreak.rb CHANGED
@@ -1,9 +1,7 @@
1
- require 'tempfile'
2
1
  require 'thread'
3
- require 'fcntl'
4
2
  require 'zlib'
5
-
6
3
  require 'daybreak/version'
7
- require 'daybreak/record'
8
- require 'daybreak/writer'
4
+ require 'daybreak/serializer'
5
+ require 'daybreak/format'
6
+ require 'daybreak/queue'
9
7
  require 'daybreak/db'
data/lib/daybreak/db.rb CHANGED
@@ -1,186 +1,380 @@
1
1
  module Daybreak
2
- # Daybreak::DB contains the public api for Daybreak, you may extend it like
3
- # any other Ruby class (i.e. to overwrite serialize and parse). It includes
2
+ # Daybreak::DB contains the public api for Daybreak. It includes
4
3
  # Enumerable for functional goodies like map, each, reduce and friends.
4
+ # @api public
5
5
  class DB
6
6
  include Enumerable
7
7
 
8
+ # Accessors for the database file, and a counter of how many records are in
9
+ # sync with the file.
10
+ attr_reader :file, :logsize
11
+ attr_writer :default
12
+
13
+ @databases = []
14
+ @databases_mutex = Mutex.new
15
+
16
+ # A handler that will ensure that databases are closed and synced when the
17
+ # current process exits.
18
+ at_exit do
19
+ loop do
20
+ db = @databases_mutex.synchronize { @databases.first }
21
+ break unless db
22
+ warn "Daybreak database #{db.file} was not closed, state might be inconsistent"
23
+ begin
24
+ db.close
25
+ rescue Exception => ex
26
+ warn "Failed to close daybreak database: #{ex.message}"
27
+ end
28
+ end
29
+ end
30
+
31
+ class << self
32
+ # @api private
33
+ def register(db)
34
+ @databases_mutex.synchronize { @databases << db }
35
+ end
36
+
37
+ # @api private
38
+ def unregister(db)
39
+ @databases_mutex.synchronize { @databases.delete(db) }
40
+ end
41
+ end
42
+
8
43
  # Create a new Daybreak::DB. The second argument is the default value
9
44
  # to store when accessing a previously unset key, this follows the
10
45
  # Hash standard.
11
46
  # @param [String] file the path to the db file
12
- # @param default the default value to store and return when a key is
13
- # not yet in the database.
47
+ # @param [Hash] options a hash that contains the options for creating a new
48
+ # database. You can pass in :serializer, :format or :default.
14
49
  # @yield [key] a block that will return the default value to store.
15
50
  # @yieldparam [String] key the key to be stored.
16
- def initialize(file, default=nil, &blk)
17
- @table = {}
18
- @file_name = file
19
- @writer = Writer.new(@file_name)
20
- @default = block_given? ? blk : default
21
- read!
51
+ def initialize(file, options = {}, &block)
52
+ @file = file
53
+ @serializer = (options[:serializer] || Serializer::Default).new
54
+ @format = (options[:format] || Format).new
55
+ @default = block ? block : options[:default]
56
+ @queue = Queue.new
57
+ @table = {}
58
+ open
59
+ @mutex = Mutex.new # Mutex to make #lock thread safe
60
+ @worker = Thread.new(&method(:worker))
61
+ @worker.priority = -1
62
+ update
63
+ self.class.register(self)
64
+ end
65
+
66
+ # Return default value belonging to key
67
+ # @param key the default value to retrieve.
68
+ def default(key = nil)
69
+ @default.respond_to?(:call) ? @default.call(key) : @default
70
+ end
71
+
72
+ # Retrieve a value at key from the database. If the default value was specified
73
+ # when this database was created, that value will be set and returned. Aliased
74
+ # as <tt>get</tt>.
75
+ # @param key the value to retrieve from the database.
76
+ def [](key)
77
+ skey = @serializer.key_for(key)
78
+ value = @table[skey]
79
+ if value != nil || @table.has_key?(skey)
80
+ value
81
+ elsif @default
82
+ value = default(key)
83
+ @queue << [skey, value]
84
+ @table[skey] = value
85
+ end
22
86
  end
87
+ alias_method :get, :'[]'
23
88
 
24
89
  # Set a key in the database to be written at some future date. If the data
25
90
  # needs to be persisted immediately, call <tt>db.set(key, value, true)</tt>.
26
91
  # @param [#to_s] key the key of the storage slot in the database
27
92
  # @param value the value to store
28
- # @param [Boolean] sync if true, sync this value immediately
29
- def []=(key, value, sync = false)
30
- key = key.to_s
31
- write key, value, sync
93
+ def []=(key, value)
94
+ key = @serializer.key_for(key)
95
+ @queue << [key, value]
32
96
  @table[key] = value
33
97
  end
34
- alias_method :set, :"[]="
98
+ alias_method :set, :'[]='
35
99
 
36
100
  # set! flushes data immediately to disk.
37
- # @param [#to_s] key the key of the storage slot in the database
101
+ # @param key the key of the storage slot in the database
38
102
  # @param value the value to store
39
103
  def set!(key, value)
40
- set key, value, true
104
+ set(key, value)
105
+ flush
106
+ value
41
107
  end
42
108
 
43
109
  # Delete a key from the database
44
- # @param [#to_s] key the key of the storage slot in the database
45
- # @param [Boolean] sync if true, sync this deletion immediately
46
- def delete(key, sync = false)
47
- key = key.to_s
48
- write key, '', sync, true
49
- @table.delete key
110
+ # @param key the key of the storage slot in the database
111
+ def delete(key)
112
+ key = @serializer.key_for(key)
113
+ @queue << [key]
114
+ @table.delete(key)
50
115
  end
51
116
 
52
- # delete! immediately deletes the key on disk.
53
- # @param [#to_s] key the key of the storage slot in the database
117
+ # Immediately delete the key on disk.
118
+ # @param key the key of the storage slot in the database
54
119
  def delete!(key)
55
- delete key, true
120
+ value = delete(key)
121
+ flush
122
+ value
56
123
  end
57
124
 
58
- # Retrieve a value at key from the database. If the default value was specified
59
- # when this database was created, that value will be set and returned. Aliased
60
- # as <tt>get</tt>.
61
- # @param [#to_s] key the value to retrieve from the database.
62
- def [](key)
63
- key = key.to_s
64
- if @table.has_key? key
65
- @table[key]
66
- elsif default?
67
- set key, Proc === @default ? @default.call(key) : @default
68
- end
125
+ # Does this db have a value for this key?
126
+ # @param key the key to check if the DB has a key.
127
+ def has_key?(key)
128
+ @table.has_key?(@serializer.key_for(key))
69
129
  end
70
- alias_method :get, :"[]"
130
+ alias_method :key?, :has_key?
131
+ alias_method :include?, :has_key?
132
+ alias_method :member?, :has_key?
71
133
 
72
- # Iterate over the key, value pairs in the database.
73
- # @yield [key, value] blk the iterator for each key value pair.
74
- # @yieldparam [String] key the key.
75
- # @yieldparam value the value from the database.
76
- def each
77
- keys.each { |k| yield(k, get(k)) }
134
+ def has_value?(value)
135
+ @table.has_value?(value)
78
136
  end
137
+ alias_method :value?, :has_value?
79
138
 
80
- # Does this db have a default value.
81
- def default?
82
- !@default.nil?
139
+ # Return the number of stored items.
140
+ # @return [Integer]
141
+ def size
142
+ @table.size
83
143
  end
144
+ alias_method :length, :size
84
145
 
85
- # Does this db have a value for this key?
86
- # @param [key#to_s] key the key to check if the DB has a key.
87
- def has_key?(key)
88
- @table.has_key? key.to_s
146
+ # Return true if database is empty.
147
+ # @return [Boolean]
148
+ def empty?
149
+ @table.empty?
150
+ end
151
+
152
+ # Iterate over the key, value pairs in the database.
153
+ # @yield [key, value] blk the iterator for each key value pair.
154
+ # @yieldparam key the key.
155
+ # @yieldparam value the value from the database.
156
+ def each(&block)
157
+ @table.each(&block)
89
158
  end
90
159
 
91
160
  # Return the keys in the db.
92
- # @return [Array<String>]
161
+ # @return [Array]
93
162
  def keys
94
163
  @table.keys
95
164
  end
96
165
 
97
- # Return the number of stored items.
98
- # @return [Integer]
99
- def length
100
- @table.keys.length
166
+ # Flush all changes to disk.
167
+ def flush
168
+ @queue.flush
101
169
  end
102
- alias_method :size, :length
103
170
 
104
- # Serialize the data for writing to disk, if you don't want to use <tt>Marshal</tt>
105
- # overwrite this method.
106
- # @param value the value to be serialized
107
- # @return [String]
108
- def serialize(value)
109
- Marshal.dump(value)
171
+ # Sync the database with what is on disk, by first flushing changes, and
172
+ # then reading the file if necessary.
173
+ def sync
174
+ flush
175
+ update
110
176
  end
111
177
 
112
- # Parse the serialized value from disk, like serialize if you want to use a
113
- # different serialization method overwrite this method.
114
- # @param value the value to be parsed
115
- # @return [String]
116
- def parse(value)
117
- Marshal.load(value)
178
+ # Lock the database for an exclusive commit accross processes and threads
179
+ # @yield a block where every change to the database is synced
180
+ def lock
181
+ @mutex.synchronize do
182
+ # We need a flush before exclusive
183
+ # so that @exclusive is not modified by the worker
184
+ flush
185
+ exclusive do
186
+ update
187
+ result = yield
188
+ flush
189
+ result
190
+ end
191
+ end
118
192
  end
119
193
 
120
- # Empty the database file.
121
- def empty!
122
- @writer.truncate!
194
+ # Remove all keys and values from the database.
195
+ def clear
196
+ flush
197
+ with_tmpfile do |path, file|
198
+ file.write(@format.header)
199
+ file.close
200
+ # Clear acts like a compactification
201
+ File.rename(path, @file)
202
+ end
123
203
  @table.clear
124
- read!
204
+ open
205
+ self
125
206
  end
126
- alias_method :clear, :empty!
127
207
 
128
- # Force all queued commits to be written to disk.
129
- def flush!
130
- @writer.flush!
208
+ # Compact the database to remove stale commits and reduce the file size.
209
+ def compact
210
+ sync
211
+ with_tmpfile do |path, file|
212
+ compactsize = file.write(dump)
213
+ exclusive do
214
+ stat = @fd.stat
215
+ # Check if database was compactified at the same time
216
+ if stat.nlink > 0 && stat.ino == @inode
217
+ # Compactified database has the same size -> return
218
+ return self if stat.size == compactsize
219
+ # Append changed journal records if the database changed during compactification
220
+ file.write(read)
221
+ file.close
222
+ File.rename(path, @file)
223
+ end
224
+ end
225
+ end
226
+ open
227
+ update
228
+ self
131
229
  end
132
230
 
133
231
  # Close the database for reading and writing.
134
- def close!
135
- @writer.close!
232
+ def close
233
+ @queue << nil
234
+ @worker.join
235
+ @fd.close
236
+ @queue.stop if @queue.respond_to?(:stop)
237
+ self.class.unregister(self)
238
+ nil
136
239
  end
137
240
 
138
- # Compact the database to remove stale commits and reduce the file size.
139
- def compact!
140
- # Create a new temporary database
141
- tmp_file = @file_name + "-#{$$}-#{Thread.current.object_id}"
142
- copy_db = self.class.new tmp_file
143
-
144
- # Copy the database key by key into the temporary table
145
- each do |key, value|
146
- copy_db.set(key, get(key))
241
+ # Check to see if we've already closed the database.
242
+ def closed?
243
+ @fd.closed?
244
+ end
245
+
246
+ private
247
+
248
+ # Update the @table with records read from the file, and increment @logsize
249
+ def update
250
+ buf = new_records
251
+ until buf.empty?
252
+ record = @format.parse(buf)
253
+ if record.size == 1
254
+ @table.delete(record.first)
255
+ else
256
+ @table[record.first] = @serializer.load(record.last)
257
+ end
258
+ @logsize += 1
147
259
  end
148
- copy_db.close!
260
+ end
149
261
 
150
- close!
262
+ # Read new records from journal log and return buffer
263
+ def new_records
264
+ loop do
265
+ unless @exclusive
266
+ # HACK: JRuby returns false if the process is already hold by the same process
267
+ # see https://github.com/jruby/jruby/issues/496
268
+ Thread.pass until @fd.flock(File::LOCK_SH)
269
+ end
270
+ # Check if database was compactified in the meantime
271
+ # break if not
272
+ stat = @fd.stat
273
+ break if stat.nlink > 0 && stat.ino == @inode
274
+ open
275
+ end
151
276
 
152
- # Move the copy into place
153
- File.rename tmp_file, @file_name
277
+ # Read new journal records
278
+ read
279
+ ensure
280
+ @fd.flock(File::LOCK_UN) unless @exclusive
281
+ end
154
282
 
155
- # Reopen this database
156
- @writer = Writer.new(@file_name)
157
- @table.clear
158
- read!
283
+ def open
284
+ @fd.close if @fd
285
+ @fd = File.open(@file, 'ab+')
286
+ @fd.advise(:sequential) if @fd.respond_to? :advise
287
+ stat = @fd.stat
288
+ @inode = stat.ino
289
+ @logsize = 0
290
+ if stat.size == 0
291
+ @fd.write(@format.header)
292
+ @fd.flush
293
+ end
294
+ @pos = nil
159
295
  end
160
296
 
161
- # Read all values from the log file. If you want to check for changed data
162
- # call this again.
163
- def read!
164
- buf = nil
165
- File.open(@file_name, 'rb') do |fd|
166
- fd.flock(File::LOCK_SH)
167
- buf = fd.read
297
+ def read
298
+ # File was opened
299
+ unless @pos
300
+ @fd.pos = 0
301
+ @format.read_header(@fd)
302
+ else
303
+ @fd.pos = @pos
168
304
  end
169
- until buf.empty?
170
- key, data, deleted = Record.deserialize(buf)
171
- if deleted
172
- @table.delete key
173
- else
174
- @table[key] = parse(data)
175
- end
305
+ buf = @fd.read
306
+ @pos = @fd.pos
307
+ buf
308
+ end
309
+
310
+ # Return database dump as string
311
+ def dump
312
+ dump = @format.header
313
+ # each is faster than inject
314
+ @table.each do |record|
315
+ record[1] = @serializer.dump(record.last)
316
+ dump << @format.dump(record)
176
317
  end
318
+ dump
177
319
  end
178
320
 
179
- private
321
+ # Worker thread
322
+ def worker
323
+ loop do
324
+ record = @queue.next
325
+ write_record(record) if record
326
+ @queue.pop
327
+ break unless record
328
+ end
329
+ rescue Exception => ex
330
+ warn "Daybreak worker: #{ex.message}"
331
+ retry
332
+ end
333
+
334
+ # Write record to output stream and
335
+ # advance input stream
336
+ def write_record(record)
337
+ record[1] = @serializer.dump(record.last) if record.size > 1
338
+ record = @format.dump(record)
339
+ exclusive do
340
+ @fd.write(record)
341
+ # Flush to make sure the file is really updated
342
+ @fd.flush
343
+ end
344
+ @pos = @fd.pos if @pos && @fd.pos == @pos + record.bytesize
345
+ @logsize += 1
346
+ end
347
+
348
+ # Lock database exclusively
349
+ def exclusive
350
+ return yield if @exclusive
351
+ begin
352
+ loop do
353
+ # HACK: JRuby returns false if the process is already hold by the same process
354
+ # see https://github.com/jruby/jruby/issues/496
355
+ Thread.pass until @fd.flock(File::LOCK_EX)
356
+ # Check if database was compactified in the meantime
357
+ # break if not
358
+ stat = @fd.stat
359
+ break if stat.nlink > 0 && stat.ino == @inode
360
+ open
361
+ end
362
+ @exclusive = true
363
+ yield
364
+ ensure
365
+ @fd.flock(File::LOCK_UN)
366
+ @exclusive = false
367
+ end
368
+ end
180
369
 
181
- def write(key, value, sync = false, delete = false)
182
- @writer.write([key, serialize(value), delete])
183
- flush! if sync
370
+ # Open temporary file and pass it to the block
371
+ def with_tmpfile
372
+ path = [@file, $$.to_s(36), Thread.current.object_id.to_s(36)].join
373
+ file = File.open(path, 'wb')
374
+ yield(path, file)
375
+ ensure
376
+ file.close unless file.closed?
377
+ File.unlink(path) if File.exists?(path)
184
378
  end
185
379
  end
186
380
  end