em-bucketer 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +34 -0
- data/Gemfile +3 -0
- data/LICENSE +675 -0
- data/README.md +63 -0
- data/Rakefile +2 -0
- data/em-bucketer.gemspec +29 -0
- data/lib/em-bucketer.rb +8 -0
- data/lib/em-bucketer/base.rb +153 -0
- data/lib/em-bucketer/database.rb +2 -0
- data/lib/em-bucketer/database/hash.rb +42 -0
- data/lib/em-bucketer/database/redis.rb +88 -0
- data/lib/em-bucketer/in_memory.rb +24 -0
- data/lib/em-bucketer/redis.rb +68 -0
- data/lib/em-bucketer/version.rb +5 -0
- data/spec/em_bucketer_examples.rb +108 -0
- data/spec/in_memory_spec.rb +8 -0
- data/spec/redis_spec.rb +33 -0
- data/spec/spec_helper.rb +11 -0
- data/spec/spec_methods.rb +10 -0
- metadata +183 -0
data/README.md
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
# EventMachine::Bucketer
|
2
|
+
|
3
|
+
This is a generic EventMachine library for putting arbitrary objects into
|
4
|
+
buckets and setting callbacks to be called when any bucket exceeds a specific
|
5
|
+
threshold size. Although the `Bucketer::InMemory` is synchronous (it's just
|
6
|
+
using a ruby hash) the interface is still what would be expected for an
|
7
|
+
asynchronous API for consistency with other Bucketers that are actually
|
8
|
+
asynchronous.
|
9
|
+
|
10
|
+
## Installation
|
11
|
+
|
12
|
+
Add this line to your application's Gemfile:
|
13
|
+
|
14
|
+
gem 'em-bucketer'
|
15
|
+
|
16
|
+
And then execute:
|
17
|
+
|
18
|
+
$ bundle
|
19
|
+
|
20
|
+
Or install it yourself as:
|
21
|
+
|
22
|
+
$ gem install em-bucketer
|
23
|
+
|
24
|
+
## Usage
|
25
|
+
|
26
|
+
```ruby
|
27
|
+
require 'em-bucketer'
|
28
|
+
EM.run do
|
29
|
+
bucketer = EM::Bucketer::InMemory.new(:bucket_threshold_size => 5)
|
30
|
+
|
31
|
+
bucketer.on_bucket_full do |bucket_id|
|
32
|
+
p "yay bucket #{bucket_id} filled up!"
|
33
|
+
|
34
|
+
bucketer.get_and_empty_bucket(bucket_id) do |items|
|
35
|
+
EM.stop
|
36
|
+
items.each do |item|
|
37
|
+
p "got back #{item}"
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
bucketer.add_item("1", "1", {:foo => :bar})
|
43
|
+
bucketer.add_item("1", "2", {:foo => :bar})
|
44
|
+
bucketer.add_item("1", "3", {:foo => :bar})
|
45
|
+
bucketer.add_item("1", "4", {:bar => :foo})
|
46
|
+
bucketer.add_item("1", "5", {:bar => :foo})
|
47
|
+
end
|
48
|
+
```
|
49
|
+
|
50
|
+
## Redis Bucketer
|
51
|
+
|
52
|
+
This gem also supports a redis backed bucketer which uses the `em-hiredis` gem.
|
53
|
+
This bucketer uses `Marshal.dump` to store objects in redis and thus there are
|
54
|
+
limitations on what can be placed in a bucket. Specifically you cannot store
|
55
|
+
procs in buckets using the redis bucketer.
|
56
|
+
|
57
|
+
## Contributing
|
58
|
+
|
59
|
+
1. Fork it ( https://github.com/dgvz/em-bucketer/fork )
|
60
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
61
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
62
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
63
|
+
5. Create a new Pull Request
|
data/Rakefile
ADDED
data/em-bucketer.gemspec
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'em-bucketer/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "em-bucketer"
|
8
|
+
spec.version = EventMachine::Bucketer::VERSION
|
9
|
+
spec.authors = ["Richard Heycock", "Dylan Griffith"]
|
10
|
+
spec.email = ["dyl.griffith@gmail.com"]
|
11
|
+
spec.summary = %q{A generic eventmachine library for storing arbitrary objects in buckets with callbacks on threshold reached}
|
12
|
+
spec.homepage = "https://github.com/dgvz/em-bucketer"
|
13
|
+
spec.license = "GPL"
|
14
|
+
|
15
|
+
spec.files = `git ls-files -z`.split("\x0")
|
16
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
17
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
18
|
+
spec.require_paths = ["lib"]
|
19
|
+
|
20
|
+
spec.add_runtime_dependency "eventmachine"
|
21
|
+
spec.add_runtime_dependency "em-hiredis"
|
22
|
+
|
23
|
+
spec.add_development_dependency "bundler", "~> 1.6"
|
24
|
+
spec.add_development_dependency "redis"
|
25
|
+
spec.add_development_dependency "rake"
|
26
|
+
spec.add_development_dependency "rspec"
|
27
|
+
spec.add_development_dependency "pry"
|
28
|
+
spec.add_development_dependency "yard"
|
29
|
+
end
|
data/lib/em-bucketer.rb
ADDED
@@ -0,0 +1,153 @@
|
|
1
|
+
module EventMachine::Bucketer
|
2
|
+
module Base
|
3
|
+
def setup(bucket_threshold_size, bucket_max_age)
|
4
|
+
@bucket_threshold_size = bucket_threshold_size
|
5
|
+
@bucket_max_age = bucket_max_age
|
6
|
+
@buckets = {}
|
7
|
+
@on_bucket_full_callbacks = []
|
8
|
+
@on_bucket_timeout_callbacks = []
|
9
|
+
@buckets_with_timers = Set.new
|
10
|
+
end
|
11
|
+
|
12
|
+
# Adds a item to the specified bucket and
|
13
|
+
# calls the block when it is done
|
14
|
+
#
|
15
|
+
# @param bucket_id [String] the bucket id of
|
16
|
+
# the bucket to put the item in
|
17
|
+
# @param item_id [String] the item_id
|
18
|
+
# of the item (used to ensure uniqueness
|
19
|
+
# within a bucket)
|
20
|
+
# @param item [Object] the item to be
|
21
|
+
# placed in the bucket
|
22
|
+
def add_item(bucket_id, item_id, item, &blk)
|
23
|
+
add_timer_if_first(bucket_id)
|
24
|
+
EM::Completion.new.tap do |c|
|
25
|
+
c.callback(&blk) if block_given?
|
26
|
+
add_bucket_to_db(bucket_id, item_id, item).callback do
|
27
|
+
c.succeed
|
28
|
+
end.errback do |e|
|
29
|
+
c.fail e
|
30
|
+
end
|
31
|
+
check_bucket_full(bucket_id)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
# Used to set a callback hook for when a bucket
|
36
|
+
# reaches the threshold size. It is IMPORTANT
|
37
|
+
# to note that the bucket will not automatically
|
38
|
+
# be emptied you must call empty_bucket if you
|
39
|
+
# want the bucket to be emptied. Also the callback
|
40
|
+
# will be called every time a item is added
|
41
|
+
# until the bucket is emptied.
|
42
|
+
#
|
43
|
+
# @yield [String] The bucket id of the full bucket
|
44
|
+
def on_bucket_full(&blk)
|
45
|
+
@on_bucket_full_callbacks << blk
|
46
|
+
end
|
47
|
+
|
48
|
+
# Used to set a callback hook for when a bucket
|
49
|
+
# reaches the time limit. It is IMPORTANT
|
50
|
+
# to note that the bucket will not automatically
|
51
|
+
# be emptied you must call empty_bucket if you
|
52
|
+
# want the bucket to be emptied.
|
53
|
+
#
|
54
|
+
# This timer is started once the bucket gets its
|
55
|
+
# first item and is cleared only when the
|
56
|
+
# bucket is emptied. The callback will only be
|
57
|
+
# called once at this time and then not again
|
58
|
+
# unless you empty the bucket and add something
|
59
|
+
# again.
|
60
|
+
#
|
61
|
+
# @yield [String] The bucket id of the full bucket
|
62
|
+
def on_bucket_timeout(&blk)
|
63
|
+
@on_bucket_timeout_callbacks << blk
|
64
|
+
end
|
65
|
+
|
66
|
+
# Get the contents of a bucket.
|
67
|
+
#
|
68
|
+
# @param bucket_id [String] the bucket id
|
69
|
+
# of the bucket you want to get
|
70
|
+
# @yield [Array] the items you put
|
71
|
+
# into the bucket
|
72
|
+
def get_bucket(bucket_id, &blk)
|
73
|
+
EM::Completion.new.tap do |c|
|
74
|
+
c.callback(&blk) if block_given?
|
75
|
+
get_bucket_from_db(bucket_id).callback do |bucket|
|
76
|
+
c.succeed bucket.values
|
77
|
+
end.errback do |e|
|
78
|
+
c.fail e
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
# Get the contents of a bucket then empty it
|
84
|
+
#
|
85
|
+
# @param bucket_id [String] the bucket id
|
86
|
+
# of the bucket you want to get
|
87
|
+
# @yield [Array] the items you put
|
88
|
+
# into the bucket
|
89
|
+
def get_and_empty_bucket(bucket_id, &blk)
|
90
|
+
EM::Completion.new.tap do |c|
|
91
|
+
c.callback(&blk) if block_given?
|
92
|
+
get_bucket(bucket_id).callback do |contents|
|
93
|
+
empty_bucket(bucket_id).callback do
|
94
|
+
c.succeed contents
|
95
|
+
end.errback do |e|
|
96
|
+
c.fail e
|
97
|
+
end
|
98
|
+
end.errback do |e|
|
99
|
+
c.fail e
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
# Empty a bucket
|
105
|
+
#
|
106
|
+
# @param bucket_id [String] the bucket id
|
107
|
+
# of the bucket you want to empty
|
108
|
+
def empty_bucket(bucket_id, &blk)
|
109
|
+
EM::Completion.new.tap do |c|
|
110
|
+
c.callback(&blk) if block_given?
|
111
|
+
empty_bucket_in_db(bucket_id).callback do
|
112
|
+
clear_timer(bucket_id)
|
113
|
+
c.succeed
|
114
|
+
end.errback do |e|
|
115
|
+
c.fail e
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
private
|
121
|
+
|
122
|
+
def bucket_full?(bucket_id, &blk)
|
123
|
+
bucket_size_from_db(bucket_id).callback do |size|
|
124
|
+
blk.call size >= @bucket_threshold_size
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
def check_bucket_full(bucket_id)
|
129
|
+
bucket_full?(bucket_id) do |is_full|
|
130
|
+
if is_full
|
131
|
+
@on_bucket_full_callbacks.each do |callback|
|
132
|
+
callback.call bucket_id
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
def add_timer_if_first(bucket_id)
|
139
|
+
return unless @bucket_max_age
|
140
|
+
if @buckets_with_timers.add?(bucket_id)
|
141
|
+
EM.add_timer(@bucket_max_age) do
|
142
|
+
@on_bucket_timeout_callbacks.each do |callback|
|
143
|
+
callback.call bucket_id
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
def clear_timer(bucket_id)
|
150
|
+
@buckets_with_timers.delete(bucket_id)
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require 'em-bucketer/database'
|
2
|
+
|
3
|
+
module EventMachine::Bucketer
|
4
|
+
module Database
|
5
|
+
module Hash
|
6
|
+
private
|
7
|
+
|
8
|
+
def bucket_size_from_db(bucket_id, &blk)
|
9
|
+
EM::Completion.new.tap do |c|
|
10
|
+
c.callback(&blk) if block_given?
|
11
|
+
@buckets[bucket_id] ||= {}
|
12
|
+
c.succeed @buckets[bucket_id].size
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def add_bucket_to_db(bucket_id, item_id, item, &blk)
|
17
|
+
EM::Completion.new.tap do |c|
|
18
|
+
c.callback(&blk) if block_given?
|
19
|
+
@buckets[bucket_id] ||= {}
|
20
|
+
@buckets[bucket_id][item_id] = item
|
21
|
+
c.succeed
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def get_bucket_from_db(bucket_id, &blk)
|
26
|
+
EM::Completion.new.tap do |c|
|
27
|
+
c.callback(&blk) if block_given?
|
28
|
+
@buckets[bucket_id] ||= {}
|
29
|
+
c.succeed @buckets[bucket_id]
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def empty_bucket_in_db(bucket_id, &blk)
|
34
|
+
EM::Completion.new.tap do |c|
|
35
|
+
c.callback(&blk) if block_given?
|
36
|
+
@buckets[bucket_id] = {}
|
37
|
+
c.succeed
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,88 @@
|
|
1
|
+
require 'em-bucketer/database'
|
2
|
+
require 'em-hiredis'
|
3
|
+
|
4
|
+
module EventMachine::Bucketer
|
5
|
+
module Database
|
6
|
+
module Redis
|
7
|
+
private
|
8
|
+
|
9
|
+
def bucket_size_from_db(bucket_id, &blk)
|
10
|
+
EM::Completion.new.tap do |c|
|
11
|
+
c.callback(&blk) if block_given?
|
12
|
+
redis.hlen(redis_key(bucket_id)).callback do |len|
|
13
|
+
c.succeed len.to_i
|
14
|
+
end.errback do |e|
|
15
|
+
c.fail e
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def add_bucket_to_db(bucket_id, item_id, item, &blk)
|
21
|
+
EM::Completion.new.tap do |c|
|
22
|
+
c.callback(&blk) if block_given?
|
23
|
+
redis.hset(redis_key(bucket_id), item_id, Marshal.dump(item)).callback do
|
24
|
+
add_to_known_buckets(bucket_id).callback do
|
25
|
+
c.succeed
|
26
|
+
end.errback do |e|
|
27
|
+
c.fail e
|
28
|
+
end
|
29
|
+
end.errback do |e|
|
30
|
+
c.fail e
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def get_bucket_from_db(bucket_id, &blk)
|
36
|
+
EM::Completion.new.tap do |c|
|
37
|
+
c.callback(&blk) if block_given?
|
38
|
+
redis.hgetall(redis_key(bucket_id)) do |data|
|
39
|
+
bucket = {}
|
40
|
+
index = 0
|
41
|
+
while(index < data.size)
|
42
|
+
bucket[data[index]] = Marshal.load(data[index + 1])
|
43
|
+
index += 2
|
44
|
+
end
|
45
|
+
c.succeed bucket
|
46
|
+
end.errback do |e|
|
47
|
+
c.errback
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def empty_bucket_in_db(bucket_id, &blk)
|
53
|
+
EM::Completion.new.tap do |c|
|
54
|
+
c.callback(&blk) if block_given?
|
55
|
+
redis.del(redis_key(bucket_id)).callback do
|
56
|
+
remove_from_known_buckets(bucket_id).callback do
|
57
|
+
c.succeed
|
58
|
+
end.errback do |e|
|
59
|
+
c.fail e
|
60
|
+
end
|
61
|
+
end.errback do |e|
|
62
|
+
c.fail e
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def known_buckets(&blk)
|
68
|
+
redis.smembers(redis_known_buckets_key, &blk)
|
69
|
+
end
|
70
|
+
|
71
|
+
def add_to_known_buckets(bucket_id, &blk)
|
72
|
+
redis.sadd(redis_known_buckets_key, bucket_id, &blk)
|
73
|
+
end
|
74
|
+
|
75
|
+
def remove_from_known_buckets(bucket_id, &blk)
|
76
|
+
redis.srem(redis_known_buckets_key, bucket_id, &blk)
|
77
|
+
end
|
78
|
+
|
79
|
+
def redis_key(bucket_id)
|
80
|
+
"em_bucketer:#{redis_prefix}:#{bucket_id}"
|
81
|
+
end
|
82
|
+
|
83
|
+
def redis_known_buckets_key
|
84
|
+
"em_bucketer_known_buckets:#{redis_prefix}"
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'eventmachine'
|
2
|
+
require 'em-bucketer/database/hash'
|
3
|
+
require 'em-bucketer/base'
|
4
|
+
|
5
|
+
module EventMachine::Bucketer
|
6
|
+
class InMemory
|
7
|
+
include Database::Hash
|
8
|
+
include Base
|
9
|
+
|
10
|
+
BUCKET_THRESHOLD_SIZE_DEFAULT = 1000
|
11
|
+
BUCKET_MAX_AGE_DEFAULT = 3600
|
12
|
+
|
13
|
+
# Creates a new in memory Bucketer with the requested
|
14
|
+
# configurations
|
15
|
+
#
|
16
|
+
# @param bucket_threshold_size [Integer] the max size of the bucket
|
17
|
+
# after which the on_bucket_full callback is called
|
18
|
+
# @param bucket_max_age [Integer] max number of seconds a bucket
|
19
|
+
# can remain before the on_bucket_timed_out is called
|
20
|
+
def initialize(bucket_threshold_size: BUCKET_THRESHOLD_SIZE_DEFAULT, bucket_max_age: BUCKET_MAX_AGE_DEFAULT)
|
21
|
+
setup(bucket_threshold_size, bucket_max_age)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
require 'eventmachine'
|
2
|
+
require 'em-bucketer/database/redis'
|
3
|
+
require 'em-bucketer/base'
|
4
|
+
|
5
|
+
module EventMachine::Bucketer
|
6
|
+
class Redis
|
7
|
+
include Database::Redis
|
8
|
+
include Base
|
9
|
+
|
10
|
+
BUCKET_THRESHOLD_SIZE_DEFAULT = 1000
|
11
|
+
BUCKET_MAX_AGE_DEFAULT = 3600
|
12
|
+
|
13
|
+
# Creates a new redis Bucketer with the requested
|
14
|
+
# configurations.
|
15
|
+
# *NOTE* The redis bucketer uses Marshal to store
|
16
|
+
# the objects in redis. This puts limitations on
|
17
|
+
# the data that cannot be stored in these buckets.
|
18
|
+
# For example you cannot store an object that
|
19
|
+
# references a proc as an instance variable.
|
20
|
+
#
|
21
|
+
# The redis bucketer also sets all timers on
|
22
|
+
# startup for buckets already in the redis
|
23
|
+
# database. This ensures that even if your
|
24
|
+
# app is restarted the previous timers will still
|
25
|
+
# get set and you won't ever lose a bucket.
|
26
|
+
#
|
27
|
+
# @param redis_prefix [String] The prefix for the
|
28
|
+
# bucket in redis. This is necessary because you
|
29
|
+
# may want to have multiple bucketers using one
|
30
|
+
# redis instance and you don't want them conflicting.
|
31
|
+
# Also this can't just be random because the whole
|
32
|
+
# point of the redis bucketer is that you can restart
|
33
|
+
# your app and get back the same bucketer without any
|
34
|
+
# data loss.
|
35
|
+
# @param bucket_threshold_size [Integer] the max size of the bucket
|
36
|
+
# after which the on_bucket_full callback is called
|
37
|
+
# @param bucket_max_age [Integer] max number of seconds a bucket
|
38
|
+
# can remain before the on_bucket_timed_out is called
|
39
|
+
def initialize(redis_prefix, bucket_threshold_size: BUCKET_THRESHOLD_SIZE_DEFAULT, bucket_max_age: BUCKET_MAX_AGE_DEFAULT)
|
40
|
+
@redis = EM::Hiredis.connect
|
41
|
+
@redis_prefix = redis_prefix
|
42
|
+
setup(bucket_threshold_size, bucket_max_age)
|
43
|
+
set_timers
|
44
|
+
end
|
45
|
+
|
46
|
+
def set_timers
|
47
|
+
known_buckets.callback do |bucket_ids|
|
48
|
+
bucket_ids.each do |bucket_id|
|
49
|
+
add_timer_if_first(bucket_id)
|
50
|
+
end
|
51
|
+
end.errback do |e|
|
52
|
+
# I think this is okay since it will only happen when
|
53
|
+
# you are initializing the bucketer so it will hopefully
|
54
|
+
# bring the issue to your attention on startup. I
|
55
|
+
# couldn't actually pass this error back through to
|
56
|
+
# anyone so I needed to raise it. It is also a bad
|
57
|
+
# exception since it means you are not properly reloading
|
58
|
+
# your buckets from redis
|
59
|
+
raise e
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
private
|
64
|
+
|
65
|
+
# Used by Database::Redis
|
66
|
+
attr_reader :redis_prefix, :redis
|
67
|
+
end
|
68
|
+
end
|