em-bucketer 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.md ADDED
@@ -0,0 +1,63 @@
1
+ # EventMachine::Bucketer
2
+
3
+ This is a generic EventMachine library for putting arbitrary objects into
4
+ buckets and setting callbacks to be called when any bucket exceeds a specific
5
+ threshold size. Although the `Bucketer::InMemory` is synchronous (it's just
6
+ using a ruby hash) the interface is still what would be expected for an
7
+ asynchronous API for consistency with other Bucketers that are actually
8
+ asynchronous.
9
+
10
+ ## Installation
11
+
12
+ Add this line to your application's Gemfile:
13
+
14
+ gem 'em-bucketer'
15
+
16
+ And then execute:
17
+
18
+ $ bundle
19
+
20
+ Or install it yourself as:
21
+
22
+ $ gem install em-bucketer
23
+
24
+ ## Usage
25
+
26
+ ```ruby
27
+ require 'em-bucketer'
28
+ EM.run do
29
+ bucketer = EM::Bucketer::InMemory.new(:bucket_threshold_size => 5)
30
+
31
+ bucketer.on_bucket_full do |bucket_id|
32
+ p "yay bucket #{bucket_id} filled up!"
33
+
34
+ bucketer.get_and_empty_bucket(bucket_id) do |items|
35
+ EM.stop
36
+ items.each do |item|
37
+ p "got back #{item}"
38
+ end
39
+ end
40
+ end
41
+
42
+ bucketer.add_item("1", "1", {:foo => :bar})
43
+ bucketer.add_item("1", "2", {:foo => :bar})
44
+ bucketer.add_item("1", "3", {:foo => :bar})
45
+ bucketer.add_item("1", "4", {:bar => :foo})
46
+ bucketer.add_item("1", "5", {:bar => :foo})
47
+ end
48
+ ```
49
+
50
+ ## Redis Bucketer
51
+
52
+ This gem also supports a redis backed bucketer which uses the `em-hiredis` gem.
53
+ This bucketer uses `Marshal.dump` to store objects in redis and thus there are
54
+ limitations on what can be placed in a bucket. Specifically you cannot store
55
+ procs in buckets using the redis bucketer.
56
+
57
+ ## Contributing
58
+
59
+ 1. Fork it ( https://github.com/dgvz/em-bucketer/fork )
60
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
61
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
62
+ 4. Push to the branch (`git push origin my-new-feature`)
63
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+
@@ -0,0 +1,29 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'em-bucketer/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "em-bucketer"
8
+ spec.version = EventMachine::Bucketer::VERSION
9
+ spec.authors = ["Richard Heycock", "Dylan Griffith"]
10
+ spec.email = ["dyl.griffith@gmail.com"]
11
+ spec.summary = %q{A generic eventmachine library for storing arbitrary objects in buckets with callbacks on threshold reached}
12
+ spec.homepage = "https://github.com/dgvz/em-bucketer"
13
+ spec.license = "GPL"
14
+
15
+ spec.files = `git ls-files -z`.split("\x0")
16
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
+ spec.require_paths = ["lib"]
19
+
20
+ spec.add_runtime_dependency "eventmachine"
21
+ spec.add_runtime_dependency "em-hiredis"
22
+
23
+ spec.add_development_dependency "bundler", "~> 1.6"
24
+ spec.add_development_dependency "redis"
25
+ spec.add_development_dependency "rake"
26
+ spec.add_development_dependency "rspec"
27
+ spec.add_development_dependency "pry"
28
+ spec.add_development_dependency "yard"
29
+ end
@@ -0,0 +1,8 @@
1
+ require "em-bucketer/version"
2
+
3
+ module EventMachine::Bucketer
4
+ end
5
+
6
+ require 'eventmachine'
7
+ require 'em-bucketer/in_memory'
8
+ require 'em-bucketer/redis'
@@ -0,0 +1,153 @@
1
+ module EventMachine::Bucketer
2
+ module Base
3
+ def setup(bucket_threshold_size, bucket_max_age)
4
+ @bucket_threshold_size = bucket_threshold_size
5
+ @bucket_max_age = bucket_max_age
6
+ @buckets = {}
7
+ @on_bucket_full_callbacks = []
8
+ @on_bucket_timeout_callbacks = []
9
+ @buckets_with_timers = Set.new
10
+ end
11
+
12
+ # Adds a item to the specified bucket and
13
+ # calls the block when it is done
14
+ #
15
+ # @param bucket_id [String] the bucket id of
16
+ # the bucket to put the item in
17
+ # @param item_id [String] the item_id
18
+ # of the item (used to ensure uniqueness
19
+ # within a bucket)
20
+ # @param item [Object] the item to be
21
+ # placed in the bucket
22
+ def add_item(bucket_id, item_id, item, &blk)
23
+ add_timer_if_first(bucket_id)
24
+ EM::Completion.new.tap do |c|
25
+ c.callback(&blk) if block_given?
26
+ add_bucket_to_db(bucket_id, item_id, item).callback do
27
+ c.succeed
28
+ end.errback do |e|
29
+ c.fail e
30
+ end
31
+ check_bucket_full(bucket_id)
32
+ end
33
+ end
34
+
35
+ # Used to set a callback hook for when a bucket
36
+ # reaches the threshold size. It is IMPORTANT
37
+ # to note that the bucket will not automatically
38
+ # be emptied you must call empty_bucket if you
39
+ # want the bucket to be emptied. Also the callback
40
+ # will be called every time a item is added
41
+ # until the bucket is emptied.
42
+ #
43
+ # @yield [String] The bucket id of the full bucket
44
+ def on_bucket_full(&blk)
45
+ @on_bucket_full_callbacks << blk
46
+ end
47
+
48
+ # Used to set a callback hook for when a bucket
49
+ # reaches the time limit. It is IMPORTANT
50
+ # to note that the bucket will not automatically
51
+ # be emptied you must call empty_bucket if you
52
+ # want the bucket to be emptied.
53
+ #
54
+ # This timer is started once the bucket gets its
55
+ # first item and is cleared only when the
56
+ # bucket is emptied. The callback will only be
57
+ # called once at this time and then not again
58
+ # unless you empty the bucket and add something
59
+ # again.
60
+ #
61
+ # @yield [String] The bucket id of the full bucket
62
+ def on_bucket_timeout(&blk)
63
+ @on_bucket_timeout_callbacks << blk
64
+ end
65
+
66
+ # Get the contents of a bucket.
67
+ #
68
+ # @param bucket_id [String] the bucket id
69
+ # of the bucket you want to get
70
+ # @yield [Array] the items you put
71
+ # into the bucket
72
+ def get_bucket(bucket_id, &blk)
73
+ EM::Completion.new.tap do |c|
74
+ c.callback(&blk) if block_given?
75
+ get_bucket_from_db(bucket_id).callback do |bucket|
76
+ c.succeed bucket.values
77
+ end.errback do |e|
78
+ c.fail e
79
+ end
80
+ end
81
+ end
82
+
83
+ # Get the contents of a bucket then empty it
84
+ #
85
+ # @param bucket_id [String] the bucket id
86
+ # of the bucket you want to get
87
+ # @yield [Array] the items you put
88
+ # into the bucket
89
+ def get_and_empty_bucket(bucket_id, &blk)
90
+ EM::Completion.new.tap do |c|
91
+ c.callback(&blk) if block_given?
92
+ get_bucket(bucket_id).callback do |contents|
93
+ empty_bucket(bucket_id).callback do
94
+ c.succeed contents
95
+ end.errback do |e|
96
+ c.fail e
97
+ end
98
+ end.errback do |e|
99
+ c.fail e
100
+ end
101
+ end
102
+ end
103
+
104
+ # Empty a bucket
105
+ #
106
+ # @param bucket_id [String] the bucket id
107
+ # of the bucket you want to empty
108
+ def empty_bucket(bucket_id, &blk)
109
+ EM::Completion.new.tap do |c|
110
+ c.callback(&blk) if block_given?
111
+ empty_bucket_in_db(bucket_id).callback do
112
+ clear_timer(bucket_id)
113
+ c.succeed
114
+ end.errback do |e|
115
+ c.fail e
116
+ end
117
+ end
118
+ end
119
+
120
+ private
121
+
122
+ def bucket_full?(bucket_id, &blk)
123
+ bucket_size_from_db(bucket_id).callback do |size|
124
+ blk.call size >= @bucket_threshold_size
125
+ end
126
+ end
127
+
128
+ def check_bucket_full(bucket_id)
129
+ bucket_full?(bucket_id) do |is_full|
130
+ if is_full
131
+ @on_bucket_full_callbacks.each do |callback|
132
+ callback.call bucket_id
133
+ end
134
+ end
135
+ end
136
+ end
137
+
138
+ def add_timer_if_first(bucket_id)
139
+ return unless @bucket_max_age
140
+ if @buckets_with_timers.add?(bucket_id)
141
+ EM.add_timer(@bucket_max_age) do
142
+ @on_bucket_timeout_callbacks.each do |callback|
143
+ callback.call bucket_id
144
+ end
145
+ end
146
+ end
147
+ end
148
+
149
+ def clear_timer(bucket_id)
150
+ @buckets_with_timers.delete(bucket_id)
151
+ end
152
+ end
153
+ end
@@ -0,0 +1,2 @@
1
+ module Database
2
+ end
@@ -0,0 +1,42 @@
1
+ require 'em-bucketer/database'
2
+
3
+ module EventMachine::Bucketer
4
+ module Database
5
+ module Hash
6
+ private
7
+
8
+ def bucket_size_from_db(bucket_id, &blk)
9
+ EM::Completion.new.tap do |c|
10
+ c.callback(&blk) if block_given?
11
+ @buckets[bucket_id] ||= {}
12
+ c.succeed @buckets[bucket_id].size
13
+ end
14
+ end
15
+
16
+ def add_bucket_to_db(bucket_id, item_id, item, &blk)
17
+ EM::Completion.new.tap do |c|
18
+ c.callback(&blk) if block_given?
19
+ @buckets[bucket_id] ||= {}
20
+ @buckets[bucket_id][item_id] = item
21
+ c.succeed
22
+ end
23
+ end
24
+
25
+ def get_bucket_from_db(bucket_id, &blk)
26
+ EM::Completion.new.tap do |c|
27
+ c.callback(&blk) if block_given?
28
+ @buckets[bucket_id] ||= {}
29
+ c.succeed @buckets[bucket_id]
30
+ end
31
+ end
32
+
33
+ def empty_bucket_in_db(bucket_id, &blk)
34
+ EM::Completion.new.tap do |c|
35
+ c.callback(&blk) if block_given?
36
+ @buckets[bucket_id] = {}
37
+ c.succeed
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,88 @@
1
+ require 'em-bucketer/database'
2
+ require 'em-hiredis'
3
+
4
+ module EventMachine::Bucketer
5
+ module Database
6
+ module Redis
7
+ private
8
+
9
+ def bucket_size_from_db(bucket_id, &blk)
10
+ EM::Completion.new.tap do |c|
11
+ c.callback(&blk) if block_given?
12
+ redis.hlen(redis_key(bucket_id)).callback do |len|
13
+ c.succeed len.to_i
14
+ end.errback do |e|
15
+ c.fail e
16
+ end
17
+ end
18
+ end
19
+
20
+ def add_bucket_to_db(bucket_id, item_id, item, &blk)
21
+ EM::Completion.new.tap do |c|
22
+ c.callback(&blk) if block_given?
23
+ redis.hset(redis_key(bucket_id), item_id, Marshal.dump(item)).callback do
24
+ add_to_known_buckets(bucket_id).callback do
25
+ c.succeed
26
+ end.errback do |e|
27
+ c.fail e
28
+ end
29
+ end.errback do |e|
30
+ c.fail e
31
+ end
32
+ end
33
+ end
34
+
35
+ def get_bucket_from_db(bucket_id, &blk)
36
+ EM::Completion.new.tap do |c|
37
+ c.callback(&blk) if block_given?
38
+ redis.hgetall(redis_key(bucket_id)) do |data|
39
+ bucket = {}
40
+ index = 0
41
+ while(index < data.size)
42
+ bucket[data[index]] = Marshal.load(data[index + 1])
43
+ index += 2
44
+ end
45
+ c.succeed bucket
46
+ end.errback do |e|
47
+ c.errback
48
+ end
49
+ end
50
+ end
51
+
52
+ def empty_bucket_in_db(bucket_id, &blk)
53
+ EM::Completion.new.tap do |c|
54
+ c.callback(&blk) if block_given?
55
+ redis.del(redis_key(bucket_id)).callback do
56
+ remove_from_known_buckets(bucket_id).callback do
57
+ c.succeed
58
+ end.errback do |e|
59
+ c.fail e
60
+ end
61
+ end.errback do |e|
62
+ c.fail e
63
+ end
64
+ end
65
+ end
66
+
67
+ def known_buckets(&blk)
68
+ redis.smembers(redis_known_buckets_key, &blk)
69
+ end
70
+
71
+ def add_to_known_buckets(bucket_id, &blk)
72
+ redis.sadd(redis_known_buckets_key, bucket_id, &blk)
73
+ end
74
+
75
+ def remove_from_known_buckets(bucket_id, &blk)
76
+ redis.srem(redis_known_buckets_key, bucket_id, &blk)
77
+ end
78
+
79
+ def redis_key(bucket_id)
80
+ "em_bucketer:#{redis_prefix}:#{bucket_id}"
81
+ end
82
+
83
+ def redis_known_buckets_key
84
+ "em_bucketer_known_buckets:#{redis_prefix}"
85
+ end
86
+ end
87
+ end
88
+ end
@@ -0,0 +1,24 @@
1
+ require 'eventmachine'
2
+ require 'em-bucketer/database/hash'
3
+ require 'em-bucketer/base'
4
+
5
+ module EventMachine::Bucketer
6
+ class InMemory
7
+ include Database::Hash
8
+ include Base
9
+
10
+ BUCKET_THRESHOLD_SIZE_DEFAULT = 1000
11
+ BUCKET_MAX_AGE_DEFAULT = 3600
12
+
13
+ # Creates a new in memory Bucketer with the requested
14
+ # configurations
15
+ #
16
+ # @param bucket_threshold_size [Integer] the max size of the bucket
17
+ # after which the on_bucket_full callback is called
18
+ # @param bucket_max_age [Integer] max number of seconds a bucket
19
+ # can remain before the on_bucket_timed_out is called
20
+ def initialize(bucket_threshold_size: BUCKET_THRESHOLD_SIZE_DEFAULT, bucket_max_age: BUCKET_MAX_AGE_DEFAULT)
21
+ setup(bucket_threshold_size, bucket_max_age)
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,68 @@
1
+ require 'eventmachine'
2
+ require 'em-bucketer/database/redis'
3
+ require 'em-bucketer/base'
4
+
5
+ module EventMachine::Bucketer
6
+ class Redis
7
+ include Database::Redis
8
+ include Base
9
+
10
+ BUCKET_THRESHOLD_SIZE_DEFAULT = 1000
11
+ BUCKET_MAX_AGE_DEFAULT = 3600
12
+
13
+ # Creates a new redis Bucketer with the requested
14
+ # configurations.
15
+ # *NOTE* The redis bucketer uses Marshal to store
16
+ # the objects in redis. This puts limitations on
17
+ # the data that cannot be stored in these buckets.
18
+ # For example you cannot store an object that
19
+ # references a proc as an instance variable.
20
+ #
21
+ # The redis bucketer also sets all timers on
22
+ # startup for buckets already in the redis
23
+ # database. This ensures that even if your
24
+ # app is restarted the previous timers will still
25
+ # get set and you won't ever lose a bucket.
26
+ #
27
+ # @param redis_prefix [String] The prefix for the
28
+ # bucket in redis. This is necessary because you
29
+ # may want to have multiple bucketers using one
30
+ # redis instance and you don't want them conflicting.
31
+ # Also this can't just be random because the whole
32
+ # point of the redis bucketer is that you can restart
33
+ # your app and get back the same bucketer without any
34
+ # data loss.
35
+ # @param bucket_threshold_size [Integer] the max size of the bucket
36
+ # after which the on_bucket_full callback is called
37
+ # @param bucket_max_age [Integer] max number of seconds a bucket
38
+ # can remain before the on_bucket_timed_out is called
39
+ def initialize(redis_prefix, bucket_threshold_size: BUCKET_THRESHOLD_SIZE_DEFAULT, bucket_max_age: BUCKET_MAX_AGE_DEFAULT)
40
+ @redis = EM::Hiredis.connect
41
+ @redis_prefix = redis_prefix
42
+ setup(bucket_threshold_size, bucket_max_age)
43
+ set_timers
44
+ end
45
+
46
+ def set_timers
47
+ known_buckets.callback do |bucket_ids|
48
+ bucket_ids.each do |bucket_id|
49
+ add_timer_if_first(bucket_id)
50
+ end
51
+ end.errback do |e|
52
+ # I think this is okay since it will only happen when
53
+ # you are initializing the bucketer so it will hopefully
54
+ # bring the issue to your attention on startup. I
55
+ # couldn't actually pass this error back through to
56
+ # anyone so I needed to raise it. It is also a bad
57
+ # exception since it means you are not properly reloading
58
+ # your buckets from redis
59
+ raise e
60
+ end
61
+ end
62
+
63
+ private
64
+
65
+ # Used by Database::Redis
66
+ attr_reader :redis_prefix, :redis
67
+ end
68
+ end