space-saver-redis 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --color
data/Gemfile ADDED
@@ -0,0 +1,10 @@
1
+ source "http://rubygems.org"
2
+
3
+ gem 'redis', '~> 3.0.1'
4
+
5
+ group :development, :test do
6
+ gem 'jeweler', '~> 1.8.4'
7
+ gem 'rake', '~> 0.9.2.2'
8
+ gem 'rspec', '~> 2.11.0'
9
+ gem 'timecop', '~> 0.5.3'
10
+ end
data/Gemfile.lock ADDED
@@ -0,0 +1,34 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ diff-lcs (1.1.3)
5
+ git (1.2.5)
6
+ jeweler (1.8.4)
7
+ bundler (~> 1.0)
8
+ git (>= 1.2.5)
9
+ rake
10
+ rdoc
11
+ json (1.7.5)
12
+ rake (0.9.2.2)
13
+ rdoc (3.12)
14
+ json (~> 1.4)
15
+ redis (3.0.2)
16
+ rspec (2.11.0)
17
+ rspec-core (~> 2.11.0)
18
+ rspec-expectations (~> 2.11.0)
19
+ rspec-mocks (~> 2.11.0)
20
+ rspec-core (2.11.1)
21
+ rspec-expectations (2.11.3)
22
+ diff-lcs (~> 1.1.3)
23
+ rspec-mocks (2.11.3)
24
+ timecop (0.5.4)
25
+
26
+ PLATFORMS
27
+ ruby
28
+
29
+ DEPENDENCIES
30
+ jeweler (~> 1.8.4)
31
+ rake (~> 0.9.2.2)
32
+ redis (~> 3.0.1)
33
+ rspec (~> 2.11.0)
34
+ timecop (~> 0.5.3)
data/LICENSE.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2012 Art.sy
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,40 @@
1
+ space-saver-redis
2
+ =================
3
+
4
+ This gem is a pure Ruby implementation of Metwally, Agrawal, and Abbadi's
5
+ [SpaceSaver algorithm](http://www.cs.ucsb.edu/research/tech_reports/reports/2005-23.pdf)
6
+ for estimating the top K elements in a data stream. A [Redis](http://redis.io)
7
+ instance is used for storage.
8
+
9
+ Here's an example:
10
+
11
+ require 'redis'
12
+ require 'space-saver-redis'
13
+
14
+ # Estimate the top 10 most frequent items seen in a data stream
15
+ space_saver = SpaceSaver.new(Redis.new, 10)
16
+
17
+ urls_visited.each { |url| space_saver.increment("urls", url) }
18
+
19
+ After the above code executes, you can query `space_saver.leaders("urls")` to get
20
+ an estimate of the top 10 most frequent URLs visited along with their estimated
21
+ counts. The `SpaceSaver` instance uses only a Redis sorted set with at most K
22
+ elements (10, in this case) at any time to make this estimation.
23
+
24
+ Obviously, since the data structure uses only a small, fixed amount of space,
25
+ there are some data distributions that can cause the top K elements returned to
26
+ be completely incorrect, but for a lot of data distributions the results are
27
+ worth the savings in space. In particular, for a `SpaceSaver` instance initialized
28
+ with parameter K that observes a data stream of N items, any item that occurs more
29
+ than N/K times is guaranteed to be in the list of estimated leaders.
30
+
31
+ One way to cope with the error involved in this kind of estimation is to use a K
32
+ bigger than you actually need and then truncate the number of leaders returned
33
+ at query time. You can pass an additional parameter to the call to `leaders` to
34
+ do this, for example `space_saver.leaders("urls", 3)` will return only the top
35
+ 3 of the 10 estimated most frequent items.
36
+
37
+ Installation
38
+ ============
39
+
40
+ gem install space-saver-redis
data/Rakefile ADDED
@@ -0,0 +1,49 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+
14
+ require 'jeweler'
15
+ Jeweler::Tasks.new do |gem|
16
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
+ gem.name = "space-saver-redis"
18
+ gem.homepage = "http://github.com/aaw/space-saver-redis"
19
+ gem.license = "MIT"
20
+ gem.summary = %Q{A pure Ruby implementation of the SpaceSaver algorithm for approximating the top K elements in a data stream, backed by Redis}
21
+ gem.description = %Q{A pure Ruby implementation of the SpaceSaver algorithm for approximating the top K elements in a data stream, backed by Redis}
22
+ gem.email = "aaron.windsor@gmail.com"
23
+ gem.authors = ["Aaron Windsor"]
24
+ # dependencies defined in Gemfile
25
+ end
26
+ Jeweler::RubygemsDotOrgTasks.new
27
+
28
+ require 'rspec/core'
29
+ require 'rspec/core/rake_task'
30
+ RSpec::Core::RakeTask.new(:spec) do |spec|
31
+ spec.pattern = FileList['spec/**/*_spec.rb']
32
+ end
33
+
34
+ RSpec::Core::RakeTask.new(:rcov) do |spec|
35
+ spec.pattern = 'spec/**/*_spec.rb'
36
+ spec.rcov = true
37
+ end
38
+
39
+ task :default => :spec
40
+
41
+ require 'rake/rdoctask'
42
+ Rake::RDocTask.new do |rdoc|
43
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
44
+
45
+ rdoc.rdoc_dir = 'rdoc'
46
+ rdoc.title = "space-saver-redis #{version}"
47
+ rdoc.rdoc_files.include('README*')
48
+ rdoc.rdoc_files.include('lib/**/*.rb')
49
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.1
@@ -0,0 +1,22 @@
1
+ class SpaceSaver
2
+ def initialize(redis, k)
3
+ @redis = redis
4
+ @k = k
5
+ end
6
+
7
+ def increment(leaderboard, value)
8
+ score = @redis.zscore(leaderboard, value)
9
+ if score || @redis.zcard(leaderboard) < @k
10
+ @redis.zincrby(leaderboard, 1, value)
11
+ else
12
+ item, score = @redis.zrange(leaderboard, 0, 0, withscores: true).first
13
+ new_score = score.to_i + 1
14
+ @redis.zadd(leaderboard, new_score, value) if @redis.zrem(leaderboard, item)
15
+ new_score
16
+ end
17
+ end
18
+
19
+ def leaders(leaderboard, k=@k)
20
+ @redis.zrevrange(leaderboard, 0, k-1, withscores: true)
21
+ end
22
+ end
@@ -0,0 +1,46 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe SpaceSaver do
4
+ it "returns an empty list when you ask about an empty leaderboard" do
5
+ s = SpaceSaver.new(Redis.new, 10)
6
+ s.leaders('unknown-leaderboard').should be_empty
7
+ end
8
+ it "can store different leaderboards in the same Redis database" do
9
+ s = SpaceSaver.new(Redis.new, 10)
10
+ s.increment('a','a_item')
11
+ s.increment('b','b_item')
12
+ s.leaders('a').should == [["a_item", 1.0]]
13
+ s.leaders('b').should == [["b_item", 1.0]]
14
+ end
15
+ it "uses a Redis sorted set of cardinality K if you pass K to the SpaceSaver constructor" do
16
+ r = Redis.new
17
+ [3,4,5].each do |k|
18
+ s = SpaceSaver.new(r, k)
19
+ 30.times do |i|
20
+ s.increment("leaderboard#{k}", "item#{i}")
21
+ r.zcard("leaderboard#{k}").should <= k
22
+ end
23
+ r.zcard("leaderboard#{k}").should == k
24
+ end
25
+ end
26
+ it "can return less than k leaders" do
27
+ s = SpaceSaver.new(Redis.new, 20)
28
+ 100.times { |i| s.increment("leaderboard", "item#{i}") }
29
+ [5,6,7,8].each do |i|
30
+ s.leaders("leaderboard", i).length.should == i
31
+ s.leaders("leaderboard", i+1).should include(*s.leaders("leaderboard", i))
32
+ end
33
+ end
34
+ it "stores any element that occurs at least n/k times" do
35
+ s = SpaceSaver.new(Redis.new, 5)
36
+ 200.times { s.increment("leaderboard", "foo") }
37
+ 400.times do |i|
38
+ s.increment("leaderboard", "bar") if i % 2 == 0
39
+ s.increment("leaderboard", "item#{i}")
40
+ end
41
+ 200.times { s.increment("leaderboard", "baz") }
42
+ expected_leaders = ['foo','bar','baz']
43
+ s.leaders("leaderboard").map{ |x| x.first }.should include('foo','bar','baz')
44
+ end
45
+
46
+ end
@@ -0,0 +1,29 @@
1
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
2
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
3
+ require 'rspec'
4
+ require 'redis'
5
+ require 'space-saver-redis'
6
+
7
+ db_number = ENV['REDIS_TEST_DATABASE'] || '15'
8
+ ENV['REDIS_URL'] = "redis://localhost:6379/#{db_number}"
9
+ redis = Redis.new
10
+ if redis.keys('*').length > 0
11
+ puts "Warning! These specs use database #{db_number} on your local redis instance"
12
+ puts "running on port 6379. Your database #{db_number} seems to have keys in it."
13
+ puts "Please clear them before running the specs or set the environment"
14
+ puts "variable REDIS_TEST_DATABASE to use a different database number."
15
+ raise SystemExit
16
+ end
17
+
18
+ # Requires supporting files with custom matchers and macros, etc,
19
+ # in ./support/ and its subdirectories.
20
+ Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
21
+
22
+ RSpec.configure do |config|
23
+ config.before(:each) do
24
+ Redis.new.flushdb
25
+ end
26
+ config.after(:each) do
27
+ Redis.new.flushdb
28
+ end
29
+ end
metadata ADDED
@@ -0,0 +1,118 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: space-saver-redis
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Aaron Windsor
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-12-18 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: redis
16
+ requirement: &2169308260 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: 3.0.1
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *2169308260
25
+ - !ruby/object:Gem::Dependency
26
+ name: jeweler
27
+ requirement: &2169307780 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ~>
31
+ - !ruby/object:Gem::Version
32
+ version: 1.8.4
33
+ type: :development
34
+ prerelease: false
35
+ version_requirements: *2169307780
36
+ - !ruby/object:Gem::Dependency
37
+ name: rake
38
+ requirement: &2169307300 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ~>
42
+ - !ruby/object:Gem::Version
43
+ version: 0.9.2.2
44
+ type: :development
45
+ prerelease: false
46
+ version_requirements: *2169307300
47
+ - !ruby/object:Gem::Dependency
48
+ name: rspec
49
+ requirement: &2169306820 !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: 2.11.0
55
+ type: :development
56
+ prerelease: false
57
+ version_requirements: *2169306820
58
+ - !ruby/object:Gem::Dependency
59
+ name: timecop
60
+ requirement: &2169306340 !ruby/object:Gem::Requirement
61
+ none: false
62
+ requirements:
63
+ - - ~>
64
+ - !ruby/object:Gem::Version
65
+ version: 0.5.3
66
+ type: :development
67
+ prerelease: false
68
+ version_requirements: *2169306340
69
+ description: A pure Ruby implementation of the SpaceSaver algorithm for approximating
70
+ the top K elements in a data stream, backed by Redis
71
+ email: aaron.windsor@gmail.com
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files:
75
+ - LICENSE.txt
76
+ - README.md
77
+ files:
78
+ - .document
79
+ - .rspec
80
+ - Gemfile
81
+ - Gemfile.lock
82
+ - LICENSE.txt
83
+ - README.md
84
+ - Rakefile
85
+ - VERSION
86
+ - lib/space-saver-redis.rb
87
+ - spec/space-saver-redis_spec.rb
88
+ - spec/spec_helper.rb
89
+ homepage: http://github.com/aaw/space-saver-redis
90
+ licenses:
91
+ - MIT
92
+ post_install_message:
93
+ rdoc_options: []
94
+ require_paths:
95
+ - lib
96
+ required_ruby_version: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ! '>='
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ segments:
103
+ - 0
104
+ hash: 300893669371900743
105
+ required_rubygems_version: !ruby/object:Gem::Requirement
106
+ none: false
107
+ requirements:
108
+ - - ! '>='
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ requirements: []
112
+ rubyforge_project:
113
+ rubygems_version: 1.8.10
114
+ signing_key:
115
+ specification_version: 3
116
+ summary: A pure Ruby implementation of the SpaceSaver algorithm for approximating
117
+ the top K elements in a data stream, backed by Redis
118
+ test_files: []