space-saver-redis 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --color
data/Gemfile ADDED
@@ -0,0 +1,10 @@
1
+ source "http://rubygems.org"
2
+
3
+ gem 'redis', '~> 3.0.1'
4
+
5
+ group :development, :test do
6
+ gem 'jeweler', '~> 1.8.4'
7
+ gem 'rake', '~> 0.9.2.2'
8
+ gem 'rspec', '~> 2.11.0'
9
+ gem 'timecop', '~> 0.5.3'
10
+ end
data/Gemfile.lock ADDED
@@ -0,0 +1,34 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ diff-lcs (1.1.3)
5
+ git (1.2.5)
6
+ jeweler (1.8.4)
7
+ bundler (~> 1.0)
8
+ git (>= 1.2.5)
9
+ rake
10
+ rdoc
11
+ json (1.7.5)
12
+ rake (0.9.2.2)
13
+ rdoc (3.12)
14
+ json (~> 1.4)
15
+ redis (3.0.2)
16
+ rspec (2.11.0)
17
+ rspec-core (~> 2.11.0)
18
+ rspec-expectations (~> 2.11.0)
19
+ rspec-mocks (~> 2.11.0)
20
+ rspec-core (2.11.1)
21
+ rspec-expectations (2.11.3)
22
+ diff-lcs (~> 1.1.3)
23
+ rspec-mocks (2.11.3)
24
+ timecop (0.5.4)
25
+
26
+ PLATFORMS
27
+ ruby
28
+
29
+ DEPENDENCIES
30
+ jeweler (~> 1.8.4)
31
+ rake (~> 0.9.2.2)
32
+ redis (~> 3.0.1)
33
+ rspec (~> 2.11.0)
34
+ timecop (~> 0.5.3)
data/LICENSE.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2012 Art.sy
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,40 @@
1
+ space-saver-redis
2
+ =================
3
+
4
+ This gem is a pure Ruby implementation of Metwally, Agrawal, and Abbadi's
5
+ [SpaceSaver algorithm](http://www.cs.ucsb.edu/research/tech_reports/reports/2005-23.pdf)
6
+ for estimating the top K elements in a data stream. A [Redis](http://redis.io)
7
+ instance is used for storage.
8
+
9
+ Here's an example:
10
+
11
+ require 'redis'
12
+ require 'space-saver-redis'
13
+
14
+ # Estimate the top 10 most frequent items seen in a data stream
15
+ space_saver = SpaceSaver.new(Redis.new, 10)
16
+
17
+ urls_visited.each { |url| space_saver.increment("urls", url) }
18
+
19
+ After the above code executes, you can query `space_saver.leaders("urls")` to get
20
+ an estimate of the top 10 most frequent URLs visited along with their estimated
21
+ counts. The `SpaceSaver` instance uses only a Redis sorted set with at most K
22
+ elements (10, in this case) at any time to make this estimation.
23
+
24
+ Obviously, since the data structure uses only a small, fixed amount of space,
25
+ there are some data distributions that can cause the top K elements returned to
26
+ be completely incorrect, but for a lot of data distributions the results are
27
+ worth the savings in space. In particular, for a `SpaceSaver` instance initialized
28
+ with parameter K that observes a data stream of N items, any item that occurs more
29
+ than N/K times is guaranteed to be in the list of estimated leaders.
30
+
31
+ One way to cope with the error involved in this kind of estimation is to use a K
32
+ bigger than you actually need and then truncate the number of leaders returned
33
+ at query time. You can pass an additional parameter to the call to `leaders` to
34
+ do this, for example `space_saver.leaders("urls", 3)` will return only the top
35
+ 3 of the 10 estimated most frequent items.
36
+
37
+ Installation
38
+ ============
39
+
40
+ gem install space-saver-redis
data/Rakefile ADDED
@@ -0,0 +1,49 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+
14
+ require 'jeweler'
15
+ Jeweler::Tasks.new do |gem|
16
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
+ gem.name = "space-saver-redis"
18
+ gem.homepage = "http://github.com/aaw/space-saver-redis"
19
+ gem.license = "MIT"
20
+ gem.summary = %Q{A pure Ruby implementation of the SpaceSaver algorithm for approximating the top K elements in a data stream, backed by Redis}
21
+ gem.description = %Q{A pure Ruby implementation of the SpaceSaver algorithm for approximating the top K elements in a data stream, backed by Redis}
22
+ gem.email = "aaron.windsor@gmail.com"
23
+ gem.authors = ["Aaron Windsor"]
24
+ # dependencies defined in Gemfile
25
+ end
26
+ Jeweler::RubygemsDotOrgTasks.new
27
+
28
+ require 'rspec/core'
29
+ require 'rspec/core/rake_task'
30
+ RSpec::Core::RakeTask.new(:spec) do |spec|
31
+ spec.pattern = FileList['spec/**/*_spec.rb']
32
+ end
33
+
34
+ RSpec::Core::RakeTask.new(:rcov) do |spec|
35
+ spec.pattern = 'spec/**/*_spec.rb'
36
+ spec.rcov = true
37
+ end
38
+
39
+ task :default => :spec
40
+
41
+ require 'rake/rdoctask'
42
+ Rake::RDocTask.new do |rdoc|
43
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
44
+
45
+ rdoc.rdoc_dir = 'rdoc'
46
+ rdoc.title = "space-saver-redis #{version}"
47
+ rdoc.rdoc_files.include('README*')
48
+ rdoc.rdoc_files.include('lib/**/*.rb')
49
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.1
@@ -0,0 +1,22 @@
1
+ class SpaceSaver
2
+ def initialize(redis, k)
3
+ @redis = redis
4
+ @k = k
5
+ end
6
+
7
+ def increment(leaderboard, value)
8
+ score = @redis.zscore(leaderboard, value)
9
+ if score || @redis.zcard(leaderboard) < @k
10
+ @redis.zincrby(leaderboard, 1, value)
11
+ else
12
+ item, score = @redis.zrange(leaderboard, 0, 0, withscores: true).first
13
+ new_score = score.to_i + 1
14
+ @redis.zadd(leaderboard, new_score, value) if @redis.zrem(leaderboard, item)
15
+ new_score
16
+ end
17
+ end
18
+
19
+ def leaders(leaderboard, k=@k)
20
+ @redis.zrevrange(leaderboard, 0, k-1, withscores: true)
21
+ end
22
+ end
@@ -0,0 +1,46 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe SpaceSaver do
4
+ it "returns an empty list when you ask about an empty leaderboard" do
5
+ s = SpaceSaver.new(Redis.new, 10)
6
+ s.leaders('unknown-leaderboard').should be_empty
7
+ end
8
+ it "can store different leaderboards in the same Redis database" do
9
+ s = SpaceSaver.new(Redis.new, 10)
10
+ s.increment('a','a_item')
11
+ s.increment('b','b_item')
12
+ s.leaders('a').should == [["a_item", 1.0]]
13
+ s.leaders('b').should == [["b_item", 1.0]]
14
+ end
15
+ it "uses a Redis sorted set of cardinality K if you pass K to the SpaceSaver constructor" do
16
+ r = Redis.new
17
+ [3,4,5].each do |k|
18
+ s = SpaceSaver.new(r, k)
19
+ 30.times do |i|
20
+ s.increment("leaderboard#{k}", "item#{i}")
21
+ r.zcard("leaderboard#{k}").should <= k
22
+ end
23
+ r.zcard("leaderboard#{k}").should == k
24
+ end
25
+ end
26
+ it "can return less than k leaders" do
27
+ s = SpaceSaver.new(Redis.new, 20)
28
+ 100.times { |i| s.increment("leaderboard", "item#{i}") }
29
+ [5,6,7,8].each do |i|
30
+ s.leaders("leaderboard", i).length.should == i
31
+ s.leaders("leaderboard", i+1).should include(*s.leaders("leaderboard", i))
32
+ end
33
+ end
34
+ it "stores any element that occurs at least n/k times" do
35
+ s = SpaceSaver.new(Redis.new, 5)
36
+ 200.times { s.increment("leaderboard", "foo") }
37
+ 400.times do |i|
38
+ s.increment("leaderboard", "bar") if i % 2 == 0
39
+ s.increment("leaderboard", "item#{i}")
40
+ end
41
+ 200.times { s.increment("leaderboard", "baz") }
42
+ expected_leaders = ['foo','bar','baz']
43
+ s.leaders("leaderboard").map{ |x| x.first }.should include('foo','bar','baz')
44
+ end
45
+
46
+ end
@@ -0,0 +1,29 @@
1
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
2
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
3
+ require 'rspec'
4
+ require 'redis'
5
+ require 'space-saver-redis'
6
+
7
+ db_number = ENV['REDIS_TEST_DATABASE'] || '15'
8
+ ENV['REDIS_URL'] = "redis://localhost:6379/#{db_number}"
9
+ redis = Redis.new
10
+ if redis.keys('*').length > 0
11
+ puts "Warning! These specs use database #{db_number} on your local redis instance"
12
+ puts "running on port 6379. Your database #{db_number} seems to have keys in it."
13
+ puts "Please clear them before running the specs or set the environment"
14
+ puts "variable REDIS_TEST_DATABASE to use a different database number."
15
+ raise SystemExit
16
+ end
17
+
18
+ # Requires supporting files with custom matchers and macros, etc,
19
+ # in ./support/ and its subdirectories.
20
+ Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
21
+
22
+ RSpec.configure do |config|
23
+ config.before(:each) do
24
+ Redis.new.flushdb
25
+ end
26
+ config.after(:each) do
27
+ Redis.new.flushdb
28
+ end
29
+ end
metadata ADDED
@@ -0,0 +1,118 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: space-saver-redis
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Aaron Windsor
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-12-18 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: redis
16
+ requirement: &2169308260 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: 3.0.1
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *2169308260
25
+ - !ruby/object:Gem::Dependency
26
+ name: jeweler
27
+ requirement: &2169307780 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ~>
31
+ - !ruby/object:Gem::Version
32
+ version: 1.8.4
33
+ type: :development
34
+ prerelease: false
35
+ version_requirements: *2169307780
36
+ - !ruby/object:Gem::Dependency
37
+ name: rake
38
+ requirement: &2169307300 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ~>
42
+ - !ruby/object:Gem::Version
43
+ version: 0.9.2.2
44
+ type: :development
45
+ prerelease: false
46
+ version_requirements: *2169307300
47
+ - !ruby/object:Gem::Dependency
48
+ name: rspec
49
+ requirement: &2169306820 !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: 2.11.0
55
+ type: :development
56
+ prerelease: false
57
+ version_requirements: *2169306820
58
+ - !ruby/object:Gem::Dependency
59
+ name: timecop
60
+ requirement: &2169306340 !ruby/object:Gem::Requirement
61
+ none: false
62
+ requirements:
63
+ - - ~>
64
+ - !ruby/object:Gem::Version
65
+ version: 0.5.3
66
+ type: :development
67
+ prerelease: false
68
+ version_requirements: *2169306340
69
+ description: A pure Ruby implementation of the SpaceSaver algorithm for approximating
70
+ the top K elements in a data stream, backed by Redis
71
+ email: aaron.windsor@gmail.com
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files:
75
+ - LICENSE.txt
76
+ - README.md
77
+ files:
78
+ - .document
79
+ - .rspec
80
+ - Gemfile
81
+ - Gemfile.lock
82
+ - LICENSE.txt
83
+ - README.md
84
+ - Rakefile
85
+ - VERSION
86
+ - lib/space-saver-redis.rb
87
+ - spec/space-saver-redis_spec.rb
88
+ - spec/spec_helper.rb
89
+ homepage: http://github.com/aaw/space-saver-redis
90
+ licenses:
91
+ - MIT
92
+ post_install_message:
93
+ rdoc_options: []
94
+ require_paths:
95
+ - lib
96
+ required_ruby_version: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ! '>='
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ segments:
103
+ - 0
104
+ hash: 300893669371900743
105
+ required_rubygems_version: !ruby/object:Gem::Requirement
106
+ none: false
107
+ requirements:
108
+ - - ! '>='
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ requirements: []
112
+ rubyforge_project:
113
+ rubygems_version: 1.8.10
114
+ signing_key:
115
+ specification_version: 3
116
+ summary: A pure Ruby implementation of the SpaceSaver algorithm for approximating
117
+ the top K elements in a data stream, backed by Redis
118
+ test_files: []