similus 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
data/init.rb ADDED
@@ -0,0 +1,2 @@
1
+ $:.unshift "#{File.dirname(__FILE__)}/lib"
2
+ require 'similus'
@@ -0,0 +1,6 @@
1
+ require 'rubygems'
2
+ require 'redis'
3
+
4
+ require File.join(File.dirname(__FILE__), 'similus', 'core.rb')
5
+ require File.join(File.dirname(__FILE__), 'similus', 'config.rb')
6
+ require File.join(File.dirname(__FILE__), 'similus', 'redis.rb')
@@ -0,0 +1,24 @@
1
+ module Similus
2
+ def self.config
3
+ @config ||= Config.new
4
+ block_given? ? yield(@config) : @config
5
+ end
6
+
7
+ class Config
8
+ attr_accessor :backend
9
+ attr_accessor :redis_server
10
+ attr_accessor :redis_db
11
+ attr_accessor :logfile
12
+
13
+ def initialize #:nodoc:
14
+ self.backend = :redis
15
+ self.redis_server = "localhost:6379"
16
+ self.redis_db = 9
17
+ self.logfile = STDOUT
18
+ end
19
+
20
+ def logger
21
+ @logger ||= Logger.new(logfile)
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,220 @@
1
+ require 'digest/sha1'
2
+
3
+ module Similus
4
+ class << self
5
+ def add_activity(a, action, b)
6
+ # Find or create objects
7
+ a = add_object(a)
8
+ b = add_object(b)
9
+
10
+ # Find or create action
11
+ action = add_action({:name => action})
12
+
13
+ # Create activities (both sides)
14
+ create_activities(a, action, b)
15
+ create_activities(b, action, a)
16
+ end
17
+
18
+ def similar_to(src, options={}, objects=nil)
19
+ sopt = {:source => :all, :load_objects => true}.update(options)
20
+ src = add_object(src)
21
+
22
+ objects ||= load_activity(src, options)
23
+
24
+ similar_objects = objects.inject(empty_hash) do |result, id|
25
+ redis.smembers("activity:#{id}").each { |similar| result[similar] += 1 }
26
+ result
27
+ end
28
+
29
+ similar_objects.delete(src[:obj_id]) # delete object itself form list
30
+ similar_objects.reject! { |k,v| v == 1 } # remove similars just by one
31
+
32
+ if sopt[:load_objects]
33
+ load_objects(similar_objects.sort { |x,y| y[1] <=> x[1] })
34
+ else
35
+ similar_objects
36
+ end
37
+ end
38
+
39
+ def recommended_for(src, options={}, &block)
40
+ default_options = { :load_objects => true, :max_similar => 10,
41
+ :limit => 10, :offset => 0, :method => :similarity,
42
+ :source => :all, :target => :all }
43
+
44
+ ropt = default_options.update(options)
45
+ sopt = options.merge(:load_objects => false)
46
+ maxs = ropt.delete(:max_similar)
47
+
48
+ # Find objects for user
49
+ src_act = load_activity(src, sopt)
50
+
51
+ # Get similar objects, sort and get first N elements
52
+ src_sim = similar_to(src, sopt, src_act).sort {|x,y| y[1] <=> x[1]}[0..maxs]
53
+
54
+ # Get recommended score of similar objects's activity
55
+ result = src_sim.inject(empty_hash) do |res, sim|
56
+ dst_act = redis.smembers("activity:#{sim[0]}")
57
+ rscore(ropt[:method], res, dst_act, src_act, sim[1], sim[0], &block)
58
+ end
59
+
60
+ # Remove already seen, sort and apply limit/offset
61
+ result.reject! { |key,value| src_act.include?(key) }
62
+ result = result.sort { |x,y| y[1] <=> x[1] }[ropt[:offset],ropt[:limit]]
63
+
64
+ # Load original objects
65
+ ropt[:load_objects] ? load_objects(result) : result
66
+ end
67
+
68
+ private
69
+
70
+ def add_class(obj)
71
+ obj = object_to_hash(obj)
72
+ obj[:class_id] = cached_value("class:#{obj[:class]}") do
73
+ safe_create("class", obj[:class])
74
+ end
75
+ obj
76
+ end
77
+
78
+ def add_object(obj)
79
+ obj = object_to_hash(obj)
80
+ # Find or create class
81
+ add_class(obj) unless obj[:class_id]
82
+
83
+ # Find or create object
84
+ obj[:obj_id] = cached_value("object:#{obj[:class]}:#{obj[:id]}") do
85
+ safe_create('object', "#{obj[:class]}:#{obj[:id]}") do |id|
86
+ redis.hmset("object:#{id}", :id, obj[:id], :class_id, obj[:class_id])
87
+ redis.sadd("class:#{obj[:class_id]}:objects", id)
88
+ end
89
+ end
90
+ obj
91
+ end
92
+
93
+ def add_action(action)
94
+ action[:action_id] = cached_value("action:#{action[:name]}") do
95
+ safe_create("action", action[:name])
96
+ end
97
+ action
98
+ end
99
+
100
+ def create_activities(src, action, dst)
101
+ keys = [ activity_key(src[:obj_id]),
102
+ activity_key(src[:obj_id], action[:action_id]),
103
+ activity_key(src[:obj_id], action[:action_id], dst[:class_id]),
104
+ activity_key(src[:obj_id], nil, dst[:class_id])]
105
+
106
+ keys.each do |key|
107
+ redis.sadd "#{key}", dst[:obj_id] # Set
108
+ redis.zadd "#{key}:s", Time.now.to_i, dst[:obj_id] # Sorted List
109
+ end
110
+ end
111
+
112
+ def activity_key(obj_id, action_id=nil, class_id=nil)
113
+ str = "activity:#{obj_id}"
114
+ str << ":a:#{action_id}" if action_id
115
+ str << ":c:#{class_id}" if class_id
116
+ str
117
+ end
118
+
119
+ def load_activity(src, options)
120
+ aopt = { :source => :all,
121
+ :max_activity_objects => 20,
122
+ }.update(options)
123
+
124
+ last = aopt.delete(:max_activity_objects)
125
+
126
+ # Assign object and class ids
127
+ src = add_object(src)
128
+
129
+ # Retrieve last activity for obj
130
+ act_key = activity_key(src[:obj_id])
131
+ last ? redis.zrevrange("#{act_key}:s", 0, last-1) : redis.smembers(act_key)
132
+ end
133
+
134
+ # data_with_score is hash {key => score} or array [[key,score]]
135
+ def load_objects(data_with_score)
136
+ data_with_score = data_with_score.to_a if data_with_score.is_a?(Hash)
137
+ data_with_score.map do |item|
138
+ obj = redis.hgetall "object:#{item[0]}"
139
+ { :score => item[1],
140
+ :id => obj["id"],
141
+ :class => redis.get("class:#{obj["class_id"]}")
142
+ }
143
+ end
144
+ end
145
+
146
+ def object_to_hash(obj)
147
+ case obj.class.to_s
148
+ when "Array"
149
+ {:class => obj[0], :id => obj[1]}
150
+ when "Hash"
151
+ obj
152
+ else
153
+ if obj.respond_to?(:id)
154
+ {:class => obj.class.to_s, :id => obj.id}
155
+ end
156
+ end
157
+ end
158
+
159
+ def empty_hash(default=0.0)
160
+ hash = Hash.new
161
+ hash.default = default
162
+ hash
163
+ end
164
+
165
+ def rscore(method, res, dst_act, src_act, src_sim_score, src_oid, &block)
166
+ if block_given?
167
+ params = [res, dst_act, src_act, src_sim_score, src_oid]
168
+ block.call(*(params[0..block.arity]))
169
+ else
170
+ case method
171
+ when :similarity
172
+ dst_act.each do |dst_oid|
173
+ res[dst_oid] += src_sim_score
174
+ end
175
+ when :jaccard
176
+ puts "Doing jaccard"
177
+ jf = jaccard_factor(src_act, dst_act)
178
+ dst_act.each do |dst_oid|
179
+ res[dst_oid] += 1000.0 * jf
180
+ end
181
+ when :jaccard_similarity
182
+ jf = jaccard_factor(src_act, dst_act)
183
+ dst_act.each do |dst_oid|
184
+ res[dst_oid] += src_sim_score * jf
185
+ end
186
+ end
187
+ end
188
+ res
189
+ end
190
+
191
+ def jaccard_factor(src,dst)
192
+ (src & dst).size.to_f / (src | dst).size.to_f
193
+ end
194
+
195
+ # Class level cache for objects
196
+ def cache
197
+ @cache ||= {}
198
+ end
199
+
200
+ def cached_value(key)
201
+ cache[key] ||= yield
202
+ end
203
+
204
+ def safe_create(base, value)
205
+ hash = Digest::SHA1.hexdigest(value.to_s)
206
+ hkey = "#{base}:#{hash}:id"
207
+ id = redis.get(hkey)
208
+
209
+ unless id
210
+ id = redis.incr("next.#{base}.id").to_s(36) # use base 36 for ids to save space
211
+ unless redis.setnx(hkey, id)
212
+ id = redis.get(hkey) # hash key created in between - revert to original value
213
+ else
214
+ block_given? ? yield(id) : redis.setnx("#{base}:#{id}", value)
215
+ end
216
+ end
217
+ id
218
+ end
219
+ end # class << self
220
+ end
@@ -0,0 +1,16 @@
1
+ module Similus
2
+ def self.redis
3
+ @redis ||= begin
4
+ host, port = config.redis_server.split(':')
5
+ ::Redis.new(:host => host, :port => port, :db => config.redis_db)
6
+ rescue Exception => e
7
+ config.logger.error "Error connecting redis server: #{e.message}"
8
+ nil
9
+ end
10
+ end
11
+
12
+ def self.clear_database!
13
+ @cache = {}
14
+ redis.flushdb
15
+ end
16
+ end
@@ -0,0 +1,32 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = %q{similus}
5
+ s.version = "0.1.1"
6
+
7
+ s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
+ s.authors = ["Horaci Cuevas"]
9
+ s.cert_chain = ["/Users/horaci.cuevas/gem-public_cert.pem"]
10
+ s.date = %q{2010-10-07}
11
+ s.description = %q{A ruby library to find similar objects and make recommendations based on activity of objects}
12
+ s.email = %q{horaci @@ gmail.com}
13
+ s.extra_rdoc_files = ["LICENSES", "README.rdoc", "lib/similus.rb", "lib/similus/config.rb", "lib/similus/core.rb", "lib/similus/redis.rb"]
14
+ s.files = ["LICENSES", "README.rdoc", "Rakefile", "benchmarks/benchmark1.rb", "benchmarks/benchmark2.rb", "benchmarks/custom_benchmark.rb", "benchmarks/redis.conf", "init.rb", "lib/similus.rb", "lib/similus/config.rb", "lib/similus/core.rb", "lib/similus/redis.rb", "test/add_activity_spec.rb", "test/recommended_spec.rb", "test/similar_spec.rb", "Manifest", "similus.gemspec"]
15
+ s.homepage = %q{http://github.com/horaci/similus}
16
+ s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Similus", "--main", "README.rdoc"]
17
+ s.require_paths = ["lib"]
18
+ s.rubyforge_project = %q{similus}
19
+ s.rubygems_version = %q{1.3.7}
20
+ s.signing_key = %q{/Users/horaci.cuevas/gem-private_key.pem}
21
+ s.summary = %q{A ruby library to find similar objects and make recommendations based on activity of objects}
22
+
23
+ if s.respond_to? :specification_version then
24
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
25
+ s.specification_version = 3
26
+
27
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
28
+ else
29
+ end
30
+ else
31
+ end
32
+ end
@@ -0,0 +1,37 @@
1
+ $: << File.join(File.dirname(__FILE__), "/../lib")
2
+
3
+ require 'similus'
4
+
5
+ describe "Similus" do
6
+ describe "Add activity" do
7
+ before(:all) do
8
+ # Clear redis
9
+ Similus.clear_database!
10
+
11
+ # Activity
12
+ Similus.add_activity(["User", 1], :view, ["Movie", "Star Wars 1"])
13
+ end
14
+
15
+ def redis_object(type,object)
16
+ object_hash = Digest::SHA1.hexdigest(object)
17
+ Similus.redis.get("#{type}:#{object_hash}:id")
18
+ end
19
+
20
+ it "should create classes in redis" do
21
+ redis_object("class", "User").should_not be_nil
22
+ redis_object("class", "Movie").should_not be_nil
23
+ redis_object("class", "Other").should be_nil
24
+ end
25
+
26
+ it "should create objects in redis" do
27
+ redis_object("object", "User:1").should_not be_nil
28
+ redis_object("object", "Movie:Star Wars 1").should_not be_nil
29
+ redis_object("object", "User:2").should be_nil
30
+ end
31
+
32
+ it "should create actions in redis" do
33
+ redis_object("action", "view").should_not be_nil
34
+ redis_object("action", "like").should be_nil
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,104 @@
1
+ $: << File.join(File.dirname(__FILE__), "/../lib")
2
+ require 'similus'
3
+
4
+ describe "Similus" do
5
+ before(:all) do
6
+ # Clear redis
7
+ Similus.clear_database!
8
+
9
+ Similus.add_activity(["User", 1], :view, ["Movie", "Star Wars 1"])
10
+ Similus.add_activity(["User", 1], :view, ["Movie", "Star Wars 2"])
11
+ Similus.add_activity(["User", 1], :view, ["Movie", "Star Wars 3"])
12
+ Similus.add_activity(["User", 1], :view, ["Movie", "Star Wars 4"])
13
+
14
+ Similus.add_activity(["User", 2], :view, ["Movie", "Star Wars 3"])
15
+ Similus.add_activity(["User", 2], :view, ["Movie", "Star Wars 4"])
16
+ Similus.add_activity(["User", 2], :view, ["Movie", "Star Wars 5"])
17
+
18
+ Similus.add_activity(["User", 3], :view, ["Movie", "Star Wars 1"])
19
+ Similus.add_activity(["User", 3], :view, ["Movie", "Star Wars 3"])
20
+ Similus.add_activity(["User", 3], :view, ["Movie", "Star Wars 5"])
21
+
22
+ Similus.add_activity(["User", 4], :view, ["Movie", "Star Wars 2"])
23
+ Similus.add_activity(["User", 4], :view, ["Movie", "Star Wars 3"])
24
+
25
+ Similus.add_activity(["User", 5], :view, ["Movie", "Star Wars 1"])
26
+ Similus.add_activity(["User", 5], :view, ["Movie", "Star Wars 2"])
27
+ Similus.add_activity(["User", 5], :view, ["Movie", "Star Wars 3"])
28
+ Similus.add_activity(["User", 5], :view, ["Movie", "Blade Runner"])
29
+
30
+ Similus.add_activity(["User", 6], :view, ["Movie", "Star Wars 1"])
31
+ Similus.add_activity(["User", 6], :view, ["Movie", "Star Wars 5"])
32
+ Similus.add_activity(["User", 6], :view, ["Movie", "Blade Runner"])
33
+
34
+ Similus.add_activity(["User", 7], :view, ["Movie", "Casablanca"])
35
+ end
36
+
37
+ describe "#recommended_for" do
38
+ describe "User 1" do
39
+ before(:all) do
40
+ @recommended_for_user_1 = Similus.recommended_for(["User", 1])
41
+ end
42
+
43
+ it "should recommend SW5 and then Blade Runner" do
44
+ @recommended_for_user_1[0][:id].should == "Star Wars 5"
45
+ @recommended_for_user_1[0][:score].should == 4.0
46
+ @recommended_for_user_1[1][:id].should == "Blade Runner"
47
+ @recommended_for_user_1[1][:score].should == 3.0
48
+ end
49
+
50
+ it "should not recommend Casablanca" do
51
+ @recommended_for_user_1.detect { |x| x[:id] == "Casablanca" }.should be_nil
52
+ end
53
+ end
54
+
55
+ describe "other users" do
56
+ before(:all) do
57
+ @recommended_for_user_2 = Similus.recommended_for(["User", 2])
58
+ @recommended_for_user_3 = Similus.recommended_for(["User", 3])
59
+ @recommended_for_user_4 = Similus.recommended_for(["User", 4])
60
+ @recommended_for_user_5 = Similus.recommended_for(["User", 5])
61
+ @recommended_for_user_6 = Similus.recommended_for(["User", 6])
62
+ @recommended_for_user_7 = Similus.recommended_for(["User", 7])
63
+ end
64
+
65
+ it "should recommend only SW1 and then SW2 to user2 but not blade runner" do
66
+ @recommended_for_user_2[0][:id].should == "Star Wars 1"
67
+ @recommended_for_user_2[0][:score].should == 4.0
68
+ @recommended_for_user_2[1][:id].should == "Star Wars 2"
69
+ @recommended_for_user_2[1][:score].should == 2.0
70
+ @recommended_for_user_2.detect { |x| x[:id] == "Blade Runner" }.should be_nil
71
+ end
72
+
73
+ it "should recommend only BR and SW4 and SW2 to user3" do
74
+ @recommended_for_user_3.detect { |x| x[:id] == "Blade Runner" }.should_not be_nil
75
+ @recommended_for_user_3.detect { |x| x[:id] == "Star Wars 4" }.should_not be_nil
76
+ @recommended_for_user_3.detect { |x| x[:id] == "Star Wars 2" }.should_not be_nil
77
+ end
78
+
79
+ it "should recommend first SW1 and then SW4 and BR to user4" do
80
+ @recommended_for_user_4.first[:id].should == "Star Wars 1"
81
+ @recommended_for_user_4.detect { |x| x[:id] == "Blade Runner" }.should_not be_nil
82
+ @recommended_for_user_4.detect { |x| x[:id] == "Star Wars 4" }.should_not be_nil
83
+ end
84
+
85
+ it "should recommend first SW5 and then SW4 to user5" do
86
+ @recommended_for_user_5[0][:id].should == "Star Wars 5"
87
+ @recommended_for_user_5[0][:score].should == 4.0
88
+ @recommended_for_user_5[1][:id].should == "Star Wars 4"
89
+ @recommended_for_user_5[1][:score].should == 3.0
90
+ end
91
+
92
+ it "should recommend first SW3 and then SW2 to user6" do
93
+ @recommended_for_user_6[0][:id].should == "Star Wars 3"
94
+ @recommended_for_user_6[0][:score].should == 4.0
95
+ @recommended_for_user_6[1][:id].should == "Star Wars 2"
96
+ @recommended_for_user_6[1][:score].should == 2.0
97
+ end
98
+
99
+ it "should recommend nothing to user7" do
100
+ @recommended_for_user_7.should be_empty
101
+ end
102
+ end
103
+ end
104
+ end