similus 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/init.rb ADDED
@@ -0,0 +1,2 @@
1
+ $:.unshift "#{File.dirname(__FILE__)}/lib"
2
+ require 'similus'
@@ -0,0 +1,6 @@
1
+ require 'rubygems'
2
+ require 'redis'
3
+
4
+ require File.join(File.dirname(__FILE__), 'similus', 'core.rb')
5
+ require File.join(File.dirname(__FILE__), 'similus', 'config.rb')
6
+ require File.join(File.dirname(__FILE__), 'similus', 'redis.rb')
@@ -0,0 +1,24 @@
1
+ module Similus
2
+ def self.config
3
+ @config ||= Config.new
4
+ block_given? ? yield(@config) : @config
5
+ end
6
+
7
+ class Config
8
+ attr_accessor :backend
9
+ attr_accessor :redis_server
10
+ attr_accessor :redis_db
11
+ attr_accessor :logfile
12
+
13
+ def initialize #:nodoc:
14
+ self.backend = :redis
15
+ self.redis_server = "localhost:6379"
16
+ self.redis_db = 9
17
+ self.logfile = STDOUT
18
+ end
19
+
20
+ def logger
21
+ @logger ||= Logger.new(logfile)
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,220 @@
1
+ require 'digest/sha1'
2
+
3
+ module Similus
4
+ class << self
5
+ def add_activity(a, action, b)
6
+ # Find or create objects
7
+ a = add_object(a)
8
+ b = add_object(b)
9
+
10
+ # Find or create action
11
+ action = add_action({:name => action})
12
+
13
+ # Create activities (both sides)
14
+ create_activities(a, action, b)
15
+ create_activities(b, action, a)
16
+ end
17
+
18
+ def similar_to(src, options={}, objects=nil)
19
+ sopt = {:source => :all, :load_objects => true}.update(options)
20
+ src = add_object(src)
21
+
22
+ objects ||= load_activity(src, options)
23
+
24
+ similar_objects = objects.inject(empty_hash) do |result, id|
25
+ redis.smembers("activity:#{id}").each { |similar| result[similar] += 1 }
26
+ result
27
+ end
28
+
29
+ similar_objects.delete(src[:obj_id]) # delete object itself form list
30
+ similar_objects.reject! { |k,v| v == 1 } # remove similars just by one
31
+
32
+ if sopt[:load_objects]
33
+ load_objects(similar_objects.sort { |x,y| y[1] <=> x[1] })
34
+ else
35
+ similar_objects
36
+ end
37
+ end
38
+
39
+ def recommended_for(src, options={}, &block)
40
+ default_options = { :load_objects => true, :max_similar => 10,
41
+ :limit => 10, :offset => 0, :method => :similarity,
42
+ :source => :all, :target => :all }
43
+
44
+ ropt = default_options.update(options)
45
+ sopt = options.merge(:load_objects => false)
46
+ maxs = ropt.delete(:max_similar)
47
+
48
+ # Find objects for user
49
+ src_act = load_activity(src, sopt)
50
+
51
+ # Get similar objects, sort and get first N elements
52
+ src_sim = similar_to(src, sopt, src_act).sort {|x,y| y[1] <=> x[1]}[0..maxs]
53
+
54
+ # Get recommended score of similar objects's activity
55
+ result = src_sim.inject(empty_hash) do |res, sim|
56
+ dst_act = redis.smembers("activity:#{sim[0]}")
57
+ rscore(ropt[:method], res, dst_act, src_act, sim[1], sim[0], &block)
58
+ end
59
+
60
+ # Remove already seen, sort and apply limit/offset
61
+ result.reject! { |key,value| src_act.include?(key) }
62
+ result = result.sort { |x,y| y[1] <=> x[1] }[ropt[:offset],ropt[:limit]]
63
+
64
+ # Load original objects
65
+ ropt[:load_objects] ? load_objects(result) : result
66
+ end
67
+
68
+ private
69
+
70
+ def add_class(obj)
71
+ obj = object_to_hash(obj)
72
+ obj[:class_id] = cached_value("class:#{obj[:class]}") do
73
+ safe_create("class", obj[:class])
74
+ end
75
+ obj
76
+ end
77
+
78
+ def add_object(obj)
79
+ obj = object_to_hash(obj)
80
+ # Find or create class
81
+ add_class(obj) unless obj[:class_id]
82
+
83
+ # Find or create object
84
+ obj[:obj_id] = cached_value("object:#{obj[:class]}:#{obj[:id]}") do
85
+ safe_create('object', "#{obj[:class]}:#{obj[:id]}") do |id|
86
+ redis.hmset("object:#{id}", :id, obj[:id], :class_id, obj[:class_id])
87
+ redis.sadd("class:#{obj[:class_id]}:objects", id)
88
+ end
89
+ end
90
+ obj
91
+ end
92
+
93
+ def add_action(action)
94
+ action[:action_id] = cached_value("action:#{action[:name]}") do
95
+ safe_create("action", action[:name])
96
+ end
97
+ action
98
+ end
99
+
100
+ def create_activities(src, action, dst)
101
+ keys = [ activity_key(src[:obj_id]),
102
+ activity_key(src[:obj_id], action[:action_id]),
103
+ activity_key(src[:obj_id], action[:action_id], dst[:class_id]),
104
+ activity_key(src[:obj_id], nil, dst[:class_id])]
105
+
106
+ keys.each do |key|
107
+ redis.sadd "#{key}", dst[:obj_id] # Set
108
+ redis.zadd "#{key}:s", Time.now.to_i, dst[:obj_id] # Sorted List
109
+ end
110
+ end
111
+
112
+ def activity_key(obj_id, action_id=nil, class_id=nil)
113
+ str = "activity:#{obj_id}"
114
+ str << ":a:#{action_id}" if action_id
115
+ str << ":c:#{class_id}" if class_id
116
+ str
117
+ end
118
+
119
+ def load_activity(src, options)
120
+ aopt = { :source => :all,
121
+ :max_activity_objects => 20,
122
+ }.update(options)
123
+
124
+ last = aopt.delete(:max_activity_objects)
125
+
126
+ # Assign object and class ids
127
+ src = add_object(src)
128
+
129
+ # Retrieve last activity for obj
130
+ act_key = activity_key(src[:obj_id])
131
+ last ? redis.zrevrange("#{act_key}:s", 0, last-1) : redis.smembers(act_key)
132
+ end
133
+
134
+ # data_with_score is hash {key => score} or array [[key,score]]
135
+ def load_objects(data_with_score)
136
+ data_with_score = data_with_score.to_a if data_with_score.is_a?(Hash)
137
+ data_with_score.map do |item|
138
+ obj = redis.hgetall "object:#{item[0]}"
139
+ { :score => item[1],
140
+ :id => obj["id"],
141
+ :class => redis.get("class:#{obj["class_id"]}")
142
+ }
143
+ end
144
+ end
145
+
146
+ def object_to_hash(obj)
147
+ case obj.class.to_s
148
+ when "Array"
149
+ {:class => obj[0], :id => obj[1]}
150
+ when "Hash"
151
+ obj
152
+ else
153
+ if obj.respond_to?(:id)
154
+ {:class => obj.class.to_s, :id => obj.id}
155
+ end
156
+ end
157
+ end
158
+
159
+ def empty_hash(default=0.0)
160
+ hash = Hash.new
161
+ hash.default = default
162
+ hash
163
+ end
164
+
165
+ def rscore(method, res, dst_act, src_act, src_sim_score, src_oid, &block)
166
+ if block_given?
167
+ params = [res, dst_act, src_act, src_sim_score, src_oid]
168
+ block.call(*(params[0..block.arity]))
169
+ else
170
+ case method
171
+ when :similarity
172
+ dst_act.each do |dst_oid|
173
+ res[dst_oid] += src_sim_score
174
+ end
175
+ when :jaccard
176
+ puts "Doing jaccard"
177
+ jf = jaccard_factor(src_act, dst_act)
178
+ dst_act.each do |dst_oid|
179
+ res[dst_oid] += 1000.0 * jf
180
+ end
181
+ when :jaccard_similarity
182
+ jf = jaccard_factor(src_act, dst_act)
183
+ dst_act.each do |dst_oid|
184
+ res[dst_oid] += src_sim_score * jf
185
+ end
186
+ end
187
+ end
188
+ res
189
+ end
190
+
191
+ def jaccard_factor(src,dst)
192
+ (src & dst).size.to_f / (src | dst).size.to_f
193
+ end
194
+
195
+ # Class level cache for objects
196
+ def cache
197
+ @cache ||= {}
198
+ end
199
+
200
+ def cached_value(key)
201
+ cache[key] ||= yield
202
+ end
203
+
204
+ def safe_create(base, value)
205
+ hash = Digest::SHA1.hexdigest(value.to_s)
206
+ hkey = "#{base}:#{hash}:id"
207
+ id = redis.get(hkey)
208
+
209
+ unless id
210
+ id = redis.incr("next.#{base}.id").to_s(36) # use base 36 for ids to save space
211
+ unless redis.setnx(hkey, id)
212
+ id = redis.get(hkey) # hash key created in between - revert to original value
213
+ else
214
+ block_given? ? yield(id) : redis.setnx("#{base}:#{id}", value)
215
+ end
216
+ end
217
+ id
218
+ end
219
+ end # class << self
220
+ end
@@ -0,0 +1,16 @@
1
+ module Similus
2
+ def self.redis
3
+ @redis ||= begin
4
+ host, port = config.redis_server.split(':')
5
+ ::Redis.new(:host => host, :port => port, :db => config.redis_db)
6
+ rescue Exception => e
7
+ config.logger.error "Error connecting redis server: #{e.message}"
8
+ nil
9
+ end
10
+ end
11
+
12
+ def self.clear_database!
13
+ @cache = {}
14
+ redis.flushdb
15
+ end
16
+ end
@@ -0,0 +1,32 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = %q{similus}
5
+ s.version = "0.1.1"
6
+
7
+ s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
+ s.authors = ["Horaci Cuevas"]
9
+ s.cert_chain = ["/Users/horaci.cuevas/gem-public_cert.pem"]
10
+ s.date = %q{2010-10-07}
11
+ s.description = %q{A ruby library to find similar objects and make recommendations based on activity of objects}
12
+ s.email = %q{horaci @@ gmail.com}
13
+ s.extra_rdoc_files = ["LICENSES", "README.rdoc", "lib/similus.rb", "lib/similus/config.rb", "lib/similus/core.rb", "lib/similus/redis.rb"]
14
+ s.files = ["LICENSES", "README.rdoc", "Rakefile", "benchmarks/benchmark1.rb", "benchmarks/benchmark2.rb", "benchmarks/custom_benchmark.rb", "benchmarks/redis.conf", "init.rb", "lib/similus.rb", "lib/similus/config.rb", "lib/similus/core.rb", "lib/similus/redis.rb", "test/add_activity_spec.rb", "test/recommended_spec.rb", "test/similar_spec.rb", "Manifest", "similus.gemspec"]
15
+ s.homepage = %q{http://github.com/horaci/similus}
16
+ s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Similus", "--main", "README.rdoc"]
17
+ s.require_paths = ["lib"]
18
+ s.rubyforge_project = %q{similus}
19
+ s.rubygems_version = %q{1.3.7}
20
+ s.signing_key = %q{/Users/horaci.cuevas/gem-private_key.pem}
21
+ s.summary = %q{A ruby library to find similar objects and make recommendations based on activity of objects}
22
+
23
+ if s.respond_to? :specification_version then
24
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
25
+ s.specification_version = 3
26
+
27
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
28
+ else
29
+ end
30
+ else
31
+ end
32
+ end
@@ -0,0 +1,37 @@
1
+ $: << File.join(File.dirname(__FILE__), "/../lib")
2
+
3
+ require 'similus'
4
+
5
+ describe "Similus" do
6
+ describe "Add activity" do
7
+ before(:all) do
8
+ # Clear redis
9
+ Similus.clear_database!
10
+
11
+ # Activity
12
+ Similus.add_activity(["User", 1], :view, ["Movie", "Star Wars 1"])
13
+ end
14
+
15
+ def redis_object(type,object)
16
+ object_hash = Digest::SHA1.hexdigest(object)
17
+ Similus.redis.get("#{type}:#{object_hash}:id")
18
+ end
19
+
20
+ it "should create classes in redis" do
21
+ redis_object("class", "User").should_not be_nil
22
+ redis_object("class", "Movie").should_not be_nil
23
+ redis_object("class", "Other").should be_nil
24
+ end
25
+
26
+ it "should create objects in redis" do
27
+ redis_object("object", "User:1").should_not be_nil
28
+ redis_object("object", "Movie:Star Wars 1").should_not be_nil
29
+ redis_object("object", "User:2").should be_nil
30
+ end
31
+
32
+ it "should create actions in redis" do
33
+ redis_object("action", "view").should_not be_nil
34
+ redis_object("action", "like").should be_nil
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,104 @@
1
+ $: << File.join(File.dirname(__FILE__), "/../lib")
2
+ require 'similus'
3
+
4
+ describe "Similus" do
5
+ before(:all) do
6
+ # Clear redis
7
+ Similus.clear_database!
8
+
9
+ Similus.add_activity(["User", 1], :view, ["Movie", "Star Wars 1"])
10
+ Similus.add_activity(["User", 1], :view, ["Movie", "Star Wars 2"])
11
+ Similus.add_activity(["User", 1], :view, ["Movie", "Star Wars 3"])
12
+ Similus.add_activity(["User", 1], :view, ["Movie", "Star Wars 4"])
13
+
14
+ Similus.add_activity(["User", 2], :view, ["Movie", "Star Wars 3"])
15
+ Similus.add_activity(["User", 2], :view, ["Movie", "Star Wars 4"])
16
+ Similus.add_activity(["User", 2], :view, ["Movie", "Star Wars 5"])
17
+
18
+ Similus.add_activity(["User", 3], :view, ["Movie", "Star Wars 1"])
19
+ Similus.add_activity(["User", 3], :view, ["Movie", "Star Wars 3"])
20
+ Similus.add_activity(["User", 3], :view, ["Movie", "Star Wars 5"])
21
+
22
+ Similus.add_activity(["User", 4], :view, ["Movie", "Star Wars 2"])
23
+ Similus.add_activity(["User", 4], :view, ["Movie", "Star Wars 3"])
24
+
25
+ Similus.add_activity(["User", 5], :view, ["Movie", "Star Wars 1"])
26
+ Similus.add_activity(["User", 5], :view, ["Movie", "Star Wars 2"])
27
+ Similus.add_activity(["User", 5], :view, ["Movie", "Star Wars 3"])
28
+ Similus.add_activity(["User", 5], :view, ["Movie", "Blade Runner"])
29
+
30
+ Similus.add_activity(["User", 6], :view, ["Movie", "Star Wars 1"])
31
+ Similus.add_activity(["User", 6], :view, ["Movie", "Star Wars 5"])
32
+ Similus.add_activity(["User", 6], :view, ["Movie", "Blade Runner"])
33
+
34
+ Similus.add_activity(["User", 7], :view, ["Movie", "Casablanca"])
35
+ end
36
+
37
+ describe "#recommended_for" do
38
+ describe "User 1" do
39
+ before(:all) do
40
+ @recommended_for_user_1 = Similus.recommended_for(["User", 1])
41
+ end
42
+
43
+ it "should recommend SW5 and then Blade Runner" do
44
+ @recommended_for_user_1[0][:id].should == "Star Wars 5"
45
+ @recommended_for_user_1[0][:score].should == 4.0
46
+ @recommended_for_user_1[1][:id].should == "Blade Runner"
47
+ @recommended_for_user_1[1][:score].should == 3.0
48
+ end
49
+
50
+ it "should not recommend Casablanca" do
51
+ @recommended_for_user_1.detect { |x| x[:id] == "Casablanca" }.should be_nil
52
+ end
53
+ end
54
+
55
+ describe "other users" do
56
+ before(:all) do
57
+ @recommended_for_user_2 = Similus.recommended_for(["User", 2])
58
+ @recommended_for_user_3 = Similus.recommended_for(["User", 3])
59
+ @recommended_for_user_4 = Similus.recommended_for(["User", 4])
60
+ @recommended_for_user_5 = Similus.recommended_for(["User", 5])
61
+ @recommended_for_user_6 = Similus.recommended_for(["User", 6])
62
+ @recommended_for_user_7 = Similus.recommended_for(["User", 7])
63
+ end
64
+
65
+ it "should recommend only SW1 and then SW2 to user2 but not blade runner" do
66
+ @recommended_for_user_2[0][:id].should == "Star Wars 1"
67
+ @recommended_for_user_2[0][:score].should == 4.0
68
+ @recommended_for_user_2[1][:id].should == "Star Wars 2"
69
+ @recommended_for_user_2[1][:score].should == 2.0
70
+ @recommended_for_user_2.detect { |x| x[:id] == "Blade Runner" }.should be_nil
71
+ end
72
+
73
+ it "should recommend only BR and SW4 and SW2 to user3" do
74
+ @recommended_for_user_3.detect { |x| x[:id] == "Blade Runner" }.should_not be_nil
75
+ @recommended_for_user_3.detect { |x| x[:id] == "Star Wars 4" }.should_not be_nil
76
+ @recommended_for_user_3.detect { |x| x[:id] == "Star Wars 2" }.should_not be_nil
77
+ end
78
+
79
+ it "should recommend first SW1 and then SW4 and BR to user4" do
80
+ @recommended_for_user_4.first[:id].should == "Star Wars 1"
81
+ @recommended_for_user_4.detect { |x| x[:id] == "Blade Runner" }.should_not be_nil
82
+ @recommended_for_user_4.detect { |x| x[:id] == "Star Wars 4" }.should_not be_nil
83
+ end
84
+
85
+ it "should recommend first SW5 and then SW4 to user5" do
86
+ @recommended_for_user_5[0][:id].should == "Star Wars 5"
87
+ @recommended_for_user_5[0][:score].should == 4.0
88
+ @recommended_for_user_5[1][:id].should == "Star Wars 4"
89
+ @recommended_for_user_5[1][:score].should == 3.0
90
+ end
91
+
92
+ it "should recommend first SW3 and then SW2 to user6" do
93
+ @recommended_for_user_6[0][:id].should == "Star Wars 3"
94
+ @recommended_for_user_6[0][:score].should == 4.0
95
+ @recommended_for_user_6[1][:id].should == "Star Wars 2"
96
+ @recommended_for_user_6[1][:score].should == 2.0
97
+ end
98
+
99
+ it "should recommend nothing to user7" do
100
+ @recommended_for_user_7.should be_empty
101
+ end
102
+ end
103
+ end
104
+ end