similus 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data.tar.gz.sig +0 -0
- data/LICENSES +23 -0
- data/Manifest +16 -0
- data/README.rdoc +80 -0
- data/Rakefile +22 -0
- data/benchmarks/benchmark1.rb +90 -0
- data/benchmarks/benchmark2.rb +92 -0
- data/benchmarks/custom_benchmark.rb +41 -0
- data/benchmarks/redis.conf +312 -0
- data/init.rb +2 -0
- data/lib/similus.rb +6 -0
- data/lib/similus/config.rb +24 -0
- data/lib/similus/core.rb +220 -0
- data/lib/similus/redis.rb +16 -0
- data/similus.gemspec +32 -0
- data/test/add_activity_spec.rb +37 -0
- data/test/recommended_spec.rb +104 -0
- data/test/similar_spec.rb +103 -0
- metadata +112 -0
- metadata.gz.sig +0 -0
data/init.rb
ADDED
data/lib/similus.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
module Similus
|
2
|
+
def self.config
|
3
|
+
@config ||= Config.new
|
4
|
+
block_given? ? yield(@config) : @config
|
5
|
+
end
|
6
|
+
|
7
|
+
class Config
|
8
|
+
attr_accessor :backend
|
9
|
+
attr_accessor :redis_server
|
10
|
+
attr_accessor :redis_db
|
11
|
+
attr_accessor :logfile
|
12
|
+
|
13
|
+
def initialize #:nodoc:
|
14
|
+
self.backend = :redis
|
15
|
+
self.redis_server = "localhost:6379"
|
16
|
+
self.redis_db = 9
|
17
|
+
self.logfile = STDOUT
|
18
|
+
end
|
19
|
+
|
20
|
+
def logger
|
21
|
+
@logger ||= Logger.new(logfile)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
data/lib/similus/core.rb
ADDED
@@ -0,0 +1,220 @@
|
|
1
|
+
require 'digest/sha1'
|
2
|
+
|
3
|
+
module Similus
|
4
|
+
class << self
|
5
|
+
def add_activity(a, action, b)
|
6
|
+
# Find or create objects
|
7
|
+
a = add_object(a)
|
8
|
+
b = add_object(b)
|
9
|
+
|
10
|
+
# Find or create action
|
11
|
+
action = add_action({:name => action})
|
12
|
+
|
13
|
+
# Create activities (both sides)
|
14
|
+
create_activities(a, action, b)
|
15
|
+
create_activities(b, action, a)
|
16
|
+
end
|
17
|
+
|
18
|
+
def similar_to(src, options={}, objects=nil)
|
19
|
+
sopt = {:source => :all, :load_objects => true}.update(options)
|
20
|
+
src = add_object(src)
|
21
|
+
|
22
|
+
objects ||= load_activity(src, options)
|
23
|
+
|
24
|
+
similar_objects = objects.inject(empty_hash) do |result, id|
|
25
|
+
redis.smembers("activity:#{id}").each { |similar| result[similar] += 1 }
|
26
|
+
result
|
27
|
+
end
|
28
|
+
|
29
|
+
similar_objects.delete(src[:obj_id]) # delete object itself form list
|
30
|
+
similar_objects.reject! { |k,v| v == 1 } # remove similars just by one
|
31
|
+
|
32
|
+
if sopt[:load_objects]
|
33
|
+
load_objects(similar_objects.sort { |x,y| y[1] <=> x[1] })
|
34
|
+
else
|
35
|
+
similar_objects
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def recommended_for(src, options={}, &block)
|
40
|
+
default_options = { :load_objects => true, :max_similar => 10,
|
41
|
+
:limit => 10, :offset => 0, :method => :similarity,
|
42
|
+
:source => :all, :target => :all }
|
43
|
+
|
44
|
+
ropt = default_options.update(options)
|
45
|
+
sopt = options.merge(:load_objects => false)
|
46
|
+
maxs = ropt.delete(:max_similar)
|
47
|
+
|
48
|
+
# Find objects for user
|
49
|
+
src_act = load_activity(src, sopt)
|
50
|
+
|
51
|
+
# Get similar objects, sort and get first N elements
|
52
|
+
src_sim = similar_to(src, sopt, src_act).sort {|x,y| y[1] <=> x[1]}[0..maxs]
|
53
|
+
|
54
|
+
# Get recommended score of similar objects's activity
|
55
|
+
result = src_sim.inject(empty_hash) do |res, sim|
|
56
|
+
dst_act = redis.smembers("activity:#{sim[0]}")
|
57
|
+
rscore(ropt[:method], res, dst_act, src_act, sim[1], sim[0], &block)
|
58
|
+
end
|
59
|
+
|
60
|
+
# Remove already seen, sort and apply limit/offset
|
61
|
+
result.reject! { |key,value| src_act.include?(key) }
|
62
|
+
result = result.sort { |x,y| y[1] <=> x[1] }[ropt[:offset],ropt[:limit]]
|
63
|
+
|
64
|
+
# Load original objects
|
65
|
+
ropt[:load_objects] ? load_objects(result) : result
|
66
|
+
end
|
67
|
+
|
68
|
+
private
|
69
|
+
|
70
|
+
def add_class(obj)
|
71
|
+
obj = object_to_hash(obj)
|
72
|
+
obj[:class_id] = cached_value("class:#{obj[:class]}") do
|
73
|
+
safe_create("class", obj[:class])
|
74
|
+
end
|
75
|
+
obj
|
76
|
+
end
|
77
|
+
|
78
|
+
def add_object(obj)
|
79
|
+
obj = object_to_hash(obj)
|
80
|
+
# Find or create class
|
81
|
+
add_class(obj) unless obj[:class_id]
|
82
|
+
|
83
|
+
# Find or create object
|
84
|
+
obj[:obj_id] = cached_value("object:#{obj[:class]}:#{obj[:id]}") do
|
85
|
+
safe_create('object', "#{obj[:class]}:#{obj[:id]}") do |id|
|
86
|
+
redis.hmset("object:#{id}", :id, obj[:id], :class_id, obj[:class_id])
|
87
|
+
redis.sadd("class:#{obj[:class_id]}:objects", id)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
obj
|
91
|
+
end
|
92
|
+
|
93
|
+
def add_action(action)
|
94
|
+
action[:action_id] = cached_value("action:#{action[:name]}") do
|
95
|
+
safe_create("action", action[:name])
|
96
|
+
end
|
97
|
+
action
|
98
|
+
end
|
99
|
+
|
100
|
+
def create_activities(src, action, dst)
|
101
|
+
keys = [ activity_key(src[:obj_id]),
|
102
|
+
activity_key(src[:obj_id], action[:action_id]),
|
103
|
+
activity_key(src[:obj_id], action[:action_id], dst[:class_id]),
|
104
|
+
activity_key(src[:obj_id], nil, dst[:class_id])]
|
105
|
+
|
106
|
+
keys.each do |key|
|
107
|
+
redis.sadd "#{key}", dst[:obj_id] # Set
|
108
|
+
redis.zadd "#{key}:s", Time.now.to_i, dst[:obj_id] # Sorted List
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
def activity_key(obj_id, action_id=nil, class_id=nil)
|
113
|
+
str = "activity:#{obj_id}"
|
114
|
+
str << ":a:#{action_id}" if action_id
|
115
|
+
str << ":c:#{class_id}" if class_id
|
116
|
+
str
|
117
|
+
end
|
118
|
+
|
119
|
+
def load_activity(src, options)
|
120
|
+
aopt = { :source => :all,
|
121
|
+
:max_activity_objects => 20,
|
122
|
+
}.update(options)
|
123
|
+
|
124
|
+
last = aopt.delete(:max_activity_objects)
|
125
|
+
|
126
|
+
# Assign object and class ids
|
127
|
+
src = add_object(src)
|
128
|
+
|
129
|
+
# Retrieve last activity for obj
|
130
|
+
act_key = activity_key(src[:obj_id])
|
131
|
+
last ? redis.zrevrange("#{act_key}:s", 0, last-1) : redis.smembers(act_key)
|
132
|
+
end
|
133
|
+
|
134
|
+
# data_with_score is hash {key => score} or array [[key,score]]
|
135
|
+
def load_objects(data_with_score)
|
136
|
+
data_with_score = data_with_score.to_a if data_with_score.is_a?(Hash)
|
137
|
+
data_with_score.map do |item|
|
138
|
+
obj = redis.hgetall "object:#{item[0]}"
|
139
|
+
{ :score => item[1],
|
140
|
+
:id => obj["id"],
|
141
|
+
:class => redis.get("class:#{obj["class_id"]}")
|
142
|
+
}
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
def object_to_hash(obj)
|
147
|
+
case obj.class.to_s
|
148
|
+
when "Array"
|
149
|
+
{:class => obj[0], :id => obj[1]}
|
150
|
+
when "Hash"
|
151
|
+
obj
|
152
|
+
else
|
153
|
+
if obj.respond_to?(:id)
|
154
|
+
{:class => obj.class.to_s, :id => obj.id}
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
def empty_hash(default=0.0)
|
160
|
+
hash = Hash.new
|
161
|
+
hash.default = default
|
162
|
+
hash
|
163
|
+
end
|
164
|
+
|
165
|
+
def rscore(method, res, dst_act, src_act, src_sim_score, src_oid, &block)
|
166
|
+
if block_given?
|
167
|
+
params = [res, dst_act, src_act, src_sim_score, src_oid]
|
168
|
+
block.call(*(params[0..block.arity]))
|
169
|
+
else
|
170
|
+
case method
|
171
|
+
when :similarity
|
172
|
+
dst_act.each do |dst_oid|
|
173
|
+
res[dst_oid] += src_sim_score
|
174
|
+
end
|
175
|
+
when :jaccard
|
176
|
+
puts "Doing jaccard"
|
177
|
+
jf = jaccard_factor(src_act, dst_act)
|
178
|
+
dst_act.each do |dst_oid|
|
179
|
+
res[dst_oid] += 1000.0 * jf
|
180
|
+
end
|
181
|
+
when :jaccard_similarity
|
182
|
+
jf = jaccard_factor(src_act, dst_act)
|
183
|
+
dst_act.each do |dst_oid|
|
184
|
+
res[dst_oid] += src_sim_score * jf
|
185
|
+
end
|
186
|
+
end
|
187
|
+
end
|
188
|
+
res
|
189
|
+
end
|
190
|
+
|
191
|
+
def jaccard_factor(src,dst)
|
192
|
+
(src & dst).size.to_f / (src | dst).size.to_f
|
193
|
+
end
|
194
|
+
|
195
|
+
# Class level cache for objects
|
196
|
+
def cache
|
197
|
+
@cache ||= {}
|
198
|
+
end
|
199
|
+
|
200
|
+
def cached_value(key)
|
201
|
+
cache[key] ||= yield
|
202
|
+
end
|
203
|
+
|
204
|
+
def safe_create(base, value)
|
205
|
+
hash = Digest::SHA1.hexdigest(value.to_s)
|
206
|
+
hkey = "#{base}:#{hash}:id"
|
207
|
+
id = redis.get(hkey)
|
208
|
+
|
209
|
+
unless id
|
210
|
+
id = redis.incr("next.#{base}.id").to_s(36) # use base 36 for ids to save space
|
211
|
+
unless redis.setnx(hkey, id)
|
212
|
+
id = redis.get(hkey) # hash key created in between - revert to original value
|
213
|
+
else
|
214
|
+
block_given? ? yield(id) : redis.setnx("#{base}:#{id}", value)
|
215
|
+
end
|
216
|
+
end
|
217
|
+
id
|
218
|
+
end
|
219
|
+
end # class << self
|
220
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module Similus
|
2
|
+
def self.redis
|
3
|
+
@redis ||= begin
|
4
|
+
host, port = config.redis_server.split(':')
|
5
|
+
::Redis.new(:host => host, :port => port, :db => config.redis_db)
|
6
|
+
rescue Exception => e
|
7
|
+
config.logger.error "Error connecting redis server: #{e.message}"
|
8
|
+
nil
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.clear_database!
|
13
|
+
@cache = {}
|
14
|
+
redis.flushdb
|
15
|
+
end
|
16
|
+
end
|
data/similus.gemspec
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = %q{similus}
|
5
|
+
s.version = "0.1.1"
|
6
|
+
|
7
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
|
8
|
+
s.authors = ["Horaci Cuevas"]
|
9
|
+
s.cert_chain = ["/Users/horaci.cuevas/gem-public_cert.pem"]
|
10
|
+
s.date = %q{2010-10-07}
|
11
|
+
s.description = %q{A ruby library to find similar objects and make recommendations based on activity of objects}
|
12
|
+
s.email = %q{horaci @@ gmail.com}
|
13
|
+
s.extra_rdoc_files = ["LICENSES", "README.rdoc", "lib/similus.rb", "lib/similus/config.rb", "lib/similus/core.rb", "lib/similus/redis.rb"]
|
14
|
+
s.files = ["LICENSES", "README.rdoc", "Rakefile", "benchmarks/benchmark1.rb", "benchmarks/benchmark2.rb", "benchmarks/custom_benchmark.rb", "benchmarks/redis.conf", "init.rb", "lib/similus.rb", "lib/similus/config.rb", "lib/similus/core.rb", "lib/similus/redis.rb", "test/add_activity_spec.rb", "test/recommended_spec.rb", "test/similar_spec.rb", "Manifest", "similus.gemspec"]
|
15
|
+
s.homepage = %q{http://github.com/horaci/similus}
|
16
|
+
s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Similus", "--main", "README.rdoc"]
|
17
|
+
s.require_paths = ["lib"]
|
18
|
+
s.rubyforge_project = %q{similus}
|
19
|
+
s.rubygems_version = %q{1.3.7}
|
20
|
+
s.signing_key = %q{/Users/horaci.cuevas/gem-private_key.pem}
|
21
|
+
s.summary = %q{A ruby library to find similar objects and make recommendations based on activity of objects}
|
22
|
+
|
23
|
+
if s.respond_to? :specification_version then
|
24
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
25
|
+
s.specification_version = 3
|
26
|
+
|
27
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
28
|
+
else
|
29
|
+
end
|
30
|
+
else
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
$: << File.join(File.dirname(__FILE__), "/../lib")
|
2
|
+
|
3
|
+
require 'similus'
|
4
|
+
|
5
|
+
describe "Similus" do
|
6
|
+
describe "Add activity" do
|
7
|
+
before(:all) do
|
8
|
+
# Clear redis
|
9
|
+
Similus.clear_database!
|
10
|
+
|
11
|
+
# Activity
|
12
|
+
Similus.add_activity(["User", 1], :view, ["Movie", "Star Wars 1"])
|
13
|
+
end
|
14
|
+
|
15
|
+
def redis_object(type,object)
|
16
|
+
object_hash = Digest::SHA1.hexdigest(object)
|
17
|
+
Similus.redis.get("#{type}:#{object_hash}:id")
|
18
|
+
end
|
19
|
+
|
20
|
+
it "should create classes in redis" do
|
21
|
+
redis_object("class", "User").should_not be_nil
|
22
|
+
redis_object("class", "Movie").should_not be_nil
|
23
|
+
redis_object("class", "Other").should be_nil
|
24
|
+
end
|
25
|
+
|
26
|
+
it "should create objects in redis" do
|
27
|
+
redis_object("object", "User:1").should_not be_nil
|
28
|
+
redis_object("object", "Movie:Star Wars 1").should_not be_nil
|
29
|
+
redis_object("object", "User:2").should be_nil
|
30
|
+
end
|
31
|
+
|
32
|
+
it "should create actions in redis" do
|
33
|
+
redis_object("action", "view").should_not be_nil
|
34
|
+
redis_object("action", "like").should be_nil
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,104 @@
|
|
1
|
+
$: << File.join(File.dirname(__FILE__), "/../lib")
|
2
|
+
require 'similus'
|
3
|
+
|
4
|
+
describe "Similus" do
|
5
|
+
before(:all) do
|
6
|
+
# Clear redis
|
7
|
+
Similus.clear_database!
|
8
|
+
|
9
|
+
Similus.add_activity(["User", 1], :view, ["Movie", "Star Wars 1"])
|
10
|
+
Similus.add_activity(["User", 1], :view, ["Movie", "Star Wars 2"])
|
11
|
+
Similus.add_activity(["User", 1], :view, ["Movie", "Star Wars 3"])
|
12
|
+
Similus.add_activity(["User", 1], :view, ["Movie", "Star Wars 4"])
|
13
|
+
|
14
|
+
Similus.add_activity(["User", 2], :view, ["Movie", "Star Wars 3"])
|
15
|
+
Similus.add_activity(["User", 2], :view, ["Movie", "Star Wars 4"])
|
16
|
+
Similus.add_activity(["User", 2], :view, ["Movie", "Star Wars 5"])
|
17
|
+
|
18
|
+
Similus.add_activity(["User", 3], :view, ["Movie", "Star Wars 1"])
|
19
|
+
Similus.add_activity(["User", 3], :view, ["Movie", "Star Wars 3"])
|
20
|
+
Similus.add_activity(["User", 3], :view, ["Movie", "Star Wars 5"])
|
21
|
+
|
22
|
+
Similus.add_activity(["User", 4], :view, ["Movie", "Star Wars 2"])
|
23
|
+
Similus.add_activity(["User", 4], :view, ["Movie", "Star Wars 3"])
|
24
|
+
|
25
|
+
Similus.add_activity(["User", 5], :view, ["Movie", "Star Wars 1"])
|
26
|
+
Similus.add_activity(["User", 5], :view, ["Movie", "Star Wars 2"])
|
27
|
+
Similus.add_activity(["User", 5], :view, ["Movie", "Star Wars 3"])
|
28
|
+
Similus.add_activity(["User", 5], :view, ["Movie", "Blade Runner"])
|
29
|
+
|
30
|
+
Similus.add_activity(["User", 6], :view, ["Movie", "Star Wars 1"])
|
31
|
+
Similus.add_activity(["User", 6], :view, ["Movie", "Star Wars 5"])
|
32
|
+
Similus.add_activity(["User", 6], :view, ["Movie", "Blade Runner"])
|
33
|
+
|
34
|
+
Similus.add_activity(["User", 7], :view, ["Movie", "Casablanca"])
|
35
|
+
end
|
36
|
+
|
37
|
+
describe "#recommended_for" do
|
38
|
+
describe "User 1" do
|
39
|
+
before(:all) do
|
40
|
+
@recommended_for_user_1 = Similus.recommended_for(["User", 1])
|
41
|
+
end
|
42
|
+
|
43
|
+
it "should recommend SW5 and then Blade Runner" do
|
44
|
+
@recommended_for_user_1[0][:id].should == "Star Wars 5"
|
45
|
+
@recommended_for_user_1[0][:score].should == 4.0
|
46
|
+
@recommended_for_user_1[1][:id].should == "Blade Runner"
|
47
|
+
@recommended_for_user_1[1][:score].should == 3.0
|
48
|
+
end
|
49
|
+
|
50
|
+
it "should not recommend Casablanca" do
|
51
|
+
@recommended_for_user_1.detect { |x| x[:id] == "Casablanca" }.should be_nil
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
describe "other users" do
|
56
|
+
before(:all) do
|
57
|
+
@recommended_for_user_2 = Similus.recommended_for(["User", 2])
|
58
|
+
@recommended_for_user_3 = Similus.recommended_for(["User", 3])
|
59
|
+
@recommended_for_user_4 = Similus.recommended_for(["User", 4])
|
60
|
+
@recommended_for_user_5 = Similus.recommended_for(["User", 5])
|
61
|
+
@recommended_for_user_6 = Similus.recommended_for(["User", 6])
|
62
|
+
@recommended_for_user_7 = Similus.recommended_for(["User", 7])
|
63
|
+
end
|
64
|
+
|
65
|
+
it "should recommend only SW1 and then SW2 to user2 but not blade runner" do
|
66
|
+
@recommended_for_user_2[0][:id].should == "Star Wars 1"
|
67
|
+
@recommended_for_user_2[0][:score].should == 4.0
|
68
|
+
@recommended_for_user_2[1][:id].should == "Star Wars 2"
|
69
|
+
@recommended_for_user_2[1][:score].should == 2.0
|
70
|
+
@recommended_for_user_2.detect { |x| x[:id] == "Blade Runner" }.should be_nil
|
71
|
+
end
|
72
|
+
|
73
|
+
it "should recommend only BR and SW4 and SW2 to user3" do
|
74
|
+
@recommended_for_user_3.detect { |x| x[:id] == "Blade Runner" }.should_not be_nil
|
75
|
+
@recommended_for_user_3.detect { |x| x[:id] == "Star Wars 4" }.should_not be_nil
|
76
|
+
@recommended_for_user_3.detect { |x| x[:id] == "Star Wars 2" }.should_not be_nil
|
77
|
+
end
|
78
|
+
|
79
|
+
it "should recommend first SW1 and then SW4 and BR to user4" do
|
80
|
+
@recommended_for_user_4.first[:id].should == "Star Wars 1"
|
81
|
+
@recommended_for_user_4.detect { |x| x[:id] == "Blade Runner" }.should_not be_nil
|
82
|
+
@recommended_for_user_4.detect { |x| x[:id] == "Star Wars 4" }.should_not be_nil
|
83
|
+
end
|
84
|
+
|
85
|
+
it "should recommend first SW5 and then SW4 to user5" do
|
86
|
+
@recommended_for_user_5[0][:id].should == "Star Wars 5"
|
87
|
+
@recommended_for_user_5[0][:score].should == 4.0
|
88
|
+
@recommended_for_user_5[1][:id].should == "Star Wars 4"
|
89
|
+
@recommended_for_user_5[1][:score].should == 3.0
|
90
|
+
end
|
91
|
+
|
92
|
+
it "should recommend first SW3 and then SW2 to user6" do
|
93
|
+
@recommended_for_user_6[0][:id].should == "Star Wars 3"
|
94
|
+
@recommended_for_user_6[0][:score].should == 4.0
|
95
|
+
@recommended_for_user_6[1][:id].should == "Star Wars 2"
|
96
|
+
@recommended_for_user_6[1][:score].should == 2.0
|
97
|
+
end
|
98
|
+
|
99
|
+
it "should recommend nothing to user7" do
|
100
|
+
@recommended_for_user_7.should be_empty
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|