similus 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data.tar.gz.sig +0 -0
- data/LICENSES +23 -0
- data/Manifest +16 -0
- data/README.rdoc +80 -0
- data/Rakefile +22 -0
- data/benchmarks/benchmark1.rb +90 -0
- data/benchmarks/benchmark2.rb +92 -0
- data/benchmarks/custom_benchmark.rb +41 -0
- data/benchmarks/redis.conf +312 -0
- data/init.rb +2 -0
- data/lib/similus.rb +6 -0
- data/lib/similus/config.rb +24 -0
- data/lib/similus/core.rb +220 -0
- data/lib/similus/redis.rb +16 -0
- data/similus.gemspec +32 -0
- data/test/add_activity_spec.rb +37 -0
- data/test/recommended_spec.rb +104 -0
- data/test/similar_spec.rb +103 -0
- metadata +112 -0
- metadata.gz.sig +0 -0
data/init.rb
ADDED
data/lib/similus.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
module Similus
|
2
|
+
def self.config
|
3
|
+
@config ||= Config.new
|
4
|
+
block_given? ? yield(@config) : @config
|
5
|
+
end
|
6
|
+
|
7
|
+
class Config
|
8
|
+
attr_accessor :backend
|
9
|
+
attr_accessor :redis_server
|
10
|
+
attr_accessor :redis_db
|
11
|
+
attr_accessor :logfile
|
12
|
+
|
13
|
+
def initialize #:nodoc:
|
14
|
+
self.backend = :redis
|
15
|
+
self.redis_server = "localhost:6379"
|
16
|
+
self.redis_db = 9
|
17
|
+
self.logfile = STDOUT
|
18
|
+
end
|
19
|
+
|
20
|
+
def logger
|
21
|
+
@logger ||= Logger.new(logfile)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
data/lib/similus/core.rb
ADDED
@@ -0,0 +1,220 @@
|
|
1
|
+
require 'digest/sha1'
|
2
|
+
|
3
|
+
module Similus
|
4
|
+
class << self
|
5
|
+
def add_activity(a, action, b)
|
6
|
+
# Find or create objects
|
7
|
+
a = add_object(a)
|
8
|
+
b = add_object(b)
|
9
|
+
|
10
|
+
# Find or create action
|
11
|
+
action = add_action({:name => action})
|
12
|
+
|
13
|
+
# Create activities (both sides)
|
14
|
+
create_activities(a, action, b)
|
15
|
+
create_activities(b, action, a)
|
16
|
+
end
|
17
|
+
|
18
|
+
def similar_to(src, options={}, objects=nil)
|
19
|
+
sopt = {:source => :all, :load_objects => true}.update(options)
|
20
|
+
src = add_object(src)
|
21
|
+
|
22
|
+
objects ||= load_activity(src, options)
|
23
|
+
|
24
|
+
similar_objects = objects.inject(empty_hash) do |result, id|
|
25
|
+
redis.smembers("activity:#{id}").each { |similar| result[similar] += 1 }
|
26
|
+
result
|
27
|
+
end
|
28
|
+
|
29
|
+
similar_objects.delete(src[:obj_id]) # delete object itself form list
|
30
|
+
similar_objects.reject! { |k,v| v == 1 } # remove similars just by one
|
31
|
+
|
32
|
+
if sopt[:load_objects]
|
33
|
+
load_objects(similar_objects.sort { |x,y| y[1] <=> x[1] })
|
34
|
+
else
|
35
|
+
similar_objects
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def recommended_for(src, options={}, &block)
|
40
|
+
default_options = { :load_objects => true, :max_similar => 10,
|
41
|
+
:limit => 10, :offset => 0, :method => :similarity,
|
42
|
+
:source => :all, :target => :all }
|
43
|
+
|
44
|
+
ropt = default_options.update(options)
|
45
|
+
sopt = options.merge(:load_objects => false)
|
46
|
+
maxs = ropt.delete(:max_similar)
|
47
|
+
|
48
|
+
# Find objects for user
|
49
|
+
src_act = load_activity(src, sopt)
|
50
|
+
|
51
|
+
# Get similar objects, sort and get first N elements
|
52
|
+
src_sim = similar_to(src, sopt, src_act).sort {|x,y| y[1] <=> x[1]}[0..maxs]
|
53
|
+
|
54
|
+
# Get recommended score of similar objects's activity
|
55
|
+
result = src_sim.inject(empty_hash) do |res, sim|
|
56
|
+
dst_act = redis.smembers("activity:#{sim[0]}")
|
57
|
+
rscore(ropt[:method], res, dst_act, src_act, sim[1], sim[0], &block)
|
58
|
+
end
|
59
|
+
|
60
|
+
# Remove already seen, sort and apply limit/offset
|
61
|
+
result.reject! { |key,value| src_act.include?(key) }
|
62
|
+
result = result.sort { |x,y| y[1] <=> x[1] }[ropt[:offset],ropt[:limit]]
|
63
|
+
|
64
|
+
# Load original objects
|
65
|
+
ropt[:load_objects] ? load_objects(result) : result
|
66
|
+
end
|
67
|
+
|
68
|
+
private
|
69
|
+
|
70
|
+
def add_class(obj)
|
71
|
+
obj = object_to_hash(obj)
|
72
|
+
obj[:class_id] = cached_value("class:#{obj[:class]}") do
|
73
|
+
safe_create("class", obj[:class])
|
74
|
+
end
|
75
|
+
obj
|
76
|
+
end
|
77
|
+
|
78
|
+
def add_object(obj)
|
79
|
+
obj = object_to_hash(obj)
|
80
|
+
# Find or create class
|
81
|
+
add_class(obj) unless obj[:class_id]
|
82
|
+
|
83
|
+
# Find or create object
|
84
|
+
obj[:obj_id] = cached_value("object:#{obj[:class]}:#{obj[:id]}") do
|
85
|
+
safe_create('object', "#{obj[:class]}:#{obj[:id]}") do |id|
|
86
|
+
redis.hmset("object:#{id}", :id, obj[:id], :class_id, obj[:class_id])
|
87
|
+
redis.sadd("class:#{obj[:class_id]}:objects", id)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
obj
|
91
|
+
end
|
92
|
+
|
93
|
+
def add_action(action)
|
94
|
+
action[:action_id] = cached_value("action:#{action[:name]}") do
|
95
|
+
safe_create("action", action[:name])
|
96
|
+
end
|
97
|
+
action
|
98
|
+
end
|
99
|
+
|
100
|
+
def create_activities(src, action, dst)
|
101
|
+
keys = [ activity_key(src[:obj_id]),
|
102
|
+
activity_key(src[:obj_id], action[:action_id]),
|
103
|
+
activity_key(src[:obj_id], action[:action_id], dst[:class_id]),
|
104
|
+
activity_key(src[:obj_id], nil, dst[:class_id])]
|
105
|
+
|
106
|
+
keys.each do |key|
|
107
|
+
redis.sadd "#{key}", dst[:obj_id] # Set
|
108
|
+
redis.zadd "#{key}:s", Time.now.to_i, dst[:obj_id] # Sorted List
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
def activity_key(obj_id, action_id=nil, class_id=nil)
|
113
|
+
str = "activity:#{obj_id}"
|
114
|
+
str << ":a:#{action_id}" if action_id
|
115
|
+
str << ":c:#{class_id}" if class_id
|
116
|
+
str
|
117
|
+
end
|
118
|
+
|
119
|
+
def load_activity(src, options)
|
120
|
+
aopt = { :source => :all,
|
121
|
+
:max_activity_objects => 20,
|
122
|
+
}.update(options)
|
123
|
+
|
124
|
+
last = aopt.delete(:max_activity_objects)
|
125
|
+
|
126
|
+
# Assign object and class ids
|
127
|
+
src = add_object(src)
|
128
|
+
|
129
|
+
# Retrieve last activity for obj
|
130
|
+
act_key = activity_key(src[:obj_id])
|
131
|
+
last ? redis.zrevrange("#{act_key}:s", 0, last-1) : redis.smembers(act_key)
|
132
|
+
end
|
133
|
+
|
134
|
+
# data_with_score is hash {key => score} or array [[key,score]]
|
135
|
+
def load_objects(data_with_score)
|
136
|
+
data_with_score = data_with_score.to_a if data_with_score.is_a?(Hash)
|
137
|
+
data_with_score.map do |item|
|
138
|
+
obj = redis.hgetall "object:#{item[0]}"
|
139
|
+
{ :score => item[1],
|
140
|
+
:id => obj["id"],
|
141
|
+
:class => redis.get("class:#{obj["class_id"]}")
|
142
|
+
}
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
def object_to_hash(obj)
|
147
|
+
case obj.class.to_s
|
148
|
+
when "Array"
|
149
|
+
{:class => obj[0], :id => obj[1]}
|
150
|
+
when "Hash"
|
151
|
+
obj
|
152
|
+
else
|
153
|
+
if obj.respond_to?(:id)
|
154
|
+
{:class => obj.class.to_s, :id => obj.id}
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
def empty_hash(default=0.0)
|
160
|
+
hash = Hash.new
|
161
|
+
hash.default = default
|
162
|
+
hash
|
163
|
+
end
|
164
|
+
|
165
|
+
def rscore(method, res, dst_act, src_act, src_sim_score, src_oid, &block)
|
166
|
+
if block_given?
|
167
|
+
params = [res, dst_act, src_act, src_sim_score, src_oid]
|
168
|
+
block.call(*(params[0..block.arity]))
|
169
|
+
else
|
170
|
+
case method
|
171
|
+
when :similarity
|
172
|
+
dst_act.each do |dst_oid|
|
173
|
+
res[dst_oid] += src_sim_score
|
174
|
+
end
|
175
|
+
when :jaccard
|
176
|
+
puts "Doing jaccard"
|
177
|
+
jf = jaccard_factor(src_act, dst_act)
|
178
|
+
dst_act.each do |dst_oid|
|
179
|
+
res[dst_oid] += 1000.0 * jf
|
180
|
+
end
|
181
|
+
when :jaccard_similarity
|
182
|
+
jf = jaccard_factor(src_act, dst_act)
|
183
|
+
dst_act.each do |dst_oid|
|
184
|
+
res[dst_oid] += src_sim_score * jf
|
185
|
+
end
|
186
|
+
end
|
187
|
+
end
|
188
|
+
res
|
189
|
+
end
|
190
|
+
|
191
|
+
def jaccard_factor(src,dst)
|
192
|
+
(src & dst).size.to_f / (src | dst).size.to_f
|
193
|
+
end
|
194
|
+
|
195
|
+
# Class level cache for objects
|
196
|
+
def cache
|
197
|
+
@cache ||= {}
|
198
|
+
end
|
199
|
+
|
200
|
+
def cached_value(key)
|
201
|
+
cache[key] ||= yield
|
202
|
+
end
|
203
|
+
|
204
|
+
def safe_create(base, value)
|
205
|
+
hash = Digest::SHA1.hexdigest(value.to_s)
|
206
|
+
hkey = "#{base}:#{hash}:id"
|
207
|
+
id = redis.get(hkey)
|
208
|
+
|
209
|
+
unless id
|
210
|
+
id = redis.incr("next.#{base}.id").to_s(36) # use base 36 for ids to save space
|
211
|
+
unless redis.setnx(hkey, id)
|
212
|
+
id = redis.get(hkey) # hash key created in between - revert to original value
|
213
|
+
else
|
214
|
+
block_given? ? yield(id) : redis.setnx("#{base}:#{id}", value)
|
215
|
+
end
|
216
|
+
end
|
217
|
+
id
|
218
|
+
end
|
219
|
+
end # class << self
|
220
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module Similus
|
2
|
+
def self.redis
|
3
|
+
@redis ||= begin
|
4
|
+
host, port = config.redis_server.split(':')
|
5
|
+
::Redis.new(:host => host, :port => port, :db => config.redis_db)
|
6
|
+
rescue Exception => e
|
7
|
+
config.logger.error "Error connecting redis server: #{e.message}"
|
8
|
+
nil
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.clear_database!
|
13
|
+
@cache = {}
|
14
|
+
redis.flushdb
|
15
|
+
end
|
16
|
+
end
|
data/similus.gemspec
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = %q{similus}
|
5
|
+
s.version = "0.1.1"
|
6
|
+
|
7
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
|
8
|
+
s.authors = ["Horaci Cuevas"]
|
9
|
+
s.cert_chain = ["/Users/horaci.cuevas/gem-public_cert.pem"]
|
10
|
+
s.date = %q{2010-10-07}
|
11
|
+
s.description = %q{A ruby library to find similar objects and make recommendations based on activity of objects}
|
12
|
+
s.email = %q{horaci @@ gmail.com}
|
13
|
+
s.extra_rdoc_files = ["LICENSES", "README.rdoc", "lib/similus.rb", "lib/similus/config.rb", "lib/similus/core.rb", "lib/similus/redis.rb"]
|
14
|
+
s.files = ["LICENSES", "README.rdoc", "Rakefile", "benchmarks/benchmark1.rb", "benchmarks/benchmark2.rb", "benchmarks/custom_benchmark.rb", "benchmarks/redis.conf", "init.rb", "lib/similus.rb", "lib/similus/config.rb", "lib/similus/core.rb", "lib/similus/redis.rb", "test/add_activity_spec.rb", "test/recommended_spec.rb", "test/similar_spec.rb", "Manifest", "similus.gemspec"]
|
15
|
+
s.homepage = %q{http://github.com/horaci/similus}
|
16
|
+
s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Similus", "--main", "README.rdoc"]
|
17
|
+
s.require_paths = ["lib"]
|
18
|
+
s.rubyforge_project = %q{similus}
|
19
|
+
s.rubygems_version = %q{1.3.7}
|
20
|
+
s.signing_key = %q{/Users/horaci.cuevas/gem-private_key.pem}
|
21
|
+
s.summary = %q{A ruby library to find similar objects and make recommendations based on activity of objects}
|
22
|
+
|
23
|
+
if s.respond_to? :specification_version then
|
24
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
25
|
+
s.specification_version = 3
|
26
|
+
|
27
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
28
|
+
else
|
29
|
+
end
|
30
|
+
else
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
$: << File.join(File.dirname(__FILE__), "/../lib")
|
2
|
+
|
3
|
+
require 'similus'
|
4
|
+
|
5
|
+
describe "Similus" do
|
6
|
+
describe "Add activity" do
|
7
|
+
before(:all) do
|
8
|
+
# Clear redis
|
9
|
+
Similus.clear_database!
|
10
|
+
|
11
|
+
# Activity
|
12
|
+
Similus.add_activity(["User", 1], :view, ["Movie", "Star Wars 1"])
|
13
|
+
end
|
14
|
+
|
15
|
+
def redis_object(type,object)
|
16
|
+
object_hash = Digest::SHA1.hexdigest(object)
|
17
|
+
Similus.redis.get("#{type}:#{object_hash}:id")
|
18
|
+
end
|
19
|
+
|
20
|
+
it "should create classes in redis" do
|
21
|
+
redis_object("class", "User").should_not be_nil
|
22
|
+
redis_object("class", "Movie").should_not be_nil
|
23
|
+
redis_object("class", "Other").should be_nil
|
24
|
+
end
|
25
|
+
|
26
|
+
it "should create objects in redis" do
|
27
|
+
redis_object("object", "User:1").should_not be_nil
|
28
|
+
redis_object("object", "Movie:Star Wars 1").should_not be_nil
|
29
|
+
redis_object("object", "User:2").should be_nil
|
30
|
+
end
|
31
|
+
|
32
|
+
it "should create actions in redis" do
|
33
|
+
redis_object("action", "view").should_not be_nil
|
34
|
+
redis_object("action", "like").should be_nil
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,104 @@
|
|
1
|
+
$: << File.join(File.dirname(__FILE__), "/../lib")
|
2
|
+
require 'similus'
|
3
|
+
|
4
|
+
describe "Similus" do
|
5
|
+
before(:all) do
|
6
|
+
# Clear redis
|
7
|
+
Similus.clear_database!
|
8
|
+
|
9
|
+
Similus.add_activity(["User", 1], :view, ["Movie", "Star Wars 1"])
|
10
|
+
Similus.add_activity(["User", 1], :view, ["Movie", "Star Wars 2"])
|
11
|
+
Similus.add_activity(["User", 1], :view, ["Movie", "Star Wars 3"])
|
12
|
+
Similus.add_activity(["User", 1], :view, ["Movie", "Star Wars 4"])
|
13
|
+
|
14
|
+
Similus.add_activity(["User", 2], :view, ["Movie", "Star Wars 3"])
|
15
|
+
Similus.add_activity(["User", 2], :view, ["Movie", "Star Wars 4"])
|
16
|
+
Similus.add_activity(["User", 2], :view, ["Movie", "Star Wars 5"])
|
17
|
+
|
18
|
+
Similus.add_activity(["User", 3], :view, ["Movie", "Star Wars 1"])
|
19
|
+
Similus.add_activity(["User", 3], :view, ["Movie", "Star Wars 3"])
|
20
|
+
Similus.add_activity(["User", 3], :view, ["Movie", "Star Wars 5"])
|
21
|
+
|
22
|
+
Similus.add_activity(["User", 4], :view, ["Movie", "Star Wars 2"])
|
23
|
+
Similus.add_activity(["User", 4], :view, ["Movie", "Star Wars 3"])
|
24
|
+
|
25
|
+
Similus.add_activity(["User", 5], :view, ["Movie", "Star Wars 1"])
|
26
|
+
Similus.add_activity(["User", 5], :view, ["Movie", "Star Wars 2"])
|
27
|
+
Similus.add_activity(["User", 5], :view, ["Movie", "Star Wars 3"])
|
28
|
+
Similus.add_activity(["User", 5], :view, ["Movie", "Blade Runner"])
|
29
|
+
|
30
|
+
Similus.add_activity(["User", 6], :view, ["Movie", "Star Wars 1"])
|
31
|
+
Similus.add_activity(["User", 6], :view, ["Movie", "Star Wars 5"])
|
32
|
+
Similus.add_activity(["User", 6], :view, ["Movie", "Blade Runner"])
|
33
|
+
|
34
|
+
Similus.add_activity(["User", 7], :view, ["Movie", "Casablanca"])
|
35
|
+
end
|
36
|
+
|
37
|
+
describe "#recommended_for" do
|
38
|
+
describe "User 1" do
|
39
|
+
before(:all) do
|
40
|
+
@recommended_for_user_1 = Similus.recommended_for(["User", 1])
|
41
|
+
end
|
42
|
+
|
43
|
+
it "should recommend SW5 and then Blade Runner" do
|
44
|
+
@recommended_for_user_1[0][:id].should == "Star Wars 5"
|
45
|
+
@recommended_for_user_1[0][:score].should == 4.0
|
46
|
+
@recommended_for_user_1[1][:id].should == "Blade Runner"
|
47
|
+
@recommended_for_user_1[1][:score].should == 3.0
|
48
|
+
end
|
49
|
+
|
50
|
+
it "should not recommend Casablanca" do
|
51
|
+
@recommended_for_user_1.detect { |x| x[:id] == "Casablanca" }.should be_nil
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
describe "other users" do
|
56
|
+
before(:all) do
|
57
|
+
@recommended_for_user_2 = Similus.recommended_for(["User", 2])
|
58
|
+
@recommended_for_user_3 = Similus.recommended_for(["User", 3])
|
59
|
+
@recommended_for_user_4 = Similus.recommended_for(["User", 4])
|
60
|
+
@recommended_for_user_5 = Similus.recommended_for(["User", 5])
|
61
|
+
@recommended_for_user_6 = Similus.recommended_for(["User", 6])
|
62
|
+
@recommended_for_user_7 = Similus.recommended_for(["User", 7])
|
63
|
+
end
|
64
|
+
|
65
|
+
it "should recommend only SW1 and then SW2 to user2 but not blade runner" do
|
66
|
+
@recommended_for_user_2[0][:id].should == "Star Wars 1"
|
67
|
+
@recommended_for_user_2[0][:score].should == 4.0
|
68
|
+
@recommended_for_user_2[1][:id].should == "Star Wars 2"
|
69
|
+
@recommended_for_user_2[1][:score].should == 2.0
|
70
|
+
@recommended_for_user_2.detect { |x| x[:id] == "Blade Runner" }.should be_nil
|
71
|
+
end
|
72
|
+
|
73
|
+
it "should recommend only BR and SW4 and SW2 to user3" do
|
74
|
+
@recommended_for_user_3.detect { |x| x[:id] == "Blade Runner" }.should_not be_nil
|
75
|
+
@recommended_for_user_3.detect { |x| x[:id] == "Star Wars 4" }.should_not be_nil
|
76
|
+
@recommended_for_user_3.detect { |x| x[:id] == "Star Wars 2" }.should_not be_nil
|
77
|
+
end
|
78
|
+
|
79
|
+
it "should recommend first SW1 and then SW4 and BR to user4" do
|
80
|
+
@recommended_for_user_4.first[:id].should == "Star Wars 1"
|
81
|
+
@recommended_for_user_4.detect { |x| x[:id] == "Blade Runner" }.should_not be_nil
|
82
|
+
@recommended_for_user_4.detect { |x| x[:id] == "Star Wars 4" }.should_not be_nil
|
83
|
+
end
|
84
|
+
|
85
|
+
it "should recommend first SW5 and then SW4 to user5" do
|
86
|
+
@recommended_for_user_5[0][:id].should == "Star Wars 5"
|
87
|
+
@recommended_for_user_5[0][:score].should == 4.0
|
88
|
+
@recommended_for_user_5[1][:id].should == "Star Wars 4"
|
89
|
+
@recommended_for_user_5[1][:score].should == 3.0
|
90
|
+
end
|
91
|
+
|
92
|
+
it "should recommend first SW3 and then SW2 to user6" do
|
93
|
+
@recommended_for_user_6[0][:id].should == "Star Wars 3"
|
94
|
+
@recommended_for_user_6[0][:score].should == 4.0
|
95
|
+
@recommended_for_user_6[1][:id].should == "Star Wars 2"
|
96
|
+
@recommended_for_user_6[1][:score].should == 2.0
|
97
|
+
end
|
98
|
+
|
99
|
+
it "should recommend nothing to user7" do
|
100
|
+
@recommended_for_user_7.should be_empty
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|