commendo 0.0.7 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -0
- data/bin/commendo-find-identical-pairs +24 -0
- data/bin/commendo-load +21 -0
- data/bin/commendo-recommendations-distribution +24 -0
- data/bin/commendo-similarity-distribution +27 -0
- data/lib/commendo/content_set.rb +48 -26
- data/lib/commendo/pair_comparison.lua +29 -7
- data/lib/commendo/similarity.lua +45 -17
- data/lib/commendo/version.rb +1 -1
- data/test/content_set_test.rb +96 -39
- data/test/weighted_group_test.rb +30 -37
- metadata +11 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 62cfaa4fff44e045a7adf83a7bc3ccb29db84812
|
4
|
+
data.tar.gz: 8473e5cb764e1474f2aa541b195afd22ca123a84
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2e2911b2ab856cc6ce7514f289a02cf1fe3e29e9e4a546aca1a11a97da8c4f4b9a2518074ed8b1b4148ffde164c62a970d36f42cd1c6e56b9c49021f513b818f
|
7
|
+
data.tar.gz: f8664d3e2e048c9026865929c1c39375242b3f05d669b6b14212442f3ffaeb0aeca03a8a80091c42e1b59d6e305d7622b5ffb8eca516f083a4af991aaeaf8ef7
|
data/CHANGELOG.md
CHANGED
@@ -0,0 +1,24 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
content_set_base_redis_key = ARGV[0]
|
4
|
+
|
5
|
+
require 'redis'
|
6
|
+
require 'commendo'
|
7
|
+
|
8
|
+
redis = Redis.new(db: 2)
|
9
|
+
#cs = Commendo::ContentSet.new(redis, content_set_base_redis_key)
|
10
|
+
|
11
|
+
distribution = {}
|
12
|
+
cursor = 0
|
13
|
+
begin
|
14
|
+
cursor, keys = redis.scan(cursor.to_i, match: "#{content_set_base_redis_key}*", count: 1000)
|
15
|
+
keys.each do |key|
|
16
|
+
left_resource = key.gsub(/^#{content_set_base_redis_key}/, '')
|
17
|
+
similar = redis.zrange(key, 0, -1, with_scores: true)
|
18
|
+
similar.each do |recommendation|
|
19
|
+
right_resource, score = recommendation
|
20
|
+
puts "#{left_resource}\t#{right_resource}" if score == 1 && left_resource < right_resource
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end while cursor.to_i > 0
|
24
|
+
|
data/bin/commendo-load
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
redis_db = ARGV[0].to_i
|
4
|
+
content_set_base_redis_key = ARGV[1]
|
5
|
+
filename = ARGV[2]
|
6
|
+
|
7
|
+
require 'redis'
|
8
|
+
require 'commendo'
|
9
|
+
|
10
|
+
redis = Redis.new(db: 2)
|
11
|
+
cs = Commendo::ContentSet.new(redis, content_set_base_redis_key)
|
12
|
+
|
13
|
+
File.open(filename) do |f|
|
14
|
+
|
15
|
+
current_resource = nil
|
16
|
+
current_headings = []
|
17
|
+
|
18
|
+
f.each_line.with_index do |tsv, i|
|
19
|
+
next if i.zero?
|
20
|
+
|
21
|
+
resource, _classified, root, score, heading = tsv.split(/\t/)
|
@@ -0,0 +1,24 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
content_set_base_redis_key = ARGV[0]
|
4
|
+
|
5
|
+
require 'redis'
|
6
|
+
require 'commendo'
|
7
|
+
|
8
|
+
redis = Redis.new(db: 2)
|
9
|
+
#cs = Commendo::ContentSet.new(redis, content_set_base_redis_key)
|
10
|
+
|
11
|
+
distribution = {}
|
12
|
+
cursor = 0
|
13
|
+
begin
|
14
|
+
cursor, keys = redis.scan(cursor.to_i, match: content_set_base_redis_key, count: 1000)
|
15
|
+
keys.each do |key|
|
16
|
+
count = redis.zcard(key)
|
17
|
+
distribution[count] ||= 0
|
18
|
+
distribution[count] += 1
|
19
|
+
end
|
20
|
+
end while cursor.to_i > 0
|
21
|
+
|
22
|
+
distribution.each do |score, count|
|
23
|
+
puts "#{score}\t#{count}"
|
24
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
content_set_base_redis_key = ARGV[0]
|
4
|
+
|
5
|
+
require 'redis'
|
6
|
+
require 'commendo'
|
7
|
+
|
8
|
+
redis = Redis.new(db: 2)
|
9
|
+
#cs = Commendo::ContentSet.new(redis, content_set_base_redis_key)
|
10
|
+
|
11
|
+
distribution = {}
|
12
|
+
cursor = 0
|
13
|
+
begin
|
14
|
+
cursor, keys = redis.scan(cursor.to_i, match: content_set_base_redis_key, count: 1000)
|
15
|
+
keys.each do |key|
|
16
|
+
similar = redis.zrange(key, 0, -1, with_scores: true)
|
17
|
+
similar.each do |recommendation|
|
18
|
+
resource, score = recommendation
|
19
|
+
distribution[score.round(1)] ||= 0
|
20
|
+
distribution[score.round(1)] += 1
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end while cursor.to_i > 0
|
24
|
+
|
25
|
+
distribution.each do |score, count|
|
26
|
+
puts "#{score}\t#{count}"
|
27
|
+
end
|
data/lib/commendo/content_set.rb
CHANGED
@@ -9,33 +9,33 @@ module Commendo
|
|
9
9
|
end
|
10
10
|
|
11
11
|
def add_by_group(group, *resources)
|
12
|
-
redis.sadd(group_key(group), resources)
|
13
12
|
resources.each do |resource|
|
14
|
-
|
13
|
+
if resource.kind_of?(Array)
|
14
|
+
add_single(resource[0], group, resource[1])
|
15
|
+
else
|
16
|
+
add_single(resource, group, 1)
|
17
|
+
end
|
15
18
|
end
|
16
19
|
end
|
17
20
|
|
18
21
|
def add(resource, *groups)
|
19
|
-
redis.sadd(resource_key(resource), groups)
|
20
22
|
groups.each do |group|
|
21
|
-
|
23
|
+
if group.kind_of?(Array)
|
24
|
+
add_single(resource, group[0], group[1])
|
25
|
+
else
|
26
|
+
add_single(resource, group, 1)
|
27
|
+
end
|
22
28
|
end
|
23
29
|
end
|
24
30
|
|
31
|
+
def add_single(resource, group, score)
|
32
|
+
redis.zincrby(group_key(group), score, resource)
|
33
|
+
redis.zincrby(resource_key(resource), score, group)
|
34
|
+
end
|
35
|
+
|
25
36
|
def add_and_calculate(resource, *groups)
|
26
37
|
add(resource, *groups)
|
27
|
-
|
28
|
-
group_keys = groups.map { |group| group_key(group) }
|
29
|
-
resources = redis.sunion(*group_keys)
|
30
|
-
resources.combination(2) do |l, r|
|
31
|
-
intersect = redis.sinter(resource_key(l), resource_key(r))
|
32
|
-
if (intersect.length > 0)
|
33
|
-
union = redis.sunion(resource_key(l), resource_key(r))
|
34
|
-
jaccard = intersect.length / union.length.to_f
|
35
|
-
redis.zadd(similarity_key(l), jaccard, r)
|
36
|
-
redis.zadd(similarity_key(r), jaccard, l)
|
37
|
-
end
|
38
|
-
end
|
38
|
+
calculate_similarity_for_resource(resource, 0)
|
39
39
|
end
|
40
40
|
|
41
41
|
def delete(resource)
|
@@ -48,25 +48,43 @@ module Commendo
|
|
48
48
|
redis.del(resource_key(resource))
|
49
49
|
end
|
50
50
|
|
51
|
+
SET_TOO_LARGE_FOR_LUA = 999
|
52
|
+
|
51
53
|
def calculate_similarity(threshold = 0)
|
52
54
|
#TODO make this use scan for scaling
|
53
55
|
keys = redis.keys("#{resource_key_base}:*")
|
54
56
|
keys.each_with_index do |key, i|
|
55
57
|
yield(key, i, keys.length) if block_given?
|
56
|
-
completed = redis.eval(similarity_lua, keys: [key], argv: [resource_key_base, similar_key_base, group_key_base, threshold])
|
57
|
-
if completed ==
|
58
|
-
|
59
|
-
groups = redis.smembers(resource_key(resource))
|
60
|
-
group_keys = groups.map { |group| group_key(group) }
|
61
|
-
resources = redis.sunion(*group_keys)
|
62
|
-
resources.each do |to_compare|
|
63
|
-
next if resource == to_compare
|
64
|
-
redis.eval(pair_comparison_lua, keys: [key, resource_key(to_compare), similarity_key(resource), similarity_key(to_compare)], argv: [resource, to_compare, threshold])
|
65
|
-
end
|
58
|
+
completed = redis.eval(similarity_lua, keys: [key], argv: [tmp_key_base, resource_key_base, similar_key_base, group_key_base, threshold])
|
59
|
+
if completed == SET_TOO_LARGE_FOR_LUA
|
60
|
+
calculate_similarity_for_key(key, threshold)
|
66
61
|
end
|
67
62
|
end
|
68
63
|
end
|
69
64
|
|
65
|
+
def calculate_similarity_for_key(key, threshold)
|
66
|
+
resource = key.gsub(/^#{resource_key_base}:/, '')
|
67
|
+
calculate_similarity_for_key_resource(key, resource, threshold)
|
68
|
+
end
|
69
|
+
|
70
|
+
def calculate_similarity_for_resource(resource, threshold)
|
71
|
+
key = resource_key(resource)
|
72
|
+
calculate_similarity_for_key_resource(key, resource, threshold)
|
73
|
+
end
|
74
|
+
|
75
|
+
def calculate_similarity_for_key_resource(key, resource, threshold)
|
76
|
+
groups = redis.zrange(resource_key(resource), 0, -1)
|
77
|
+
group_keys = groups.map { |group| group_key(group) }
|
78
|
+
tmp_key = "#{tmp_key_base}:#{SecureRandom.uuid}"
|
79
|
+
redis.zunionstore(tmp_key, group_keys)
|
80
|
+
resources = redis.zrange(tmp_key, 0, -1)
|
81
|
+
redis.del(tmp_key)
|
82
|
+
resources.each do |to_compare|
|
83
|
+
next if resource == to_compare
|
84
|
+
redis.eval(pair_comparison_lua, keys: [key, resource_key(to_compare), similarity_key(resource), similarity_key(to_compare)], argv: [tmp_key_base, resource, to_compare, threshold])
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
70
88
|
def similar_to(resource)
|
71
89
|
if resource.kind_of? Array
|
72
90
|
keys = resource.map do |res|
|
@@ -116,6 +134,10 @@ module Commendo
|
|
116
134
|
file.read
|
117
135
|
end
|
118
136
|
|
137
|
+
def tmp_key_base
|
138
|
+
"#{key_base}:tmp"
|
139
|
+
end
|
140
|
+
|
119
141
|
def similar_key_base
|
120
142
|
"#{key_base}:similar"
|
121
143
|
end
|
@@ -3,9 +3,10 @@ local right_key = KEYS[2]
|
|
3
3
|
local left_similarity_key = KEYS[3]
|
4
4
|
local right_similarity_key = KEYS[4]
|
5
5
|
|
6
|
-
local
|
7
|
-
local
|
8
|
-
local
|
6
|
+
local tmp_key_base = ARGV[1]
|
7
|
+
local left = tonumber(ARGV[2])
|
8
|
+
local right = tonumber(ARGV[3])
|
9
|
+
local threshold = tonumber(ARGV[4])
|
9
10
|
|
10
11
|
local function round(num, idp)
|
11
12
|
local mult = 10^(idp or 0)
|
@@ -14,10 +15,31 @@ end
|
|
14
15
|
|
15
16
|
redis.log(redis.LOG_NOTICE, 'Running pair comparison for ' .. left_key .. ' ' .. right_key)
|
16
17
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
18
|
+
|
19
|
+
--TODO change bar
|
20
|
+
local tmp_pair_intersect_key = tmp_key_base .. 'bar'
|
21
|
+
redis.call('ZINTERSTORE', tmp_pair_intersect_key, 2, left_key, right_key)
|
22
|
+
local intersect = redis.call('ZRANGE', tmp_pair_intersect_key, 0, -1, 'WITHSCORES')
|
23
|
+
redis.call('DEL', tmp_pair_intersect_key)
|
24
|
+
|
25
|
+
if table.getn(intersect) > 0 then
|
26
|
+
local intersect_score = 0
|
27
|
+
for i=1,#intersect,2 do
|
28
|
+
intersect_score = intersect_score + intersect[i+1]
|
29
|
+
end
|
30
|
+
|
31
|
+
--TODO change baz
|
32
|
+
local tmp_pair_union_key = tmp_key_base .. 'baz'
|
33
|
+
redis.call('ZUNIONSTORE', tmp_pair_union_key, 2, left_key, right_key)
|
34
|
+
|
35
|
+
local union = redis.call('ZRANGE', tmp_pair_union_key, 0, -1, 'WITHSCORES')
|
36
|
+
redis.call('DEL', tmp_pair_union_key)
|
37
|
+
local union_score = 0
|
38
|
+
for i=1,#union,2 do
|
39
|
+
union_score = union_score + union[i+1]
|
40
|
+
end
|
41
|
+
|
42
|
+
local similarity = round(intersect_score / union_score, 3)
|
21
43
|
if similarity > threshold then
|
22
44
|
redis.call('ZADD', left_similarity_key, similarity, right)
|
23
45
|
redis.call('ZADD', right_similarity_key, similarity, left)
|
data/lib/commendo/similarity.lua
CHANGED
@@ -1,8 +1,9 @@
|
|
1
1
|
local resource_key = KEYS[1]
|
2
|
-
local
|
3
|
-
local
|
4
|
-
local
|
5
|
-
local
|
2
|
+
local tmp_key_base = ARGV[1]
|
3
|
+
local resource_key_base = ARGV[2]
|
4
|
+
local sim_key_base = ARGV[3]
|
5
|
+
local group_key_base = ARGV[4]
|
6
|
+
local threshold = tonumber(ARGV[5])
|
6
7
|
|
7
8
|
local function round(num, idp)
|
8
9
|
local mult = 10^(idp or 0)
|
@@ -12,7 +13,7 @@ end
|
|
12
13
|
redis.log(redis.LOG_NOTICE, 'Running complete similarity for ' .. resource_key)
|
13
14
|
|
14
15
|
local resource = resource_key:gsub('%' .. resource_key_base .. ':', '')
|
15
|
-
local groups = redis.call('
|
16
|
+
local groups = redis.call('ZRANGE', resource_key, 0, -1)
|
16
17
|
|
17
18
|
if table.getn(groups) > 999 then
|
18
19
|
redis.log(redis.LOG_NOTICE, 'Complete similarity too large for ' .. resource_key .. ', ' .. table.getn(groups))
|
@@ -25,24 +26,45 @@ for _,group in ipairs(groups) do
|
|
25
26
|
end
|
26
27
|
--redis.log(redis.LOG_NOTICE, 'Found ' .. table.getn(group_keys) .. ' group keys')
|
27
28
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
29
|
+
--TODO change foo
|
30
|
+
local tmp_groups_union_key = tmp_key_base .. 'unionfoo'
|
31
|
+
redis.call('ZUNIONSTORE', tmp_groups_union_key, table.getn(group_keys), unpack(group_keys))
|
32
|
+
local resources = redis.call('ZRANGE', tmp_groups_union_key, 0, -1)
|
32
33
|
|
33
34
|
local previous = 'foo'
|
34
35
|
for _,to_compare in ipairs(resources) do
|
35
|
-
--
|
36
|
+
--redis.log(redis.LOG_NOTICE, 'Comparing ' .. resource .. ' and ' .. to_compare)
|
36
37
|
if to_compare ~= previous then
|
37
38
|
previous = to_compare
|
38
39
|
if resource > to_compare then
|
39
|
-
--
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
40
|
+
--redis.log(redis.LOG_NOTICE, 'Calculating similarity for ' .. resource .. ' and ' .. to_compare)
|
41
|
+
|
42
|
+
--TODO change bar
|
43
|
+
local tmp_pair_intersect_key = tmp_key_base .. 'bar'
|
44
|
+
redis.call('ZINTERSTORE', tmp_pair_intersect_key, 2, resource_key, resource_key_base .. ':' .. to_compare)
|
45
|
+
local intersect = redis.call('ZRANGE', tmp_pair_intersect_key, 0, -1, 'WITHSCORES')
|
46
|
+
redis.call('DEL', tmp_pair_intersect_key)
|
47
|
+
|
48
|
+
if table.getn(intersect) > 0 then
|
49
|
+
local intersect_score = 0
|
50
|
+
for i=1,#intersect,2 do
|
51
|
+
intersect_score = intersect_score + intersect[i+1]
|
52
|
+
end
|
53
|
+
|
54
|
+
--TODO change baz
|
55
|
+
local tmp_pair_union_key = tmp_key_base .. 'baz'
|
56
|
+
redis.call('ZUNIONSTORE', tmp_pair_union_key, 2, resource_key, resource_key_base .. ':' .. to_compare)
|
57
|
+
|
58
|
+
local union = redis.call('ZRANGE', tmp_pair_union_key, 0, -1, 'WITHSCORES')
|
59
|
+
redis.call('DEL', tmp_pair_union_key)
|
60
|
+
local union_score = 0
|
61
|
+
for i=1,#union,2 do
|
62
|
+
union_score = union_score + union[i+1]
|
63
|
+
end
|
64
|
+
|
65
|
+
local similarity = round(intersect_score / union_score, 3)
|
44
66
|
if similarity > threshold then
|
45
|
-
--
|
67
|
+
--redis.log(redis.LOG_NOTICE, resource .. ' and ' .. to_compare .. ' scored ' .. similarity)
|
46
68
|
redis.call('ZADD', sim_key_base .. ':' .. resource, similarity, to_compare)
|
47
69
|
redis.call('ZADD', sim_key_base .. ':' .. to_compare, similarity, resource)
|
48
70
|
end
|
@@ -51,6 +73,12 @@ for _,to_compare in ipairs(resources) do
|
|
51
73
|
end
|
52
74
|
end
|
53
75
|
|
76
|
+
redis.call('DEL', tmp_groups_union_key)
|
77
|
+
|
54
78
|
redis.log(redis.LOG_NOTICE, 'Finished running complete similarity for ' .. resource_key)
|
55
79
|
|
56
|
-
return true
|
80
|
+
return true
|
81
|
+
|
82
|
+
|
83
|
+
|
84
|
+
|
data/lib/commendo/version.rb
CHANGED
data/test/content_set_test.rb
CHANGED
@@ -16,7 +16,7 @@ module Commendo
|
|
16
16
|
assert_equal 'CommendoTests:similar:resource-1', cs.similarity_key('resource-1')
|
17
17
|
end
|
18
18
|
|
19
|
-
def
|
19
|
+
def test_recommends_when_added
|
20
20
|
redis = Redis.new(db: 15)
|
21
21
|
redis.flushdb
|
22
22
|
key_base = 'CommendoTests'
|
@@ -25,33 +25,89 @@ module Commendo
|
|
25
25
|
cs.add('resource-2', 'group-1')
|
26
26
|
cs.add('resource-3', 'group-1', 'group-2')
|
27
27
|
cs.add('resource-4', 'group-2')
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
assert redis.sismember("#{key_base}:resources:resource-3", 'group-2')
|
36
|
-
assert redis.sismember("#{key_base}:resources:resource-4", 'group-2')
|
28
|
+
cs.calculate_similarity
|
29
|
+
expected = [
|
30
|
+
{resource: 'resource-3', similarity: 1.0},
|
31
|
+
{resource: 'resource-4', similarity: 0.667},
|
32
|
+
{resource: 'resource-2', similarity: 0.667}
|
33
|
+
]
|
34
|
+
assert_equal expected, cs.similar_to('resource-1')
|
37
35
|
end
|
38
36
|
|
39
|
-
def
|
37
|
+
def test_recommends_when_added_with_scores
|
38
|
+
redis = Redis.new(db: 15)
|
39
|
+
redis.flushdb
|
40
|
+
key_base = 'CommendoTests'
|
41
|
+
cs = ContentSet.new(redis, key_base)
|
42
|
+
cs.add('resource-1', ['group-1', 2], ['group-2', 2])
|
43
|
+
cs.add('resource-2', ['group-1', 7])
|
44
|
+
cs.add('resource-3', ['group-1', 2], ['group-2', 2])
|
45
|
+
cs.add('resource-4', ['group-2', 3])
|
46
|
+
cs.calculate_similarity
|
47
|
+
expected = [
|
48
|
+
{resource: 'resource-3', similarity: 1.0},
|
49
|
+
{resource: 'resource-2', similarity: 0.818},
|
50
|
+
{resource: 'resource-4', similarity: 0.714}
|
51
|
+
]
|
52
|
+
assert_equal expected, cs.similar_to('resource-1')
|
53
|
+
end
|
54
|
+
|
55
|
+
def test_recommends_when_extra_scores_added
|
56
|
+
test_recommends_when_added_with_scores
|
57
|
+
redis = Redis.new(db: 15)
|
58
|
+
key_base = 'CommendoTests'
|
59
|
+
cs = ContentSet.new(redis, key_base)
|
60
|
+
cs.add('resource-3', ['group-1', 1], ['group-3', 2])
|
61
|
+
cs.add('resource-4', ['group-2', 1])
|
62
|
+
cs.add_by_group('group-1', ['newource-9', 100], 'resource-2', 'resource-3')
|
63
|
+
cs.add_by_group('group-2', 'resource-1', 'resource-3', 'resource-4')
|
64
|
+
cs.calculate_similarity
|
65
|
+
expected = [
|
66
|
+
{resource: 'newource-9', similarity: 1.0},
|
67
|
+
{resource: 'resource-1', similarity: 0.769},
|
68
|
+
{resource: 'resource-3', similarity: 0.706}
|
69
|
+
]
|
70
|
+
assert_equal expected, cs.similar_to('resource-2')
|
71
|
+
end
|
72
|
+
|
73
|
+
def test_recommends_when_added_by_group
|
40
74
|
redis = Redis.new(db: 15)
|
41
75
|
redis.flushdb
|
42
76
|
key_base = 'CommendoTests'
|
43
77
|
cs = ContentSet.new(redis, key_base)
|
44
78
|
cs.add_by_group('group-1', 'resource-1', 'resource-2', 'resource-3')
|
45
79
|
cs.add_by_group('group-2', 'resource-1', 'resource-3', 'resource-4')
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
80
|
+
cs.calculate_similarity
|
81
|
+
expected = [
|
82
|
+
{resource: 'resource-3', similarity: 1.0},
|
83
|
+
{resource: 'resource-4', similarity: 0.667},
|
84
|
+
{resource: 'resource-2', similarity: 0.667}
|
85
|
+
]
|
86
|
+
assert_equal expected, cs.similar_to('resource-1')
|
87
|
+
end
|
88
|
+
|
89
|
+
def test_recommends_when_added_by_group_with_scores
|
90
|
+
redis = Redis.new(db: 15)
|
91
|
+
redis.flushdb
|
92
|
+
key_base = 'CommendoTests'
|
93
|
+
cs = ContentSet.new(redis, key_base)
|
94
|
+
cs.add_by_group('group-1', ['resource-1', 2], ['resource-2', 3], ['resource-3', 7])
|
95
|
+
cs.add_by_group('group-2', ['resource-1', 2], ['resource-3', 3], ['resource-4', 5])
|
96
|
+
cs.calculate_similarity
|
97
|
+
expected = [
|
98
|
+
{resource: 'resource-3', similarity: 1.0},
|
99
|
+
{resource: 'resource-4', similarity: 0.778},
|
100
|
+
{resource: 'resource-2', similarity: 0.714}
|
101
|
+
]
|
102
|
+
assert_equal expected, cs.similar_to('resource-1')
|
103
|
+
end
|
104
|
+
|
105
|
+
def test_recommendations_are_isolated_by_key_base
|
106
|
+
skip
|
107
|
+
end
|
108
|
+
|
109
|
+
def test_recommendations_are_isolated_by_redis_db
|
110
|
+
skip
|
55
111
|
end
|
56
112
|
|
57
113
|
def test_calculates_similarity_scores
|
@@ -66,12 +122,12 @@ module Commendo
|
|
66
122
|
end
|
67
123
|
cs.calculate_similarity
|
68
124
|
expected = [
|
69
|
-
{resource: '9', similarity: 0.
|
70
|
-
{resource: '6', similarity: 0.
|
71
|
-
{resource: '12', similarity: 0.
|
72
|
-
{resource: '3', similarity: 0.
|
73
|
-
{resource: '21', similarity: 0.
|
74
|
-
{resource: '15', similarity: 0.
|
125
|
+
{resource: '9', similarity: 0.667},
|
126
|
+
{resource: '6', similarity: 0.667},
|
127
|
+
{resource: '12', similarity: 0.5},
|
128
|
+
{resource: '3', similarity: 0.4},
|
129
|
+
{resource: '21', similarity: 0.286},
|
130
|
+
{resource: '15', similarity: 0.286}
|
75
131
|
]
|
76
132
|
assert_equal expected, cs.similar_to(18)
|
77
133
|
end
|
@@ -88,8 +144,9 @@ module Commendo
|
|
88
144
|
end
|
89
145
|
cs.calculate_similarity(0.4)
|
90
146
|
expected = [
|
91
|
-
{resource: '9', similarity: 0.
|
92
|
-
{resource: '6', similarity: 0.
|
147
|
+
{resource: '9', similarity: 0.667},
|
148
|
+
{resource: '6', similarity: 0.667},
|
149
|
+
{resource: '12', similarity: 0.5}
|
93
150
|
]
|
94
151
|
assert_equal expected, cs.similar_to(18)
|
95
152
|
end
|
@@ -250,17 +307,17 @@ module Commendo
|
|
250
307
|
end
|
251
308
|
cs.calculate_similarity
|
252
309
|
expected = [
|
253
|
-
{resource: '18', similarity: 1.
|
254
|
-
{resource: '3', similarity: 1.
|
255
|
-
{resource: '6', similarity:
|
256
|
-
{resource: '
|
257
|
-
{resource: '
|
258
|
-
{resource: '
|
259
|
-
{resource: '9', similarity: 0.
|
260
|
-
{resource: '4', similarity: 0.
|
261
|
-
{resource: '8', similarity: 0.
|
262
|
-
{resource: '16', similarity: 0.
|
263
|
-
{resource: '20', similarity: 0.
|
310
|
+
{resource: '18', similarity: 1.834},
|
311
|
+
{resource: '3', similarity: 1.734},
|
312
|
+
{resource: '6', similarity: 1.167},
|
313
|
+
{resource: '21', similarity: 1.086},
|
314
|
+
{resource: '15', similarity: 1.086},
|
315
|
+
{resource: '12', similarity: 1.0},
|
316
|
+
{resource: '9', similarity: 0.833},
|
317
|
+
{resource: '4', similarity: 0.4},
|
318
|
+
{resource: '8', similarity: 0.333},
|
319
|
+
{resource: '16', similarity: 0.286},
|
320
|
+
{resource: '20', similarity: 0.25}
|
264
321
|
]
|
265
322
|
actual = cs.similar_to([12, 6, 9])
|
266
323
|
assert_equal expected, actual
|
data/test/weighted_group_test.rb
CHANGED
@@ -19,9 +19,9 @@ module Commendo
|
|
19
19
|
@cs3 = ContentSet.new(@redis, 'CommendoTests:ContentSet3', @tag_set)
|
20
20
|
(3..23).each do |group|
|
21
21
|
(3..23).each do |res|
|
22
|
-
@cs1.add_by_group(group, res) if (
|
23
|
-
@cs2.add_by_group(group, res) if (
|
24
|
-
@cs3.add_by_group(group, res) if (
|
22
|
+
@cs1.add_by_group(group, res) if res.modulo(group).zero? && res.modulo(2).zero?
|
23
|
+
@cs2.add_by_group(group, res) if res.modulo(group).zero? && res.modulo(3).zero?
|
24
|
+
@cs3.add_by_group(group, res) if res.modulo(group).zero? && res.modulo(6).zero?
|
25
25
|
@tag_set.add(res, 'mod3') if res.modulo(3).zero?
|
26
26
|
@tag_set.add(res, 'mod4') if res.modulo(4).zero?
|
27
27
|
@tag_set.add(res, 'mod5') if res.modulo(5).zero?
|
@@ -40,12 +40,12 @@ module Commendo
|
|
40
40
|
{cs: @cs3, weight: 100.0}
|
41
41
|
)
|
42
42
|
expected = [
|
43
|
-
{resource: '6', similarity:
|
44
|
-
{resource: '12', similarity:
|
45
|
-
{resource: '9', similarity:
|
46
|
-
{resource: '3', similarity:
|
47
|
-
{resource: '21', similarity:
|
48
|
-
{resource: '15', similarity:
|
43
|
+
{resource: '6', similarity: 74.037},
|
44
|
+
{resource: '12', similarity: 55.5},
|
45
|
+
{resource: '9', similarity: 6.67},
|
46
|
+
{resource: '3', similarity: 4.0},
|
47
|
+
{resource: '21', similarity: 2.86},
|
48
|
+
{resource: '15', similarity: 2.86}
|
49
49
|
]
|
50
50
|
assert_equal expected, weighted_group.similar_to(18)
|
51
51
|
end
|
@@ -58,7 +58,7 @@ module Commendo
|
|
58
58
|
{cs: @cs2, weight: 10.0},
|
59
59
|
{cs: @cs3, weight: 100.0}
|
60
60
|
)
|
61
|
-
expected = [{resource: '15', similarity:
|
61
|
+
expected = [{resource: '15', similarity: 2.86}]
|
62
62
|
weighted_group.tag_set = @tag_set
|
63
63
|
assert_equal expected, weighted_group.filtered_similar_to(18, include: ['mod5'])
|
64
64
|
end
|
@@ -72,10 +72,10 @@ module Commendo
|
|
72
72
|
{cs: @cs3, weight: 100.0}
|
73
73
|
)
|
74
74
|
expected = [
|
75
|
-
{resource: '6', similarity:
|
76
|
-
{resource: '12', similarity:
|
77
|
-
{resource: '9', similarity:
|
78
|
-
{resource: '3', similarity:
|
75
|
+
{resource: '6', similarity: 74.037},
|
76
|
+
{resource: '12', similarity: 55.5},
|
77
|
+
{resource: '9', similarity: 6.67},
|
78
|
+
{resource: '3', similarity: 4.0}
|
79
79
|
]
|
80
80
|
weighted_group.tag_set = @tag_set
|
81
81
|
assert_equal expected, weighted_group.filtered_similar_to(18, exclude: ['mod5', 'mod7'])
|
@@ -90,9 +90,9 @@ module Commendo
|
|
90
90
|
{cs: @cs3, weight: 1.0}
|
91
91
|
)
|
92
92
|
expected = [
|
93
|
-
{resource: '16', similarity:
|
94
|
-
{resource: '4', similarity:
|
95
|
-
{resource: '12', similarity:
|
93
|
+
{resource: '16', similarity: 80.0},
|
94
|
+
{resource: '4', similarity: 66.7},
|
95
|
+
{resource: '12', similarity: 33.3}
|
96
96
|
]
|
97
97
|
weighted_group.tag_set = @tag_set
|
98
98
|
assert_equal expected, weighted_group.filtered_similar_to(8, include: ['mod4'], exclude: ['mod5'])
|
@@ -107,16 +107,16 @@ module Commendo
|
|
107
107
|
{cs: @cs3, weight: 1.0}
|
108
108
|
)
|
109
109
|
expected = [
|
110
|
-
{resource: '12', similarity:
|
111
|
-
{resource: '18', similarity:
|
112
|
-
{resource: '8', similarity:
|
113
|
-
{resource: '16', similarity:
|
114
|
-
{resource: '20', similarity:
|
115
|
-
{resource: '9', similarity:
|
116
|
-
{resource: '21', similarity:
|
117
|
-
{resource: '15', similarity:
|
118
|
-
{resource: '6', similarity:
|
119
|
-
{resource: '3', similarity:
|
110
|
+
{resource: '12', similarity: 118.037},
|
111
|
+
{resource: '18', similarity: 78.037},
|
112
|
+
{resource: '8', similarity: 66.7},
|
113
|
+
{resource: '16', similarity: 50.0},
|
114
|
+
{resource: '20', similarity: 40.0},
|
115
|
+
{resource: '9', similarity: 11.67},
|
116
|
+
{resource: '21', similarity: 9.0},
|
117
|
+
{resource: '15', similarity: 9.0},
|
118
|
+
{resource: '6', similarity: 6.67},
|
119
|
+
{resource: '3', similarity: 6.67}
|
120
120
|
]
|
121
121
|
weighted_group.tag_set = @tag_set
|
122
122
|
assert_equal expected, weighted_group.similar_to([3,4,5,6,7])
|
@@ -131,16 +131,9 @@ module Commendo
|
|
131
131
|
{cs: @cs3, weight: 1.0}
|
132
132
|
)
|
133
133
|
expected = [
|
134
|
-
{resource: '12', similarity:
|
135
|
-
|
136
|
-
{resource: '
|
137
|
-
{resource: '16', similarity: 33.3},
|
138
|
-
#{resource: '20', similarity: 25.0},
|
139
|
-
#{resource: '9', similarity: 8.33},
|
140
|
-
#{resource: '21', similarity: 5.83},
|
141
|
-
#{resource: '15', similarity: 5.83},
|
142
|
-
#{resource: '6', similarity: 5.0},
|
143
|
-
#{resource: '3', similarity: 5.0}
|
134
|
+
{resource: '12', similarity: 118.037},
|
135
|
+
{resource: '8', similarity: 66.7},
|
136
|
+
{resource: '16', similarity: 50.0},
|
144
137
|
]
|
145
138
|
weighted_group.tag_set = @tag_set
|
146
139
|
assert_equal expected, weighted_group.filtered_similar_to([3,4,5,6,7], include: ['mod4'], exclude: ['mod5'])
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: commendo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rob Styles
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-04-
|
11
|
+
date: 2014-04-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: redis
|
@@ -97,7 +97,11 @@ dependencies:
|
|
97
97
|
description: A Jaccard-similarity recommender using Redis sets
|
98
98
|
email:
|
99
99
|
- rob.styles@dynamicorange.com
|
100
|
-
executables:
|
100
|
+
executables:
|
101
|
+
- commendo-find-identical-pairs
|
102
|
+
- commendo-load
|
103
|
+
- commendo-recommendations-distribution
|
104
|
+
- commendo-similarity-distribution
|
101
105
|
extensions: []
|
102
106
|
extra_rdoc_files: []
|
103
107
|
files:
|
@@ -107,6 +111,10 @@ files:
|
|
107
111
|
- LICENSE.txt
|
108
112
|
- README.md
|
109
113
|
- Rakefile
|
114
|
+
- bin/commendo-find-identical-pairs
|
115
|
+
- bin/commendo-load
|
116
|
+
- bin/commendo-recommendations-distribution
|
117
|
+
- bin/commendo-similarity-distribution
|
110
118
|
- commendo.gemspec
|
111
119
|
- lib/commendo.rb
|
112
120
|
- lib/commendo/content_set.rb
|