commendo 0.0.7 → 0.0.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -0
- data/bin/commendo-find-identical-pairs +24 -0
- data/bin/commendo-load +21 -0
- data/bin/commendo-recommendations-distribution +24 -0
- data/bin/commendo-similarity-distribution +27 -0
- data/lib/commendo/content_set.rb +48 -26
- data/lib/commendo/pair_comparison.lua +29 -7
- data/lib/commendo/similarity.lua +45 -17
- data/lib/commendo/version.rb +1 -1
- data/test/content_set_test.rb +96 -39
- data/test/weighted_group_test.rb +30 -37
- metadata +11 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 62cfaa4fff44e045a7adf83a7bc3ccb29db84812
|
4
|
+
data.tar.gz: 8473e5cb764e1474f2aa541b195afd22ca123a84
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2e2911b2ab856cc6ce7514f289a02cf1fe3e29e9e4a546aca1a11a97da8c4f4b9a2518074ed8b1b4148ffde164c62a970d36f42cd1c6e56b9c49021f513b818f
|
7
|
+
data.tar.gz: f8664d3e2e048c9026865929c1c39375242b3f05d669b6b14212442f3ffaeb0aeca03a8a80091c42e1b59d6e305d7622b5ffb8eca516f083a4af991aaeaf8ef7
|
data/CHANGELOG.md
CHANGED
@@ -0,0 +1,24 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
content_set_base_redis_key = ARGV[0]
|
4
|
+
|
5
|
+
require 'redis'
|
6
|
+
require 'commendo'
|
7
|
+
|
8
|
+
redis = Redis.new(db: 2)
|
9
|
+
#cs = Commendo::ContentSet.new(redis, content_set_base_redis_key)
|
10
|
+
|
11
|
+
distribution = {}
|
12
|
+
cursor = 0
|
13
|
+
begin
|
14
|
+
cursor, keys = redis.scan(cursor.to_i, match: "#{content_set_base_redis_key}*", count: 1000)
|
15
|
+
keys.each do |key|
|
16
|
+
left_resource = key.gsub(/^#{content_set_base_redis_key}/, '')
|
17
|
+
similar = redis.zrange(key, 0, -1, with_scores: true)
|
18
|
+
similar.each do |recommendation|
|
19
|
+
right_resource, score = recommendation
|
20
|
+
puts "#{left_resource}\t#{right_resource}" if score == 1 && left_resource < right_resource
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end while cursor.to_i > 0
|
24
|
+
|
data/bin/commendo-load
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
redis_db = ARGV[0].to_i
|
4
|
+
content_set_base_redis_key = ARGV[1]
|
5
|
+
filename = ARGV[2]
|
6
|
+
|
7
|
+
require 'redis'
|
8
|
+
require 'commendo'
|
9
|
+
|
10
|
+
redis = Redis.new(db: 2)
|
11
|
+
cs = Commendo::ContentSet.new(redis, content_set_base_redis_key)
|
12
|
+
|
13
|
+
File.open(filename) do |f|
|
14
|
+
|
15
|
+
current_resource = nil
|
16
|
+
current_headings = []
|
17
|
+
|
18
|
+
f.each_line.with_index do |tsv, i|
|
19
|
+
next if i.zero?
|
20
|
+
|
21
|
+
resource, _classified, root, score, heading = tsv.split(/\t/)
|
@@ -0,0 +1,24 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
content_set_base_redis_key = ARGV[0]
|
4
|
+
|
5
|
+
require 'redis'
|
6
|
+
require 'commendo'
|
7
|
+
|
8
|
+
redis = Redis.new(db: 2)
|
9
|
+
#cs = Commendo::ContentSet.new(redis, content_set_base_redis_key)
|
10
|
+
|
11
|
+
distribution = {}
|
12
|
+
cursor = 0
|
13
|
+
begin
|
14
|
+
cursor, keys = redis.scan(cursor.to_i, match: content_set_base_redis_key, count: 1000)
|
15
|
+
keys.each do |key|
|
16
|
+
count = redis.zcard(key)
|
17
|
+
distribution[count] ||= 0
|
18
|
+
distribution[count] += 1
|
19
|
+
end
|
20
|
+
end while cursor.to_i > 0
|
21
|
+
|
22
|
+
distribution.each do |score, count|
|
23
|
+
puts "#{score}\t#{count}"
|
24
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
content_set_base_redis_key = ARGV[0]
|
4
|
+
|
5
|
+
require 'redis'
|
6
|
+
require 'commendo'
|
7
|
+
|
8
|
+
redis = Redis.new(db: 2)
|
9
|
+
#cs = Commendo::ContentSet.new(redis, content_set_base_redis_key)
|
10
|
+
|
11
|
+
distribution = {}
|
12
|
+
cursor = 0
|
13
|
+
begin
|
14
|
+
cursor, keys = redis.scan(cursor.to_i, match: content_set_base_redis_key, count: 1000)
|
15
|
+
keys.each do |key|
|
16
|
+
similar = redis.zrange(key, 0, -1, with_scores: true)
|
17
|
+
similar.each do |recommendation|
|
18
|
+
resource, score = recommendation
|
19
|
+
distribution[score.round(1)] ||= 0
|
20
|
+
distribution[score.round(1)] += 1
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end while cursor.to_i > 0
|
24
|
+
|
25
|
+
distribution.each do |score, count|
|
26
|
+
puts "#{score}\t#{count}"
|
27
|
+
end
|
data/lib/commendo/content_set.rb
CHANGED
@@ -9,33 +9,33 @@ module Commendo
|
|
9
9
|
end
|
10
10
|
|
11
11
|
def add_by_group(group, *resources)
|
12
|
-
redis.sadd(group_key(group), resources)
|
13
12
|
resources.each do |resource|
|
14
|
-
|
13
|
+
if resource.kind_of?(Array)
|
14
|
+
add_single(resource[0], group, resource[1])
|
15
|
+
else
|
16
|
+
add_single(resource, group, 1)
|
17
|
+
end
|
15
18
|
end
|
16
19
|
end
|
17
20
|
|
18
21
|
def add(resource, *groups)
|
19
|
-
redis.sadd(resource_key(resource), groups)
|
20
22
|
groups.each do |group|
|
21
|
-
|
23
|
+
if group.kind_of?(Array)
|
24
|
+
add_single(resource, group[0], group[1])
|
25
|
+
else
|
26
|
+
add_single(resource, group, 1)
|
27
|
+
end
|
22
28
|
end
|
23
29
|
end
|
24
30
|
|
31
|
+
def add_single(resource, group, score)
|
32
|
+
redis.zincrby(group_key(group), score, resource)
|
33
|
+
redis.zincrby(resource_key(resource), score, group)
|
34
|
+
end
|
35
|
+
|
25
36
|
def add_and_calculate(resource, *groups)
|
26
37
|
add(resource, *groups)
|
27
|
-
|
28
|
-
group_keys = groups.map { |group| group_key(group) }
|
29
|
-
resources = redis.sunion(*group_keys)
|
30
|
-
resources.combination(2) do |l, r|
|
31
|
-
intersect = redis.sinter(resource_key(l), resource_key(r))
|
32
|
-
if (intersect.length > 0)
|
33
|
-
union = redis.sunion(resource_key(l), resource_key(r))
|
34
|
-
jaccard = intersect.length / union.length.to_f
|
35
|
-
redis.zadd(similarity_key(l), jaccard, r)
|
36
|
-
redis.zadd(similarity_key(r), jaccard, l)
|
37
|
-
end
|
38
|
-
end
|
38
|
+
calculate_similarity_for_resource(resource, 0)
|
39
39
|
end
|
40
40
|
|
41
41
|
def delete(resource)
|
@@ -48,25 +48,43 @@ module Commendo
|
|
48
48
|
redis.del(resource_key(resource))
|
49
49
|
end
|
50
50
|
|
51
|
+
SET_TOO_LARGE_FOR_LUA = 999
|
52
|
+
|
51
53
|
def calculate_similarity(threshold = 0)
|
52
54
|
#TODO make this use scan for scaling
|
53
55
|
keys = redis.keys("#{resource_key_base}:*")
|
54
56
|
keys.each_with_index do |key, i|
|
55
57
|
yield(key, i, keys.length) if block_given?
|
56
|
-
completed = redis.eval(similarity_lua, keys: [key], argv: [resource_key_base, similar_key_base, group_key_base, threshold])
|
57
|
-
if completed ==
|
58
|
-
|
59
|
-
groups = redis.smembers(resource_key(resource))
|
60
|
-
group_keys = groups.map { |group| group_key(group) }
|
61
|
-
resources = redis.sunion(*group_keys)
|
62
|
-
resources.each do |to_compare|
|
63
|
-
next if resource == to_compare
|
64
|
-
redis.eval(pair_comparison_lua, keys: [key, resource_key(to_compare), similarity_key(resource), similarity_key(to_compare)], argv: [resource, to_compare, threshold])
|
65
|
-
end
|
58
|
+
completed = redis.eval(similarity_lua, keys: [key], argv: [tmp_key_base, resource_key_base, similar_key_base, group_key_base, threshold])
|
59
|
+
if completed == SET_TOO_LARGE_FOR_LUA
|
60
|
+
calculate_similarity_for_key(key, threshold)
|
66
61
|
end
|
67
62
|
end
|
68
63
|
end
|
69
64
|
|
65
|
+
def calculate_similarity_for_key(key, threshold)
|
66
|
+
resource = key.gsub(/^#{resource_key_base}:/, '')
|
67
|
+
calculate_similarity_for_key_resource(key, resource, threshold)
|
68
|
+
end
|
69
|
+
|
70
|
+
def calculate_similarity_for_resource(resource, threshold)
|
71
|
+
key = resource_key(resource)
|
72
|
+
calculate_similarity_for_key_resource(key, resource, threshold)
|
73
|
+
end
|
74
|
+
|
75
|
+
def calculate_similarity_for_key_resource(key, resource, threshold)
|
76
|
+
groups = redis.zrange(resource_key(resource), 0, -1)
|
77
|
+
group_keys = groups.map { |group| group_key(group) }
|
78
|
+
tmp_key = "#{tmp_key_base}:#{SecureRandom.uuid}"
|
79
|
+
redis.zunionstore(tmp_key, group_keys)
|
80
|
+
resources = redis.zrange(tmp_key, 0, -1)
|
81
|
+
redis.del(tmp_key)
|
82
|
+
resources.each do |to_compare|
|
83
|
+
next if resource == to_compare
|
84
|
+
redis.eval(pair_comparison_lua, keys: [key, resource_key(to_compare), similarity_key(resource), similarity_key(to_compare)], argv: [tmp_key_base, resource, to_compare, threshold])
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
70
88
|
def similar_to(resource)
|
71
89
|
if resource.kind_of? Array
|
72
90
|
keys = resource.map do |res|
|
@@ -116,6 +134,10 @@ module Commendo
|
|
116
134
|
file.read
|
117
135
|
end
|
118
136
|
|
137
|
+
def tmp_key_base
|
138
|
+
"#{key_base}:tmp"
|
139
|
+
end
|
140
|
+
|
119
141
|
def similar_key_base
|
120
142
|
"#{key_base}:similar"
|
121
143
|
end
|
@@ -3,9 +3,10 @@ local right_key = KEYS[2]
|
|
3
3
|
local left_similarity_key = KEYS[3]
|
4
4
|
local right_similarity_key = KEYS[4]
|
5
5
|
|
6
|
-
local
|
7
|
-
local
|
8
|
-
local
|
6
|
+
local tmp_key_base = ARGV[1]
|
7
|
+
local left = tonumber(ARGV[2])
|
8
|
+
local right = tonumber(ARGV[3])
|
9
|
+
local threshold = tonumber(ARGV[4])
|
9
10
|
|
10
11
|
local function round(num, idp)
|
11
12
|
local mult = 10^(idp or 0)
|
@@ -14,10 +15,31 @@ end
|
|
14
15
|
|
15
16
|
redis.log(redis.LOG_NOTICE, 'Running pair comparison for ' .. left_key .. ' ' .. right_key)
|
16
17
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
18
|
+
|
19
|
+
--TODO change bar
|
20
|
+
local tmp_pair_intersect_key = tmp_key_base .. 'bar'
|
21
|
+
redis.call('ZINTERSTORE', tmp_pair_intersect_key, 2, left_key, right_key)
|
22
|
+
local intersect = redis.call('ZRANGE', tmp_pair_intersect_key, 0, -1, 'WITHSCORES')
|
23
|
+
redis.call('DEL', tmp_pair_intersect_key)
|
24
|
+
|
25
|
+
if table.getn(intersect) > 0 then
|
26
|
+
local intersect_score = 0
|
27
|
+
for i=1,#intersect,2 do
|
28
|
+
intersect_score = intersect_score + intersect[i+1]
|
29
|
+
end
|
30
|
+
|
31
|
+
--TODO change baz
|
32
|
+
local tmp_pair_union_key = tmp_key_base .. 'baz'
|
33
|
+
redis.call('ZUNIONSTORE', tmp_pair_union_key, 2, left_key, right_key)
|
34
|
+
|
35
|
+
local union = redis.call('ZRANGE', tmp_pair_union_key, 0, -1, 'WITHSCORES')
|
36
|
+
redis.call('DEL', tmp_pair_union_key)
|
37
|
+
local union_score = 0
|
38
|
+
for i=1,#union,2 do
|
39
|
+
union_score = union_score + union[i+1]
|
40
|
+
end
|
41
|
+
|
42
|
+
local similarity = round(intersect_score / union_score, 3)
|
21
43
|
if similarity > threshold then
|
22
44
|
redis.call('ZADD', left_similarity_key, similarity, right)
|
23
45
|
redis.call('ZADD', right_similarity_key, similarity, left)
|
data/lib/commendo/similarity.lua
CHANGED
@@ -1,8 +1,9 @@
|
|
1
1
|
local resource_key = KEYS[1]
|
2
|
-
local
|
3
|
-
local
|
4
|
-
local
|
5
|
-
local
|
2
|
+
local tmp_key_base = ARGV[1]
|
3
|
+
local resource_key_base = ARGV[2]
|
4
|
+
local sim_key_base = ARGV[3]
|
5
|
+
local group_key_base = ARGV[4]
|
6
|
+
local threshold = tonumber(ARGV[5])
|
6
7
|
|
7
8
|
local function round(num, idp)
|
8
9
|
local mult = 10^(idp or 0)
|
@@ -12,7 +13,7 @@ end
|
|
12
13
|
redis.log(redis.LOG_NOTICE, 'Running complete similarity for ' .. resource_key)
|
13
14
|
|
14
15
|
local resource = resource_key:gsub('%' .. resource_key_base .. ':', '')
|
15
|
-
local groups = redis.call('
|
16
|
+
local groups = redis.call('ZRANGE', resource_key, 0, -1)
|
16
17
|
|
17
18
|
if table.getn(groups) > 999 then
|
18
19
|
redis.log(redis.LOG_NOTICE, 'Complete similarity too large for ' .. resource_key .. ', ' .. table.getn(groups))
|
@@ -25,24 +26,45 @@ for _,group in ipairs(groups) do
|
|
25
26
|
end
|
26
27
|
--redis.log(redis.LOG_NOTICE, 'Found ' .. table.getn(group_keys) .. ' group keys')
|
27
28
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
29
|
+
--TODO change foo
|
30
|
+
local tmp_groups_union_key = tmp_key_base .. 'unionfoo'
|
31
|
+
redis.call('ZUNIONSTORE', tmp_groups_union_key, table.getn(group_keys), unpack(group_keys))
|
32
|
+
local resources = redis.call('ZRANGE', tmp_groups_union_key, 0, -1)
|
32
33
|
|
33
34
|
local previous = 'foo'
|
34
35
|
for _,to_compare in ipairs(resources) do
|
35
|
-
--
|
36
|
+
--redis.log(redis.LOG_NOTICE, 'Comparing ' .. resource .. ' and ' .. to_compare)
|
36
37
|
if to_compare ~= previous then
|
37
38
|
previous = to_compare
|
38
39
|
if resource > to_compare then
|
39
|
-
--
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
40
|
+
--redis.log(redis.LOG_NOTICE, 'Calculating similarity for ' .. resource .. ' and ' .. to_compare)
|
41
|
+
|
42
|
+
--TODO change bar
|
43
|
+
local tmp_pair_intersect_key = tmp_key_base .. 'bar'
|
44
|
+
redis.call('ZINTERSTORE', tmp_pair_intersect_key, 2, resource_key, resource_key_base .. ':' .. to_compare)
|
45
|
+
local intersect = redis.call('ZRANGE', tmp_pair_intersect_key, 0, -1, 'WITHSCORES')
|
46
|
+
redis.call('DEL', tmp_pair_intersect_key)
|
47
|
+
|
48
|
+
if table.getn(intersect) > 0 then
|
49
|
+
local intersect_score = 0
|
50
|
+
for i=1,#intersect,2 do
|
51
|
+
intersect_score = intersect_score + intersect[i+1]
|
52
|
+
end
|
53
|
+
|
54
|
+
--TODO change baz
|
55
|
+
local tmp_pair_union_key = tmp_key_base .. 'baz'
|
56
|
+
redis.call('ZUNIONSTORE', tmp_pair_union_key, 2, resource_key, resource_key_base .. ':' .. to_compare)
|
57
|
+
|
58
|
+
local union = redis.call('ZRANGE', tmp_pair_union_key, 0, -1, 'WITHSCORES')
|
59
|
+
redis.call('DEL', tmp_pair_union_key)
|
60
|
+
local union_score = 0
|
61
|
+
for i=1,#union,2 do
|
62
|
+
union_score = union_score + union[i+1]
|
63
|
+
end
|
64
|
+
|
65
|
+
local similarity = round(intersect_score / union_score, 3)
|
44
66
|
if similarity > threshold then
|
45
|
-
--
|
67
|
+
--redis.log(redis.LOG_NOTICE, resource .. ' and ' .. to_compare .. ' scored ' .. similarity)
|
46
68
|
redis.call('ZADD', sim_key_base .. ':' .. resource, similarity, to_compare)
|
47
69
|
redis.call('ZADD', sim_key_base .. ':' .. to_compare, similarity, resource)
|
48
70
|
end
|
@@ -51,6 +73,12 @@ for _,to_compare in ipairs(resources) do
|
|
51
73
|
end
|
52
74
|
end
|
53
75
|
|
76
|
+
redis.call('DEL', tmp_groups_union_key)
|
77
|
+
|
54
78
|
redis.log(redis.LOG_NOTICE, 'Finished running complete similarity for ' .. resource_key)
|
55
79
|
|
56
|
-
return true
|
80
|
+
return true
|
81
|
+
|
82
|
+
|
83
|
+
|
84
|
+
|
data/lib/commendo/version.rb
CHANGED
data/test/content_set_test.rb
CHANGED
@@ -16,7 +16,7 @@ module Commendo
|
|
16
16
|
assert_equal 'CommendoTests:similar:resource-1', cs.similarity_key('resource-1')
|
17
17
|
end
|
18
18
|
|
19
|
-
def
|
19
|
+
def test_recommends_when_added
|
20
20
|
redis = Redis.new(db: 15)
|
21
21
|
redis.flushdb
|
22
22
|
key_base = 'CommendoTests'
|
@@ -25,33 +25,89 @@ module Commendo
|
|
25
25
|
cs.add('resource-2', 'group-1')
|
26
26
|
cs.add('resource-3', 'group-1', 'group-2')
|
27
27
|
cs.add('resource-4', 'group-2')
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
assert redis.sismember("#{key_base}:resources:resource-3", 'group-2')
|
36
|
-
assert redis.sismember("#{key_base}:resources:resource-4", 'group-2')
|
28
|
+
cs.calculate_similarity
|
29
|
+
expected = [
|
30
|
+
{resource: 'resource-3', similarity: 1.0},
|
31
|
+
{resource: 'resource-4', similarity: 0.667},
|
32
|
+
{resource: 'resource-2', similarity: 0.667}
|
33
|
+
]
|
34
|
+
assert_equal expected, cs.similar_to('resource-1')
|
37
35
|
end
|
38
36
|
|
39
|
-
def
|
37
|
+
def test_recommends_when_added_with_scores
|
38
|
+
redis = Redis.new(db: 15)
|
39
|
+
redis.flushdb
|
40
|
+
key_base = 'CommendoTests'
|
41
|
+
cs = ContentSet.new(redis, key_base)
|
42
|
+
cs.add('resource-1', ['group-1', 2], ['group-2', 2])
|
43
|
+
cs.add('resource-2', ['group-1', 7])
|
44
|
+
cs.add('resource-3', ['group-1', 2], ['group-2', 2])
|
45
|
+
cs.add('resource-4', ['group-2', 3])
|
46
|
+
cs.calculate_similarity
|
47
|
+
expected = [
|
48
|
+
{resource: 'resource-3', similarity: 1.0},
|
49
|
+
{resource: 'resource-2', similarity: 0.818},
|
50
|
+
{resource: 'resource-4', similarity: 0.714}
|
51
|
+
]
|
52
|
+
assert_equal expected, cs.similar_to('resource-1')
|
53
|
+
end
|
54
|
+
|
55
|
+
def test_recommends_when_extra_scores_added
|
56
|
+
test_recommends_when_added_with_scores
|
57
|
+
redis = Redis.new(db: 15)
|
58
|
+
key_base = 'CommendoTests'
|
59
|
+
cs = ContentSet.new(redis, key_base)
|
60
|
+
cs.add('resource-3', ['group-1', 1], ['group-3', 2])
|
61
|
+
cs.add('resource-4', ['group-2', 1])
|
62
|
+
cs.add_by_group('group-1', ['newource-9', 100], 'resource-2', 'resource-3')
|
63
|
+
cs.add_by_group('group-2', 'resource-1', 'resource-3', 'resource-4')
|
64
|
+
cs.calculate_similarity
|
65
|
+
expected = [
|
66
|
+
{resource: 'newource-9', similarity: 1.0},
|
67
|
+
{resource: 'resource-1', similarity: 0.769},
|
68
|
+
{resource: 'resource-3', similarity: 0.706}
|
69
|
+
]
|
70
|
+
assert_equal expected, cs.similar_to('resource-2')
|
71
|
+
end
|
72
|
+
|
73
|
+
def test_recommends_when_added_by_group
|
40
74
|
redis = Redis.new(db: 15)
|
41
75
|
redis.flushdb
|
42
76
|
key_base = 'CommendoTests'
|
43
77
|
cs = ContentSet.new(redis, key_base)
|
44
78
|
cs.add_by_group('group-1', 'resource-1', 'resource-2', 'resource-3')
|
45
79
|
cs.add_by_group('group-2', 'resource-1', 'resource-3', 'resource-4')
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
80
|
+
cs.calculate_similarity
|
81
|
+
expected = [
|
82
|
+
{resource: 'resource-3', similarity: 1.0},
|
83
|
+
{resource: 'resource-4', similarity: 0.667},
|
84
|
+
{resource: 'resource-2', similarity: 0.667}
|
85
|
+
]
|
86
|
+
assert_equal expected, cs.similar_to('resource-1')
|
87
|
+
end
|
88
|
+
|
89
|
+
def test_recommends_when_added_by_group_with_scores
|
90
|
+
redis = Redis.new(db: 15)
|
91
|
+
redis.flushdb
|
92
|
+
key_base = 'CommendoTests'
|
93
|
+
cs = ContentSet.new(redis, key_base)
|
94
|
+
cs.add_by_group('group-1', ['resource-1', 2], ['resource-2', 3], ['resource-3', 7])
|
95
|
+
cs.add_by_group('group-2', ['resource-1', 2], ['resource-3', 3], ['resource-4', 5])
|
96
|
+
cs.calculate_similarity
|
97
|
+
expected = [
|
98
|
+
{resource: 'resource-3', similarity: 1.0},
|
99
|
+
{resource: 'resource-4', similarity: 0.778},
|
100
|
+
{resource: 'resource-2', similarity: 0.714}
|
101
|
+
]
|
102
|
+
assert_equal expected, cs.similar_to('resource-1')
|
103
|
+
end
|
104
|
+
|
105
|
+
def test_recommendations_are_isolated_by_key_base
|
106
|
+
skip
|
107
|
+
end
|
108
|
+
|
109
|
+
def test_recommendations_are_isolated_by_redis_db
|
110
|
+
skip
|
55
111
|
end
|
56
112
|
|
57
113
|
def test_calculates_similarity_scores
|
@@ -66,12 +122,12 @@ module Commendo
|
|
66
122
|
end
|
67
123
|
cs.calculate_similarity
|
68
124
|
expected = [
|
69
|
-
{resource: '9', similarity: 0.
|
70
|
-
{resource: '6', similarity: 0.
|
71
|
-
{resource: '12', similarity: 0.
|
72
|
-
{resource: '3', similarity: 0.
|
73
|
-
{resource: '21', similarity: 0.
|
74
|
-
{resource: '15', similarity: 0.
|
125
|
+
{resource: '9', similarity: 0.667},
|
126
|
+
{resource: '6', similarity: 0.667},
|
127
|
+
{resource: '12', similarity: 0.5},
|
128
|
+
{resource: '3', similarity: 0.4},
|
129
|
+
{resource: '21', similarity: 0.286},
|
130
|
+
{resource: '15', similarity: 0.286}
|
75
131
|
]
|
76
132
|
assert_equal expected, cs.similar_to(18)
|
77
133
|
end
|
@@ -88,8 +144,9 @@ module Commendo
|
|
88
144
|
end
|
89
145
|
cs.calculate_similarity(0.4)
|
90
146
|
expected = [
|
91
|
-
{resource: '9', similarity: 0.
|
92
|
-
{resource: '6', similarity: 0.
|
147
|
+
{resource: '9', similarity: 0.667},
|
148
|
+
{resource: '6', similarity: 0.667},
|
149
|
+
{resource: '12', similarity: 0.5}
|
93
150
|
]
|
94
151
|
assert_equal expected, cs.similar_to(18)
|
95
152
|
end
|
@@ -250,17 +307,17 @@ module Commendo
|
|
250
307
|
end
|
251
308
|
cs.calculate_similarity
|
252
309
|
expected = [
|
253
|
-
{resource: '18', similarity: 1.
|
254
|
-
{resource: '3', similarity: 1.
|
255
|
-
{resource: '6', similarity:
|
256
|
-
{resource: '
|
257
|
-
{resource: '
|
258
|
-
{resource: '
|
259
|
-
{resource: '9', similarity: 0.
|
260
|
-
{resource: '4', similarity: 0.
|
261
|
-
{resource: '8', similarity: 0.
|
262
|
-
{resource: '16', similarity: 0.
|
263
|
-
{resource: '20', similarity: 0.
|
310
|
+
{resource: '18', similarity: 1.834},
|
311
|
+
{resource: '3', similarity: 1.734},
|
312
|
+
{resource: '6', similarity: 1.167},
|
313
|
+
{resource: '21', similarity: 1.086},
|
314
|
+
{resource: '15', similarity: 1.086},
|
315
|
+
{resource: '12', similarity: 1.0},
|
316
|
+
{resource: '9', similarity: 0.833},
|
317
|
+
{resource: '4', similarity: 0.4},
|
318
|
+
{resource: '8', similarity: 0.333},
|
319
|
+
{resource: '16', similarity: 0.286},
|
320
|
+
{resource: '20', similarity: 0.25}
|
264
321
|
]
|
265
322
|
actual = cs.similar_to([12, 6, 9])
|
266
323
|
assert_equal expected, actual
|
data/test/weighted_group_test.rb
CHANGED
@@ -19,9 +19,9 @@ module Commendo
|
|
19
19
|
@cs3 = ContentSet.new(@redis, 'CommendoTests:ContentSet3', @tag_set)
|
20
20
|
(3..23).each do |group|
|
21
21
|
(3..23).each do |res|
|
22
|
-
@cs1.add_by_group(group, res) if (
|
23
|
-
@cs2.add_by_group(group, res) if (
|
24
|
-
@cs3.add_by_group(group, res) if (
|
22
|
+
@cs1.add_by_group(group, res) if res.modulo(group).zero? && res.modulo(2).zero?
|
23
|
+
@cs2.add_by_group(group, res) if res.modulo(group).zero? && res.modulo(3).zero?
|
24
|
+
@cs3.add_by_group(group, res) if res.modulo(group).zero? && res.modulo(6).zero?
|
25
25
|
@tag_set.add(res, 'mod3') if res.modulo(3).zero?
|
26
26
|
@tag_set.add(res, 'mod4') if res.modulo(4).zero?
|
27
27
|
@tag_set.add(res, 'mod5') if res.modulo(5).zero?
|
@@ -40,12 +40,12 @@ module Commendo
|
|
40
40
|
{cs: @cs3, weight: 100.0}
|
41
41
|
)
|
42
42
|
expected = [
|
43
|
-
{resource: '6', similarity:
|
44
|
-
{resource: '12', similarity:
|
45
|
-
{resource: '9', similarity:
|
46
|
-
{resource: '3', similarity:
|
47
|
-
{resource: '21', similarity:
|
48
|
-
{resource: '15', similarity:
|
43
|
+
{resource: '6', similarity: 74.037},
|
44
|
+
{resource: '12', similarity: 55.5},
|
45
|
+
{resource: '9', similarity: 6.67},
|
46
|
+
{resource: '3', similarity: 4.0},
|
47
|
+
{resource: '21', similarity: 2.86},
|
48
|
+
{resource: '15', similarity: 2.86}
|
49
49
|
]
|
50
50
|
assert_equal expected, weighted_group.similar_to(18)
|
51
51
|
end
|
@@ -58,7 +58,7 @@ module Commendo
|
|
58
58
|
{cs: @cs2, weight: 10.0},
|
59
59
|
{cs: @cs3, weight: 100.0}
|
60
60
|
)
|
61
|
-
expected = [{resource: '15', similarity:
|
61
|
+
expected = [{resource: '15', similarity: 2.86}]
|
62
62
|
weighted_group.tag_set = @tag_set
|
63
63
|
assert_equal expected, weighted_group.filtered_similar_to(18, include: ['mod5'])
|
64
64
|
end
|
@@ -72,10 +72,10 @@ module Commendo
|
|
72
72
|
{cs: @cs3, weight: 100.0}
|
73
73
|
)
|
74
74
|
expected = [
|
75
|
-
{resource: '6', similarity:
|
76
|
-
{resource: '12', similarity:
|
77
|
-
{resource: '9', similarity:
|
78
|
-
{resource: '3', similarity:
|
75
|
+
{resource: '6', similarity: 74.037},
|
76
|
+
{resource: '12', similarity: 55.5},
|
77
|
+
{resource: '9', similarity: 6.67},
|
78
|
+
{resource: '3', similarity: 4.0}
|
79
79
|
]
|
80
80
|
weighted_group.tag_set = @tag_set
|
81
81
|
assert_equal expected, weighted_group.filtered_similar_to(18, exclude: ['mod5', 'mod7'])
|
@@ -90,9 +90,9 @@ module Commendo
|
|
90
90
|
{cs: @cs3, weight: 1.0}
|
91
91
|
)
|
92
92
|
expected = [
|
93
|
-
{resource: '16', similarity:
|
94
|
-
{resource: '4', similarity:
|
95
|
-
{resource: '12', similarity:
|
93
|
+
{resource: '16', similarity: 80.0},
|
94
|
+
{resource: '4', similarity: 66.7},
|
95
|
+
{resource: '12', similarity: 33.3}
|
96
96
|
]
|
97
97
|
weighted_group.tag_set = @tag_set
|
98
98
|
assert_equal expected, weighted_group.filtered_similar_to(8, include: ['mod4'], exclude: ['mod5'])
|
@@ -107,16 +107,16 @@ module Commendo
|
|
107
107
|
{cs: @cs3, weight: 1.0}
|
108
108
|
)
|
109
109
|
expected = [
|
110
|
-
{resource: '12', similarity:
|
111
|
-
{resource: '18', similarity:
|
112
|
-
{resource: '8', similarity:
|
113
|
-
{resource: '16', similarity:
|
114
|
-
{resource: '20', similarity:
|
115
|
-
{resource: '9', similarity:
|
116
|
-
{resource: '21', similarity:
|
117
|
-
{resource: '15', similarity:
|
118
|
-
{resource: '6', similarity:
|
119
|
-
{resource: '3', similarity:
|
110
|
+
{resource: '12', similarity: 118.037},
|
111
|
+
{resource: '18', similarity: 78.037},
|
112
|
+
{resource: '8', similarity: 66.7},
|
113
|
+
{resource: '16', similarity: 50.0},
|
114
|
+
{resource: '20', similarity: 40.0},
|
115
|
+
{resource: '9', similarity: 11.67},
|
116
|
+
{resource: '21', similarity: 9.0},
|
117
|
+
{resource: '15', similarity: 9.0},
|
118
|
+
{resource: '6', similarity: 6.67},
|
119
|
+
{resource: '3', similarity: 6.67}
|
120
120
|
]
|
121
121
|
weighted_group.tag_set = @tag_set
|
122
122
|
assert_equal expected, weighted_group.similar_to([3,4,5,6,7])
|
@@ -131,16 +131,9 @@ module Commendo
|
|
131
131
|
{cs: @cs3, weight: 1.0}
|
132
132
|
)
|
133
133
|
expected = [
|
134
|
-
{resource: '12', similarity:
|
135
|
-
|
136
|
-
{resource: '
|
137
|
-
{resource: '16', similarity: 33.3},
|
138
|
-
#{resource: '20', similarity: 25.0},
|
139
|
-
#{resource: '9', similarity: 8.33},
|
140
|
-
#{resource: '21', similarity: 5.83},
|
141
|
-
#{resource: '15', similarity: 5.83},
|
142
|
-
#{resource: '6', similarity: 5.0},
|
143
|
-
#{resource: '3', similarity: 5.0}
|
134
|
+
{resource: '12', similarity: 118.037},
|
135
|
+
{resource: '8', similarity: 66.7},
|
136
|
+
{resource: '16', similarity: 50.0},
|
144
137
|
]
|
145
138
|
weighted_group.tag_set = @tag_set
|
146
139
|
assert_equal expected, weighted_group.filtered_similar_to([3,4,5,6,7], include: ['mod4'], exclude: ['mod5'])
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: commendo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rob Styles
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-04-
|
11
|
+
date: 2014-04-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: redis
|
@@ -97,7 +97,11 @@ dependencies:
|
|
97
97
|
description: A Jaccard-similarity recommender using Redis sets
|
98
98
|
email:
|
99
99
|
- rob.styles@dynamicorange.com
|
100
|
-
executables:
|
100
|
+
executables:
|
101
|
+
- commendo-find-identical-pairs
|
102
|
+
- commendo-load
|
103
|
+
- commendo-recommendations-distribution
|
104
|
+
- commendo-similarity-distribution
|
101
105
|
extensions: []
|
102
106
|
extra_rdoc_files: []
|
103
107
|
files:
|
@@ -107,6 +111,10 @@ files:
|
|
107
111
|
- LICENSE.txt
|
108
112
|
- README.md
|
109
113
|
- Rakefile
|
114
|
+
- bin/commendo-find-identical-pairs
|
115
|
+
- bin/commendo-load
|
116
|
+
- bin/commendo-recommendations-distribution
|
117
|
+
- bin/commendo-similarity-distribution
|
110
118
|
- commendo.gemspec
|
111
119
|
- lib/commendo.rb
|
112
120
|
- lib/commendo/content_set.rb
|