commendo 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -0
- data/Rakefile +2 -2
- data/lib/commendo.rb +1 -0
- data/lib/commendo/content_set.rb +61 -26
- data/lib/commendo/similarity.lua +1 -1
- data/lib/commendo/version.rb +1 -1
- data/lib/commendo/weighted_group.rb +31 -0
- data/test/content_set_test.rb +79 -18
- data/test/weighted_group_test.rb +47 -0
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 01e2c317e998efe31d72e1a399d1d8eb677854a7
|
4
|
+
data.tar.gz: fad0b2a2c09333ef31eff3f3aaaf91c7b3d1862f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8d0563d3909815866885f6f654c172f39675c442fb24a7306162090f848e2e50f4161aabf0fcc4f4779e474ff3b5c9902ec8d5c7cdc66db69cd7a3886f17a9bc
|
7
|
+
data.tar.gz: f05702dd19c5be8b6480ef4a513294c493f7138a2bb2d3c5f329fc6a734a9060660ab168f4d987f830c188142fc6bd5334c0a903b0f45024d7810ba7cb02e284
|
data/CHANGELOG.md
CHANGED
data/Rakefile
CHANGED
@@ -46,8 +46,8 @@ task :load_traffic_from_tsv, :filename do |task, args|
|
|
46
46
|
end
|
47
47
|
|
48
48
|
puts 'Processing...'
|
49
|
-
cs.calculate_similarity(0
|
50
|
-
puts key
|
49
|
+
cs.calculate_similarity(0) { |key, index, total|
|
50
|
+
puts "#{key} - #{index}/#{total} = #{(index/total.to_f*100).round(2)}%"
|
51
51
|
}
|
52
52
|
end
|
53
53
|
|
data/lib/commendo.rb
CHANGED
data/lib/commendo/content_set.rb
CHANGED
@@ -9,43 +9,70 @@ module Commendo
|
|
9
9
|
end
|
10
10
|
|
11
11
|
def add_by_group(group, *resources)
|
12
|
-
|
13
|
-
|
12
|
+
redis.sadd(group_key(group), resources)
|
13
|
+
resources.each do |resource|
|
14
|
+
redis.sadd(resource_key(resource), group)
|
14
15
|
end
|
15
16
|
end
|
16
17
|
|
17
18
|
def add(resource, *groups)
|
18
|
-
redis.sadd(
|
19
|
+
redis.sadd(resource_key(resource), groups)
|
20
|
+
groups.each do |group|
|
21
|
+
redis.sadd(group_key(group), resource)
|
22
|
+
end
|
19
23
|
end
|
20
24
|
|
21
|
-
def
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
25
|
+
def add_and_calculate(resource, *groups)
|
26
|
+
add(resource, *groups)
|
27
|
+
groups = redis.smembers(resource_key(resource))
|
28
|
+
group_keys = groups.map { |group| group_key(group) }
|
29
|
+
resources = redis.sunion(*group_keys)
|
30
|
+
resources.combination(2) do |l, r|
|
31
|
+
intersect = redis.sinter(resource_key(l), resource_key(r))
|
32
|
+
if (intersect.length > 0)
|
33
|
+
union = redis.sunion(resource_key(l), resource_key(r))
|
34
|
+
jaccard = intersect.length / union.length.to_f
|
35
|
+
redis.zadd(similarity_key(l), jaccard, r)
|
36
|
+
redis.zadd(similarity_key(r), jaccard, l)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def delete(resource)
|
42
|
+
similar = similar_to(resource)
|
43
|
+
similar.each do |other_resource|
|
44
|
+
redis.zrem(similarity_key(other_resource[:resource]), "#{resource}")
|
28
45
|
end
|
46
|
+
redis.del(similarity_key(resource))
|
47
|
+
redis.del(resource_key(resource))
|
48
|
+
end
|
29
49
|
|
50
|
+
def calculate_similarity(threshold = 0)
|
51
|
+
#TODO make this use scan for scaling
|
52
|
+
keys = redis.keys("#{resource_key_base}:*")
|
53
|
+
keys.each_with_index do |outer_key, i|
|
54
|
+
outer_res = outer_key.gsub(/^#{resource_key_base}:/, '')
|
55
|
+
calculate_similarity_in_redis(outer_key, similarity_key(outer_res), threshold)
|
56
|
+
yield(outer_key, i, keys.length) if block_given?
|
57
|
+
end
|
30
58
|
end
|
31
59
|
|
32
60
|
def calculate_similarity_in_redis(set_key, similiarity_key, threshold)
|
33
|
-
|
61
|
+
#TODO maybe consider using ary.combination to get finer grained operation in lua
|
62
|
+
redis.eval(similarity_lua, [set_key, similiarity_key], [resource_key_base, threshold])
|
34
63
|
end
|
35
64
|
|
36
65
|
def similar_to(resource)
|
37
|
-
|
38
|
-
|
39
|
-
similar_resources.
|
40
|
-
|
41
|
-
end
|
42
|
-
similar.sort! do |x, y|
|
43
|
-
if y[:similarity] != x[:similarity]
|
44
|
-
y[:similarity] <=> x[:similarity]
|
45
|
-
else
|
46
|
-
y[:resource] <=> x[:resource]
|
47
|
-
end
|
66
|
+
similar_resources = redis.zrevrange(similarity_key(resource), 0, -1, with_scores: true)
|
67
|
+
|
68
|
+
similar_resources.map do |resource|
|
69
|
+
{resource: resource[0], similarity: resource[1].to_f}
|
48
70
|
end
|
71
|
+
|
72
|
+
end
|
73
|
+
|
74
|
+
def similarity_key(resource)
|
75
|
+
"#{similar_key_base}:#{resource}"
|
49
76
|
end
|
50
77
|
|
51
78
|
private
|
@@ -59,14 +86,22 @@ module Commendo
|
|
59
86
|
file.read
|
60
87
|
end
|
61
88
|
|
62
|
-
def set_key_base
|
63
|
-
"#{key_base}:sets"
|
64
|
-
end
|
65
|
-
|
66
89
|
def similar_key_base
|
67
90
|
"#{key_base}:similar"
|
68
91
|
end
|
69
92
|
|
93
|
+
def resource_key_base
|
94
|
+
"#{key_base}:resources"
|
95
|
+
end
|
96
|
+
|
97
|
+
def resource_key(resource)
|
98
|
+
"#{resource_key_base}:#{resource}"
|
99
|
+
end
|
100
|
+
|
101
|
+
def group_key(group)
|
102
|
+
"#{key_base}:groups:#{group}"
|
103
|
+
end
|
104
|
+
|
70
105
|
end
|
71
106
|
|
72
107
|
end
|
data/lib/commendo/similarity.lua
CHANGED
@@ -20,7 +20,7 @@ for _,key in ipairs(key_matches) do
|
|
20
20
|
-- table.insert(similar, key)
|
21
21
|
-- table.insert(similar, similarity)
|
22
22
|
local resource = key:gsub('%' .. set_key_base .. ':', '')
|
23
|
-
redis.call('
|
23
|
+
redis.call('ZADD', sim_key, similarity, resource)
|
24
24
|
end
|
25
25
|
end
|
26
26
|
end
|
data/lib/commendo/version.rb
CHANGED
@@ -0,0 +1,31 @@
|
|
1
|
+
module Commendo
|
2
|
+
|
3
|
+
class WeightedGroup
|
4
|
+
|
5
|
+
attr_accessor :content_sets, :redis, :key_base
|
6
|
+
|
7
|
+
def initialize(redis, key_base, *content_sets)
|
8
|
+
@content_sets, @redis, @key_base = content_sets, redis, key_base
|
9
|
+
end
|
10
|
+
|
11
|
+
def similar_to(resource)
|
12
|
+
keys = content_sets.map do |cs|
|
13
|
+
cs[:cs].similarity_key(resource)
|
14
|
+
end
|
15
|
+
weights = content_sets.map do |cs|
|
16
|
+
cs[:weight]
|
17
|
+
end
|
18
|
+
tmp_key = "#{key_base}:tmp:#{SecureRandom.uuid}"
|
19
|
+
redis.zunionstore(tmp_key, keys, weights: weights)
|
20
|
+
similar_resources = redis.zrevrange(tmp_key, 0, -1, with_scores: true)
|
21
|
+
redis.del(tmp_key)
|
22
|
+
|
23
|
+
similar_resources.map do |resource|
|
24
|
+
{resource: resource[0], similarity: resource[1].to_f}
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
data/test/content_set_test.rb
CHANGED
@@ -9,6 +9,13 @@ module Commendo
|
|
9
9
|
|
10
10
|
class ContentSetTest < Minitest::Test
|
11
11
|
|
12
|
+
def test_gives_similarity_key_for_resource
|
13
|
+
redis = Redis.new(db: 15)
|
14
|
+
key_base = 'CommendoTests'
|
15
|
+
cs = ContentSet.new(redis, key_base)
|
16
|
+
assert_equal 'CommendoTests:similar:resource-1', cs.similarity_key('resource-1')
|
17
|
+
end
|
18
|
+
|
12
19
|
def test_stores_sets_by_resource
|
13
20
|
redis = Redis.new(db: 15)
|
14
21
|
redis.flushdb
|
@@ -18,15 +25,15 @@ module Commendo
|
|
18
25
|
cs.add('resource-2', 'group-1')
|
19
26
|
cs.add('resource-3', 'group-1', 'group-2')
|
20
27
|
cs.add('resource-4', 'group-2')
|
21
|
-
assert redis.sismember("#{key_base}:
|
22
|
-
assert redis.sismember("#{key_base}:
|
23
|
-
assert redis.sismember("#{key_base}:
|
24
|
-
refute redis.sismember("#{key_base}:
|
25
|
-
|
26
|
-
assert redis.sismember("#{key_base}:
|
27
|
-
refute redis.sismember("#{key_base}:
|
28
|
-
assert redis.sismember("#{key_base}:
|
29
|
-
assert redis.sismember("#{key_base}:
|
28
|
+
assert redis.sismember("#{key_base}:resources:resource-1", 'group-1')
|
29
|
+
assert redis.sismember("#{key_base}:resources:resource-2", 'group-1')
|
30
|
+
assert redis.sismember("#{key_base}:resources:resource-3", 'group-1')
|
31
|
+
refute redis.sismember("#{key_base}:resources:resource-4", 'group-1')
|
32
|
+
|
33
|
+
assert redis.sismember("#{key_base}:resources:resource-1", 'group-2')
|
34
|
+
refute redis.sismember("#{key_base}:resources:resource-2", 'group-2')
|
35
|
+
assert redis.sismember("#{key_base}:resources:resource-3", 'group-2')
|
36
|
+
assert redis.sismember("#{key_base}:resources:resource-4", 'group-2')
|
30
37
|
end
|
31
38
|
|
32
39
|
def test_stores_sets_by_group
|
@@ -36,15 +43,15 @@ module Commendo
|
|
36
43
|
cs = ContentSet.new(redis, key_base)
|
37
44
|
cs.add_by_group('group-1', 'resource-1', 'resource-2', 'resource-3')
|
38
45
|
cs.add_by_group('group-2', 'resource-1', 'resource-3', 'resource-4')
|
39
|
-
assert redis.sismember("#{key_base}:
|
40
|
-
assert redis.sismember("#{key_base}:
|
41
|
-
assert redis.sismember("#{key_base}:
|
42
|
-
refute redis.sismember("#{key_base}:
|
43
|
-
|
44
|
-
assert redis.sismember("#{key_base}:
|
45
|
-
refute redis.sismember("#{key_base}:
|
46
|
-
assert redis.sismember("#{key_base}:
|
47
|
-
assert redis.sismember("#{key_base}:
|
46
|
+
assert redis.sismember("#{key_base}:resources:resource-1", 'group-1')
|
47
|
+
assert redis.sismember("#{key_base}:resources:resource-2", 'group-1')
|
48
|
+
assert redis.sismember("#{key_base}:resources:resource-3", 'group-1')
|
49
|
+
refute redis.sismember("#{key_base}:resources:resource-4", 'group-1')
|
50
|
+
|
51
|
+
assert redis.sismember("#{key_base}:resources:resource-1", 'group-2')
|
52
|
+
refute redis.sismember("#{key_base}:resources:resource-2", 'group-2')
|
53
|
+
assert redis.sismember("#{key_base}:resources:resource-3", 'group-2')
|
54
|
+
assert redis.sismember("#{key_base}:resources:resource-4", 'group-2')
|
48
55
|
end
|
49
56
|
|
50
57
|
def test_calculates_similarity_scores
|
@@ -95,6 +102,60 @@ module Commendo
|
|
95
102
|
skip
|
96
103
|
end
|
97
104
|
|
105
|
+
def test_deletes_resource_from_everywhere
|
106
|
+
redis = Redis.new(db: 15)
|
107
|
+
redis.flushdb
|
108
|
+
key_base = 'CommendoTests'
|
109
|
+
cs = ContentSet.new(redis, key_base)
|
110
|
+
(3..23).each do |group|
|
111
|
+
(3..23).each do |res|
|
112
|
+
cs.add_by_group(group, res) if res % group == 0
|
113
|
+
end
|
114
|
+
end
|
115
|
+
cs.calculate_similarity
|
116
|
+
assert similar_to(cs, 18, 12)
|
117
|
+
|
118
|
+
cs.delete(12)
|
119
|
+
assert_equal [], cs.similar_to(12)
|
120
|
+
refute similar_to(cs, 18, 12)
|
121
|
+
|
122
|
+
cs.calculate_similarity
|
123
|
+
assert_equal [], cs.similar_to(12)
|
124
|
+
refute similar_to(cs, 18, 12)
|
125
|
+
|
126
|
+
end
|
127
|
+
|
128
|
+
def test_accepts_incremental_updates
|
129
|
+
redis = Redis.new(db: 15)
|
130
|
+
redis.flushdb
|
131
|
+
key_base = 'CommendoTests'
|
132
|
+
cs = ContentSet.new(redis, key_base)
|
133
|
+
(3..23).each do |group|
|
134
|
+
(3..23).each do |res|
|
135
|
+
cs.add(res, group) if res % group == 0
|
136
|
+
end
|
137
|
+
end
|
138
|
+
cs.calculate_similarity
|
139
|
+
assert similar_to(cs, 18, 12)
|
140
|
+
refute similar_to(cs, 10, 12)
|
141
|
+
|
142
|
+
cs.add_and_calculate(12, 'foo', true)
|
143
|
+
cs.add_and_calculate(10, 'foo', true)
|
144
|
+
assert similar_to(cs, 10, 12)
|
145
|
+
end
|
146
|
+
|
147
|
+
def test_accepts_tag_collection
|
148
|
+
skip
|
149
|
+
end
|
150
|
+
|
151
|
+
def test_filters_by_tag_collection
|
152
|
+
skip
|
153
|
+
end
|
154
|
+
|
155
|
+
def similar_to(cs, resource, similar)
|
156
|
+
cs.similar_to(resource).select { |sim| sim[:resource] == "#{similar}" }.length > 0
|
157
|
+
end
|
158
|
+
|
98
159
|
end
|
99
160
|
|
100
161
|
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
gem 'minitest'
|
2
|
+
require 'minitest/autorun'
|
3
|
+
require 'minitest/pride'
|
4
|
+
require 'minitest/mock'
|
5
|
+
require 'mocha/setup'
|
6
|
+
require 'commendo'
|
7
|
+
|
8
|
+
module Commendo
|
9
|
+
|
10
|
+
class WeightedGroupTest < Minitest::Test
|
11
|
+
|
12
|
+
def test_calls_each_content_set
|
13
|
+
redis = Redis.new(db: 15)
|
14
|
+
redis.flushdb
|
15
|
+
cs1 = ContentSet.new(redis, 'CommendoTests:ContentSet1')
|
16
|
+
cs2 = ContentSet.new(redis, 'CommendoTests:ContentSet2')
|
17
|
+
cs3 = ContentSet.new(redis, 'CommendoTests:ContentSet3')
|
18
|
+
(3..23).each do |group|
|
19
|
+
(3..23).each do |res|
|
20
|
+
cs1.add_by_group(group, res) if (res % group == 0) && (res % 2 == 0)
|
21
|
+
cs2.add_by_group(group, res) if (res % group == 0) && (res % 3 == 0)
|
22
|
+
cs3.add_by_group(group, res) if (res % group == 0) && (res % 6 == 0)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
[cs1, cs2, cs3].each { |cs| cs.calculate_similarity }
|
26
|
+
weighted_group = WeightedGroup.new(redis, 'CommendoTests:WeightedGroup', { cs: cs1, weight: 1.0 }, { cs: cs2, weight: 10.0 }, { cs: cs3, weight: 100.0 } )
|
27
|
+
expected = [
|
28
|
+
{resource: '6', similarity: 55.5},
|
29
|
+
{resource: '12', similarity: 36.99999999999963},
|
30
|
+
{resource: '9', similarity: 5.0},
|
31
|
+
{resource: '3', similarity: 2.5},
|
32
|
+
{resource: '21', similarity: 1.6666666666666998},
|
33
|
+
{resource: '15', similarity: 1.6666666666666998}
|
34
|
+
]
|
35
|
+
|
36
|
+
assert_equal expected, weighted_group.similar_to(18)
|
37
|
+
end
|
38
|
+
|
39
|
+
def test_precalculates
|
40
|
+
skip
|
41
|
+
end
|
42
|
+
|
43
|
+
|
44
|
+
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: commendo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rob Styles
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-03-
|
11
|
+
date: 2014-03-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: redis
|
@@ -112,7 +112,9 @@ files:
|
|
112
112
|
- lib/commendo/content_set.rb
|
113
113
|
- lib/commendo/similarity.lua
|
114
114
|
- lib/commendo/version.rb
|
115
|
+
- lib/commendo/weighted_group.rb
|
115
116
|
- test/content_set_test.rb
|
117
|
+
- test/weighted_group_test.rb
|
116
118
|
homepage: ''
|
117
119
|
licenses:
|
118
120
|
- MIT
|
@@ -139,4 +141,5 @@ specification_version: 4
|
|
139
141
|
summary: A Jaccard-similarity recommender using Redis sets
|
140
142
|
test_files:
|
141
143
|
- test/content_set_test.rb
|
144
|
+
- test/weighted_group_test.rb
|
142
145
|
has_rdoc:
|