commendo 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -0
- data/Rakefile +2 -2
- data/lib/commendo.rb +1 -0
- data/lib/commendo/content_set.rb +61 -26
- data/lib/commendo/similarity.lua +1 -1
- data/lib/commendo/version.rb +1 -1
- data/lib/commendo/weighted_group.rb +31 -0
- data/test/content_set_test.rb +79 -18
- data/test/weighted_group_test.rb +47 -0
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 01e2c317e998efe31d72e1a399d1d8eb677854a7
|
4
|
+
data.tar.gz: fad0b2a2c09333ef31eff3f3aaaf91c7b3d1862f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8d0563d3909815866885f6f654c172f39675c442fb24a7306162090f848e2e50f4161aabf0fcc4f4779e474ff3b5c9902ec8d5c7cdc66db69cd7a3886f17a9bc
|
7
|
+
data.tar.gz: f05702dd19c5be8b6480ef4a513294c493f7138a2bb2d3c5f329fc6a734a9060660ab168f4d987f830c188142fc6bd5334c0a903b0f45024d7810ba7cb02e284
|
data/CHANGELOG.md
CHANGED
data/Rakefile
CHANGED
@@ -46,8 +46,8 @@ task :load_traffic_from_tsv, :filename do |task, args|
|
|
46
46
|
end
|
47
47
|
|
48
48
|
puts 'Processing...'
|
49
|
-
cs.calculate_similarity(0
|
50
|
-
puts key
|
49
|
+
cs.calculate_similarity(0) { |key, index, total|
|
50
|
+
puts "#{key} - #{index}/#{total} = #{(index/total.to_f*100).round(2)}%"
|
51
51
|
}
|
52
52
|
end
|
53
53
|
|
data/lib/commendo.rb
CHANGED
data/lib/commendo/content_set.rb
CHANGED
@@ -9,43 +9,70 @@ module Commendo
|
|
9
9
|
end
|
10
10
|
|
11
11
|
def add_by_group(group, *resources)
|
12
|
-
|
13
|
-
|
12
|
+
redis.sadd(group_key(group), resources)
|
13
|
+
resources.each do |resource|
|
14
|
+
redis.sadd(resource_key(resource), group)
|
14
15
|
end
|
15
16
|
end
|
16
17
|
|
17
18
|
def add(resource, *groups)
|
18
|
-
redis.sadd(
|
19
|
+
redis.sadd(resource_key(resource), groups)
|
20
|
+
groups.each do |group|
|
21
|
+
redis.sadd(group_key(group), resource)
|
22
|
+
end
|
19
23
|
end
|
20
24
|
|
21
|
-
def
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
25
|
+
def add_and_calculate(resource, *groups)
|
26
|
+
add(resource, *groups)
|
27
|
+
groups = redis.smembers(resource_key(resource))
|
28
|
+
group_keys = groups.map { |group| group_key(group) }
|
29
|
+
resources = redis.sunion(*group_keys)
|
30
|
+
resources.combination(2) do |l, r|
|
31
|
+
intersect = redis.sinter(resource_key(l), resource_key(r))
|
32
|
+
if (intersect.length > 0)
|
33
|
+
union = redis.sunion(resource_key(l), resource_key(r))
|
34
|
+
jaccard = intersect.length / union.length.to_f
|
35
|
+
redis.zadd(similarity_key(l), jaccard, r)
|
36
|
+
redis.zadd(similarity_key(r), jaccard, l)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def delete(resource)
|
42
|
+
similar = similar_to(resource)
|
43
|
+
similar.each do |other_resource|
|
44
|
+
redis.zrem(similarity_key(other_resource[:resource]), "#{resource}")
|
28
45
|
end
|
46
|
+
redis.del(similarity_key(resource))
|
47
|
+
redis.del(resource_key(resource))
|
48
|
+
end
|
29
49
|
|
50
|
+
def calculate_similarity(threshold = 0)
|
51
|
+
#TODO make this use scan for scaling
|
52
|
+
keys = redis.keys("#{resource_key_base}:*")
|
53
|
+
keys.each_with_index do |outer_key, i|
|
54
|
+
outer_res = outer_key.gsub(/^#{resource_key_base}:/, '')
|
55
|
+
calculate_similarity_in_redis(outer_key, similarity_key(outer_res), threshold)
|
56
|
+
yield(outer_key, i, keys.length) if block_given?
|
57
|
+
end
|
30
58
|
end
|
31
59
|
|
32
60
|
def calculate_similarity_in_redis(set_key, similiarity_key, threshold)
|
33
|
-
|
61
|
+
#TODO maybe consider using ary.combination to get finer grained operation in lua
|
62
|
+
redis.eval(similarity_lua, [set_key, similiarity_key], [resource_key_base, threshold])
|
34
63
|
end
|
35
64
|
|
36
65
|
def similar_to(resource)
|
37
|
-
|
38
|
-
|
39
|
-
similar_resources.
|
40
|
-
|
41
|
-
end
|
42
|
-
similar.sort! do |x, y|
|
43
|
-
if y[:similarity] != x[:similarity]
|
44
|
-
y[:similarity] <=> x[:similarity]
|
45
|
-
else
|
46
|
-
y[:resource] <=> x[:resource]
|
47
|
-
end
|
66
|
+
similar_resources = redis.zrevrange(similarity_key(resource), 0, -1, with_scores: true)
|
67
|
+
|
68
|
+
similar_resources.map do |resource|
|
69
|
+
{resource: resource[0], similarity: resource[1].to_f}
|
48
70
|
end
|
71
|
+
|
72
|
+
end
|
73
|
+
|
74
|
+
def similarity_key(resource)
|
75
|
+
"#{similar_key_base}:#{resource}"
|
49
76
|
end
|
50
77
|
|
51
78
|
private
|
@@ -59,14 +86,22 @@ module Commendo
|
|
59
86
|
file.read
|
60
87
|
end
|
61
88
|
|
62
|
-
def set_key_base
|
63
|
-
"#{key_base}:sets"
|
64
|
-
end
|
65
|
-
|
66
89
|
def similar_key_base
|
67
90
|
"#{key_base}:similar"
|
68
91
|
end
|
69
92
|
|
93
|
+
def resource_key_base
|
94
|
+
"#{key_base}:resources"
|
95
|
+
end
|
96
|
+
|
97
|
+
def resource_key(resource)
|
98
|
+
"#{resource_key_base}:#{resource}"
|
99
|
+
end
|
100
|
+
|
101
|
+
def group_key(group)
|
102
|
+
"#{key_base}:groups:#{group}"
|
103
|
+
end
|
104
|
+
|
70
105
|
end
|
71
106
|
|
72
107
|
end
|
data/lib/commendo/similarity.lua
CHANGED
@@ -20,7 +20,7 @@ for _,key in ipairs(key_matches) do
|
|
20
20
|
-- table.insert(similar, key)
|
21
21
|
-- table.insert(similar, similarity)
|
22
22
|
local resource = key:gsub('%' .. set_key_base .. ':', '')
|
23
|
-
redis.call('
|
23
|
+
redis.call('ZADD', sim_key, similarity, resource)
|
24
24
|
end
|
25
25
|
end
|
26
26
|
end
|
data/lib/commendo/version.rb
CHANGED
@@ -0,0 +1,31 @@
|
|
1
|
+
module Commendo
|
2
|
+
|
3
|
+
class WeightedGroup
|
4
|
+
|
5
|
+
attr_accessor :content_sets, :redis, :key_base
|
6
|
+
|
7
|
+
def initialize(redis, key_base, *content_sets)
|
8
|
+
@content_sets, @redis, @key_base = content_sets, redis, key_base
|
9
|
+
end
|
10
|
+
|
11
|
+
def similar_to(resource)
|
12
|
+
keys = content_sets.map do |cs|
|
13
|
+
cs[:cs].similarity_key(resource)
|
14
|
+
end
|
15
|
+
weights = content_sets.map do |cs|
|
16
|
+
cs[:weight]
|
17
|
+
end
|
18
|
+
tmp_key = "#{key_base}:tmp:#{SecureRandom.uuid}"
|
19
|
+
redis.zunionstore(tmp_key, keys, weights: weights)
|
20
|
+
similar_resources = redis.zrevrange(tmp_key, 0, -1, with_scores: true)
|
21
|
+
redis.del(tmp_key)
|
22
|
+
|
23
|
+
similar_resources.map do |resource|
|
24
|
+
{resource: resource[0], similarity: resource[1].to_f}
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
data/test/content_set_test.rb
CHANGED
@@ -9,6 +9,13 @@ module Commendo
|
|
9
9
|
|
10
10
|
class ContentSetTest < Minitest::Test
|
11
11
|
|
12
|
+
def test_gives_similarity_key_for_resource
|
13
|
+
redis = Redis.new(db: 15)
|
14
|
+
key_base = 'CommendoTests'
|
15
|
+
cs = ContentSet.new(redis, key_base)
|
16
|
+
assert_equal 'CommendoTests:similar:resource-1', cs.similarity_key('resource-1')
|
17
|
+
end
|
18
|
+
|
12
19
|
def test_stores_sets_by_resource
|
13
20
|
redis = Redis.new(db: 15)
|
14
21
|
redis.flushdb
|
@@ -18,15 +25,15 @@ module Commendo
|
|
18
25
|
cs.add('resource-2', 'group-1')
|
19
26
|
cs.add('resource-3', 'group-1', 'group-2')
|
20
27
|
cs.add('resource-4', 'group-2')
|
21
|
-
assert redis.sismember("#{key_base}:
|
22
|
-
assert redis.sismember("#{key_base}:
|
23
|
-
assert redis.sismember("#{key_base}:
|
24
|
-
refute redis.sismember("#{key_base}:
|
25
|
-
|
26
|
-
assert redis.sismember("#{key_base}:
|
27
|
-
refute redis.sismember("#{key_base}:
|
28
|
-
assert redis.sismember("#{key_base}:
|
29
|
-
assert redis.sismember("#{key_base}:
|
28
|
+
assert redis.sismember("#{key_base}:resources:resource-1", 'group-1')
|
29
|
+
assert redis.sismember("#{key_base}:resources:resource-2", 'group-1')
|
30
|
+
assert redis.sismember("#{key_base}:resources:resource-3", 'group-1')
|
31
|
+
refute redis.sismember("#{key_base}:resources:resource-4", 'group-1')
|
32
|
+
|
33
|
+
assert redis.sismember("#{key_base}:resources:resource-1", 'group-2')
|
34
|
+
refute redis.sismember("#{key_base}:resources:resource-2", 'group-2')
|
35
|
+
assert redis.sismember("#{key_base}:resources:resource-3", 'group-2')
|
36
|
+
assert redis.sismember("#{key_base}:resources:resource-4", 'group-2')
|
30
37
|
end
|
31
38
|
|
32
39
|
def test_stores_sets_by_group
|
@@ -36,15 +43,15 @@ module Commendo
|
|
36
43
|
cs = ContentSet.new(redis, key_base)
|
37
44
|
cs.add_by_group('group-1', 'resource-1', 'resource-2', 'resource-3')
|
38
45
|
cs.add_by_group('group-2', 'resource-1', 'resource-3', 'resource-4')
|
39
|
-
assert redis.sismember("#{key_base}:
|
40
|
-
assert redis.sismember("#{key_base}:
|
41
|
-
assert redis.sismember("#{key_base}:
|
42
|
-
refute redis.sismember("#{key_base}:
|
43
|
-
|
44
|
-
assert redis.sismember("#{key_base}:
|
45
|
-
refute redis.sismember("#{key_base}:
|
46
|
-
assert redis.sismember("#{key_base}:
|
47
|
-
assert redis.sismember("#{key_base}:
|
46
|
+
assert redis.sismember("#{key_base}:resources:resource-1", 'group-1')
|
47
|
+
assert redis.sismember("#{key_base}:resources:resource-2", 'group-1')
|
48
|
+
assert redis.sismember("#{key_base}:resources:resource-3", 'group-1')
|
49
|
+
refute redis.sismember("#{key_base}:resources:resource-4", 'group-1')
|
50
|
+
|
51
|
+
assert redis.sismember("#{key_base}:resources:resource-1", 'group-2')
|
52
|
+
refute redis.sismember("#{key_base}:resources:resource-2", 'group-2')
|
53
|
+
assert redis.sismember("#{key_base}:resources:resource-3", 'group-2')
|
54
|
+
assert redis.sismember("#{key_base}:resources:resource-4", 'group-2')
|
48
55
|
end
|
49
56
|
|
50
57
|
def test_calculates_similarity_scores
|
@@ -95,6 +102,60 @@ module Commendo
|
|
95
102
|
skip
|
96
103
|
end
|
97
104
|
|
105
|
+
def test_deletes_resource_from_everywhere
|
106
|
+
redis = Redis.new(db: 15)
|
107
|
+
redis.flushdb
|
108
|
+
key_base = 'CommendoTests'
|
109
|
+
cs = ContentSet.new(redis, key_base)
|
110
|
+
(3..23).each do |group|
|
111
|
+
(3..23).each do |res|
|
112
|
+
cs.add_by_group(group, res) if res % group == 0
|
113
|
+
end
|
114
|
+
end
|
115
|
+
cs.calculate_similarity
|
116
|
+
assert similar_to(cs, 18, 12)
|
117
|
+
|
118
|
+
cs.delete(12)
|
119
|
+
assert_equal [], cs.similar_to(12)
|
120
|
+
refute similar_to(cs, 18, 12)
|
121
|
+
|
122
|
+
cs.calculate_similarity
|
123
|
+
assert_equal [], cs.similar_to(12)
|
124
|
+
refute similar_to(cs, 18, 12)
|
125
|
+
|
126
|
+
end
|
127
|
+
|
128
|
+
def test_accepts_incremental_updates
|
129
|
+
redis = Redis.new(db: 15)
|
130
|
+
redis.flushdb
|
131
|
+
key_base = 'CommendoTests'
|
132
|
+
cs = ContentSet.new(redis, key_base)
|
133
|
+
(3..23).each do |group|
|
134
|
+
(3..23).each do |res|
|
135
|
+
cs.add(res, group) if res % group == 0
|
136
|
+
end
|
137
|
+
end
|
138
|
+
cs.calculate_similarity
|
139
|
+
assert similar_to(cs, 18, 12)
|
140
|
+
refute similar_to(cs, 10, 12)
|
141
|
+
|
142
|
+
cs.add_and_calculate(12, 'foo', true)
|
143
|
+
cs.add_and_calculate(10, 'foo', true)
|
144
|
+
assert similar_to(cs, 10, 12)
|
145
|
+
end
|
146
|
+
|
147
|
+
def test_accepts_tag_collection
|
148
|
+
skip
|
149
|
+
end
|
150
|
+
|
151
|
+
def test_filters_by_tag_collection
|
152
|
+
skip
|
153
|
+
end
|
154
|
+
|
155
|
+
def similar_to(cs, resource, similar)
|
156
|
+
cs.similar_to(resource).select { |sim| sim[:resource] == "#{similar}" }.length > 0
|
157
|
+
end
|
158
|
+
|
98
159
|
end
|
99
160
|
|
100
161
|
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
gem 'minitest'
|
2
|
+
require 'minitest/autorun'
|
3
|
+
require 'minitest/pride'
|
4
|
+
require 'minitest/mock'
|
5
|
+
require 'mocha/setup'
|
6
|
+
require 'commendo'
|
7
|
+
|
8
|
+
module Commendo
|
9
|
+
|
10
|
+
class WeightedGroupTest < Minitest::Test
|
11
|
+
|
12
|
+
def test_calls_each_content_set
|
13
|
+
redis = Redis.new(db: 15)
|
14
|
+
redis.flushdb
|
15
|
+
cs1 = ContentSet.new(redis, 'CommendoTests:ContentSet1')
|
16
|
+
cs2 = ContentSet.new(redis, 'CommendoTests:ContentSet2')
|
17
|
+
cs3 = ContentSet.new(redis, 'CommendoTests:ContentSet3')
|
18
|
+
(3..23).each do |group|
|
19
|
+
(3..23).each do |res|
|
20
|
+
cs1.add_by_group(group, res) if (res % group == 0) && (res % 2 == 0)
|
21
|
+
cs2.add_by_group(group, res) if (res % group == 0) && (res % 3 == 0)
|
22
|
+
cs3.add_by_group(group, res) if (res % group == 0) && (res % 6 == 0)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
[cs1, cs2, cs3].each { |cs| cs.calculate_similarity }
|
26
|
+
weighted_group = WeightedGroup.new(redis, 'CommendoTests:WeightedGroup', { cs: cs1, weight: 1.0 }, { cs: cs2, weight: 10.0 }, { cs: cs3, weight: 100.0 } )
|
27
|
+
expected = [
|
28
|
+
{resource: '6', similarity: 55.5},
|
29
|
+
{resource: '12', similarity: 36.99999999999963},
|
30
|
+
{resource: '9', similarity: 5.0},
|
31
|
+
{resource: '3', similarity: 2.5},
|
32
|
+
{resource: '21', similarity: 1.6666666666666998},
|
33
|
+
{resource: '15', similarity: 1.6666666666666998}
|
34
|
+
]
|
35
|
+
|
36
|
+
assert_equal expected, weighted_group.similar_to(18)
|
37
|
+
end
|
38
|
+
|
39
|
+
def test_precalculates
|
40
|
+
skip
|
41
|
+
end
|
42
|
+
|
43
|
+
|
44
|
+
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: commendo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rob Styles
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-03-
|
11
|
+
date: 2014-03-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: redis
|
@@ -112,7 +112,9 @@ files:
|
|
112
112
|
- lib/commendo/content_set.rb
|
113
113
|
- lib/commendo/similarity.lua
|
114
114
|
- lib/commendo/version.rb
|
115
|
+
- lib/commendo/weighted_group.rb
|
115
116
|
- test/content_set_test.rb
|
117
|
+
- test/weighted_group_test.rb
|
116
118
|
homepage: ''
|
117
119
|
licenses:
|
118
120
|
- MIT
|
@@ -139,4 +141,5 @@ specification_version: 4
|
|
139
141
|
summary: A Jaccard-similarity recommender using Redis sets
|
140
142
|
test_files:
|
141
143
|
- test/content_set_test.rb
|
144
|
+
- test/weighted_group_test.rb
|
142
145
|
has_rdoc:
|