commendo 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ccb7bd9fd0ad412db66aeda26a0048db1d3dc040
4
- data.tar.gz: 0afcaf6c33e8eed6b27401e3bce8bf67dc59440a
3
+ metadata.gz: 01e2c317e998efe31d72e1a399d1d8eb677854a7
4
+ data.tar.gz: fad0b2a2c09333ef31eff3f3aaaf91c7b3d1862f
5
5
  SHA512:
6
- metadata.gz: 71d5f32a19e6d7de531dc3e99727caafc400eb0d7f0a3aa326f7c03a4b11ed5962654aea49499a75fc1d49344e156987bf20424d778562b901924fa8af939fde
7
- data.tar.gz: 12f3145df67a43dd59eb56be2884118becb03c93535e7bb25dffaa4fc0d2a9ddeda6a563ea100c658d752f20567c48099ae8cbd2a8af2cb0dd1e6818780a4e2f
6
+ metadata.gz: 8d0563d3909815866885f6f654c172f39675c442fb24a7306162090f848e2e50f4161aabf0fcc4f4779e474ff3b5c9902ec8d5c7cdc66db69cd7a3886f17a9bc
7
+ data.tar.gz: f05702dd19c5be8b6480ef4a513294c493f7138a2bb2d3c5f329fc6a734a9060660ab168f4d987f830c188142fc6bd5334c0a903b0f45024d7810ba7cb02e284
@@ -1,2 +1,5 @@
1
+ # 0.0.2 / 2014-03-31
2
+ * [FEATURE] Weighted groups. Incremental additions. Deletions.
3
+
1
4
  # 0.0.1 / 2014-03-28
2
5
  * [FEATURE] Initial Release
data/Rakefile CHANGED
@@ -46,8 +46,8 @@ task :load_traffic_from_tsv, :filename do |task, args|
46
46
  end
47
47
 
48
48
  puts 'Processing...'
49
- cs.calculate_similarity(0.1999999999) { |key|
50
- puts key
49
+ cs.calculate_similarity(0) { |key, index, total|
50
+ puts "#{key} - #{index}/#{total} = #{(index/total.to_f*100).round(2)}%"
51
51
  }
52
52
  end
53
53
 
@@ -1,6 +1,7 @@
1
1
  require 'redis'
2
2
  require 'commendo/version'
3
3
  require 'commendo/content_set'
4
+ require 'commendo/weighted_group'
4
5
 
5
6
  module Commendo
6
7
  # Your code goes here...
@@ -9,43 +9,70 @@ module Commendo
9
9
  end
10
10
 
11
11
  def add_by_group(group, *resources)
12
- resources.each do |res|
13
- redis.sadd("#{set_key_base}:#{res}", group)
12
+ redis.sadd(group_key(group), resources)
13
+ resources.each do |resource|
14
+ redis.sadd(resource_key(resource), group)
14
15
  end
15
16
  end
16
17
 
17
18
  def add(resource, *groups)
18
- redis.sadd("#{set_key_base}:#{resource}", groups)
19
+ redis.sadd(resource_key(resource), groups)
20
+ groups.each do |group|
21
+ redis.sadd(group_key(group), resource)
22
+ end
19
23
  end
20
24
 
21
- def calculate_similarity(threshold = 0)
22
- keys = redis.keys("#{set_key_base}:*")
23
- keys.each do |outer_key|
24
- outer_res = outer_key.gsub(/^#{set_key_base}:/, '')
25
- outer_similarity_key = "#{similar_key_base}:#{outer_res}"
26
- calculate_similarity_in_redis(outer_key, outer_similarity_key, threshold)
27
- yield(outer_key) if block_given?
25
+ def add_and_calculate(resource, *groups)
26
+ add(resource, *groups)
27
+ groups = redis.smembers(resource_key(resource))
28
+ group_keys = groups.map { |group| group_key(group) }
29
+ resources = redis.sunion(*group_keys)
30
+ resources.combination(2) do |l, r|
31
+ intersect = redis.sinter(resource_key(l), resource_key(r))
32
+ if (intersect.length > 0)
33
+ union = redis.sunion(resource_key(l), resource_key(r))
34
+ jaccard = intersect.length / union.length.to_f
35
+ redis.zadd(similarity_key(l), jaccard, r)
36
+ redis.zadd(similarity_key(r), jaccard, l)
37
+ end
38
+ end
39
+ end
40
+
41
+ def delete(resource)
42
+ similar = similar_to(resource)
43
+ similar.each do |other_resource|
44
+ redis.zrem(similarity_key(other_resource[:resource]), "#{resource}")
28
45
  end
46
+ redis.del(similarity_key(resource))
47
+ redis.del(resource_key(resource))
48
+ end
29
49
 
50
+ def calculate_similarity(threshold = 0)
51
+ #TODO make this use scan for scaling
52
+ keys = redis.keys("#{resource_key_base}:*")
53
+ keys.each_with_index do |outer_key, i|
54
+ outer_res = outer_key.gsub(/^#{resource_key_base}:/, '')
55
+ calculate_similarity_in_redis(outer_key, similarity_key(outer_res), threshold)
56
+ yield(outer_key, i, keys.length) if block_given?
57
+ end
30
58
  end
31
59
 
32
60
  def calculate_similarity_in_redis(set_key, similiarity_key, threshold)
33
- redis.eval(similarity_lua, [set_key, similiarity_key], [set_key_base, threshold])
61
+ #TODO maybe consider using ary.combination to get finer grained operation in lua
62
+ redis.eval(similarity_lua, [set_key, similiarity_key], [resource_key_base, threshold])
34
63
  end
35
64
 
36
65
  def similar_to(resource)
37
- similar = []
38
- similar_resources = redis.hgetall("#{similar_key_base}:#{resource}")
39
- similar_resources.each do |resource, similarity|
40
- similar << {resource: resource, similarity: similarity.to_f}
41
- end
42
- similar.sort! do |x, y|
43
- if y[:similarity] != x[:similarity]
44
- y[:similarity] <=> x[:similarity]
45
- else
46
- y[:resource] <=> x[:resource]
47
- end
66
+ similar_resources = redis.zrevrange(similarity_key(resource), 0, -1, with_scores: true)
67
+
68
+ similar_resources.map do |resource|
69
+ {resource: resource[0], similarity: resource[1].to_f}
48
70
  end
71
+
72
+ end
73
+
74
+ def similarity_key(resource)
75
+ "#{similar_key_base}:#{resource}"
49
76
  end
50
77
 
51
78
  private
@@ -59,14 +86,22 @@ module Commendo
59
86
  file.read
60
87
  end
61
88
 
62
- def set_key_base
63
- "#{key_base}:sets"
64
- end
65
-
66
89
  def similar_key_base
67
90
  "#{key_base}:similar"
68
91
  end
69
92
 
93
+ def resource_key_base
94
+ "#{key_base}:resources"
95
+ end
96
+
97
+ def resource_key(resource)
98
+ "#{resource_key_base}:#{resource}"
99
+ end
100
+
101
+ def group_key(group)
102
+ "#{key_base}:groups:#{group}"
103
+ end
104
+
70
105
  end
71
106
 
72
107
  end
@@ -20,7 +20,7 @@ for _,key in ipairs(key_matches) do
20
20
  -- table.insert(similar, key)
21
21
  -- table.insert(similar, similarity)
22
22
  local resource = key:gsub('%' .. set_key_base .. ':', '')
23
- redis.call('HSET', sim_key, resource, similarity)
23
+ redis.call('ZADD', sim_key, similarity, resource)
24
24
  end
25
25
  end
26
26
  end
@@ -1,3 +1,3 @@
1
1
  module Commendo
2
- VERSION = '0.0.1'
2
+ VERSION = '0.0.2'
3
3
  end
@@ -0,0 +1,31 @@
1
+ module Commendo
2
+
3
+ class WeightedGroup
4
+
5
+ attr_accessor :content_sets, :redis, :key_base
6
+
7
+ def initialize(redis, key_base, *content_sets)
8
+ @content_sets, @redis, @key_base = content_sets, redis, key_base
9
+ end
10
+
11
+ def similar_to(resource)
12
+ keys = content_sets.map do |cs|
13
+ cs[:cs].similarity_key(resource)
14
+ end
15
+ weights = content_sets.map do |cs|
16
+ cs[:weight]
17
+ end
18
+ tmp_key = "#{key_base}:tmp:#{SecureRandom.uuid}"
19
+ redis.zunionstore(tmp_key, keys, weights: weights)
20
+ similar_resources = redis.zrevrange(tmp_key, 0, -1, with_scores: true)
21
+ redis.del(tmp_key)
22
+
23
+ similar_resources.map do |resource|
24
+ {resource: resource[0], similarity: resource[1].to_f}
25
+ end
26
+
27
+ end
28
+
29
+ end
30
+
31
+ end
@@ -9,6 +9,13 @@ module Commendo
9
9
 
10
10
  class ContentSetTest < Minitest::Test
11
11
 
12
+ def test_gives_similarity_key_for_resource
13
+ redis = Redis.new(db: 15)
14
+ key_base = 'CommendoTests'
15
+ cs = ContentSet.new(redis, key_base)
16
+ assert_equal 'CommendoTests:similar:resource-1', cs.similarity_key('resource-1')
17
+ end
18
+
12
19
  def test_stores_sets_by_resource
13
20
  redis = Redis.new(db: 15)
14
21
  redis.flushdb
@@ -18,15 +25,15 @@ module Commendo
18
25
  cs.add('resource-2', 'group-1')
19
26
  cs.add('resource-3', 'group-1', 'group-2')
20
27
  cs.add('resource-4', 'group-2')
21
- assert redis.sismember("#{key_base}:sets:resource-1", 'group-1')
22
- assert redis.sismember("#{key_base}:sets:resource-2", 'group-1')
23
- assert redis.sismember("#{key_base}:sets:resource-3", 'group-1')
24
- refute redis.sismember("#{key_base}:sets:resource-4", 'group-1')
25
-
26
- assert redis.sismember("#{key_base}:sets:resource-1", 'group-2')
27
- refute redis.sismember("#{key_base}:sets:resource-2", 'group-2')
28
- assert redis.sismember("#{key_base}:sets:resource-3", 'group-2')
29
- assert redis.sismember("#{key_base}:sets:resource-4", 'group-2')
28
+ assert redis.sismember("#{key_base}:resources:resource-1", 'group-1')
29
+ assert redis.sismember("#{key_base}:resources:resource-2", 'group-1')
30
+ assert redis.sismember("#{key_base}:resources:resource-3", 'group-1')
31
+ refute redis.sismember("#{key_base}:resources:resource-4", 'group-1')
32
+
33
+ assert redis.sismember("#{key_base}:resources:resource-1", 'group-2')
34
+ refute redis.sismember("#{key_base}:resources:resource-2", 'group-2')
35
+ assert redis.sismember("#{key_base}:resources:resource-3", 'group-2')
36
+ assert redis.sismember("#{key_base}:resources:resource-4", 'group-2')
30
37
  end
31
38
 
32
39
  def test_stores_sets_by_group
@@ -36,15 +43,15 @@ module Commendo
36
43
  cs = ContentSet.new(redis, key_base)
37
44
  cs.add_by_group('group-1', 'resource-1', 'resource-2', 'resource-3')
38
45
  cs.add_by_group('group-2', 'resource-1', 'resource-3', 'resource-4')
39
- assert redis.sismember("#{key_base}:sets:resource-1", 'group-1')
40
- assert redis.sismember("#{key_base}:sets:resource-2", 'group-1')
41
- assert redis.sismember("#{key_base}:sets:resource-3", 'group-1')
42
- refute redis.sismember("#{key_base}:sets:resource-4", 'group-1')
43
-
44
- assert redis.sismember("#{key_base}:sets:resource-1", 'group-2')
45
- refute redis.sismember("#{key_base}:sets:resource-2", 'group-2')
46
- assert redis.sismember("#{key_base}:sets:resource-3", 'group-2')
47
- assert redis.sismember("#{key_base}:sets:resource-4", 'group-2')
46
+ assert redis.sismember("#{key_base}:resources:resource-1", 'group-1')
47
+ assert redis.sismember("#{key_base}:resources:resource-2", 'group-1')
48
+ assert redis.sismember("#{key_base}:resources:resource-3", 'group-1')
49
+ refute redis.sismember("#{key_base}:resources:resource-4", 'group-1')
50
+
51
+ assert redis.sismember("#{key_base}:resources:resource-1", 'group-2')
52
+ refute redis.sismember("#{key_base}:resources:resource-2", 'group-2')
53
+ assert redis.sismember("#{key_base}:resources:resource-3", 'group-2')
54
+ assert redis.sismember("#{key_base}:resources:resource-4", 'group-2')
48
55
  end
49
56
 
50
57
  def test_calculates_similarity_scores
@@ -95,6 +102,60 @@ module Commendo
95
102
  skip
96
103
  end
97
104
 
105
+ def test_deletes_resource_from_everywhere
106
+ redis = Redis.new(db: 15)
107
+ redis.flushdb
108
+ key_base = 'CommendoTests'
109
+ cs = ContentSet.new(redis, key_base)
110
+ (3..23).each do |group|
111
+ (3..23).each do |res|
112
+ cs.add_by_group(group, res) if res % group == 0
113
+ end
114
+ end
115
+ cs.calculate_similarity
116
+ assert similar_to(cs, 18, 12)
117
+
118
+ cs.delete(12)
119
+ assert_equal [], cs.similar_to(12)
120
+ refute similar_to(cs, 18, 12)
121
+
122
+ cs.calculate_similarity
123
+ assert_equal [], cs.similar_to(12)
124
+ refute similar_to(cs, 18, 12)
125
+
126
+ end
127
+
128
+ def test_accepts_incremental_updates
129
+ redis = Redis.new(db: 15)
130
+ redis.flushdb
131
+ key_base = 'CommendoTests'
132
+ cs = ContentSet.new(redis, key_base)
133
+ (3..23).each do |group|
134
+ (3..23).each do |res|
135
+ cs.add(res, group) if res % group == 0
136
+ end
137
+ end
138
+ cs.calculate_similarity
139
+ assert similar_to(cs, 18, 12)
140
+ refute similar_to(cs, 10, 12)
141
+
142
+ cs.add_and_calculate(12, 'foo', true)
143
+ cs.add_and_calculate(10, 'foo', true)
144
+ assert similar_to(cs, 10, 12)
145
+ end
146
+
147
+ def test_accepts_tag_collection
148
+ skip
149
+ end
150
+
151
+ def test_filters_by_tag_collection
152
+ skip
153
+ end
154
+
155
+ def similar_to(cs, resource, similar)
156
+ cs.similar_to(resource).select { |sim| sim[:resource] == "#{similar}" }.length > 0
157
+ end
158
+
98
159
  end
99
160
 
100
161
  end
@@ -0,0 +1,47 @@
1
+ gem 'minitest'
2
+ require 'minitest/autorun'
3
+ require 'minitest/pride'
4
+ require 'minitest/mock'
5
+ require 'mocha/setup'
6
+ require 'commendo'
7
+
8
+ module Commendo
9
+
10
+ class WeightedGroupTest < Minitest::Test
11
+
12
+ def test_calls_each_content_set
13
+ redis = Redis.new(db: 15)
14
+ redis.flushdb
15
+ cs1 = ContentSet.new(redis, 'CommendoTests:ContentSet1')
16
+ cs2 = ContentSet.new(redis, 'CommendoTests:ContentSet2')
17
+ cs3 = ContentSet.new(redis, 'CommendoTests:ContentSet3')
18
+ (3..23).each do |group|
19
+ (3..23).each do |res|
20
+ cs1.add_by_group(group, res) if (res % group == 0) && (res % 2 == 0)
21
+ cs2.add_by_group(group, res) if (res % group == 0) && (res % 3 == 0)
22
+ cs3.add_by_group(group, res) if (res % group == 0) && (res % 6 == 0)
23
+ end
24
+ end
25
+ [cs1, cs2, cs3].each { |cs| cs.calculate_similarity }
26
+ weighted_group = WeightedGroup.new(redis, 'CommendoTests:WeightedGroup', { cs: cs1, weight: 1.0 }, { cs: cs2, weight: 10.0 }, { cs: cs3, weight: 100.0 } )
27
+ expected = [
28
+ {resource: '6', similarity: 55.5},
29
+ {resource: '12', similarity: 36.99999999999963},
30
+ {resource: '9', similarity: 5.0},
31
+ {resource: '3', similarity: 2.5},
32
+ {resource: '21', similarity: 1.6666666666666998},
33
+ {resource: '15', similarity: 1.6666666666666998}
34
+ ]
35
+
36
+ assert_equal expected, weighted_group.similar_to(18)
37
+ end
38
+
39
+ def test_precalculates
40
+ skip
41
+ end
42
+
43
+
44
+
45
+ end
46
+
47
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: commendo
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rob Styles
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-03-28 00:00:00.000000000 Z
11
+ date: 2014-03-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: redis
@@ -112,7 +112,9 @@ files:
112
112
  - lib/commendo/content_set.rb
113
113
  - lib/commendo/similarity.lua
114
114
  - lib/commendo/version.rb
115
+ - lib/commendo/weighted_group.rb
115
116
  - test/content_set_test.rb
117
+ - test/weighted_group_test.rb
116
118
  homepage: ''
117
119
  licenses:
118
120
  - MIT
@@ -139,4 +141,5 @@ specification_version: 4
139
141
  summary: A Jaccard-similarity recommender using Redis sets
140
142
  test_files:
141
143
  - test/content_set_test.rb
144
+ - test/weighted_group_test.rb
142
145
  has_rdoc: