commendo 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ccb7bd9fd0ad412db66aeda26a0048db1d3dc040
4
- data.tar.gz: 0afcaf6c33e8eed6b27401e3bce8bf67dc59440a
3
+ metadata.gz: 01e2c317e998efe31d72e1a399d1d8eb677854a7
4
+ data.tar.gz: fad0b2a2c09333ef31eff3f3aaaf91c7b3d1862f
5
5
  SHA512:
6
- metadata.gz: 71d5f32a19e6d7de531dc3e99727caafc400eb0d7f0a3aa326f7c03a4b11ed5962654aea49499a75fc1d49344e156987bf20424d778562b901924fa8af939fde
7
- data.tar.gz: 12f3145df67a43dd59eb56be2884118becb03c93535e7bb25dffaa4fc0d2a9ddeda6a563ea100c658d752f20567c48099ae8cbd2a8af2cb0dd1e6818780a4e2f
6
+ metadata.gz: 8d0563d3909815866885f6f654c172f39675c442fb24a7306162090f848e2e50f4161aabf0fcc4f4779e474ff3b5c9902ec8d5c7cdc66db69cd7a3886f17a9bc
7
+ data.tar.gz: f05702dd19c5be8b6480ef4a513294c493f7138a2bb2d3c5f329fc6a734a9060660ab168f4d987f830c188142fc6bd5334c0a903b0f45024d7810ba7cb02e284
@@ -1,2 +1,5 @@
1
+ # 0.0.2 / 2014-03-31
2
+ * [FEATURE] Weighted groups. Incremental additions. Deletions.
3
+
1
4
  # 0.0.1 / 2014-03-28
2
5
  * [FEATURE] Initial Release
data/Rakefile CHANGED
@@ -46,8 +46,8 @@ task :load_traffic_from_tsv, :filename do |task, args|
46
46
  end
47
47
 
48
48
  puts 'Processing...'
49
- cs.calculate_similarity(0.1999999999) { |key|
50
- puts key
49
+ cs.calculate_similarity(0) { |key, index, total|
50
+ puts "#{key} - #{index}/#{total} = #{(index/total.to_f*100).round(2)}%"
51
51
  }
52
52
  end
53
53
 
@@ -1,6 +1,7 @@
1
1
  require 'redis'
2
2
  require 'commendo/version'
3
3
  require 'commendo/content_set'
4
+ require 'commendo/weighted_group'
4
5
 
5
6
  module Commendo
6
7
  # Your code goes here...
@@ -9,43 +9,70 @@ module Commendo
9
9
  end
10
10
 
11
11
  def add_by_group(group, *resources)
12
- resources.each do |res|
13
- redis.sadd("#{set_key_base}:#{res}", group)
12
+ redis.sadd(group_key(group), resources)
13
+ resources.each do |resource|
14
+ redis.sadd(resource_key(resource), group)
14
15
  end
15
16
  end
16
17
 
17
18
  def add(resource, *groups)
18
- redis.sadd("#{set_key_base}:#{resource}", groups)
19
+ redis.sadd(resource_key(resource), groups)
20
+ groups.each do |group|
21
+ redis.sadd(group_key(group), resource)
22
+ end
19
23
  end
20
24
 
21
- def calculate_similarity(threshold = 0)
22
- keys = redis.keys("#{set_key_base}:*")
23
- keys.each do |outer_key|
24
- outer_res = outer_key.gsub(/^#{set_key_base}:/, '')
25
- outer_similarity_key = "#{similar_key_base}:#{outer_res}"
26
- calculate_similarity_in_redis(outer_key, outer_similarity_key, threshold)
27
- yield(outer_key) if block_given?
25
+ def add_and_calculate(resource, *groups)
26
+ add(resource, *groups)
27
+ groups = redis.smembers(resource_key(resource))
28
+ group_keys = groups.map { |group| group_key(group) }
29
+ resources = redis.sunion(*group_keys)
30
+ resources.combination(2) do |l, r|
31
+ intersect = redis.sinter(resource_key(l), resource_key(r))
32
+ if (intersect.length > 0)
33
+ union = redis.sunion(resource_key(l), resource_key(r))
34
+ jaccard = intersect.length / union.length.to_f
35
+ redis.zadd(similarity_key(l), jaccard, r)
36
+ redis.zadd(similarity_key(r), jaccard, l)
37
+ end
38
+ end
39
+ end
40
+
41
+ def delete(resource)
42
+ similar = similar_to(resource)
43
+ similar.each do |other_resource|
44
+ redis.zrem(similarity_key(other_resource[:resource]), "#{resource}")
28
45
  end
46
+ redis.del(similarity_key(resource))
47
+ redis.del(resource_key(resource))
48
+ end
29
49
 
50
+ def calculate_similarity(threshold = 0)
51
+ #TODO make this use scan for scaling
52
+ keys = redis.keys("#{resource_key_base}:*")
53
+ keys.each_with_index do |outer_key, i|
54
+ outer_res = outer_key.gsub(/^#{resource_key_base}:/, '')
55
+ calculate_similarity_in_redis(outer_key, similarity_key(outer_res), threshold)
56
+ yield(outer_key, i, keys.length) if block_given?
57
+ end
30
58
  end
31
59
 
32
60
  def calculate_similarity_in_redis(set_key, similiarity_key, threshold)
33
- redis.eval(similarity_lua, [set_key, similiarity_key], [set_key_base, threshold])
61
+ #TODO maybe consider using ary.combination to get finer grained operation in lua
62
+ redis.eval(similarity_lua, [set_key, similiarity_key], [resource_key_base, threshold])
34
63
  end
35
64
 
36
65
  def similar_to(resource)
37
- similar = []
38
- similar_resources = redis.hgetall("#{similar_key_base}:#{resource}")
39
- similar_resources.each do |resource, similarity|
40
- similar << {resource: resource, similarity: similarity.to_f}
41
- end
42
- similar.sort! do |x, y|
43
- if y[:similarity] != x[:similarity]
44
- y[:similarity] <=> x[:similarity]
45
- else
46
- y[:resource] <=> x[:resource]
47
- end
66
+ similar_resources = redis.zrevrange(similarity_key(resource), 0, -1, with_scores: true)
67
+
68
+ similar_resources.map do |resource|
69
+ {resource: resource[0], similarity: resource[1].to_f}
48
70
  end
71
+
72
+ end
73
+
74
+ def similarity_key(resource)
75
+ "#{similar_key_base}:#{resource}"
49
76
  end
50
77
 
51
78
  private
@@ -59,14 +86,22 @@ module Commendo
59
86
  file.read
60
87
  end
61
88
 
62
- def set_key_base
63
- "#{key_base}:sets"
64
- end
65
-
66
89
  def similar_key_base
67
90
  "#{key_base}:similar"
68
91
  end
69
92
 
93
+ def resource_key_base
94
+ "#{key_base}:resources"
95
+ end
96
+
97
+ def resource_key(resource)
98
+ "#{resource_key_base}:#{resource}"
99
+ end
100
+
101
+ def group_key(group)
102
+ "#{key_base}:groups:#{group}"
103
+ end
104
+
70
105
  end
71
106
 
72
107
  end
@@ -20,7 +20,7 @@ for _,key in ipairs(key_matches) do
20
20
  -- table.insert(similar, key)
21
21
  -- table.insert(similar, similarity)
22
22
  local resource = key:gsub('%' .. set_key_base .. ':', '')
23
- redis.call('HSET', sim_key, resource, similarity)
23
+ redis.call('ZADD', sim_key, similarity, resource)
24
24
  end
25
25
  end
26
26
  end
@@ -1,3 +1,3 @@
1
1
  module Commendo
2
- VERSION = '0.0.1'
2
+ VERSION = '0.0.2'
3
3
  end
@@ -0,0 +1,31 @@
1
+ module Commendo
2
+
3
+ class WeightedGroup
4
+
5
+ attr_accessor :content_sets, :redis, :key_base
6
+
7
+ def initialize(redis, key_base, *content_sets)
8
+ @content_sets, @redis, @key_base = content_sets, redis, key_base
9
+ end
10
+
11
+ def similar_to(resource)
12
+ keys = content_sets.map do |cs|
13
+ cs[:cs].similarity_key(resource)
14
+ end
15
+ weights = content_sets.map do |cs|
16
+ cs[:weight]
17
+ end
18
+ tmp_key = "#{key_base}:tmp:#{SecureRandom.uuid}"
19
+ redis.zunionstore(tmp_key, keys, weights: weights)
20
+ similar_resources = redis.zrevrange(tmp_key, 0, -1, with_scores: true)
21
+ redis.del(tmp_key)
22
+
23
+ similar_resources.map do |resource|
24
+ {resource: resource[0], similarity: resource[1].to_f}
25
+ end
26
+
27
+ end
28
+
29
+ end
30
+
31
+ end
@@ -9,6 +9,13 @@ module Commendo
9
9
 
10
10
  class ContentSetTest < Minitest::Test
11
11
 
12
+ def test_gives_similarity_key_for_resource
13
+ redis = Redis.new(db: 15)
14
+ key_base = 'CommendoTests'
15
+ cs = ContentSet.new(redis, key_base)
16
+ assert_equal 'CommendoTests:similar:resource-1', cs.similarity_key('resource-1')
17
+ end
18
+
12
19
  def test_stores_sets_by_resource
13
20
  redis = Redis.new(db: 15)
14
21
  redis.flushdb
@@ -18,15 +25,15 @@ module Commendo
18
25
  cs.add('resource-2', 'group-1')
19
26
  cs.add('resource-3', 'group-1', 'group-2')
20
27
  cs.add('resource-4', 'group-2')
21
- assert redis.sismember("#{key_base}:sets:resource-1", 'group-1')
22
- assert redis.sismember("#{key_base}:sets:resource-2", 'group-1')
23
- assert redis.sismember("#{key_base}:sets:resource-3", 'group-1')
24
- refute redis.sismember("#{key_base}:sets:resource-4", 'group-1')
25
-
26
- assert redis.sismember("#{key_base}:sets:resource-1", 'group-2')
27
- refute redis.sismember("#{key_base}:sets:resource-2", 'group-2')
28
- assert redis.sismember("#{key_base}:sets:resource-3", 'group-2')
29
- assert redis.sismember("#{key_base}:sets:resource-4", 'group-2')
28
+ assert redis.sismember("#{key_base}:resources:resource-1", 'group-1')
29
+ assert redis.sismember("#{key_base}:resources:resource-2", 'group-1')
30
+ assert redis.sismember("#{key_base}:resources:resource-3", 'group-1')
31
+ refute redis.sismember("#{key_base}:resources:resource-4", 'group-1')
32
+
33
+ assert redis.sismember("#{key_base}:resources:resource-1", 'group-2')
34
+ refute redis.sismember("#{key_base}:resources:resource-2", 'group-2')
35
+ assert redis.sismember("#{key_base}:resources:resource-3", 'group-2')
36
+ assert redis.sismember("#{key_base}:resources:resource-4", 'group-2')
30
37
  end
31
38
 
32
39
  def test_stores_sets_by_group
@@ -36,15 +43,15 @@ module Commendo
36
43
  cs = ContentSet.new(redis, key_base)
37
44
  cs.add_by_group('group-1', 'resource-1', 'resource-2', 'resource-3')
38
45
  cs.add_by_group('group-2', 'resource-1', 'resource-3', 'resource-4')
39
- assert redis.sismember("#{key_base}:sets:resource-1", 'group-1')
40
- assert redis.sismember("#{key_base}:sets:resource-2", 'group-1')
41
- assert redis.sismember("#{key_base}:sets:resource-3", 'group-1')
42
- refute redis.sismember("#{key_base}:sets:resource-4", 'group-1')
43
-
44
- assert redis.sismember("#{key_base}:sets:resource-1", 'group-2')
45
- refute redis.sismember("#{key_base}:sets:resource-2", 'group-2')
46
- assert redis.sismember("#{key_base}:sets:resource-3", 'group-2')
47
- assert redis.sismember("#{key_base}:sets:resource-4", 'group-2')
46
+ assert redis.sismember("#{key_base}:resources:resource-1", 'group-1')
47
+ assert redis.sismember("#{key_base}:resources:resource-2", 'group-1')
48
+ assert redis.sismember("#{key_base}:resources:resource-3", 'group-1')
49
+ refute redis.sismember("#{key_base}:resources:resource-4", 'group-1')
50
+
51
+ assert redis.sismember("#{key_base}:resources:resource-1", 'group-2')
52
+ refute redis.sismember("#{key_base}:resources:resource-2", 'group-2')
53
+ assert redis.sismember("#{key_base}:resources:resource-3", 'group-2')
54
+ assert redis.sismember("#{key_base}:resources:resource-4", 'group-2')
48
55
  end
49
56
 
50
57
  def test_calculates_similarity_scores
@@ -95,6 +102,60 @@ module Commendo
95
102
  skip
96
103
  end
97
104
 
105
+ def test_deletes_resource_from_everywhere
106
+ redis = Redis.new(db: 15)
107
+ redis.flushdb
108
+ key_base = 'CommendoTests'
109
+ cs = ContentSet.new(redis, key_base)
110
+ (3..23).each do |group|
111
+ (3..23).each do |res|
112
+ cs.add_by_group(group, res) if res % group == 0
113
+ end
114
+ end
115
+ cs.calculate_similarity
116
+ assert similar_to(cs, 18, 12)
117
+
118
+ cs.delete(12)
119
+ assert_equal [], cs.similar_to(12)
120
+ refute similar_to(cs, 18, 12)
121
+
122
+ cs.calculate_similarity
123
+ assert_equal [], cs.similar_to(12)
124
+ refute similar_to(cs, 18, 12)
125
+
126
+ end
127
+
128
+ def test_accepts_incremental_updates
129
+ redis = Redis.new(db: 15)
130
+ redis.flushdb
131
+ key_base = 'CommendoTests'
132
+ cs = ContentSet.new(redis, key_base)
133
+ (3..23).each do |group|
134
+ (3..23).each do |res|
135
+ cs.add(res, group) if res % group == 0
136
+ end
137
+ end
138
+ cs.calculate_similarity
139
+ assert similar_to(cs, 18, 12)
140
+ refute similar_to(cs, 10, 12)
141
+
142
+ cs.add_and_calculate(12, 'foo', true)
143
+ cs.add_and_calculate(10, 'foo', true)
144
+ assert similar_to(cs, 10, 12)
145
+ end
146
+
147
+ def test_accepts_tag_collection
148
+ skip
149
+ end
150
+
151
+ def test_filters_by_tag_collection
152
+ skip
153
+ end
154
+
155
+ def similar_to(cs, resource, similar)
156
+ cs.similar_to(resource).select { |sim| sim[:resource] == "#{similar}" }.length > 0
157
+ end
158
+
98
159
  end
99
160
 
100
161
  end
@@ -0,0 +1,47 @@
1
+ gem 'minitest'
2
+ require 'minitest/autorun'
3
+ require 'minitest/pride'
4
+ require 'minitest/mock'
5
+ require 'mocha/setup'
6
+ require 'commendo'
7
+
8
+ module Commendo
9
+
10
+ class WeightedGroupTest < Minitest::Test
11
+
12
+ def test_calls_each_content_set
13
+ redis = Redis.new(db: 15)
14
+ redis.flushdb
15
+ cs1 = ContentSet.new(redis, 'CommendoTests:ContentSet1')
16
+ cs2 = ContentSet.new(redis, 'CommendoTests:ContentSet2')
17
+ cs3 = ContentSet.new(redis, 'CommendoTests:ContentSet3')
18
+ (3..23).each do |group|
19
+ (3..23).each do |res|
20
+ cs1.add_by_group(group, res) if (res % group == 0) && (res % 2 == 0)
21
+ cs2.add_by_group(group, res) if (res % group == 0) && (res % 3 == 0)
22
+ cs3.add_by_group(group, res) if (res % group == 0) && (res % 6 == 0)
23
+ end
24
+ end
25
+ [cs1, cs2, cs3].each { |cs| cs.calculate_similarity }
26
+ weighted_group = WeightedGroup.new(redis, 'CommendoTests:WeightedGroup', { cs: cs1, weight: 1.0 }, { cs: cs2, weight: 10.0 }, { cs: cs3, weight: 100.0 } )
27
+ expected = [
28
+ {resource: '6', similarity: 55.5},
29
+ {resource: '12', similarity: 36.99999999999963},
30
+ {resource: '9', similarity: 5.0},
31
+ {resource: '3', similarity: 2.5},
32
+ {resource: '21', similarity: 1.6666666666666998},
33
+ {resource: '15', similarity: 1.6666666666666998}
34
+ ]
35
+
36
+ assert_equal expected, weighted_group.similar_to(18)
37
+ end
38
+
39
+ def test_precalculates
40
+ skip
41
+ end
42
+
43
+
44
+
45
+ end
46
+
47
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: commendo
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rob Styles
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-03-28 00:00:00.000000000 Z
11
+ date: 2014-03-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: redis
@@ -112,7 +112,9 @@ files:
112
112
  - lib/commendo/content_set.rb
113
113
  - lib/commendo/similarity.lua
114
114
  - lib/commendo/version.rb
115
+ - lib/commendo/weighted_group.rb
115
116
  - test/content_set_test.rb
117
+ - test/weighted_group_test.rb
116
118
  homepage: ''
117
119
  licenses:
118
120
  - MIT
@@ -139,4 +141,5 @@ specification_version: 4
139
141
  summary: A Jaccard-similarity recommender using Redis sets
140
142
  test_files:
141
143
  - test/content_set_test.rb
144
+ - test/weighted_group_test.rb
142
145
  has_rdoc: