commendo 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6882c6ed2001ecae7be9eae066e3f534beeb1559
4
- data.tar.gz: 0a0982ccc0a17285537b1adfc12bfe854da6c10f
3
+ metadata.gz: 4fb3a3f98e3846e5f75d884630c3735ed57f62b1
4
+ data.tar.gz: 3944a25fc941e55a343b5201bf40ef77fbef08fb
5
5
  SHA512:
6
- metadata.gz: 834aeca013589496b5e6b95a0d5ec7e327fc6192ba57e7dab7c2d8507055013058a96b354b13bb2422c21caa7cce92c0fd5f0011c60d3cb5c202b723eb94dbf4
7
- data.tar.gz: d773e4c3776f66ba003cf7b7c1327a376e439f8f72480b903d1512db906c27aa55c9f01a052c11a24fbde4f4e90d7bb50b13d6494fbd963ea407d0d4807eb5fd
6
+ metadata.gz: fe3b6ea17ed10adc4ae2ffa12e7179d305970b4c8bf0d100671df8b6f800b7ffa042291c57a8d1c4d62550490654c245134f3f18c2f1733cfa0ca829018defcb
7
+ data.tar.gz: 7d01cc0c741ca514992e57b44c3defdc1a8d3de5db26f486eae3ef9bd1de62f8823974c6350178ae725bc0b61698f7cc9fdd4f1705f085a1b4799db3654c3276
data/CHANGELOG.md CHANGED
@@ -1,3 +1,6 @@
1
+ # 0.0.4 / 2014-03-31
2
+ * [FEATURE] Similarity score pushed down into redis using two different approaches depending on size of set
3
+
1
4
  # 0.0.3 / 2014-03-31
2
5
  * [FEATURE] Filtering of recommendations based on tags
3
6
 
@@ -43,6 +43,7 @@ module Commendo
43
43
  similar.each do |other_resource|
44
44
  redis.zrem(similarity_key(other_resource[:resource]), "#{resource}")
45
45
  end
46
+ #TODO delete from groups?
46
47
  redis.del(similarity_key(resource))
47
48
  redis.del(resource_key(resource))
48
49
  end
@@ -50,18 +51,22 @@ module Commendo
50
51
  def calculate_similarity(threshold = 0)
51
52
  #TODO make this use scan for scaling
52
53
  keys = redis.keys("#{resource_key_base}:*")
53
- keys.each_with_index do |outer_key, i|
54
- outer_res = outer_key.gsub(/^#{resource_key_base}:/, '')
55
- calculate_similarity_in_redis(outer_key, similarity_key(outer_res), threshold)
56
- yield(outer_key, i, keys.length) if block_given?
54
+ keys.each_with_index do |key, i|
55
+ yield(key, i, keys.length) if block_given?
56
+ completed = redis.eval(similarity_lua, keys: [key], argv: [resource_key_base, similar_key_base, group_key_base, threshold])
57
+ if completed == 999
58
+ resource = key.gsub(/^#{resource_key_base}:/, '')
59
+ groups = redis.smembers(resource_key(resource))
60
+ group_keys = groups.map { |group| group_key(group) }
61
+ resources = redis.sunion(*group_keys)
62
+ resources.each do |to_compare|
63
+ next if resource == to_compare
64
+ redis.eval(pair_comparison_lua, keys: [key, resource_key(to_compare), similarity_key(resource), similarity_key(to_compare)], argv: [resource, to_compare, threshold])
65
+ end
66
+ end
57
67
  end
58
68
  end
59
69
 
60
- def calculate_similarity_in_redis(set_key, similiarity_key, threshold)
61
- #TODO maybe consider using ary.combination to get finer grained operation in lua
62
- redis.eval(similarity_lua, [set_key, similiarity_key], [resource_key_base, threshold])
63
- end
64
-
65
70
  def similar_to(resource)
66
71
  similar_resources = redis.zrevrange(similarity_key(resource), 0, -1, with_scores: true)
67
72
 
@@ -98,6 +103,15 @@ module Commendo
98
103
  file.read
99
104
  end
100
105
 
106
+ def pair_comparison_lua
107
+ @pair_comparison_lua ||= load_pair_comparison_lua
108
+ end
109
+
110
+ def load_pair_comparison_lua
111
+ file = File.open(File.expand_path('../pair_comparison.lua', __FILE__), "r")
112
+ file.read
113
+ end
114
+
101
115
  def similar_key_base
102
116
  "#{key_base}:similar"
103
117
  end
@@ -110,8 +124,13 @@ module Commendo
110
124
  "#{resource_key_base}:#{resource}"
111
125
  end
112
126
 
127
+ def group_key_base
128
+ "#{key_base}:groups"
129
+ end
130
+
131
+
113
132
  def group_key(group)
114
- "#{key_base}:groups:#{group}"
133
+ "#{group_key_base}:#{group}"
115
134
  end
116
135
 
117
136
  end
@@ -0,0 +1,29 @@
1
+ local left_key = KEYS[1]
2
+ local right_key = KEYS[2]
3
+ local left_similarity_key = KEYS[3]
4
+ local right_similarity_key = KEYS[4]
5
+
6
+ local left = tonumber(ARGV[1])
7
+ local right = tonumber(ARGV[2])
8
+ local threshold = tonumber(ARGV[3])
9
+
10
+ local function round(num, idp)
11
+ local mult = 10^(idp or 0)
12
+ return math.floor(num * mult + 0.5) / mult
13
+ end
14
+
15
+ redis.log(redis.LOG_NOTICE, 'Running pair comparison for ' .. left_key .. ' ' .. right_key)
16
+
17
+ local intersect = table.getn(redis.call('SINTER', left_key, right_key))
18
+ if intersect > 0 then
19
+ local union = table.getn(redis.call('SUNION', left_key, right_key))
20
+ local similarity = round(intersect / union, 3)
21
+ if similarity > threshold then
22
+ redis.call('ZADD', left_similarity_key, similarity, right)
23
+ redis.call('ZADD', right_similarity_key, similarity, left)
24
+ end
25
+ end
26
+
27
+ redis.log(redis.LOG_NOTICE, 'Finished running pair comparison for ' .. left_key .. ' ' .. right_key)
28
+
29
+ return true
@@ -1,31 +1,56 @@
1
- local root_key = KEYS[1]
2
- local sim_key = KEYS[2]
3
- local set_key_base = ARGV[1]
4
- local threshold = tonumber(ARGV[2])
5
-
6
- redis.log(redis.LOG_NOTICE, 'Running similarity for ' .. root_key)
7
- local key_matches = redis.call('KEYS', set_key_base .. ':*')
8
-
9
- redis.call('DEL', sim_key)
10
- local count = 0
11
- -- local similar = {}
12
- for _,key in ipairs(key_matches) do
13
- if key ~= root_key then
14
- count = count + 1
15
- local intersect = table.getn(redis.call('SINTER', root_key, key))
16
- if intersect > 0 then
17
- local union = table.getn(redis.call('SUNION', root_key, key))
18
- local similarity = intersect / union
19
- if similarity > threshold then
20
- -- table.insert(similar, key)
21
- -- table.insert(similar, similarity)
22
- local resource = key:gsub('%' .. set_key_base .. ':', '')
23
- redis.call('ZADD', sim_key, similarity, resource)
1
+ local resource_key = KEYS[1]
2
+ local resource_key_base = ARGV[1]
3
+ local sim_key_base = ARGV[2]
4
+ local group_key_base = ARGV[3]
5
+ local threshold = tonumber(ARGV[4])
6
+
7
+ local function round(num, idp)
8
+ local mult = 10^(idp or 0)
9
+ return math.floor(num * mult + 0.5) / mult
10
+ end
11
+
12
+ redis.log(redis.LOG_NOTICE, 'Running complete similarity for ' .. resource_key)
13
+
14
+ local resource = resource_key:gsub('%' .. resource_key_base .. ':', '')
15
+ local groups = redis.call('smembers', resource_key)
16
+
17
+ if table.getn(groups) > 999 then
18
+ redis.log(redis.LOG_NOTICE, 'Complete similarity too large for ' .. resource_key .. ', ' .. table.getn(groups))
19
+ return 999
20
+ end
21
+
22
+ local group_keys = {}
23
+ for _,group in ipairs(groups) do
24
+ table.insert(group_keys, group_key_base .. ':' .. group)
25
+ end
26
+ --redis.log(redis.LOG_NOTICE, 'Found ' .. table.getn(group_keys) .. ' group keys')
27
+
28
+ local resources = redis.call('sunion', unpack(group_keys))
29
+
30
+ --local resources = redis.call('sunion', unpack(group_keys))
31
+ --redis.log(redis.LOG_NOTICE, 'Found ' .. table.getn(resources) .. ' resources')
32
+
33
+ local previous = 'foo'
34
+ for _,to_compare in ipairs(resources) do
35
+ -- redis.log(redis.LOG_NOTICE, 'Comparing ' .. resource .. ' and ' .. to_compare)
36
+ if to_compare ~= previous then
37
+ previous = to_compare
38
+ if resource > to_compare then
39
+ -- redis.log(redis.LOG_NOTICE, 'Calculating similarity for ' .. resource .. ' and ' .. to_compare)
40
+ local intersect = table.getn(redis.call('SINTER', resource_key, resource_key_base .. ':' .. to_compare))
41
+ if intersect > 0 then
42
+ local union = table.getn(redis.call('SUNION', resource_key, resource_key_base .. ':' .. to_compare))
43
+ local similarity = round(intersect / union, 3)
44
+ if similarity > threshold then
45
+ -- redis.log(redis.LOG_NOTICE, resource .. ' and ' .. to_compare .. ' scored ' .. similarity)
46
+ redis.call('ZADD', sim_key_base .. ':' .. resource, similarity, to_compare)
47
+ redis.call('ZADD', sim_key_base .. ':' .. to_compare, similarity, resource)
48
+ end
24
49
  end
25
50
  end
26
51
  end
27
52
  end
28
53
 
29
- -- redis.call('HMSET', sim_key, unpack(similar))
30
- redis.log(redis.LOG_NOTICE, 'Finished running similarity for ' .. root_key)
54
+ redis.log(redis.LOG_NOTICE, 'Finished running complete similarity for ' .. resource_key)
55
+
31
56
  return true
@@ -1,3 +1,3 @@
1
1
  module Commendo
2
- VERSION = '0.0.3'
2
+ VERSION = '0.0.4'
3
3
  end
@@ -21,7 +21,7 @@ module Commendo
21
21
  redis.del(tmp_key)
22
22
 
23
23
  similar_resources.map do |resource|
24
- {resource: resource[0], similarity: resource[1].to_f}
24
+ {resource: resource[0], similarity: resource[1].to_f.round(3)}
25
25
  end
26
26
 
27
27
  end
@@ -68,10 +68,10 @@ module Commendo
68
68
  expected = [
69
69
  {resource: '9', similarity: 0.5},
70
70
  {resource: '6', similarity: 0.5},
71
- {resource: '12', similarity: 0.33333333333333},
71
+ {resource: '12', similarity: 0.333},
72
72
  {resource: '3', similarity: 0.25},
73
- {resource: '21', similarity: 0.16666666666667},
74
- {resource: '15', similarity: 0.16666666666667}
73
+ {resource: '21', similarity: 0.167},
74
+ {resource: '15', similarity: 0.167}
75
75
  ]
76
76
  assert_equal expected, cs.similar_to(18)
77
77
  end
@@ -26,13 +26,12 @@ module Commendo
26
26
  weighted_group = WeightedGroup.new(redis, 'CommendoTests:WeightedGroup', { cs: cs1, weight: 1.0 }, { cs: cs2, weight: 10.0 }, { cs: cs3, weight: 100.0 } )
27
27
  expected = [
28
28
  {resource: '6', similarity: 55.5},
29
- {resource: '12', similarity: 36.99999999999963},
29
+ {resource: '12', similarity: 36.963},
30
30
  {resource: '9', similarity: 5.0},
31
31
  {resource: '3', similarity: 2.5},
32
- {resource: '21', similarity: 1.6666666666666998},
33
- {resource: '15', similarity: 1.6666666666666998}
32
+ {resource: '21', similarity: 1.67},
33
+ {resource: '15', similarity: 1.67}
34
34
  ]
35
-
36
35
  assert_equal expected, weighted_group.similar_to(18)
37
36
  end
38
37
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: commendo
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rob Styles
@@ -110,6 +110,7 @@ files:
110
110
  - commendo.gemspec
111
111
  - lib/commendo.rb
112
112
  - lib/commendo/content_set.rb
113
+ - lib/commendo/pair_comparison.lua
113
114
  - lib/commendo/similarity.lua
114
115
  - lib/commendo/tag_set.rb
115
116
  - lib/commendo/version.rb