commendo 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6882c6ed2001ecae7be9eae066e3f534beeb1559
4
- data.tar.gz: 0a0982ccc0a17285537b1adfc12bfe854da6c10f
3
+ metadata.gz: 4fb3a3f98e3846e5f75d884630c3735ed57f62b1
4
+ data.tar.gz: 3944a25fc941e55a343b5201bf40ef77fbef08fb
5
5
  SHA512:
6
- metadata.gz: 834aeca013589496b5e6b95a0d5ec7e327fc6192ba57e7dab7c2d8507055013058a96b354b13bb2422c21caa7cce92c0fd5f0011c60d3cb5c202b723eb94dbf4
7
- data.tar.gz: d773e4c3776f66ba003cf7b7c1327a376e439f8f72480b903d1512db906c27aa55c9f01a052c11a24fbde4f4e90d7bb50b13d6494fbd963ea407d0d4807eb5fd
6
+ metadata.gz: fe3b6ea17ed10adc4ae2ffa12e7179d305970b4c8bf0d100671df8b6f800b7ffa042291c57a8d1c4d62550490654c245134f3f18c2f1733cfa0ca829018defcb
7
+ data.tar.gz: 7d01cc0c741ca514992e57b44c3defdc1a8d3de5db26f486eae3ef9bd1de62f8823974c6350178ae725bc0b61698f7cc9fdd4f1705f085a1b4799db3654c3276
data/CHANGELOG.md CHANGED
@@ -1,3 +1,6 @@
1
+ # 0.0.4 / 2014-03-31
2
+ * [FEATURE] Similarity score pushed down into redis using two different approaches depending on size of set
3
+
1
4
  # 0.0.3 / 2014-03-31
2
5
  * [FEATURE] Filtering of recommendations based on tags
3
6
 
@@ -43,6 +43,7 @@ module Commendo
43
43
  similar.each do |other_resource|
44
44
  redis.zrem(similarity_key(other_resource[:resource]), "#{resource}")
45
45
  end
46
+ #TODO delete from groups?
46
47
  redis.del(similarity_key(resource))
47
48
  redis.del(resource_key(resource))
48
49
  end
@@ -50,18 +51,22 @@ module Commendo
50
51
  def calculate_similarity(threshold = 0)
51
52
  #TODO make this use scan for scaling
52
53
  keys = redis.keys("#{resource_key_base}:*")
53
- keys.each_with_index do |outer_key, i|
54
- outer_res = outer_key.gsub(/^#{resource_key_base}:/, '')
55
- calculate_similarity_in_redis(outer_key, similarity_key(outer_res), threshold)
56
- yield(outer_key, i, keys.length) if block_given?
54
+ keys.each_with_index do |key, i|
55
+ yield(key, i, keys.length) if block_given?
56
+ completed = redis.eval(similarity_lua, keys: [key], argv: [resource_key_base, similar_key_base, group_key_base, threshold])
57
+ if completed == 999
58
+ resource = key.gsub(/^#{resource_key_base}:/, '')
59
+ groups = redis.smembers(resource_key(resource))
60
+ group_keys = groups.map { |group| group_key(group) }
61
+ resources = redis.sunion(*group_keys)
62
+ resources.each do |to_compare|
63
+ next if resource == to_compare
64
+ redis.eval(pair_comparison_lua, keys: [key, resource_key(to_compare), similarity_key(resource), similarity_key(to_compare)], argv: [resource, to_compare, threshold])
65
+ end
66
+ end
57
67
  end
58
68
  end
59
69
 
60
- def calculate_similarity_in_redis(set_key, similiarity_key, threshold)
61
- #TODO maybe consider using ary.combination to get finer grained operation in lua
62
- redis.eval(similarity_lua, [set_key, similiarity_key], [resource_key_base, threshold])
63
- end
64
-
65
70
  def similar_to(resource)
66
71
  similar_resources = redis.zrevrange(similarity_key(resource), 0, -1, with_scores: true)
67
72
 
@@ -98,6 +103,15 @@ module Commendo
98
103
  file.read
99
104
  end
100
105
 
106
+ def pair_comparison_lua
107
+ @pair_comparison_lua ||= load_pair_comparison_lua
108
+ end
109
+
110
+ def load_pair_comparison_lua
111
+ file = File.open(File.expand_path('../pair_comparison.lua', __FILE__), "r")
112
+ file.read
113
+ end
114
+
101
115
  def similar_key_base
102
116
  "#{key_base}:similar"
103
117
  end
@@ -110,8 +124,13 @@ module Commendo
110
124
  "#{resource_key_base}:#{resource}"
111
125
  end
112
126
 
127
+ def group_key_base
128
+ "#{key_base}:groups"
129
+ end
130
+
131
+
113
132
  def group_key(group)
114
- "#{key_base}:groups:#{group}"
133
+ "#{group_key_base}:#{group}"
115
134
  end
116
135
 
117
136
  end
@@ -0,0 +1,29 @@
1
+ local left_key = KEYS[1]
2
+ local right_key = KEYS[2]
3
+ local left_similarity_key = KEYS[3]
4
+ local right_similarity_key = KEYS[4]
5
+
6
+ local left = tonumber(ARGV[1])
7
+ local right = tonumber(ARGV[2])
8
+ local threshold = tonumber(ARGV[3])
9
+
10
+ local function round(num, idp)
11
+ local mult = 10^(idp or 0)
12
+ return math.floor(num * mult + 0.5) / mult
13
+ end
14
+
15
+ redis.log(redis.LOG_NOTICE, 'Running pair comparison for ' .. left_key .. ' ' .. right_key)
16
+
17
+ local intersect = table.getn(redis.call('SINTER', left_key, right_key))
18
+ if intersect > 0 then
19
+ local union = table.getn(redis.call('SUNION', left_key, right_key))
20
+ local similarity = round(intersect / union, 3)
21
+ if similarity > threshold then
22
+ redis.call('ZADD', left_similarity_key, similarity, right)
23
+ redis.call('ZADD', right_similarity_key, similarity, left)
24
+ end
25
+ end
26
+
27
+ redis.log(redis.LOG_NOTICE, 'Finished running pair comparison for ' .. left_key .. ' ' .. right_key)
28
+
29
+ return true
@@ -1,31 +1,56 @@
1
- local root_key = KEYS[1]
2
- local sim_key = KEYS[2]
3
- local set_key_base = ARGV[1]
4
- local threshold = tonumber(ARGV[2])
5
-
6
- redis.log(redis.LOG_NOTICE, 'Running similarity for ' .. root_key)
7
- local key_matches = redis.call('KEYS', set_key_base .. ':*')
8
-
9
- redis.call('DEL', sim_key)
10
- local count = 0
11
- -- local similar = {}
12
- for _,key in ipairs(key_matches) do
13
- if key ~= root_key then
14
- count = count + 1
15
- local intersect = table.getn(redis.call('SINTER', root_key, key))
16
- if intersect > 0 then
17
- local union = table.getn(redis.call('SUNION', root_key, key))
18
- local similarity = intersect / union
19
- if similarity > threshold then
20
- -- table.insert(similar, key)
21
- -- table.insert(similar, similarity)
22
- local resource = key:gsub('%' .. set_key_base .. ':', '')
23
- redis.call('ZADD', sim_key, similarity, resource)
1
+ local resource_key = KEYS[1]
2
+ local resource_key_base = ARGV[1]
3
+ local sim_key_base = ARGV[2]
4
+ local group_key_base = ARGV[3]
5
+ local threshold = tonumber(ARGV[4])
6
+
7
+ local function round(num, idp)
8
+ local mult = 10^(idp or 0)
9
+ return math.floor(num * mult + 0.5) / mult
10
+ end
11
+
12
+ redis.log(redis.LOG_NOTICE, 'Running complete similarity for ' .. resource_key)
13
+
14
+ local resource = resource_key:gsub('%' .. resource_key_base .. ':', '')
15
+ local groups = redis.call('smembers', resource_key)
16
+
17
+ if table.getn(groups) > 999 then
18
+ redis.log(redis.LOG_NOTICE, 'Complete similarity too large for ' .. resource_key .. ', ' .. table.getn(groups))
19
+ return 999
20
+ end
21
+
22
+ local group_keys = {}
23
+ for _,group in ipairs(groups) do
24
+ table.insert(group_keys, group_key_base .. ':' .. group)
25
+ end
26
+ --redis.log(redis.LOG_NOTICE, 'Found ' .. table.getn(group_keys) .. ' group keys')
27
+
28
+ local resources = redis.call('sunion', unpack(group_keys))
29
+
30
+ --local resources = redis.call('sunion', unpack(group_keys))
31
+ --redis.log(redis.LOG_NOTICE, 'Found ' .. table.getn(resources) .. ' resources')
32
+
33
+ local previous = 'foo'
34
+ for _,to_compare in ipairs(resources) do
35
+ -- redis.log(redis.LOG_NOTICE, 'Comparing ' .. resource .. ' and ' .. to_compare)
36
+ if to_compare ~= previous then
37
+ previous = to_compare
38
+ if resource > to_compare then
39
+ -- redis.log(redis.LOG_NOTICE, 'Calculating similarity for ' .. resource .. ' and ' .. to_compare)
40
+ local intersect = table.getn(redis.call('SINTER', resource_key, resource_key_base .. ':' .. to_compare))
41
+ if intersect > 0 then
42
+ local union = table.getn(redis.call('SUNION', resource_key, resource_key_base .. ':' .. to_compare))
43
+ local similarity = round(intersect / union, 3)
44
+ if similarity > threshold then
45
+ -- redis.log(redis.LOG_NOTICE, resource .. ' and ' .. to_compare .. ' scored ' .. similarity)
46
+ redis.call('ZADD', sim_key_base .. ':' .. resource, similarity, to_compare)
47
+ redis.call('ZADD', sim_key_base .. ':' .. to_compare, similarity, resource)
48
+ end
24
49
  end
25
50
  end
26
51
  end
27
52
  end
28
53
 
29
- -- redis.call('HMSET', sim_key, unpack(similar))
30
- redis.log(redis.LOG_NOTICE, 'Finished running similarity for ' .. root_key)
54
+ redis.log(redis.LOG_NOTICE, 'Finished running complete similarity for ' .. resource_key)
55
+
31
56
  return true
@@ -1,3 +1,3 @@
1
1
  module Commendo
2
- VERSION = '0.0.3'
2
+ VERSION = '0.0.4'
3
3
  end
@@ -21,7 +21,7 @@ module Commendo
21
21
  redis.del(tmp_key)
22
22
 
23
23
  similar_resources.map do |resource|
24
- {resource: resource[0], similarity: resource[1].to_f}
24
+ {resource: resource[0], similarity: resource[1].to_f.round(3)}
25
25
  end
26
26
 
27
27
  end
@@ -68,10 +68,10 @@ module Commendo
68
68
  expected = [
69
69
  {resource: '9', similarity: 0.5},
70
70
  {resource: '6', similarity: 0.5},
71
- {resource: '12', similarity: 0.33333333333333},
71
+ {resource: '12', similarity: 0.333},
72
72
  {resource: '3', similarity: 0.25},
73
- {resource: '21', similarity: 0.16666666666667},
74
- {resource: '15', similarity: 0.16666666666667}
73
+ {resource: '21', similarity: 0.167},
74
+ {resource: '15', similarity: 0.167}
75
75
  ]
76
76
  assert_equal expected, cs.similar_to(18)
77
77
  end
@@ -26,13 +26,12 @@ module Commendo
26
26
  weighted_group = WeightedGroup.new(redis, 'CommendoTests:WeightedGroup', { cs: cs1, weight: 1.0 }, { cs: cs2, weight: 10.0 }, { cs: cs3, weight: 100.0 } )
27
27
  expected = [
28
28
  {resource: '6', similarity: 55.5},
29
- {resource: '12', similarity: 36.99999999999963},
29
+ {resource: '12', similarity: 36.963},
30
30
  {resource: '9', similarity: 5.0},
31
31
  {resource: '3', similarity: 2.5},
32
- {resource: '21', similarity: 1.6666666666666998},
33
- {resource: '15', similarity: 1.6666666666666998}
32
+ {resource: '21', similarity: 1.67},
33
+ {resource: '15', similarity: 1.67}
34
34
  ]
35
-
36
35
  assert_equal expected, weighted_group.similar_to(18)
37
36
  end
38
37
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: commendo
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rob Styles
@@ -110,6 +110,7 @@ files:
110
110
  - commendo.gemspec
111
111
  - lib/commendo.rb
112
112
  - lib/commendo/content_set.rb
113
+ - lib/commendo/pair_comparison.lua
113
114
  - lib/commendo/similarity.lua
114
115
  - lib/commendo/tag_set.rb
115
116
  - lib/commendo/version.rb