commendo 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -0
- data/lib/commendo/content_set.rb +29 -10
- data/lib/commendo/pair_comparison.lua +29 -0
- data/lib/commendo/similarity.lua +50 -25
- data/lib/commendo/version.rb +1 -1
- data/lib/commendo/weighted_group.rb +1 -1
- data/test/content_set_test.rb +3 -3
- data/test/weighted_group_test.rb +3 -4
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4fb3a3f98e3846e5f75d884630c3735ed57f62b1
|
4
|
+
data.tar.gz: 3944a25fc941e55a343b5201bf40ef77fbef08fb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fe3b6ea17ed10adc4ae2ffa12e7179d305970b4c8bf0d100671df8b6f800b7ffa042291c57a8d1c4d62550490654c245134f3f18c2f1733cfa0ca829018defcb
|
7
|
+
data.tar.gz: 7d01cc0c741ca514992e57b44c3defdc1a8d3de5db26f486eae3ef9bd1de62f8823974c6350178ae725bc0b61698f7cc9fdd4f1705f085a1b4799db3654c3276
|
data/CHANGELOG.md
CHANGED
data/lib/commendo/content_set.rb
CHANGED
@@ -43,6 +43,7 @@ module Commendo
|
|
43
43
|
similar.each do |other_resource|
|
44
44
|
redis.zrem(similarity_key(other_resource[:resource]), "#{resource}")
|
45
45
|
end
|
46
|
+
#TODO delete from groups?
|
46
47
|
redis.del(similarity_key(resource))
|
47
48
|
redis.del(resource_key(resource))
|
48
49
|
end
|
@@ -50,18 +51,22 @@ module Commendo
|
|
50
51
|
def calculate_similarity(threshold = 0)
|
51
52
|
#TODO make this use scan for scaling
|
52
53
|
keys = redis.keys("#{resource_key_base}:*")
|
53
|
-
keys.each_with_index do |
|
54
|
-
|
55
|
-
|
56
|
-
|
54
|
+
keys.each_with_index do |key, i|
|
55
|
+
yield(key, i, keys.length) if block_given?
|
56
|
+
completed = redis.eval(similarity_lua, keys: [key], argv: [resource_key_base, similar_key_base, group_key_base, threshold])
|
57
|
+
if completed == 999
|
58
|
+
resource = key.gsub(/^#{resource_key_base}:/, '')
|
59
|
+
groups = redis.smembers(resource_key(resource))
|
60
|
+
group_keys = groups.map { |group| group_key(group) }
|
61
|
+
resources = redis.sunion(*group_keys)
|
62
|
+
resources.each do |to_compare|
|
63
|
+
next if resource == to_compare
|
64
|
+
redis.eval(pair_comparison_lua, keys: [key, resource_key(to_compare), similarity_key(resource), similarity_key(to_compare)], argv: [resource, to_compare, threshold])
|
65
|
+
end
|
66
|
+
end
|
57
67
|
end
|
58
68
|
end
|
59
69
|
|
60
|
-
def calculate_similarity_in_redis(set_key, similiarity_key, threshold)
|
61
|
-
#TODO maybe consider using ary.combination to get finer grained operation in lua
|
62
|
-
redis.eval(similarity_lua, [set_key, similiarity_key], [resource_key_base, threshold])
|
63
|
-
end
|
64
|
-
|
65
70
|
def similar_to(resource)
|
66
71
|
similar_resources = redis.zrevrange(similarity_key(resource), 0, -1, with_scores: true)
|
67
72
|
|
@@ -98,6 +103,15 @@ module Commendo
|
|
98
103
|
file.read
|
99
104
|
end
|
100
105
|
|
106
|
+
def pair_comparison_lua
|
107
|
+
@pair_comparison_lua ||= load_pair_comparison_lua
|
108
|
+
end
|
109
|
+
|
110
|
+
def load_pair_comparison_lua
|
111
|
+
file = File.open(File.expand_path('../pair_comparison.lua', __FILE__), "r")
|
112
|
+
file.read
|
113
|
+
end
|
114
|
+
|
101
115
|
def similar_key_base
|
102
116
|
"#{key_base}:similar"
|
103
117
|
end
|
@@ -110,8 +124,13 @@ module Commendo
|
|
110
124
|
"#{resource_key_base}:#{resource}"
|
111
125
|
end
|
112
126
|
|
127
|
+
def group_key_base
|
128
|
+
"#{key_base}:groups"
|
129
|
+
end
|
130
|
+
|
131
|
+
|
113
132
|
def group_key(group)
|
114
|
-
"#{
|
133
|
+
"#{group_key_base}:#{group}"
|
115
134
|
end
|
116
135
|
|
117
136
|
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
local left_key = KEYS[1]
|
2
|
+
local right_key = KEYS[2]
|
3
|
+
local left_similarity_key = KEYS[3]
|
4
|
+
local right_similarity_key = KEYS[4]
|
5
|
+
|
6
|
+
local left = tonumber(ARGV[1])
|
7
|
+
local right = tonumber(ARGV[2])
|
8
|
+
local threshold = tonumber(ARGV[3])
|
9
|
+
|
10
|
+
local function round(num, idp)
|
11
|
+
local mult = 10^(idp or 0)
|
12
|
+
return math.floor(num * mult + 0.5) / mult
|
13
|
+
end
|
14
|
+
|
15
|
+
redis.log(redis.LOG_NOTICE, 'Running pair comparison for ' .. left_key .. ' ' .. right_key)
|
16
|
+
|
17
|
+
local intersect = table.getn(redis.call('SINTER', left_key, right_key))
|
18
|
+
if intersect > 0 then
|
19
|
+
local union = table.getn(redis.call('SUNION', left_key, right_key))
|
20
|
+
local similarity = round(intersect / union, 3)
|
21
|
+
if similarity > threshold then
|
22
|
+
redis.call('ZADD', left_similarity_key, similarity, right)
|
23
|
+
redis.call('ZADD', right_similarity_key, similarity, left)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
redis.log(redis.LOG_NOTICE, 'Finished running pair comparison for ' .. left_key .. ' ' .. right_key)
|
28
|
+
|
29
|
+
return true
|
data/lib/commendo/similarity.lua
CHANGED
@@ -1,31 +1,56 @@
|
|
1
|
-
local
|
2
|
-
local
|
3
|
-
local
|
4
|
-
local
|
5
|
-
|
6
|
-
|
7
|
-
local
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
for
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
1
|
+
local resource_key = KEYS[1]
|
2
|
+
local resource_key_base = ARGV[1]
|
3
|
+
local sim_key_base = ARGV[2]
|
4
|
+
local group_key_base = ARGV[3]
|
5
|
+
local threshold = tonumber(ARGV[4])
|
6
|
+
|
7
|
+
local function round(num, idp)
|
8
|
+
local mult = 10^(idp or 0)
|
9
|
+
return math.floor(num * mult + 0.5) / mult
|
10
|
+
end
|
11
|
+
|
12
|
+
redis.log(redis.LOG_NOTICE, 'Running complete similarity for ' .. resource_key)
|
13
|
+
|
14
|
+
local resource = resource_key:gsub('%' .. resource_key_base .. ':', '')
|
15
|
+
local groups = redis.call('smembers', resource_key)
|
16
|
+
|
17
|
+
if table.getn(groups) > 999 then
|
18
|
+
redis.log(redis.LOG_NOTICE, 'Complete similarity too large for ' .. resource_key .. ', ' .. table.getn(groups))
|
19
|
+
return 999
|
20
|
+
end
|
21
|
+
|
22
|
+
local group_keys = {}
|
23
|
+
for _,group in ipairs(groups) do
|
24
|
+
table.insert(group_keys, group_key_base .. ':' .. group)
|
25
|
+
end
|
26
|
+
--redis.log(redis.LOG_NOTICE, 'Found ' .. table.getn(group_keys) .. ' group keys')
|
27
|
+
|
28
|
+
local resources = redis.call('sunion', unpack(group_keys))
|
29
|
+
|
30
|
+
--local resources = redis.call('sunion', unpack(group_keys))
|
31
|
+
--redis.log(redis.LOG_NOTICE, 'Found ' .. table.getn(resources) .. ' resources')
|
32
|
+
|
33
|
+
local previous = 'foo'
|
34
|
+
for _,to_compare in ipairs(resources) do
|
35
|
+
-- redis.log(redis.LOG_NOTICE, 'Comparing ' .. resource .. ' and ' .. to_compare)
|
36
|
+
if to_compare ~= previous then
|
37
|
+
previous = to_compare
|
38
|
+
if resource > to_compare then
|
39
|
+
-- redis.log(redis.LOG_NOTICE, 'Calculating similarity for ' .. resource .. ' and ' .. to_compare)
|
40
|
+
local intersect = table.getn(redis.call('SINTER', resource_key, resource_key_base .. ':' .. to_compare))
|
41
|
+
if intersect > 0 then
|
42
|
+
local union = table.getn(redis.call('SUNION', resource_key, resource_key_base .. ':' .. to_compare))
|
43
|
+
local similarity = round(intersect / union, 3)
|
44
|
+
if similarity > threshold then
|
45
|
+
-- redis.log(redis.LOG_NOTICE, resource .. ' and ' .. to_compare .. ' scored ' .. similarity)
|
46
|
+
redis.call('ZADD', sim_key_base .. ':' .. resource, similarity, to_compare)
|
47
|
+
redis.call('ZADD', sim_key_base .. ':' .. to_compare, similarity, resource)
|
48
|
+
end
|
24
49
|
end
|
25
50
|
end
|
26
51
|
end
|
27
52
|
end
|
28
53
|
|
29
|
-
|
30
|
-
|
54
|
+
redis.log(redis.LOG_NOTICE, 'Finished running complete similarity for ' .. resource_key)
|
55
|
+
|
31
56
|
return true
|
data/lib/commendo/version.rb
CHANGED
data/test/content_set_test.rb
CHANGED
@@ -68,10 +68,10 @@ module Commendo
|
|
68
68
|
expected = [
|
69
69
|
{resource: '9', similarity: 0.5},
|
70
70
|
{resource: '6', similarity: 0.5},
|
71
|
-
{resource: '12', similarity: 0.
|
71
|
+
{resource: '12', similarity: 0.333},
|
72
72
|
{resource: '3', similarity: 0.25},
|
73
|
-
{resource: '21', similarity: 0.
|
74
|
-
{resource: '15', similarity: 0.
|
73
|
+
{resource: '21', similarity: 0.167},
|
74
|
+
{resource: '15', similarity: 0.167}
|
75
75
|
]
|
76
76
|
assert_equal expected, cs.similar_to(18)
|
77
77
|
end
|
data/test/weighted_group_test.rb
CHANGED
@@ -26,13 +26,12 @@ module Commendo
|
|
26
26
|
weighted_group = WeightedGroup.new(redis, 'CommendoTests:WeightedGroup', { cs: cs1, weight: 1.0 }, { cs: cs2, weight: 10.0 }, { cs: cs3, weight: 100.0 } )
|
27
27
|
expected = [
|
28
28
|
{resource: '6', similarity: 55.5},
|
29
|
-
{resource: '12', similarity: 36.
|
29
|
+
{resource: '12', similarity: 36.963},
|
30
30
|
{resource: '9', similarity: 5.0},
|
31
31
|
{resource: '3', similarity: 2.5},
|
32
|
-
{resource: '21', similarity: 1.
|
33
|
-
{resource: '15', similarity: 1.
|
32
|
+
{resource: '21', similarity: 1.67},
|
33
|
+
{resource: '15', similarity: 1.67}
|
34
34
|
]
|
35
|
-
|
36
35
|
assert_equal expected, weighted_group.similar_to(18)
|
37
36
|
end
|
38
37
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: commendo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rob Styles
|
@@ -110,6 +110,7 @@ files:
|
|
110
110
|
- commendo.gemspec
|
111
111
|
- lib/commendo.rb
|
112
112
|
- lib/commendo/content_set.rb
|
113
|
+
- lib/commendo/pair_comparison.lua
|
113
114
|
- lib/commendo/similarity.lua
|
114
115
|
- lib/commendo/tag_set.rb
|
115
116
|
- lib/commendo/version.rb
|