commendo 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -0
- data/lib/commendo/content_set.rb +29 -10
- data/lib/commendo/pair_comparison.lua +29 -0
- data/lib/commendo/similarity.lua +50 -25
- data/lib/commendo/version.rb +1 -1
- data/lib/commendo/weighted_group.rb +1 -1
- data/test/content_set_test.rb +3 -3
- data/test/weighted_group_test.rb +3 -4
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4fb3a3f98e3846e5f75d884630c3735ed57f62b1
|
4
|
+
data.tar.gz: 3944a25fc941e55a343b5201bf40ef77fbef08fb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fe3b6ea17ed10adc4ae2ffa12e7179d305970b4c8bf0d100671df8b6f800b7ffa042291c57a8d1c4d62550490654c245134f3f18c2f1733cfa0ca829018defcb
|
7
|
+
data.tar.gz: 7d01cc0c741ca514992e57b44c3defdc1a8d3de5db26f486eae3ef9bd1de62f8823974c6350178ae725bc0b61698f7cc9fdd4f1705f085a1b4799db3654c3276
|
data/CHANGELOG.md
CHANGED
data/lib/commendo/content_set.rb
CHANGED
@@ -43,6 +43,7 @@ module Commendo
|
|
43
43
|
similar.each do |other_resource|
|
44
44
|
redis.zrem(similarity_key(other_resource[:resource]), "#{resource}")
|
45
45
|
end
|
46
|
+
#TODO delete from groups?
|
46
47
|
redis.del(similarity_key(resource))
|
47
48
|
redis.del(resource_key(resource))
|
48
49
|
end
|
@@ -50,18 +51,22 @@ module Commendo
|
|
50
51
|
def calculate_similarity(threshold = 0)
|
51
52
|
#TODO make this use scan for scaling
|
52
53
|
keys = redis.keys("#{resource_key_base}:*")
|
53
|
-
keys.each_with_index do |
|
54
|
-
|
55
|
-
|
56
|
-
|
54
|
+
keys.each_with_index do |key, i|
|
55
|
+
yield(key, i, keys.length) if block_given?
|
56
|
+
completed = redis.eval(similarity_lua, keys: [key], argv: [resource_key_base, similar_key_base, group_key_base, threshold])
|
57
|
+
if completed == 999
|
58
|
+
resource = key.gsub(/^#{resource_key_base}:/, '')
|
59
|
+
groups = redis.smembers(resource_key(resource))
|
60
|
+
group_keys = groups.map { |group| group_key(group) }
|
61
|
+
resources = redis.sunion(*group_keys)
|
62
|
+
resources.each do |to_compare|
|
63
|
+
next if resource == to_compare
|
64
|
+
redis.eval(pair_comparison_lua, keys: [key, resource_key(to_compare), similarity_key(resource), similarity_key(to_compare)], argv: [resource, to_compare, threshold])
|
65
|
+
end
|
66
|
+
end
|
57
67
|
end
|
58
68
|
end
|
59
69
|
|
60
|
-
def calculate_similarity_in_redis(set_key, similiarity_key, threshold)
|
61
|
-
#TODO maybe consider using ary.combination to get finer grained operation in lua
|
62
|
-
redis.eval(similarity_lua, [set_key, similiarity_key], [resource_key_base, threshold])
|
63
|
-
end
|
64
|
-
|
65
70
|
def similar_to(resource)
|
66
71
|
similar_resources = redis.zrevrange(similarity_key(resource), 0, -1, with_scores: true)
|
67
72
|
|
@@ -98,6 +103,15 @@ module Commendo
|
|
98
103
|
file.read
|
99
104
|
end
|
100
105
|
|
106
|
+
def pair_comparison_lua
|
107
|
+
@pair_comparison_lua ||= load_pair_comparison_lua
|
108
|
+
end
|
109
|
+
|
110
|
+
def load_pair_comparison_lua
|
111
|
+
file = File.open(File.expand_path('../pair_comparison.lua', __FILE__), "r")
|
112
|
+
file.read
|
113
|
+
end
|
114
|
+
|
101
115
|
def similar_key_base
|
102
116
|
"#{key_base}:similar"
|
103
117
|
end
|
@@ -110,8 +124,13 @@ module Commendo
|
|
110
124
|
"#{resource_key_base}:#{resource}"
|
111
125
|
end
|
112
126
|
|
127
|
+
def group_key_base
|
128
|
+
"#{key_base}:groups"
|
129
|
+
end
|
130
|
+
|
131
|
+
|
113
132
|
def group_key(group)
|
114
|
-
"#{
|
133
|
+
"#{group_key_base}:#{group}"
|
115
134
|
end
|
116
135
|
|
117
136
|
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
local left_key = KEYS[1]
|
2
|
+
local right_key = KEYS[2]
|
3
|
+
local left_similarity_key = KEYS[3]
|
4
|
+
local right_similarity_key = KEYS[4]
|
5
|
+
|
6
|
+
local left = tonumber(ARGV[1])
|
7
|
+
local right = tonumber(ARGV[2])
|
8
|
+
local threshold = tonumber(ARGV[3])
|
9
|
+
|
10
|
+
local function round(num, idp)
|
11
|
+
local mult = 10^(idp or 0)
|
12
|
+
return math.floor(num * mult + 0.5) / mult
|
13
|
+
end
|
14
|
+
|
15
|
+
redis.log(redis.LOG_NOTICE, 'Running pair comparison for ' .. left_key .. ' ' .. right_key)
|
16
|
+
|
17
|
+
local intersect = table.getn(redis.call('SINTER', left_key, right_key))
|
18
|
+
if intersect > 0 then
|
19
|
+
local union = table.getn(redis.call('SUNION', left_key, right_key))
|
20
|
+
local similarity = round(intersect / union, 3)
|
21
|
+
if similarity > threshold then
|
22
|
+
redis.call('ZADD', left_similarity_key, similarity, right)
|
23
|
+
redis.call('ZADD', right_similarity_key, similarity, left)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
redis.log(redis.LOG_NOTICE, 'Finished running pair comparison for ' .. left_key .. ' ' .. right_key)
|
28
|
+
|
29
|
+
return true
|
data/lib/commendo/similarity.lua
CHANGED
@@ -1,31 +1,56 @@
|
|
1
|
-
local
|
2
|
-
local
|
3
|
-
local
|
4
|
-
local
|
5
|
-
|
6
|
-
|
7
|
-
local
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
for
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
1
|
+
local resource_key = KEYS[1]
|
2
|
+
local resource_key_base = ARGV[1]
|
3
|
+
local sim_key_base = ARGV[2]
|
4
|
+
local group_key_base = ARGV[3]
|
5
|
+
local threshold = tonumber(ARGV[4])
|
6
|
+
|
7
|
+
local function round(num, idp)
|
8
|
+
local mult = 10^(idp or 0)
|
9
|
+
return math.floor(num * mult + 0.5) / mult
|
10
|
+
end
|
11
|
+
|
12
|
+
redis.log(redis.LOG_NOTICE, 'Running complete similarity for ' .. resource_key)
|
13
|
+
|
14
|
+
local resource = resource_key:gsub('%' .. resource_key_base .. ':', '')
|
15
|
+
local groups = redis.call('smembers', resource_key)
|
16
|
+
|
17
|
+
if table.getn(groups) > 999 then
|
18
|
+
redis.log(redis.LOG_NOTICE, 'Complete similarity too large for ' .. resource_key .. ', ' .. table.getn(groups))
|
19
|
+
return 999
|
20
|
+
end
|
21
|
+
|
22
|
+
local group_keys = {}
|
23
|
+
for _,group in ipairs(groups) do
|
24
|
+
table.insert(group_keys, group_key_base .. ':' .. group)
|
25
|
+
end
|
26
|
+
--redis.log(redis.LOG_NOTICE, 'Found ' .. table.getn(group_keys) .. ' group keys')
|
27
|
+
|
28
|
+
local resources = redis.call('sunion', unpack(group_keys))
|
29
|
+
|
30
|
+
--local resources = redis.call('sunion', unpack(group_keys))
|
31
|
+
--redis.log(redis.LOG_NOTICE, 'Found ' .. table.getn(resources) .. ' resources')
|
32
|
+
|
33
|
+
local previous = 'foo'
|
34
|
+
for _,to_compare in ipairs(resources) do
|
35
|
+
-- redis.log(redis.LOG_NOTICE, 'Comparing ' .. resource .. ' and ' .. to_compare)
|
36
|
+
if to_compare ~= previous then
|
37
|
+
previous = to_compare
|
38
|
+
if resource > to_compare then
|
39
|
+
-- redis.log(redis.LOG_NOTICE, 'Calculating similarity for ' .. resource .. ' and ' .. to_compare)
|
40
|
+
local intersect = table.getn(redis.call('SINTER', resource_key, resource_key_base .. ':' .. to_compare))
|
41
|
+
if intersect > 0 then
|
42
|
+
local union = table.getn(redis.call('SUNION', resource_key, resource_key_base .. ':' .. to_compare))
|
43
|
+
local similarity = round(intersect / union, 3)
|
44
|
+
if similarity > threshold then
|
45
|
+
-- redis.log(redis.LOG_NOTICE, resource .. ' and ' .. to_compare .. ' scored ' .. similarity)
|
46
|
+
redis.call('ZADD', sim_key_base .. ':' .. resource, similarity, to_compare)
|
47
|
+
redis.call('ZADD', sim_key_base .. ':' .. to_compare, similarity, resource)
|
48
|
+
end
|
24
49
|
end
|
25
50
|
end
|
26
51
|
end
|
27
52
|
end
|
28
53
|
|
29
|
-
|
30
|
-
|
54
|
+
redis.log(redis.LOG_NOTICE, 'Finished running complete similarity for ' .. resource_key)
|
55
|
+
|
31
56
|
return true
|
data/lib/commendo/version.rb
CHANGED
data/test/content_set_test.rb
CHANGED
@@ -68,10 +68,10 @@ module Commendo
|
|
68
68
|
expected = [
|
69
69
|
{resource: '9', similarity: 0.5},
|
70
70
|
{resource: '6', similarity: 0.5},
|
71
|
-
{resource: '12', similarity: 0.
|
71
|
+
{resource: '12', similarity: 0.333},
|
72
72
|
{resource: '3', similarity: 0.25},
|
73
|
-
{resource: '21', similarity: 0.
|
74
|
-
{resource: '15', similarity: 0.
|
73
|
+
{resource: '21', similarity: 0.167},
|
74
|
+
{resource: '15', similarity: 0.167}
|
75
75
|
]
|
76
76
|
assert_equal expected, cs.similar_to(18)
|
77
77
|
end
|
data/test/weighted_group_test.rb
CHANGED
@@ -26,13 +26,12 @@ module Commendo
|
|
26
26
|
weighted_group = WeightedGroup.new(redis, 'CommendoTests:WeightedGroup', { cs: cs1, weight: 1.0 }, { cs: cs2, weight: 10.0 }, { cs: cs3, weight: 100.0 } )
|
27
27
|
expected = [
|
28
28
|
{resource: '6', similarity: 55.5},
|
29
|
-
{resource: '12', similarity: 36.
|
29
|
+
{resource: '12', similarity: 36.963},
|
30
30
|
{resource: '9', similarity: 5.0},
|
31
31
|
{resource: '3', similarity: 2.5},
|
32
|
-
{resource: '21', similarity: 1.
|
33
|
-
{resource: '15', similarity: 1.
|
32
|
+
{resource: '21', similarity: 1.67},
|
33
|
+
{resource: '15', similarity: 1.67}
|
34
34
|
]
|
35
|
-
|
36
35
|
assert_equal expected, weighted_group.similar_to(18)
|
37
36
|
end
|
38
37
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: commendo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rob Styles
|
@@ -110,6 +110,7 @@ files:
|
|
110
110
|
- commendo.gemspec
|
111
111
|
- lib/commendo.rb
|
112
112
|
- lib/commendo/content_set.rb
|
113
|
+
- lib/commendo/pair_comparison.lua
|
113
114
|
- lib/commendo/similarity.lua
|
114
115
|
- lib/commendo/tag_set.rb
|
115
116
|
- lib/commendo/version.rb
|