commendo 0.0.9 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 80baa49ab8364f70fd8dd4f2127f95d4f5ad0da2
4
- data.tar.gz: 355812df077e1df1cb4d0425fb36fac8334d19f6
3
+ metadata.gz: f4d4570da8f16342e587461c8731982f4a82c8ba
4
+ data.tar.gz: bdde9ae21c9bb35c7c9b6073a82e651086132617
5
5
  SHA512:
6
- metadata.gz: a1c2164296cfe3c47d0ef943594aeb6b4f20a007f421729ddd5b4328c3ffd2e90033f7315fac9451f98f39a088b504f0252cc3540a75560f6df9b4052bc4c9b6
7
- data.tar.gz: 5d31d9f878a2eb7ab19e97b13887adb95b3df416866993b5c9ff8008a21f24b97c7acaf685a1baeb17d9003fa6716a5b1d5041efe057de1f2dc70327c8eba4df
6
+ metadata.gz: 31406c7ee3846a6046e8e149ca8ada66eaed799977ee615e778a5fb3a2c353165e246deb87b422bef7d5a544a4dddbac3788a4ed879a77a04b13b0f3ed9d7dbc
7
+ data.tar.gz: 15e58e641bd237466b80713c64d893ebd66877aa4c4a28241b584875d5e476fc28c45d68194eacd9b74cfef5a4e144c011e892185bd1ee9d27f38fc346c11032
@@ -1,3 +1,6 @@
1
+ # 1.0.0 / 2014-04-22
2
+ * [FEATURE] Add limits to similarity requests and bump to production release 1.0.0 :)
3
+
1
4
  # 0.0.9 / 2014-04-09
2
5
  * [BUGFIX] Fix similarity calculation for resources in many sets
3
6
 
@@ -1,21 +1,30 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- redis_db = ARGV[0].to_i
4
- content_set_base_redis_key = ARGV[1]
5
- filename = ARGV[2]
6
-
7
3
  require 'redis'
8
4
  require 'commendo'
5
+ require 'json'
9
6
 
10
- redis = Redis.new(db: 2)
11
- cs = Commendo::ContentSet.new(redis, content_set_base_redis_key)
12
-
13
- File.open(filename) do |f|
7
+ filename = ARGV[0]
8
+ redis_db = ARGV[1].to_i
9
+ base_key = ARGV[2]
14
10
 
15
- current_resource = nil
16
- current_headings = []
11
+ redis = Redis.new(db: redis_db, timeout: 60)
12
+ cs = Commendo::ContentSet.new(redis, base_key)
17
13
 
18
- f.each_line.with_index do |tsv, i|
19
- next if i.zero?
14
+ puts "Loading."
15
+ File.open(filename) do |f|
16
+ f.each_line.with_index do |json, i|
17
+ print '.'
18
+ resource, scored_groups = JSON.parse(json)
19
+ cs.add(resource, *scored_groups)
20
+ end
21
+ end
22
+ puts "\nFinished loading"
20
23
 
21
- resource, _classified, root, score, heading = tsv.split(/\t/)
24
+ puts 'Calculating similarities'
25
+ pbar = nil
26
+ cs.calculate_similarity do |key, i, total|
27
+ pbar ||= ProgressBar.new('Calculating similarity', total)
28
+ pbar.inc
29
+ #puts "Calculating similarity for #{i}/#{total} — #{key}"
30
+ end
@@ -19,6 +19,7 @@ Gem::Specification.new do |spec|
19
19
  spec.require_paths = ['lib']
20
20
 
21
21
  spec.add_dependency 'redis'
22
+ spec.add_dependency 'progressbar'
22
23
 
23
24
  spec.add_development_dependency 'bundler', '~> 1.5'
24
25
  spec.add_development_dependency 'rake'
@@ -85,17 +85,18 @@ module Commendo
85
85
  end
86
86
  end
87
87
 
88
- def similar_to(resource)
88
+ def similar_to(resource, limit = 0)
89
+ finish = limit -1
89
90
  if resource.kind_of? Array
90
91
  keys = resource.map do |res|
91
92
  similarity_key(res)
92
93
  end
93
94
  tmp_key = "#{key_base}:tmp:#{SecureRandom.uuid}"
94
95
  redis.zunionstore(tmp_key, keys)
95
- similar_resources = redis.zrevrange(tmp_key, 0, -1, with_scores: true)
96
+ similar_resources = redis.zrevrange(tmp_key, 0, finish, with_scores: true)
96
97
  redis.del(tmp_key)
97
98
  else
98
- similar_resources = redis.zrevrange(similarity_key(resource), 0, -1, with_scores: true)
99
+ similar_resources = redis.zrevrange(similarity_key(resource), 0, finish, with_scores: true)
99
100
  end
100
101
  similar_resources.map do |resource|
101
102
  {resource: resource[0], similarity: resource[1].to_f}
@@ -103,11 +104,18 @@ module Commendo
103
104
  end
104
105
 
105
106
  def filtered_similar_to(resource, options = {})
106
- similar = similar_to(resource)
107
- return similar if @tag_set.nil? || options[:include].nil? && options[:exclude].nil?
108
- similar.delete_if { |s| !options[:exclude].nil? && @tag_set.matches(s[:resource], *options[:exclude]) }
109
- similar.delete_if { |s| !options[:include].nil? && !@tag_set.matches(s[:resource], *options[:include]) }
110
- similar
107
+ if @tag_set.nil? || (options[:include].nil? && options[:exclude].nil?)
108
+ return similar_to(resource, options[:limit] || 0)
109
+ else
110
+ similar = similar_to(resource)
111
+ limit = options[:limit] || similar.length
112
+ filtered = []
113
+ similar.each do |s|
114
+ return filtered if filtered.length >= limit
115
+ filtered << s if @tag_set.matches(s[:resource], options[:include], options[:exclude])
116
+ end
117
+ return filtered
118
+ end
111
119
  end
112
120
 
113
121
  def similarity_key(resource)
@@ -21,9 +21,11 @@ module Commendo
21
21
  add(resource, *tags)
22
22
  end
23
23
 
24
- def matches(resource, *tags)
24
+ def matches(resource, include, exclude = [])
25
25
  resource_tags = get(resource)
26
- (resource_tags & tags).length > 0
26
+ can_include = include.nil? || include.empty? || (resource_tags & include).length > 0
27
+ should_exclude = !exclude.nil? && !exclude.empty? && (resource_tags & exclude).length > 0
28
+ return can_include && !should_exclude
27
29
  end
28
30
 
29
31
  def delete(resource)
@@ -1,3 +1,3 @@
1
1
  module Commendo
2
- VERSION = '0.0.9'
2
+ VERSION = '1.0.0'
3
3
  end
@@ -8,7 +8,8 @@ module Commendo
8
8
  @content_sets, @redis, @key_base = content_sets, redis, key_base
9
9
  end
10
10
 
11
- def similar_to(resource)
11
+ def similar_to(resource, limit = 0)
12
+ finish = limit -1
12
13
  resources = resource.kind_of?(Array) ? resource : [resource]
13
14
  keys = []
14
15
  weights = []
@@ -20,7 +21,7 @@ module Commendo
20
21
  end
21
22
  tmp_key = "#{key_base}:tmp:#{SecureRandom.uuid}"
22
23
  redis.zunionstore(tmp_key, keys, weights: weights)
23
- similar_resources = redis.zrevrange(tmp_key, 0, -1, with_scores: true)
24
+ similar_resources = redis.zrevrange(tmp_key, 0, finish, with_scores: true)
24
25
  redis.del(tmp_key)
25
26
 
26
27
  similar_resources.map do |resource|
@@ -30,11 +31,18 @@ module Commendo
30
31
  end
31
32
 
32
33
  def filtered_similar_to(resource, options = {})
33
- similar = similar_to(resource)
34
- return similar if @tag_set.nil? || options[:include].nil? && options[:exclude].nil?
35
- similar.delete_if { |s| !options[:exclude].nil? && @tag_set.matches(s[:resource], *options[:exclude]) }
36
- similar.delete_if { |s| !options[:include].nil? && !@tag_set.matches(s[:resource], *options[:include]) }
37
- similar
34
+ if @tag_set.nil? || (options[:include].nil? && options[:exclude].nil?)
35
+ return similar_to(resource, options[:limit] || 0)
36
+ else
37
+ similar = similar_to(resource)
38
+ limit = options[:limit] || similar.length
39
+ filtered = []
40
+ similar.each do |s|
41
+ return filtered if filtered.length >= limit
42
+ filtered << s if @tag_set.matches(s[:resource], options[:include], options[:exclude])
43
+ end
44
+ return filtered
45
+ end
38
46
  end
39
47
 
40
48
  end
@@ -34,6 +34,27 @@ module Commendo
34
34
  assert_equal expected, cs.similar_to('resource-1')
35
35
  end
36
36
 
37
+ def test_recommends_limited_by_number
38
+ redis = Redis.new(db: 15)
39
+ redis.flushdb
40
+ key_base = 'CommendoTests'
41
+ cs = ContentSet.new(redis, key_base)
42
+ cs.add('resource-1', 'group-1', 'group-2')
43
+ cs.add('resource-2', 'group-1')
44
+ cs.add('resource-3', 'group-1', 'group-2')
45
+ cs.add('resource-4', 'group-2')
46
+ cs.calculate_similarity
47
+ expected = [
48
+ {resource: 'resource-3', similarity: 1.0},
49
+ {resource: 'resource-4', similarity: 0.667},
50
+ {resource: 'resource-2', similarity: 0.667}
51
+ ]
52
+ assert_equal expected[0..0], cs.similar_to('resource-1', 1)
53
+ assert_equal expected[0..1], cs.similar_to('resource-1', 2)
54
+ assert_equal expected, cs.similar_to('resource-1', 3)
55
+ assert_equal expected, cs.similar_to('resource-1', 99)
56
+ end
57
+
37
58
  def test_recommends_when_added_with_scores
38
59
  redis = Redis.new(db: 15)
39
60
  redis.flushdb
@@ -254,6 +275,29 @@ module Commendo
254
275
 
255
276
  end
256
277
 
278
+ def test_filters_include_by_tag_collection_and_limit
279
+ redis = Redis.new(db: 15)
280
+ redis.flushdb
281
+ ts = TagSet.new(redis, 'CommendoTests:tags')
282
+ cs = ContentSet.new(redis, 'CommendoTests', ts)
283
+ (3..23).each do |group|
284
+ (3..23).each do |res|
285
+ cs.add(res, group) if res % group == 0
286
+ ts.add(res, 'mod3') if res.modulo(3).zero?
287
+ ts.add(res, 'mod4') if res.modulo(4).zero?
288
+ ts.add(res, 'mod5') if res.modulo(5).zero?
289
+ end
290
+ end
291
+ cs.calculate_similarity
292
+
293
+ actual = cs.filtered_similar_to(10, include: ['mod5'], limit: 2)
294
+ assert_equal 2, actual.length
295
+ assert contains_resource('5', actual)
296
+ #assert contains_resource('15', actual)
297
+ assert contains_resource('20', actual)
298
+
299
+ end
300
+
257
301
  def test_filters_exclude_by_tag_collection
258
302
  redis = Redis.new(db: 15)
259
303
  redis.flushdb
@@ -64,18 +64,63 @@ module Commendo
64
64
  ts.set(1, 'foo', 'bar', 'baz')
65
65
  ts.set(2, 'qux', 'qip')
66
66
 
67
- assert ts.matches(1, 'foo')
68
- assert ts.matches(1, 'bar', 'baz')
69
- assert ts.matches(1, 'bar', 'baz', 'foo')
70
- refute ts.matches(1, 'qux')
71
- refute ts.matches(1, 'qip')
72
-
73
- refute ts.matches(2, 'foo')
74
- refute ts.matches(2, 'bar', 'baz')
75
- refute ts.matches(2, 'bar', 'baz', 'foo')
76
- assert ts.matches(2, 'qux', 'qip')
77
- assert ts.matches(2, 'qux')
78
- assert ts.matches(2, 'qip')
67
+ assert ts.matches(1, ['foo'])
68
+ assert ts.matches(1, ['bar', 'baz'])
69
+ assert ts.matches(1, ['bar', 'baz', 'foo'])
70
+ refute ts.matches(1, ['qux'])
71
+ refute ts.matches(1, ['qip'])
72
+
73
+ refute ts.matches(2, ['foo'])
74
+ refute ts.matches(2, ['bar', 'baz'])
75
+ refute ts.matches(2, ['bar', 'baz', 'foo'])
76
+ assert ts.matches(2, ['qux', 'qip'])
77
+ assert ts.matches(2, ['qux'])
78
+ assert ts.matches(2, ['qip'])
79
+ end
80
+
81
+ def test_matches_exclude_tags
82
+ redis = Redis.new(db: 15)
83
+ redis.flushdb
84
+ ts = TagSet.new(redis, 'TagSetTest')
85
+ ts.set(1, 'foo', 'bar', 'baz')
86
+ ts.set(2, 'qux', 'qip')
87
+
88
+ refute ts.matches(1, nil, ['foo'])
89
+ refute ts.matches(1, [], ['foo'])
90
+ refute ts.matches(1, [], ['bar', 'baz'])
91
+ refute ts.matches(1, [], ['bar', 'baz', 'foo'])
92
+ assert ts.matches(1, [], ['qux'])
93
+ assert ts.matches(1, [], ['qip'])
94
+
95
+ assert ts.matches(2, nil, ['foo'])
96
+ assert ts.matches(2, [], ['foo'])
97
+ assert ts.matches(2, [], ['bar', 'baz'])
98
+ assert ts.matches(2, [], ['bar', 'baz', 'foo'])
99
+ refute ts.matches(2, [], ['qux', 'qip'])
100
+ refute ts.matches(2, [], ['qux'])
101
+ refute ts.matches(2, [], ['qip'])
102
+ end
103
+
104
+ def test_matches_include_and_exclude_tags
105
+ redis = Redis.new(db: 15)
106
+ redis.flushdb
107
+ ts = TagSet.new(redis, 'TagSetTest')
108
+ ts.set(1, 'foo', 'bar', 'baz')
109
+ ts.set(2, 'qux', 'qip')
110
+
111
+ refute ts.matches(1, ['foo'], ['bar'])
112
+ refute ts.matches(1, ['bar'], ['foo'])
113
+
114
+ assert ts.matches(1, ['foo'], [])
115
+ assert ts.matches(1, ['foo'], nil)
116
+ assert ts.matches(1, ['foo'], ['qux'])
117
+
118
+ assert ts.matches(2, ['qip'], ['foo'])
119
+ assert ts.matches(2, ['qux'], ['bar', 'baz'])
120
+ assert ts.matches(2, ['qip'], ['bar', 'baz', 'foo'])
121
+ refute ts.matches(2, ['qip'], ['qux', 'qip'])
122
+ refute ts.matches(2, ['qip'], ['qux'])
123
+ refute ts.matches(2, ['qux'], ['qip'])
79
124
  end
80
125
 
81
126
  end
@@ -50,6 +50,28 @@ module Commendo
50
50
  assert_equal expected, weighted_group.similar_to(18)
51
51
  end
52
52
 
53
+ def test_calls_each_content_set_with_limits
54
+ weighted_group = WeightedGroup.new(
55
+ @redis,
56
+ 'CommendoTests:WeightedGroup',
57
+ {cs: @cs1, weight: 1.0},
58
+ {cs: @cs2, weight: 10.0},
59
+ {cs: @cs3, weight: 100.0}
60
+ )
61
+ expected = [
62
+ {resource: '6', similarity: 74.037},
63
+ {resource: '12', similarity: 55.5},
64
+ {resource: '9', similarity: 6.67},
65
+ {resource: '3', similarity: 4.0},
66
+ {resource: '21', similarity: 2.86},
67
+ {resource: '15', similarity: 2.86}
68
+ ]
69
+ assert_equal expected[0..0], weighted_group.similar_to(18, 1)
70
+ assert_equal expected[0..2], weighted_group.similar_to(18, 3)
71
+ assert_equal expected, weighted_group.similar_to(18, 6)
72
+ assert_equal expected, weighted_group.similar_to(18, 99)
73
+ end
74
+
53
75
  def test_filters_include_recommendations
54
76
  weighted_group = WeightedGroup.new(
55
77
  @redis,
@@ -98,6 +120,26 @@ module Commendo
98
120
  assert_equal expected, weighted_group.filtered_similar_to(8, include: ['mod4'], exclude: ['mod5'])
99
121
  end
100
122
 
123
+ def test_filters_include_and_exclude_recommendations_and_limits
124
+ weighted_group = WeightedGroup.new(
125
+ @redis,
126
+ 'CommendoTests:WeightedGroup',
127
+ {cs: @cs1, weight: 100.0},
128
+ {cs: @cs2, weight: 10.0},
129
+ {cs: @cs3, weight: 1.0}
130
+ )
131
+ expected = [
132
+ {resource: '16', similarity: 80.0},
133
+ {resource: '4', similarity: 66.7},
134
+ {resource: '12', similarity: 33.3}
135
+ ]
136
+ weighted_group.tag_set = @tag_set
137
+ assert_equal expected[0..0], weighted_group.filtered_similar_to(8, include: ['mod4'], exclude: ['mod5'], limit: 1)
138
+ assert_equal expected[0..1], weighted_group.filtered_similar_to(8, include: ['mod4'], exclude: ['mod5'], limit: 2)
139
+ assert_equal expected, weighted_group.filtered_similar_to(8, include: ['mod4'], exclude: ['mod5'], limit: 3)
140
+ assert_equal expected, weighted_group.filtered_similar_to(8, include: ['mod4'], exclude: ['mod5'], limit: 99)
141
+ end
142
+
101
143
  def test_similar_to_mutliple_items
102
144
  weighted_group = WeightedGroup.new(
103
145
  @redis,
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: commendo
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.9
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rob Styles
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-04-11 00:00:00.000000000 Z
11
+ date: 2014-04-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: redis
@@ -24,6 +24,20 @@ dependencies:
24
24
  - - '>='
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: progressbar
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
27
41
  - !ruby/object:Gem::Dependency
28
42
  name: bundler
29
43
  requirement: !ruby/object:Gem::Requirement