commendo 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 04c833f06518b70ff1370c1685df37294c8fd2b9
4
- data.tar.gz: dd99d2a4bcca958d2c216d3084b88298c59f462f
3
+ metadata.gz: 469912023220e7c8d711e40f37225461644e24db
4
+ data.tar.gz: f4e79eccf3eee391ff0829720914d82d535d283a
5
5
  SHA512:
6
- metadata.gz: 9936b22c4e2ffc54e63b36cc13c8dce2d1559115956092c3061ca65e3f5db76302128d5fe46aefb30685394bc1f508a6ca05455bc3e0f2e1ae2ddd6fe9121e3b
7
- data.tar.gz: 952208db4416f43325d581f36363a124b0614d656c3b90d38d8b6576350cb8f561aa14022c6084b50b47cc51825a405350682bb6cbb80974590a3110412e0c11
6
+ metadata.gz: 58a603eb4e13bd0a9f09c3e8e5fb34fd6b86a2a0944c86da3389792b76a505b808a03ec9ab35aa39e0b6152c3ba20dbfb69d83c2dced898b1ae09a152792aa07
7
+ data.tar.gz: c9af132db981e73748ed917fe42b3aa10f43135bc55f7340570e955c5dbb3c14da0f4d6aa295ae8003f86e923368d9404894fdc545ed8e9e38895718ef636ab1
@@ -1,3 +1,6 @@
1
+ # 2.1.0 / 2016-01-04
2
+ * [FEATURE] Commendo can includes a Ruby-backed in-memory implementation (experimental)
3
+
1
4
  # 2.0.0 / 2015-12-11
2
5
  * [FEATURE] Commendo can now use MySQL as a backend if you prefer it to Redis
3
6
 
@@ -0,0 +1,47 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'redis'
4
+ require 'commendo'
5
+ require 'progressbar'
6
+
7
+ def timer(msg)
8
+ start = Time.now
9
+ value = yield
10
+ finish = Time.now
11
+ $stderr.puts "#{msg} took #{finish - start}"
12
+ return value
13
+ end
14
+
15
+ infile = ARGV[0]
16
+ key_base = ARGV[1]
17
+ limit = ARGV[2].to_i
18
+
19
+ Commendo.config do |config|
20
+ config.backend = :ruby
21
+ end
22
+ cs = Commendo::ContentSet.new(key_base: key_base)
23
+
24
+ resource_to_sets = nil
25
+ timer('Loading') do
26
+ lines = File.open(infile).readlines
27
+ resource_to_sets = lines.map { |line| line.strip!; line = line.split("\t"); r = line.shift; [r, line] }
28
+ resource_to_sets.each do |resource, sets|
29
+ cs.add(resource, *sets)
30
+ end
31
+ end
32
+
33
+ timer('calculate_similarity') do
34
+ cs.calculate_similarity
35
+ end
36
+
37
+ names_to_query = timer("Select #{limit} random names to use") do
38
+ resource_to_sets.map { |resource, sets| resource }.sort_by { rand }.first(limit)
39
+ end
40
+
41
+ # pbar = ProgressBar.new('Querying similar_to', names_to_query.length)
42
+ names_to_query.each do |name|
43
+ timer('Similarity') { cs.similar_to(name) }
44
+ # pbar.inc
45
+ end
46
+ # pbar.finish
47
+
@@ -17,6 +17,10 @@ require 'commendo/mysql-backed/content_set'
17
17
  require 'commendo/mysql-backed/tag_set'
18
18
  require 'commendo/mysql-backed/weighted_group'
19
19
 
20
+ require 'commendo/ruby-backed/content_set'
21
+ require 'commendo/ruby-backed/tag_set'
22
+ require 'commendo/ruby-backed/weighted_group'
23
+
20
24
  module Commendo
21
25
 
22
26
  def self.config
@@ -14,6 +14,7 @@ module Commendo
14
14
  def initialize(args)
15
15
  @backend = RedisBacked::ContentSet.new(args[:key_base], args[:tag_set]) if Commendo.config.backend == :redis
16
16
  @backend = MySqlBacked::ContentSet.new(args[:key_base], args[:tag_set]) if Commendo.config.backend == :mysql
17
+ @backend = RubyBacked::ContentSet.new(args[:key_base], args[:tag_set]) if Commendo.config.backend == :ruby
17
18
  raise 'Unrecognised backend type, try :redis or :mysql' if @backend.nil?
18
19
  end
19
20
 
@@ -0,0 +1,133 @@
1
+ module Commendo
2
+ module RubyBacked
3
+
4
+ class ContentSet
5
+
6
+ attr_accessor :key_base, :tag_set
7
+
8
+ DEFAULT_LIMIT = 1000
9
+
10
+ def initialize(key_base, tag_set = nil)
11
+ @resource_group_score = Hash.new { |h, k| h[k] = Hash.new { |h, k| h[k] = 0 } }
12
+ @key_base = key_base
13
+ @tag_set = tag_set
14
+ @threshold = nil
15
+ end
16
+
17
+ def add_by_group(group, *resources)
18
+ resources.map! { |r| r.is_a?(Array) ? r : [r, 1] } #sets default score of 1
19
+ resources.each { |r| add_single(r[0], group, r[1]) }
20
+ end
21
+
22
+ def add(resource, *groups)
23
+ resource = resource.to_s
24
+ groups.map! { |g| g.is_a?(Array) ? g : [g, 1] } #sets default score of 1
25
+ groups.each do |(group, score)|
26
+ @resource_group_score[resource][group.to_s] += score
27
+ end
28
+ end
29
+
30
+ def add_single(resource, group, score)
31
+ add(resource, [group, score])
32
+ end
33
+
34
+ def add_and_calculate(resource, *groups)
35
+ add(resource, *groups)
36
+ calculate_similarity
37
+ end
38
+
39
+ def groups(resource)
40
+ resource = resource.to_s
41
+ @resource_group_score[resource].keys
42
+ end
43
+
44
+ def delete(resource)
45
+ resource = resource.to_s
46
+ @resource_group_score[resource].each { |group,score| @group_resource_scores[group].delete(resource) }
47
+ @resource_group_score.delete(resource)
48
+ @resource_totals.delete(resource)
49
+ end
50
+
51
+ def calculate_similarity_for_resource(resource, threshold = 0)
52
+ calculate_similarity(threshold)
53
+ end
54
+
55
+ def calculate_similarity(threshold = nil)
56
+ @resource_totals = Hash[@resource_group_score.map { |resource, groups| [resource, groups.map { |group, score| score }.inject(0, :+)] }]
57
+ flat_resource_group_score = @resource_group_score.flat_map do |resource, groups|
58
+ groups.map do |group, score|
59
+ [resource, group, score]
60
+ end
61
+ end
62
+ @group_resource_scores = Hash.new { |h, k| h[k] = {} }
63
+ flat_resource_group_score.each { |(resource, group, score)| @group_resource_scores[group][resource] = score }
64
+
65
+ @threshold = threshold
66
+ end
67
+
68
+ def similar_to(resource, limit = DEFAULT_LIMIT)
69
+ if resource.is_a? Array
70
+ return similar_to_array(resource, limit)
71
+ else
72
+ return similar_to_single(resource, limit)
73
+ end
74
+ end
75
+
76
+ def similar_to_array(resources, limit)
77
+ similar = resources.flat_map { |r| similar_to_single(r, limit) }
78
+ similar = similar.group_by { |h| h[:resource] }
79
+ similar = similar.map { |resource, sims| {resource: resource, similarity: sims.inject(0) { |sum, sim| sum += sim[:similarity] }} }
80
+ similar.map! { |h| {resource: h[:resource], similarity: h[:similarity].round(3)} }
81
+ similar.keep_if { |sim| @threshold.nil? || sim[:similarity] > @threshold }
82
+ similar = similar.sort_by { |h| [h[:similarity], h[:resource]] }.reverse.first(limit)
83
+ similar
84
+ end
85
+
86
+ def similar_to_single(resource, limit)
87
+ resource = resource.to_s
88
+ my_groups = @resource_group_score[resource]
89
+
90
+ similar = Hash.new { |h, k| h[k] = 0 }
91
+
92
+ my_groups.each do |group, my_score|
93
+ @group_resource_scores[group].each do |other_resource, score|
94
+ next if other_resource == resource
95
+ similarity = (my_score + score).to_f / (@resource_totals[resource] + @resource_totals[other_resource]).to_f
96
+ similar[other_resource] += similarity
97
+ end
98
+ end
99
+
100
+ similar = similar.map { |resource, similarity| {resource: resource, similarity: similarity.round(3)} }.sort_by { |h| [h[:similarity], h[:resource]] }.reverse.first(limit)
101
+ similar.keep_if { |sim| @threshold.nil? || sim[:similarity] > @threshold }
102
+ similar
103
+ end
104
+
105
+ def filtered_similar_to(resource, options = {})
106
+ if @tag_set.nil? || (options[:include].nil? && options[:exclude].nil?) || @tag_set.empty?
107
+ return similar_to(resource, options[:limit] || DEFAULT_LIMIT)
108
+ else
109
+ similar = similar_to(resource)
110
+ limit = options[:limit] || similar.length
111
+ filtered = []
112
+ similar.each do |s|
113
+ return filtered if filtered.length >= limit
114
+ filtered << s if @tag_set.matches(s[:resource], options[:include], options[:exclude])
115
+ end
116
+ return filtered
117
+ end
118
+ end
119
+
120
+ def remove_from_groups(resource, *groups)
121
+ resource = resource.to_s
122
+ groups.each { |g| @resource_group_score[resource].delete(g.to_s) }
123
+ end
124
+
125
+ def remove_from_groups_and_calculate(resource, *groups)
126
+ remove_from_groups(resource, *groups)
127
+ calculate_similarity_for_resource(resource)
128
+ end
129
+
130
+ end
131
+
132
+ end
133
+ end
@@ -0,0 +1,43 @@
1
+ module Commendo
2
+ module RubyBacked
3
+ class TagSet
4
+
5
+ attr_accessor :key_base
6
+
7
+ def initialize(key_base)
8
+ @key_base = key_base
9
+ @resource_to_tags = Hash.new { |h, k| h[k] = [] }
10
+ end
11
+
12
+ def empty?
13
+ @resource_to_tags.keys.empty?
14
+ end
15
+
16
+ def get(resource)
17
+ @resource_to_tags[resource.to_s]
18
+ end
19
+
20
+ def add(resource, *tags)
21
+ @resource_to_tags[resource.to_s] += tags
22
+ end
23
+
24
+ def set(resource, *tags)
25
+ @resource_to_tags[resource.to_s] = tags
26
+ end
27
+
28
+ def matches(resource, include, exclude = [])
29
+ resource_tags = get(resource)
30
+ can_include = include.nil? || include.empty? || (resource_tags & include).length > 0
31
+ should_exclude = !exclude.nil? && !exclude.empty? && (resource_tags & exclude).length > 0
32
+ return can_include && !should_exclude
33
+ end
34
+
35
+ def delete(resource, *tags)
36
+ @resource_to_tags.delete(resource.to_s) if tags.empty?
37
+ @resource_to_tags[resource.to_s] -= tags unless tags.empty?
38
+ end
39
+
40
+ end
41
+ end
42
+ end
43
+
@@ -0,0 +1,38 @@
1
+ module Commendo
2
+ module RubyBacked
3
+
4
+ class WeightedGroup
5
+
6
+ attr_accessor :content_sets, :key_base, :tag_set
7
+
8
+ def initialize(key_base, *content_sets)
9
+ @key_base = key_base
10
+ @content_sets = content_sets
11
+ end
12
+
13
+ def similar_to(resource, limit = 0)
14
+ similar = @content_sets.flat_map { |cs| cs[:cs].similar_to(resource).map { |s| {resource: s[:resource], similarity: (s[:similarity] * cs[:weight]).round(3)} } }
15
+ grouped = similar.group_by { |r| r[:resource] }
16
+ totaled_similar = grouped.map { |resource, similar| {resource: resource, similarity: similar.inject(0.0) { |sum, s| sum += s[:similarity] }} }.sort_by { |h| [h[:similarity], h[:resource]] }.reverse
17
+ limit > 0 ? totaled_similar[0..limit-1] : totaled_similar
18
+ end
19
+
20
+ def filtered_similar_to(resource, options = {})
21
+ if @tag_set.nil? || (options[:include].nil? && options[:exclude].nil?)
22
+ return similar_to(resource, options[:limit] || 0)
23
+ else
24
+ similar = similar_to(resource)
25
+ limit = options[:limit] || similar.length
26
+ filtered = []
27
+ similar.each do |s|
28
+ return filtered if filtered.length >= limit
29
+ filtered << s if @tag_set.matches(s[:resource], options[:include], options[:exclude])
30
+ end
31
+ return filtered
32
+ end
33
+ end
34
+
35
+ end
36
+
37
+ end
38
+ end
@@ -7,6 +7,7 @@ module Commendo
7
7
  def initialize(args)
8
8
  @backend = RedisBacked::TagSet.new(args[:key_base]) if Commendo.config.backend == :redis
9
9
  @backend = MySqlBacked::TagSet.new(args[:key_base]) if Commendo.config.backend == :mysql
10
+ @backend = RubyBacked::TagSet.new(args[:key_base]) if Commendo.config.backend == :ruby
10
11
  raise 'Unrecognised backend type, try :redis or :mysql' if @backend.nil?
11
12
  end
12
13
 
@@ -1,3 +1,3 @@
1
1
  module Commendo
2
- VERSION = '2.0.0'
2
+ VERSION = '2.1.0'
3
3
  end
@@ -6,6 +6,7 @@ module Commendo
6
6
  def initialize(args)
7
7
  @backend = RedisBacked::WeightedGroup.new(args[:key_base], *args[:content_sets]) if Commendo.config.backend == :redis
8
8
  @backend = MySqlBacked::WeightedGroup.new(args[:key_base], *args[:content_sets]) if Commendo.config.backend == :mysql
9
+ @backend = RubyBacked::WeightedGroup.new(args[:key_base], *args[:content_sets]) if Commendo.config.backend == :ruby
9
10
  raise 'Unrecognised backend type, try :redis or :mysql' if @backend.nil?
10
11
  end
11
12
 
Binary file
@@ -0,0 +1,33 @@
1
+ require_relative 'tests_for_content_sets.rb'
2
+ gem 'minitest'
3
+ require 'minitest/autorun'
4
+ require 'minitest/pride'
5
+ require 'minitest/mock'
6
+ require 'mocha/setup'
7
+ require 'commendo'
8
+
9
+ module Commendo
10
+
11
+ class RubyContentSetTest < Minitest::Test
12
+
13
+ def setup
14
+ Commendo.config do |config|
15
+ config.backend = :ruby
16
+ end
17
+ @key_base = 'CommendoTests'
18
+ @cs = ContentSet.new(key_base: @key_base)
19
+ end
20
+
21
+ def create_tag_set(kb)
22
+ Commendo::TagSet.new(key_base: kb)
23
+ end
24
+
25
+ def create_content_set(key_base, ts = nil)
26
+ Commendo::ContentSet.new(key_base: key_base, tag_set: ts)
27
+ end
28
+
29
+ include TestsForContentSets
30
+
31
+ end
32
+
33
+ end
@@ -0,0 +1,27 @@
1
+ require_relative 'tests_for_tag_sets'
2
+ gem 'minitest'
3
+ require 'minitest/autorun'
4
+ require 'minitest/pride'
5
+ require 'minitest/mock'
6
+ require 'mocha/setup'
7
+ require 'commendo'
8
+
9
+ module Commendo
10
+
11
+ class RubyTagSetTest < Minitest::Test
12
+
13
+ def setup
14
+ Commendo.config do |config|
15
+ config.backend = :ruby
16
+ end
17
+ @ts = TagSet.new(key_base: 'TagSetTest')
18
+ end
19
+
20
+ def create_tag_set(kb)
21
+ Commendo::TagSet.new(key_base: kb)
22
+ end
23
+
24
+ include TestsForTagSets
25
+
26
+ end
27
+ end
@@ -0,0 +1,43 @@
1
+ require_relative 'tests_for_weighted_groups'
2
+ gem 'minitest'
3
+ require 'minitest/autorun'
4
+ require 'minitest/pride'
5
+ require 'minitest/mock'
6
+ require 'mocha/setup'
7
+ require 'commendo'
8
+
9
+ module Commendo
10
+
11
+ class RubyWeightedGroupTest < Minitest::Test
12
+
13
+ def setup
14
+ @tag_set = TagSet.new(key_base: 'CommendoTests:Tags')
15
+ @cs1 = ContentSet.new(key_base: 'CommendoTests:ContentSet1', tag_set: @tag_set)
16
+ @cs2 = ContentSet.new(key_base: 'CommendoTests:ContentSet2', tag_set: @tag_set)
17
+ @cs3 = ContentSet.new(key_base: 'CommendoTests:ContentSet3', tag_set: @tag_set)
18
+ (3..23).each do |group|
19
+ (3..23).each do |res|
20
+ @cs1.add_by_group(group, res) if res.modulo(group).zero? && res.modulo(2).zero?
21
+ @cs2.add_by_group(group, res) if res.modulo(group).zero? && res.modulo(3).zero?
22
+ @cs3.add_by_group(group, res) if res.modulo(group).zero? && res.modulo(6).zero?
23
+ @tag_set.add(res, 'mod3') if res.modulo(3).zero?
24
+ @tag_set.add(res, 'mod4') if res.modulo(4).zero?
25
+ @tag_set.add(res, 'mod5') if res.modulo(5).zero?
26
+ @tag_set.add(res, 'mod7') if res.modulo(7).zero?
27
+ end
28
+ end
29
+ [@cs1, @cs2, @cs3].each { |cs| cs.calculate_similarity }
30
+ @weighted_group = Commendo::WeightedGroup.new(key_base: 'CommendoTests:WeightedGroup',
31
+ content_sets: [{cs: @cs1, weight: 1.0},
32
+ {cs: @cs2, weight: 10.0},
33
+ {cs: @cs3, weight: 100.0}]
34
+ )
35
+ end
36
+
37
+ include TestsForWeightedGroups
38
+
39
+ end
40
+
41
+ end
42
+
43
+
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: commendo
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.0
4
+ version: 2.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rob Styles
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-12-18 00:00:00.000000000 Z
11
+ date: 2016-01-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: redis
@@ -150,6 +150,7 @@ executables:
150
150
  - commendo-recommendations-distribution
151
151
  - commendo-similarity-distribution
152
152
  - commendo-time-mysql.rb
153
+ - commendo-time-ruby.rb
153
154
  extensions: []
154
155
  extra_rdoc_files: []
155
156
  files:
@@ -169,6 +170,7 @@ files:
169
170
  - bin/commendo-recommendations-distribution
170
171
  - bin/commendo-similarity-distribution
171
172
  - bin/commendo-time-mysql.rb
173
+ - bin/commendo-time-ruby.rb
172
174
  - commendo.gemspec
173
175
  - lib/commendo.rb
174
176
  - lib/commendo/configuration.rb
@@ -181,6 +183,9 @@ files:
181
183
  - lib/commendo/redis-backed/similarity.lua
182
184
  - lib/commendo/redis-backed/tag_set.rb
183
185
  - lib/commendo/redis-backed/weighted_group.rb
186
+ - lib/commendo/ruby-backed/content_set.rb
187
+ - lib/commendo/ruby-backed/tag_set.rb
188
+ - lib/commendo/ruby-backed/weighted_group.rb
184
189
  - lib/commendo/tag_set.rb
185
190
  - lib/commendo/version.rb
186
191
  - lib/commendo/weighted_group.rb
@@ -194,6 +199,9 @@ files:
194
199
  - test/redis_content_set_test.rb
195
200
  - test/redis_tag_set_test.rb
196
201
  - test/redis_weighted_group_test.rb
202
+ - test/ruby_content_set_test.rb
203
+ - test/ruby_tag_set_test.rb
204
+ - test/ruby_weighted_group_test.rb
197
205
  - test/tests_for_content_sets.rb
198
206
  - test/tests_for_tag_sets.rb
199
207
  - test/tests_for_weighted_groups.rb
@@ -229,6 +237,9 @@ test_files:
229
237
  - test/redis_content_set_test.rb
230
238
  - test/redis_tag_set_test.rb
231
239
  - test/redis_weighted_group_test.rb
240
+ - test/ruby_content_set_test.rb
241
+ - test/ruby_tag_set_test.rb
242
+ - test/ruby_weighted_group_test.rb
232
243
  - test/tests_for_content_sets.rb
233
244
  - test/tests_for_tag_sets.rb
234
245
  - test/tests_for_weighted_groups.rb