commendo 2.0.0 → 2.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 04c833f06518b70ff1370c1685df37294c8fd2b9
4
- data.tar.gz: dd99d2a4bcca958d2c216d3084b88298c59f462f
3
+ metadata.gz: 469912023220e7c8d711e40f37225461644e24db
4
+ data.tar.gz: f4e79eccf3eee391ff0829720914d82d535d283a
5
5
  SHA512:
6
- metadata.gz: 9936b22c4e2ffc54e63b36cc13c8dce2d1559115956092c3061ca65e3f5db76302128d5fe46aefb30685394bc1f508a6ca05455bc3e0f2e1ae2ddd6fe9121e3b
7
- data.tar.gz: 952208db4416f43325d581f36363a124b0614d656c3b90d38d8b6576350cb8f561aa14022c6084b50b47cc51825a405350682bb6cbb80974590a3110412e0c11
6
+ metadata.gz: 58a603eb4e13bd0a9f09c3e8e5fb34fd6b86a2a0944c86da3389792b76a505b808a03ec9ab35aa39e0b6152c3ba20dbfb69d83c2dced898b1ae09a152792aa07
7
+ data.tar.gz: c9af132db981e73748ed917fe42b3aa10f43135bc55f7340570e955c5dbb3c14da0f4d6aa295ae8003f86e923368d9404894fdc545ed8e9e38895718ef636ab1
@@ -1,3 +1,6 @@
1
+ # 2.1.0 / 2016-01-04
2
+ * [FEATURE] Commendo can includes a Ruby-backed in-memory implementation (experimental)
3
+
1
4
  # 2.0.0 / 2015-12-11
2
5
  * [FEATURE] Commendo can now use MySQL as a backend if you prefer it to Redis
3
6
 
@@ -0,0 +1,47 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'redis'
4
+ require 'commendo'
5
+ require 'progressbar'
6
+
7
+ def timer(msg)
8
+ start = Time.now
9
+ value = yield
10
+ finish = Time.now
11
+ $stderr.puts "#{msg} took #{finish - start}"
12
+ return value
13
+ end
14
+
15
+ infile = ARGV[0]
16
+ key_base = ARGV[1]
17
+ limit = ARGV[2].to_i
18
+
19
+ Commendo.config do |config|
20
+ config.backend = :ruby
21
+ end
22
+ cs = Commendo::ContentSet.new(key_base: key_base)
23
+
24
+ resource_to_sets = nil
25
+ timer('Loading') do
26
+ lines = File.open(infile).readlines
27
+ resource_to_sets = lines.map { |line| line.strip!; line = line.split("\t"); r = line.shift; [r, line] }
28
+ resource_to_sets.each do |resource, sets|
29
+ cs.add(resource, *sets)
30
+ end
31
+ end
32
+
33
+ timer('calculate_similarity') do
34
+ cs.calculate_similarity
35
+ end
36
+
37
+ names_to_query = timer("Select #{limit} random names to use") do
38
+ resource_to_sets.map { |resource, sets| resource }.sort_by { rand }.first(limit)
39
+ end
40
+
41
+ # pbar = ProgressBar.new('Querying similar_to', names_to_query.length)
42
+ names_to_query.each do |name|
43
+ timer('Similarity') { cs.similar_to(name) }
44
+ # pbar.inc
45
+ end
46
+ # pbar.finish
47
+
@@ -17,6 +17,10 @@ require 'commendo/mysql-backed/content_set'
17
17
  require 'commendo/mysql-backed/tag_set'
18
18
  require 'commendo/mysql-backed/weighted_group'
19
19
 
20
+ require 'commendo/ruby-backed/content_set'
21
+ require 'commendo/ruby-backed/tag_set'
22
+ require 'commendo/ruby-backed/weighted_group'
23
+
20
24
  module Commendo
21
25
 
22
26
  def self.config
@@ -14,6 +14,7 @@ module Commendo
14
14
  def initialize(args)
15
15
  @backend = RedisBacked::ContentSet.new(args[:key_base], args[:tag_set]) if Commendo.config.backend == :redis
16
16
  @backend = MySqlBacked::ContentSet.new(args[:key_base], args[:tag_set]) if Commendo.config.backend == :mysql
17
+ @backend = RubyBacked::ContentSet.new(args[:key_base], args[:tag_set]) if Commendo.config.backend == :ruby
17
18
  raise 'Unrecognised backend type, try :redis or :mysql' if @backend.nil?
18
19
  end
19
20
 
@@ -0,0 +1,133 @@
1
+ module Commendo
2
+ module RubyBacked
3
+
4
+ class ContentSet
5
+
6
+ attr_accessor :key_base, :tag_set
7
+
8
+ DEFAULT_LIMIT = 1000
9
+
10
+ def initialize(key_base, tag_set = nil)
11
+ @resource_group_score = Hash.new { |h, k| h[k] = Hash.new { |h, k| h[k] = 0 } }
12
+ @key_base = key_base
13
+ @tag_set = tag_set
14
+ @threshold = nil
15
+ end
16
+
17
+ def add_by_group(group, *resources)
18
+ resources.map! { |r| r.is_a?(Array) ? r : [r, 1] } #sets default score of 1
19
+ resources.each { |r| add_single(r[0], group, r[1]) }
20
+ end
21
+
22
+ def add(resource, *groups)
23
+ resource = resource.to_s
24
+ groups.map! { |g| g.is_a?(Array) ? g : [g, 1] } #sets default score of 1
25
+ groups.each do |(group, score)|
26
+ @resource_group_score[resource][group.to_s] += score
27
+ end
28
+ end
29
+
30
+ def add_single(resource, group, score)
31
+ add(resource, [group, score])
32
+ end
33
+
34
+ def add_and_calculate(resource, *groups)
35
+ add(resource, *groups)
36
+ calculate_similarity
37
+ end
38
+
39
+ def groups(resource)
40
+ resource = resource.to_s
41
+ @resource_group_score[resource].keys
42
+ end
43
+
44
+ def delete(resource)
45
+ resource = resource.to_s
46
+ @resource_group_score[resource].each { |group,score| @group_resource_scores[group].delete(resource) }
47
+ @resource_group_score.delete(resource)
48
+ @resource_totals.delete(resource)
49
+ end
50
+
51
+ def calculate_similarity_for_resource(resource, threshold = 0)
52
+ calculate_similarity(threshold)
53
+ end
54
+
55
+ def calculate_similarity(threshold = nil)
56
+ @resource_totals = Hash[@resource_group_score.map { |resource, groups| [resource, groups.map { |group, score| score }.inject(0, :+)] }]
57
+ flat_resource_group_score = @resource_group_score.flat_map do |resource, groups|
58
+ groups.map do |group, score|
59
+ [resource, group, score]
60
+ end
61
+ end
62
+ @group_resource_scores = Hash.new { |h, k| h[k] = {} }
63
+ flat_resource_group_score.each { |(resource, group, score)| @group_resource_scores[group][resource] = score }
64
+
65
+ @threshold = threshold
66
+ end
67
+
68
+ def similar_to(resource, limit = DEFAULT_LIMIT)
69
+ if resource.is_a? Array
70
+ return similar_to_array(resource, limit)
71
+ else
72
+ return similar_to_single(resource, limit)
73
+ end
74
+ end
75
+
76
+ def similar_to_array(resources, limit)
77
+ similar = resources.flat_map { |r| similar_to_single(r, limit) }
78
+ similar = similar.group_by { |h| h[:resource] }
79
+ similar = similar.map { |resource, sims| {resource: resource, similarity: sims.inject(0) { |sum, sim| sum += sim[:similarity] }} }
80
+ similar.map! { |h| {resource: h[:resource], similarity: h[:similarity].round(3)} }
81
+ similar.keep_if { |sim| @threshold.nil? || sim[:similarity] > @threshold }
82
+ similar = similar.sort_by { |h| [h[:similarity], h[:resource]] }.reverse.first(limit)
83
+ similar
84
+ end
85
+
86
+ def similar_to_single(resource, limit)
87
+ resource = resource.to_s
88
+ my_groups = @resource_group_score[resource]
89
+
90
+ similar = Hash.new { |h, k| h[k] = 0 }
91
+
92
+ my_groups.each do |group, my_score|
93
+ @group_resource_scores[group].each do |other_resource, score|
94
+ next if other_resource == resource
95
+ similarity = (my_score + score).to_f / (@resource_totals[resource] + @resource_totals[other_resource]).to_f
96
+ similar[other_resource] += similarity
97
+ end
98
+ end
99
+
100
+ similar = similar.map { |resource, similarity| {resource: resource, similarity: similarity.round(3)} }.sort_by { |h| [h[:similarity], h[:resource]] }.reverse.first(limit)
101
+ similar.keep_if { |sim| @threshold.nil? || sim[:similarity] > @threshold }
102
+ similar
103
+ end
104
+
105
+ def filtered_similar_to(resource, options = {})
106
+ if @tag_set.nil? || (options[:include].nil? && options[:exclude].nil?) || @tag_set.empty?
107
+ return similar_to(resource, options[:limit] || DEFAULT_LIMIT)
108
+ else
109
+ similar = similar_to(resource)
110
+ limit = options[:limit] || similar.length
111
+ filtered = []
112
+ similar.each do |s|
113
+ return filtered if filtered.length >= limit
114
+ filtered << s if @tag_set.matches(s[:resource], options[:include], options[:exclude])
115
+ end
116
+ return filtered
117
+ end
118
+ end
119
+
120
+ def remove_from_groups(resource, *groups)
121
+ resource = resource.to_s
122
+ groups.each { |g| @resource_group_score[resource].delete(g.to_s) }
123
+ end
124
+
125
+ def remove_from_groups_and_calculate(resource, *groups)
126
+ remove_from_groups(resource, *groups)
127
+ calculate_similarity_for_resource(resource)
128
+ end
129
+
130
+ end
131
+
132
+ end
133
+ end
@@ -0,0 +1,43 @@
1
+ module Commendo
2
+ module RubyBacked
3
+ class TagSet
4
+
5
+ attr_accessor :key_base
6
+
7
+ def initialize(key_base)
8
+ @key_base = key_base
9
+ @resource_to_tags = Hash.new { |h, k| h[k] = [] }
10
+ end
11
+
12
+ def empty?
13
+ @resource_to_tags.keys.empty?
14
+ end
15
+
16
+ def get(resource)
17
+ @resource_to_tags[resource.to_s]
18
+ end
19
+
20
+ def add(resource, *tags)
21
+ @resource_to_tags[resource.to_s] += tags
22
+ end
23
+
24
+ def set(resource, *tags)
25
+ @resource_to_tags[resource.to_s] = tags
26
+ end
27
+
28
+ def matches(resource, include, exclude = [])
29
+ resource_tags = get(resource)
30
+ can_include = include.nil? || include.empty? || (resource_tags & include).length > 0
31
+ should_exclude = !exclude.nil? && !exclude.empty? && (resource_tags & exclude).length > 0
32
+ return can_include && !should_exclude
33
+ end
34
+
35
+ def delete(resource, *tags)
36
+ @resource_to_tags.delete(resource.to_s) if tags.empty?
37
+ @resource_to_tags[resource.to_s] -= tags unless tags.empty?
38
+ end
39
+
40
+ end
41
+ end
42
+ end
43
+
@@ -0,0 +1,38 @@
1
+ module Commendo
2
+ module RubyBacked
3
+
4
+ class WeightedGroup
5
+
6
+ attr_accessor :content_sets, :key_base, :tag_set
7
+
8
+ def initialize(key_base, *content_sets)
9
+ @key_base = key_base
10
+ @content_sets = content_sets
11
+ end
12
+
13
+ def similar_to(resource, limit = 0)
14
+ similar = @content_sets.flat_map { |cs| cs[:cs].similar_to(resource).map { |s| {resource: s[:resource], similarity: (s[:similarity] * cs[:weight]).round(3)} } }
15
+ grouped = similar.group_by { |r| r[:resource] }
16
+ totaled_similar = grouped.map { |resource, similar| {resource: resource, similarity: similar.inject(0.0) { |sum, s| sum += s[:similarity] }} }.sort_by { |h| [h[:similarity], h[:resource]] }.reverse
17
+ limit > 0 ? totaled_similar[0..limit-1] : totaled_similar
18
+ end
19
+
20
+ def filtered_similar_to(resource, options = {})
21
+ if @tag_set.nil? || (options[:include].nil? && options[:exclude].nil?)
22
+ return similar_to(resource, options[:limit] || 0)
23
+ else
24
+ similar = similar_to(resource)
25
+ limit = options[:limit] || similar.length
26
+ filtered = []
27
+ similar.each do |s|
28
+ return filtered if filtered.length >= limit
29
+ filtered << s if @tag_set.matches(s[:resource], options[:include], options[:exclude])
30
+ end
31
+ return filtered
32
+ end
33
+ end
34
+
35
+ end
36
+
37
+ end
38
+ end
@@ -7,6 +7,7 @@ module Commendo
7
7
  def initialize(args)
8
8
  @backend = RedisBacked::TagSet.new(args[:key_base]) if Commendo.config.backend == :redis
9
9
  @backend = MySqlBacked::TagSet.new(args[:key_base]) if Commendo.config.backend == :mysql
10
+ @backend = RubyBacked::TagSet.new(args[:key_base]) if Commendo.config.backend == :ruby
10
11
  raise 'Unrecognised backend type, try :redis or :mysql' if @backend.nil?
11
12
  end
12
13
 
@@ -1,3 +1,3 @@
1
1
  module Commendo
2
- VERSION = '2.0.0'
2
+ VERSION = '2.1.0'
3
3
  end
@@ -6,6 +6,7 @@ module Commendo
6
6
  def initialize(args)
7
7
  @backend = RedisBacked::WeightedGroup.new(args[:key_base], *args[:content_sets]) if Commendo.config.backend == :redis
8
8
  @backend = MySqlBacked::WeightedGroup.new(args[:key_base], *args[:content_sets]) if Commendo.config.backend == :mysql
9
+ @backend = RubyBacked::WeightedGroup.new(args[:key_base], *args[:content_sets]) if Commendo.config.backend == :ruby
9
10
  raise 'Unrecognised backend type, try :redis or :mysql' if @backend.nil?
10
11
  end
11
12
 
Binary file
@@ -0,0 +1,33 @@
1
+ require_relative 'tests_for_content_sets.rb'
2
+ gem 'minitest'
3
+ require 'minitest/autorun'
4
+ require 'minitest/pride'
5
+ require 'minitest/mock'
6
+ require 'mocha/setup'
7
+ require 'commendo'
8
+
9
+ module Commendo
10
+
11
+ class RubyContentSetTest < Minitest::Test
12
+
13
+ def setup
14
+ Commendo.config do |config|
15
+ config.backend = :ruby
16
+ end
17
+ @key_base = 'CommendoTests'
18
+ @cs = ContentSet.new(key_base: @key_base)
19
+ end
20
+
21
+ def create_tag_set(kb)
22
+ Commendo::TagSet.new(key_base: kb)
23
+ end
24
+
25
+ def create_content_set(key_base, ts = nil)
26
+ Commendo::ContentSet.new(key_base: key_base, tag_set: ts)
27
+ end
28
+
29
+ include TestsForContentSets
30
+
31
+ end
32
+
33
+ end
@@ -0,0 +1,27 @@
1
+ require_relative 'tests_for_tag_sets'
2
+ gem 'minitest'
3
+ require 'minitest/autorun'
4
+ require 'minitest/pride'
5
+ require 'minitest/mock'
6
+ require 'mocha/setup'
7
+ require 'commendo'
8
+
9
+ module Commendo
10
+
11
+ class RubyTagSetTest < Minitest::Test
12
+
13
+ def setup
14
+ Commendo.config do |config|
15
+ config.backend = :ruby
16
+ end
17
+ @ts = TagSet.new(key_base: 'TagSetTest')
18
+ end
19
+
20
+ def create_tag_set(kb)
21
+ Commendo::TagSet.new(key_base: kb)
22
+ end
23
+
24
+ include TestsForTagSets
25
+
26
+ end
27
+ end
@@ -0,0 +1,43 @@
1
+ require_relative 'tests_for_weighted_groups'
2
+ gem 'minitest'
3
+ require 'minitest/autorun'
4
+ require 'minitest/pride'
5
+ require 'minitest/mock'
6
+ require 'mocha/setup'
7
+ require 'commendo'
8
+
9
+ module Commendo
10
+
11
+ class RubyWeightedGroupTest < Minitest::Test
12
+
13
+ def setup
14
+ @tag_set = TagSet.new(key_base: 'CommendoTests:Tags')
15
+ @cs1 = ContentSet.new(key_base: 'CommendoTests:ContentSet1', tag_set: @tag_set)
16
+ @cs2 = ContentSet.new(key_base: 'CommendoTests:ContentSet2', tag_set: @tag_set)
17
+ @cs3 = ContentSet.new(key_base: 'CommendoTests:ContentSet3', tag_set: @tag_set)
18
+ (3..23).each do |group|
19
+ (3..23).each do |res|
20
+ @cs1.add_by_group(group, res) if res.modulo(group).zero? && res.modulo(2).zero?
21
+ @cs2.add_by_group(group, res) if res.modulo(group).zero? && res.modulo(3).zero?
22
+ @cs3.add_by_group(group, res) if res.modulo(group).zero? && res.modulo(6).zero?
23
+ @tag_set.add(res, 'mod3') if res.modulo(3).zero?
24
+ @tag_set.add(res, 'mod4') if res.modulo(4).zero?
25
+ @tag_set.add(res, 'mod5') if res.modulo(5).zero?
26
+ @tag_set.add(res, 'mod7') if res.modulo(7).zero?
27
+ end
28
+ end
29
+ [@cs1, @cs2, @cs3].each { |cs| cs.calculate_similarity }
30
+ @weighted_group = Commendo::WeightedGroup.new(key_base: 'CommendoTests:WeightedGroup',
31
+ content_sets: [{cs: @cs1, weight: 1.0},
32
+ {cs: @cs2, weight: 10.0},
33
+ {cs: @cs3, weight: 100.0}]
34
+ )
35
+ end
36
+
37
+ include TestsForWeightedGroups
38
+
39
+ end
40
+
41
+ end
42
+
43
+
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: commendo
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.0
4
+ version: 2.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rob Styles
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-12-18 00:00:00.000000000 Z
11
+ date: 2016-01-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: redis
@@ -150,6 +150,7 @@ executables:
150
150
  - commendo-recommendations-distribution
151
151
  - commendo-similarity-distribution
152
152
  - commendo-time-mysql.rb
153
+ - commendo-time-ruby.rb
153
154
  extensions: []
154
155
  extra_rdoc_files: []
155
156
  files:
@@ -169,6 +170,7 @@ files:
169
170
  - bin/commendo-recommendations-distribution
170
171
  - bin/commendo-similarity-distribution
171
172
  - bin/commendo-time-mysql.rb
173
+ - bin/commendo-time-ruby.rb
172
174
  - commendo.gemspec
173
175
  - lib/commendo.rb
174
176
  - lib/commendo/configuration.rb
@@ -181,6 +183,9 @@ files:
181
183
  - lib/commendo/redis-backed/similarity.lua
182
184
  - lib/commendo/redis-backed/tag_set.rb
183
185
  - lib/commendo/redis-backed/weighted_group.rb
186
+ - lib/commendo/ruby-backed/content_set.rb
187
+ - lib/commendo/ruby-backed/tag_set.rb
188
+ - lib/commendo/ruby-backed/weighted_group.rb
184
189
  - lib/commendo/tag_set.rb
185
190
  - lib/commendo/version.rb
186
191
  - lib/commendo/weighted_group.rb
@@ -194,6 +199,9 @@ files:
194
199
  - test/redis_content_set_test.rb
195
200
  - test/redis_tag_set_test.rb
196
201
  - test/redis_weighted_group_test.rb
202
+ - test/ruby_content_set_test.rb
203
+ - test/ruby_tag_set_test.rb
204
+ - test/ruby_weighted_group_test.rb
197
205
  - test/tests_for_content_sets.rb
198
206
  - test/tests_for_tag_sets.rb
199
207
  - test/tests_for_weighted_groups.rb
@@ -229,6 +237,9 @@ test_files:
229
237
  - test/redis_content_set_test.rb
230
238
  - test/redis_tag_set_test.rb
231
239
  - test/redis_weighted_group_test.rb
240
+ - test/ruby_content_set_test.rb
241
+ - test/ruby_tag_set_test.rb
242
+ - test/ruby_weighted_group_test.rb
232
243
  - test/tests_for_content_sets.rb
233
244
  - test/tests_for_tag_sets.rb
234
245
  - test/tests_for_weighted_groups.rb