commendo 1.2.4 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +3 -0
  3. data/bin/commendo-create-mysql-db +3 -0
  4. data/bin/commendo-create.sql +99 -0
  5. data/bin/commendo-load-tsv +11 -5
  6. data/bin/commendo-load-tsv-mysql.rb +43 -0
  7. data/bin/commendo-time-mysql.rb +31 -0
  8. data/commendo.gemspec +4 -2
  9. data/lib/commendo.rb +24 -0
  10. data/lib/commendo/configuration.rb +25 -0
  11. data/lib/commendo/content_set.rb +13 -182
  12. data/lib/commendo/mysql-backed/content_set.rb +152 -0
  13. data/lib/commendo/mysql-backed/tag_set.rb +81 -0
  14. data/lib/commendo/mysql-backed/weighted_group.rb +40 -0
  15. data/lib/commendo/redis-backed/content_set.rb +194 -0
  16. data/lib/commendo/{pair_comparison.lua → redis-backed/pair_comparison.lua} +0 -0
  17. data/lib/commendo/{similarity.lua → redis-backed/similarity.lua} +0 -0
  18. data/lib/commendo/redis-backed/tag_set.rb +54 -0
  19. data/lib/commendo/redis-backed/weighted_group.rb +54 -0
  20. data/lib/commendo/tag_set.rb +6 -42
  21. data/lib/commendo/version.rb +1 -1
  22. data/lib/commendo/weighted_group.rb +7 -41
  23. data/lib/mysql2/client.rb +17 -0
  24. data/model 2.mwb +0 -0
  25. data/sql_model.mwb +0 -0
  26. data/test/configuration_test.rb +71 -0
  27. data/test/mysql_content_set_test.rb +40 -0
  28. data/test/mysql_tag_set_test.rb +34 -0
  29. data/test/mysql_weighted_group_test.rb +54 -0
  30. data/test/redis_content_set_test.rb +57 -0
  31. data/test/redis_tag_set_test.rb +31 -0
  32. data/test/redis_weighted_group_test.rb +49 -0
  33. data/test/tests_for_content_sets.rb +379 -0
  34. data/test/tests_for_tag_sets.rb +130 -0
  35. data/test/tests_for_weighted_groups.rb +106 -0
  36. metadata +72 -12
  37. data/test/content_set_test.rb +0 -408
  38. data/test/tag_set_test.rb +0 -128
  39. data/test/weighted_group_test.rb +0 -191
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f4346b4b053e4b910efdd819476fae6e18db6b45
4
- data.tar.gz: faeee86f961b6e576888accd047b6adfe4dcd54e
3
+ metadata.gz: 04c833f06518b70ff1370c1685df37294c8fd2b9
4
+ data.tar.gz: dd99d2a4bcca958d2c216d3084b88298c59f462f
5
5
  SHA512:
6
- metadata.gz: ad4356a6a14f35713ab2eee837b69e78e6ed7995c4131968ce8356df45630fc290817a670a2aa29135f02ff9a9a18928437b3a5263c551065912c5689ebb280d
7
- data.tar.gz: b4db90f586213d1174810081d1e18335572f97c51ca351ffd11f3ea395562b9674781375e2c62d8cc16c0926932dc8479a273e556f38b669acf4f2866250cf9a
6
+ metadata.gz: 9936b22c4e2ffc54e63b36cc13c8dce2d1559115956092c3061ca65e3f5db76302128d5fe46aefb30685394bc1f508a6ca05455bc3e0f2e1ae2ddd6fe9121e3b
7
+ data.tar.gz: 952208db4416f43325d581f36363a124b0614d656c3b90d38d8b6576350cb8f561aa14022c6084b50b47cc51825a405350682bb6cbb80974590a3110412e0c11
@@ -1,3 +1,6 @@
1
+ # 2.0.0 / 2015-12-11
2
+ * [FEATURE] Commendo can now use MySQL as a backend if you prefer it to Redis
3
+
1
4
  # 1.2.4 / 2015-12-09
2
5
  * [BUGFIX] Fixed bug with TagSet#empty? reporting empty when only one key present
3
6
 
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env bash
2
+ mysql -h $1 -u $2 -p$3 -e "CREATE SCHEMA IF NOT EXISTS $4 DEFAULT CHARACTER SET utf8;"
3
+ mysql -h $1 -u $2 -p$3 $4 < $(dirname $0)/commendo-create.sql
@@ -0,0 +1,99 @@
1
+ SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0;
2
+ SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0;
3
+ SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='TRADITIONAL,ALLOW_INVALID_DATES';
4
+
5
+ -- -----------------------------------------------------
6
+ -- Schema commendo
7
+ -- -----------------------------------------------------
8
+ # CREATE SCHEMA IF NOT EXISTS commendo_created DEFAULT CHARACTER SET utf8 ;
9
+ # USE commendo_created ;
10
+
11
+ -- -----------------------------------------------------
12
+ -- Table `Resources`
13
+ -- -----------------------------------------------------
14
+ CREATE TABLE IF NOT EXISTS `Resources` (
15
+ `id` INT NOT NULL AUTO_INCREMENT,
16
+ `keybase` VARCHAR(64) NOT NULL,
17
+ `name` VARCHAR(128) NOT NULL,
18
+ `groupname` VARCHAR(128) NOT NULL,
19
+ `score` FLOAT NOT NULL,
20
+ `union_score` FLOAT NULL,
21
+ PRIMARY KEY (`id`),
22
+ INDEX `name` (`name` ASC),
23
+ INDEX `groupname` (`groupname` ASC),
24
+ UNIQUE INDEX `keybase-name-groupname` (`keybase` ASC, `name` ASC, `groupname` ASC),
25
+ INDEX `keybase` (`keybase` ASC),
26
+ INDEX `keybase-name-score` (`keybase` ASC, `name` ASC, `score` ASC),
27
+ INDEX `keybase-groupname` (`keybase` ASC, `groupname` ASC))
28
+ ENGINE = InnoDB;
29
+
30
+
31
+ -- -----------------------------------------------------
32
+ -- Table `Tags`
33
+ -- -----------------------------------------------------
34
+ CREATE TABLE IF NOT EXISTS `Tags` (
35
+ `id` INT NOT NULL AUTO_INCREMENT,
36
+ `keybase` VARCHAR(64) NOT NULL,
37
+ `name` VARCHAR(128) NOT NULL,
38
+ `tag` VARCHAR(64) NOT NULL,
39
+ PRIMARY KEY (`id`),
40
+ INDEX `tag` (`tag` ASC),
41
+ UNIQUE INDEX `keybase-name-tag` (`keybase` ASC, `name` ASC, `tag` ASC),
42
+ INDEX `keybase` (`keybase` ASC),
43
+ INDEX `name` (`name` ASC))
44
+ ENGINE = InnoDB;
45
+
46
+
47
+ -- -----------------------------------------------------
48
+ -- Table `UnionScores`
49
+ -- -----------------------------------------------------
50
+ CREATE TABLE IF NOT EXISTS `UnionScores` (
51
+ `id` INT NOT NULL AUTO_INCREMENT,
52
+ `keybase` VARCHAR(64) NOT NULL,
53
+ `name` VARCHAR(128) NOT NULL,
54
+ `union_score` FLOAT NOT NULL,
55
+ PRIMARY KEY (`id`),
56
+ INDEX `keybase` (`keybase` ASC),
57
+ INDEX `name` (`name` ASC),
58
+ UNIQUE INDEX `keybase-name` (`keybase` ASC, `name` ASC))
59
+ ENGINE = InnoDB;
60
+
61
+ # USE @schema_name;
62
+
63
+ DELIMITER $$
64
+ # USE @schema_name$$
65
+ CREATE DEFINER = CURRENT_USER
66
+ TRIGGER `Resources_AFTER_INSERT`
67
+ AFTER INSERT ON `Resources` FOR EACH ROW
68
+ BEGIN
69
+ SET @union_score = (
70
+ SELECT SUM(score)
71
+ FROM Resources
72
+ WHERE keybase = new.keybase
73
+ AND name = new.name
74
+ );
75
+ INSERT INTO UnionScores (keybase, name, union_score)
76
+ VALUES (new.keybase, new.name, @union_score)
77
+ ON DUPLICATE KEY UPDATE union_score = @union_score;
78
+ END$$
79
+
80
+ # USE @schema_name$$
81
+ CREATE DEFINER = CURRENT_USER TRIGGER `Resources_AFTER_UPDATE` AFTER UPDATE ON `Resources` FOR EACH ROW
82
+ BEGIN
83
+ SET @union_score = (
84
+ SELECT SUM(score)
85
+ FROM Resources
86
+ WHERE keybase = new.keybase
87
+ AND name = new.name
88
+ );
89
+ UPDATE UnionScores SET union_score = @union_score
90
+ WHERE keybase = new.keybase
91
+ AND name = new.name;
92
+ END$$
93
+
94
+
95
+ DELIMITER ;
96
+
97
+ SET SQL_MODE=@OLD_SQL_MODE;
98
+ SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS;
99
+ SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS;
@@ -5,13 +5,17 @@ require 'commendo'
5
5
  require 'progressbar'
6
6
 
7
7
  filename = ARGV[0]
8
- redis_db = ARGV[1].to_i
9
- base_key = ARGV[2]
10
8
 
11
- redis = Redis.new(db: redis_db, timeout: 60)
12
- cs = Commendo::ContentSet.new(redis, base_key)
9
+ Commendo.config do |config|
10
+ config.backend = :redis
11
+ config.host = 'localhost'
12
+ config.port = 6379
13
+ config.database = 15
14
+ end
15
+ Redis.new(host: Commendo.config.host, port: Commendo.config.port, db: Commendo.config.database).flushdb
16
+ cs = Commendo::ContentSet.new(key_base: 'MeducationViews')
13
17
 
14
- puts "Loading."
18
+ puts 'Loading.'
15
19
  file_length = `wc -l #{filename}`.to_i
16
20
  pbar = ProgressBar.new('Loading TSV file', file_length)
17
21
  File.open(filename) do |f|
@@ -22,6 +26,7 @@ File.open(filename) do |f|
22
26
  cs.add(resource, *ids)
23
27
  end
24
28
  end
29
+ pbar.finish
25
30
  puts "\nFinished loading"
26
31
 
27
32
  puts 'Calculating similarities'
@@ -30,3 +35,4 @@ cs.calculate_similarity do |key, i, total|
30
35
  pbar ||= ProgressBar.new('Calculating similarity', total)
31
36
  pbar.inc
32
37
  end
38
+ pbar.finish
@@ -0,0 +1,43 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'redis'
4
+ require 'commendo'
5
+ require 'progressbar'
6
+
7
+ filename = ARGV[0]
8
+ key_base = ARGV[1]
9
+
10
+ Commendo.config do |config|
11
+ config.backend = :mysql
12
+ config.host = 'localhost'
13
+ config.port = 3306
14
+ config.database = 'commendo_test'
15
+ config.username = 'commendo'
16
+ config.password = 'commendo123'
17
+ end
18
+ client = Mysql2::Client.new(Commendo.config.to_hash)
19
+ client.query("DELETE FROM Resources WHERE keybase='#{key_base}';")
20
+ cs = Commendo::ContentSet.new(key_base: key_base)
21
+
22
+ puts 'Loading.'
23
+ file_length = `wc -l #{filename}`.to_i
24
+ pbar = ProgressBar.new('Loading TSV file', file_length)
25
+ File.open(filename) do |f|
26
+ f.each_line do |line|
27
+ pbar.inc
28
+ ids = line.strip.split("\t")
29
+ resource = ids.shift
30
+ cs.add(resource, *ids)
31
+ end
32
+ end
33
+ pbar.finish
34
+ puts "\nFinished loading"
35
+
36
+ # puts 'Calculating similarities'
37
+ # # pbar = nil
38
+ # cs.calculate_similarity do |key, i, total|
39
+ # pbar ||= ProgressBar.new('Calculating similarity', total)
40
+ # # pbar.inc
41
+ # $stderr.puts key
42
+ # end
43
+ # pbar.finish
@@ -0,0 +1,31 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'redis'
4
+ require 'commendo'
5
+ require 'progressbar'
6
+
7
+ key_base = ARGV[0]
8
+ limit = ARGV[1].to_i
9
+
10
+ Commendo.config do |config|
11
+ config.backend = :mysql
12
+ config.host = 'localhost'
13
+ config.port = 3306
14
+ config.database = 'commendo_test'
15
+ config.username = 'commendo'
16
+ config.password = 'commendo123'
17
+ end
18
+ cs = Commendo::ContentSet.new(key_base: key_base)
19
+
20
+ $stderr.puts "Selecting #{limit} random names to use"
21
+ client = Mysql2::Client.new(Commendo.config.to_hash)
22
+ names_to_query = client.query("SELECT DISTINCT name FROM Resources WHERE keybase = '#{key_base}' ORDER BY RAND() LIMIT #{limit}")
23
+ names_to_query = names_to_query.map { |r| r['name'] }
24
+
25
+ pbar = ProgressBar.new('Querying similar_to', names_to_query.length)
26
+ names_to_query.each do |name|
27
+ cs.similar_to(name)
28
+ pbar.inc
29
+ end
30
+ pbar.finish
31
+
@@ -8,8 +8,8 @@ Gem::Specification.new do |spec|
8
8
  spec.version = Commendo::VERSION
9
9
  spec.authors = ['Rob Styles']
10
10
  spec.email = ['rob.styles@dynamicorange.com']
11
- spec.summary = 'A Jaccard-similarity recommender using Redis sets'
12
- spec.description = 'A Jaccard-similarity recommender using Redis sets'
11
+ spec.summary = 'A Jaccard-similarity recommender using Redis sets or MySQL'
12
+ spec.description = 'A Jaccard-similarity recommender using Redis sets or MySQL'
13
13
  spec.homepage = ''
14
14
  spec.license = 'MIT'
15
15
 
@@ -19,7 +19,9 @@ Gem::Specification.new do |spec|
19
19
  spec.require_paths = ['lib']
20
20
 
21
21
  spec.add_dependency 'redis'
22
+ spec.add_dependency 'mysql2'
22
23
  spec.add_dependency 'progressbar'
24
+ spec.add_dependency 'slop'
23
25
 
24
26
  spec.add_development_dependency 'bundler', '~> 1.5'
25
27
  spec.add_development_dependency 'rake'
@@ -1,9 +1,33 @@
1
+ require 'forwardable'
2
+ require 'mysql2'
3
+ require_relative 'mysql2/client'
1
4
  require 'redis'
5
+
6
+ require 'commendo/configuration'
2
7
  require 'commendo/version'
3
8
  require 'commendo/content_set'
4
9
  require 'commendo/tag_set'
5
10
  require 'commendo/weighted_group'
6
11
 
12
+ require 'commendo/redis-backed/content_set'
13
+ require 'commendo/redis-backed/tag_set'
14
+ require 'commendo/redis-backed/weighted_group'
15
+
16
+ require 'commendo/mysql-backed/content_set'
17
+ require 'commendo/mysql-backed/tag_set'
18
+ require 'commendo/mysql-backed/weighted_group'
19
+
7
20
  module Commendo
8
21
 
22
+ def self.config
23
+ config = @@config ||= Configuration.new
24
+ yield(config) if block_given?
25
+ config
26
+ end
27
+
28
+ def self.config=(config)
29
+ raise 'Configuration must be either a Commendo::Configuration object or nil to reset' unless config.nil? || config.is_a?(Configuration)
30
+ @@config = config
31
+ end
32
+
9
33
  end
@@ -0,0 +1,25 @@
1
+ module Commendo
2
+ class Configuration
3
+
4
+ attr_accessor :backend, :host, :port, :database, :username, :password
5
+
6
+ def initialize
7
+ @backend = :redis
8
+ @host = 'localhost'
9
+ @port = 6379
10
+ @database = 15
11
+ end
12
+
13
+ def to_hash
14
+ {
15
+ backend: backend,
16
+ host: host,
17
+ port: port,
18
+ database: database,
19
+ username: username,
20
+ password: password
21
+ }
22
+ end
23
+
24
+ end
25
+ end
@@ -1,191 +1,22 @@
1
1
  module Commendo
2
2
 
3
3
  class ContentSet
4
+ extend Forwardable
4
5
 
5
- attr_accessor :redis, :key_base, :tag_set
6
+ def_delegators :@backend,
7
+ :add_by_group, :add, :add_single, :add_and_calculate,
8
+ :groups, :delete,
9
+ :calculate_similarity, :calculate_similarity_for_resource, :calculate_similarity_for_key_resource,
10
+ :similar_to, :filtered_similar_to,
11
+ :similarity_key,
12
+ :remove_from_groups, :remove_from_groups_and_calculate
6
13
 
7
- def initialize(redis, key_base, tag_set = nil)
8
- @redis, @key_base, @tag_set = redis, key_base, tag_set
9
- end
10
-
11
- def add_by_group(group, *resources)
12
- resources.each do |resource|
13
- if resource.kind_of?(Array)
14
- add_single(resource[0], group, resource[1])
15
- else
16
- add_single(resource, group, 1)
17
- end
18
- end
19
- end
20
-
21
- def add(resource, *groups)
22
- groups.each do |group|
23
- if group.kind_of?(Array)
24
- add_single(resource, group[0], group[1])
25
- else
26
- add_single(resource, group, 1)
27
- end
28
- end
29
- end
30
-
31
- def add_single(resource, group, score)
32
- redis.zincrby(group_key(group), score, resource)
33
- redis.zincrby(resource_key(resource), score, group)
34
- end
35
-
36
- def add_and_calculate(resource, *groups)
37
- add(resource, *groups)
38
- calculate_similarity_for_resource(resource, 0)
39
- end
40
-
41
- def groups(resource)
42
- redis.zrange(resource_key(resource), 0, -1)
43
- end
44
-
45
- def delete(resource)
46
- similar = similar_to(resource)
47
- similar.each do |other_resource|
48
- redis.zrem(similarity_key(other_resource[:resource]), "#{resource}")
49
- end
50
- #TODO delete from groups?
51
- redis.del(similarity_key(resource))
52
- redis.del(resource_key(resource))
53
- end
54
-
55
- SET_TOO_LARGE_FOR_LUA = 999
56
-
57
- def calculate_similarity(threshold = 0)
58
- #TODO make this use scan for scaling
59
- keys = redis.keys("#{resource_key_base}:*")
60
- keys.each_with_index do |key, i|
61
- resource = key.gsub(/^#{resource_key_base}:/, '')
62
- similarity_key = similarity_key(resource)
63
- redis.del(similarity_key)
64
- yield(key, i, keys.length) if block_given?
65
- completed = redis.eval(similarity_lua, keys: [key], argv: [tmp_key_base, resource_key_base, similar_key_base, group_key_base, threshold])
66
- if completed == SET_TOO_LARGE_FOR_LUA
67
- calculate_similarity_for_key_resource(key, resource, threshold)
68
- end
69
- end
70
- end
71
-
72
-
73
- def calculate_similarity_for_resource(resource, threshold)
74
- key = resource_key(resource)
75
- calculate_similarity_for_key_resource(key, resource, threshold)
76
- end
77
-
78
- def calculate_similarity_for_key_resource(key, resource, threshold)
79
- groups = groups(resource)
80
- return if groups.empty?
81
- group_keys = groups.map { |group| group_key(group) }
82
- tmp_key = "#{tmp_key_base}:#{SecureRandom.uuid}"
83
- redis.zunionstore(tmp_key, group_keys)
84
- resources = redis.zrange(tmp_key, 0, -1)
85
- redis.del(tmp_key)
86
- similarity_key = similarity_key(resource)
87
- redis.del(similarity_key)
88
- resources.each do |to_compare|
89
- next if resource == to_compare
90
- redis.eval(pair_comparison_lua, keys: [key, resource_key(to_compare), similarity_key(resource), similarity_key(to_compare)], argv: [tmp_key_base, resource, to_compare, threshold])
91
- end
92
- end
93
-
94
- def similar_to(resource, limit = 0)
95
- finish = limit -1
96
- if resource.kind_of? Array
97
- keys = resource.map do |res|
98
- similarity_key(res)
99
- end
100
- tmp_key = "#{key_base}:tmp:#{SecureRandom.uuid}"
101
- redis.zunionstore(tmp_key, keys)
102
- similar_resources = redis.zrevrange(tmp_key, 0, finish, with_scores: true)
103
- redis.del(tmp_key)
104
- else
105
- similar_resources = redis.zrevrange(similarity_key(resource), 0, finish, with_scores: true)
106
- end
107
- similar_resources.map do |resource|
108
- {resource: resource[0], similarity: resource[1].to_f}
109
- end
110
- end
111
-
112
- def filtered_similar_to(resource, options = {})
113
- if @tag_set.nil? || (options[:include].nil? && options[:exclude].nil?) || @tag_set.empty?
114
- return similar_to(resource, options[:limit] || 0)
115
- else
116
- similar = similar_to(resource)
117
- limit = options[:limit] || similar.length
118
- filtered = []
119
- similar.each do |s|
120
- return filtered if filtered.length >= limit
121
- filtered << s if @tag_set.matches(s[:resource], options[:include], options[:exclude])
122
- end
123
- return filtered
124
- end
125
- end
126
-
127
- def similarity_key(resource)
128
- "#{similar_key_base}:#{resource}"
129
- end
130
-
131
- def remove_from_groups(resource, *groups)
132
- resource_key = resource_key(resource)
133
- redis.zrem(resource_key, groups)
134
- groups.each do |group|
135
- group_key = group_key(group)
136
- redis.zrem(group_key, resource)
137
- end
138
- end
139
-
140
- def remove_from_groups_and_calculate(resource, *groups)
141
- remove_from_groups(resource, *groups)
142
- calculate_similarity_for_resource(resource, 0)
143
- end
144
-
145
- private
146
-
147
- def similarity_lua
148
- @similarity_lua ||= load_similarity_lua
149
- end
150
-
151
- def load_similarity_lua
152
- file = File.open(File.expand_path('../similarity.lua', __FILE__), "r")
153
- file.read
154
- end
155
-
156
- def pair_comparison_lua
157
- @pair_comparison_lua ||= load_pair_comparison_lua
158
- end
159
-
160
- def load_pair_comparison_lua
161
- file = File.open(File.expand_path('../pair_comparison.lua', __FILE__), "r")
162
- file.read
163
- end
164
-
165
- def tmp_key_base
166
- "#{key_base}:tmp"
167
- end
168
-
169
- def similar_key_base
170
- "#{key_base}:similar"
171
- end
172
-
173
- def resource_key_base
174
- "#{key_base}:resources"
175
- end
176
-
177
- def resource_key(resource)
178
- "#{resource_key_base}:#{resource}"
179
- end
180
-
181
- def group_key_base
182
- "#{key_base}:groups"
183
- end
184
-
185
- def group_key(group)
186
- "#{group_key_base}:#{group}"
14
+ def initialize(args)
15
+ @backend = RedisBacked::ContentSet.new(args[:key_base], args[:tag_set]) if Commendo.config.backend == :redis
16
+ @backend = MySqlBacked::ContentSet.new(args[:key_base], args[:tag_set]) if Commendo.config.backend == :mysql
17
+ raise 'Unrecognised backend type, try :redis or :mysql' if @backend.nil?
187
18
  end
188
19
 
189
20
  end
190
21
 
191
- end
22
+ end