commendo 1.2.4 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +3 -0
  3. data/bin/commendo-create-mysql-db +3 -0
  4. data/bin/commendo-create.sql +99 -0
  5. data/bin/commendo-load-tsv +11 -5
  6. data/bin/commendo-load-tsv-mysql.rb +43 -0
  7. data/bin/commendo-time-mysql.rb +31 -0
  8. data/commendo.gemspec +4 -2
  9. data/lib/commendo.rb +24 -0
  10. data/lib/commendo/configuration.rb +25 -0
  11. data/lib/commendo/content_set.rb +13 -182
  12. data/lib/commendo/mysql-backed/content_set.rb +152 -0
  13. data/lib/commendo/mysql-backed/tag_set.rb +81 -0
  14. data/lib/commendo/mysql-backed/weighted_group.rb +40 -0
  15. data/lib/commendo/redis-backed/content_set.rb +194 -0
  16. data/lib/commendo/{pair_comparison.lua → redis-backed/pair_comparison.lua} +0 -0
  17. data/lib/commendo/{similarity.lua → redis-backed/similarity.lua} +0 -0
  18. data/lib/commendo/redis-backed/tag_set.rb +54 -0
  19. data/lib/commendo/redis-backed/weighted_group.rb +54 -0
  20. data/lib/commendo/tag_set.rb +6 -42
  21. data/lib/commendo/version.rb +1 -1
  22. data/lib/commendo/weighted_group.rb +7 -41
  23. data/lib/mysql2/client.rb +17 -0
  24. data/model 2.mwb +0 -0
  25. data/sql_model.mwb +0 -0
  26. data/test/configuration_test.rb +71 -0
  27. data/test/mysql_content_set_test.rb +40 -0
  28. data/test/mysql_tag_set_test.rb +34 -0
  29. data/test/mysql_weighted_group_test.rb +54 -0
  30. data/test/redis_content_set_test.rb +57 -0
  31. data/test/redis_tag_set_test.rb +31 -0
  32. data/test/redis_weighted_group_test.rb +49 -0
  33. data/test/tests_for_content_sets.rb +379 -0
  34. data/test/tests_for_tag_sets.rb +130 -0
  35. data/test/tests_for_weighted_groups.rb +106 -0
  36. metadata +72 -12
  37. data/test/content_set_test.rb +0 -408
  38. data/test/tag_set_test.rb +0 -128
  39. data/test/weighted_group_test.rb +0 -191
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f4346b4b053e4b910efdd819476fae6e18db6b45
4
- data.tar.gz: faeee86f961b6e576888accd047b6adfe4dcd54e
3
+ metadata.gz: 04c833f06518b70ff1370c1685df37294c8fd2b9
4
+ data.tar.gz: dd99d2a4bcca958d2c216d3084b88298c59f462f
5
5
  SHA512:
6
- metadata.gz: ad4356a6a14f35713ab2eee837b69e78e6ed7995c4131968ce8356df45630fc290817a670a2aa29135f02ff9a9a18928437b3a5263c551065912c5689ebb280d
7
- data.tar.gz: b4db90f586213d1174810081d1e18335572f97c51ca351ffd11f3ea395562b9674781375e2c62d8cc16c0926932dc8479a273e556f38b669acf4f2866250cf9a
6
+ metadata.gz: 9936b22c4e2ffc54e63b36cc13c8dce2d1559115956092c3061ca65e3f5db76302128d5fe46aefb30685394bc1f508a6ca05455bc3e0f2e1ae2ddd6fe9121e3b
7
+ data.tar.gz: 952208db4416f43325d581f36363a124b0614d656c3b90d38d8b6576350cb8f561aa14022c6084b50b47cc51825a405350682bb6cbb80974590a3110412e0c11
@@ -1,3 +1,6 @@
1
+ # 2.0.0 / 2015-12-11
2
+ * [FEATURE] Commendo can now use MySQL as a backend if you prefer it to Redis
3
+
1
4
  # 1.2.4 / 2015-12-09
2
5
  * [BUGFIX] Fixed bug with TagSet#empty? reporting empty when only one key present
3
6
 
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env bash
2
+ mysql -h $1 -u $2 -p$3 -e "CREATE SCHEMA IF NOT EXISTS $4 DEFAULT CHARACTER SET utf8;"
3
+ mysql -h $1 -u $2 -p$3 $4 < $(dirname $0)/commendo-create.sql
@@ -0,0 +1,99 @@
1
+ SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0;
2
+ SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0;
3
+ SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='TRADITIONAL,ALLOW_INVALID_DATES';
4
+
5
+ -- -----------------------------------------------------
6
+ -- Schema commendo
7
+ -- -----------------------------------------------------
8
+ # CREATE SCHEMA IF NOT EXISTS commendo_created DEFAULT CHARACTER SET utf8 ;
9
+ # USE commendo_created ;
10
+
11
+ -- -----------------------------------------------------
12
+ -- Table `Resources`
13
+ -- -----------------------------------------------------
14
+ CREATE TABLE IF NOT EXISTS `Resources` (
15
+ `id` INT NOT NULL AUTO_INCREMENT,
16
+ `keybase` VARCHAR(64) NOT NULL,
17
+ `name` VARCHAR(128) NOT NULL,
18
+ `groupname` VARCHAR(128) NOT NULL,
19
+ `score` FLOAT NOT NULL,
20
+ `union_score` FLOAT NULL,
21
+ PRIMARY KEY (`id`),
22
+ INDEX `name` (`name` ASC),
23
+ INDEX `groupname` (`groupname` ASC),
24
+ UNIQUE INDEX `keybase-name-groupname` (`keybase` ASC, `name` ASC, `groupname` ASC),
25
+ INDEX `keybase` (`keybase` ASC),
26
+ INDEX `keybase-name-score` (`keybase` ASC, `name` ASC, `score` ASC),
27
+ INDEX `keybase-groupname` (`keybase` ASC, `groupname` ASC))
28
+ ENGINE = InnoDB;
29
+
30
+
31
+ -- -----------------------------------------------------
32
+ -- Table `Tags`
33
+ -- -----------------------------------------------------
34
+ CREATE TABLE IF NOT EXISTS `Tags` (
35
+ `id` INT NOT NULL AUTO_INCREMENT,
36
+ `keybase` VARCHAR(64) NOT NULL,
37
+ `name` VARCHAR(128) NOT NULL,
38
+ `tag` VARCHAR(64) NOT NULL,
39
+ PRIMARY KEY (`id`),
40
+ INDEX `tag` (`tag` ASC),
41
+ UNIQUE INDEX `keybase-name-tag` (`keybase` ASC, `name` ASC, `tag` ASC),
42
+ INDEX `keybase` (`keybase` ASC),
43
+ INDEX `name` (`name` ASC))
44
+ ENGINE = InnoDB;
45
+
46
+
47
+ -- -----------------------------------------------------
48
+ -- Table `UnionScores`
49
+ -- -----------------------------------------------------
50
+ CREATE TABLE IF NOT EXISTS `UnionScores` (
51
+ `id` INT NOT NULL AUTO_INCREMENT,
52
+ `keybase` VARCHAR(64) NOT NULL,
53
+ `name` VARCHAR(128) NOT NULL,
54
+ `union_score` FLOAT NOT NULL,
55
+ PRIMARY KEY (`id`),
56
+ INDEX `keybase` (`keybase` ASC),
57
+ INDEX `name` (`name` ASC),
58
+ UNIQUE INDEX `keybase-name` (`keybase` ASC, `name` ASC))
59
+ ENGINE = InnoDB;
60
+
61
+ # USE @schema_name;
62
+
63
+ DELIMITER $$
64
+ # USE @schema_name$$
65
+ CREATE DEFINER = CURRENT_USER
66
+ TRIGGER `Resources_AFTER_INSERT`
67
+ AFTER INSERT ON `Resources` FOR EACH ROW
68
+ BEGIN
69
+ SET @union_score = (
70
+ SELECT SUM(score)
71
+ FROM Resources
72
+ WHERE keybase = new.keybase
73
+ AND name = new.name
74
+ );
75
+ INSERT INTO UnionScores (keybase, name, union_score)
76
+ VALUES (new.keybase, new.name, @union_score)
77
+ ON DUPLICATE KEY UPDATE union_score = @union_score;
78
+ END$$
79
+
80
+ # USE @schema_name$$
81
+ CREATE DEFINER = CURRENT_USER TRIGGER `Resources_AFTER_UPDATE` AFTER UPDATE ON `Resources` FOR EACH ROW
82
+ BEGIN
83
+ SET @union_score = (
84
+ SELECT SUM(score)
85
+ FROM Resources
86
+ WHERE keybase = new.keybase
87
+ AND name = new.name
88
+ );
89
+ UPDATE UnionScores SET union_score = @union_score
90
+ WHERE keybase = new.keybase
91
+ AND name = new.name;
92
+ END$$
93
+
94
+
95
+ DELIMITER ;
96
+
97
+ SET SQL_MODE=@OLD_SQL_MODE;
98
+ SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS;
99
+ SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS;
@@ -5,13 +5,17 @@ require 'commendo'
5
5
  require 'progressbar'
6
6
 
7
7
  filename = ARGV[0]
8
- redis_db = ARGV[1].to_i
9
- base_key = ARGV[2]
10
8
 
11
- redis = Redis.new(db: redis_db, timeout: 60)
12
- cs = Commendo::ContentSet.new(redis, base_key)
9
+ Commendo.config do |config|
10
+ config.backend = :redis
11
+ config.host = 'localhost'
12
+ config.port = 6379
13
+ config.database = 15
14
+ end
15
+ Redis.new(host: Commendo.config.host, port: Commendo.config.port, db: Commendo.config.database).flushdb
16
+ cs = Commendo::ContentSet.new(key_base: 'MeducationViews')
13
17
 
14
- puts "Loading."
18
+ puts 'Loading.'
15
19
  file_length = `wc -l #{filename}`.to_i
16
20
  pbar = ProgressBar.new('Loading TSV file', file_length)
17
21
  File.open(filename) do |f|
@@ -22,6 +26,7 @@ File.open(filename) do |f|
22
26
  cs.add(resource, *ids)
23
27
  end
24
28
  end
29
+ pbar.finish
25
30
  puts "\nFinished loading"
26
31
 
27
32
  puts 'Calculating similarities'
@@ -30,3 +35,4 @@ cs.calculate_similarity do |key, i, total|
30
35
  pbar ||= ProgressBar.new('Calculating similarity', total)
31
36
  pbar.inc
32
37
  end
38
+ pbar.finish
@@ -0,0 +1,43 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'redis'
4
+ require 'commendo'
5
+ require 'progressbar'
6
+
7
+ filename = ARGV[0]
8
+ key_base = ARGV[1]
9
+
10
+ Commendo.config do |config|
11
+ config.backend = :mysql
12
+ config.host = 'localhost'
13
+ config.port = 3306
14
+ config.database = 'commendo_test'
15
+ config.username = 'commendo'
16
+ config.password = 'commendo123'
17
+ end
18
+ client = Mysql2::Client.new(Commendo.config.to_hash)
19
+ client.query("DELETE FROM Resources WHERE keybase='#{key_base}';")
20
+ cs = Commendo::ContentSet.new(key_base: key_base)
21
+
22
+ puts 'Loading.'
23
+ file_length = `wc -l #{filename}`.to_i
24
+ pbar = ProgressBar.new('Loading TSV file', file_length)
25
+ File.open(filename) do |f|
26
+ f.each_line do |line|
27
+ pbar.inc
28
+ ids = line.strip.split("\t")
29
+ resource = ids.shift
30
+ cs.add(resource, *ids)
31
+ end
32
+ end
33
+ pbar.finish
34
+ puts "\nFinished loading"
35
+
36
+ # puts 'Calculating similarities'
37
+ # # pbar = nil
38
+ # cs.calculate_similarity do |key, i, total|
39
+ # pbar ||= ProgressBar.new('Calculating similarity', total)
40
+ # # pbar.inc
41
+ # $stderr.puts key
42
+ # end
43
+ # pbar.finish
@@ -0,0 +1,31 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'redis'
4
+ require 'commendo'
5
+ require 'progressbar'
6
+
7
+ key_base = ARGV[0]
8
+ limit = ARGV[1].to_i
9
+
10
+ Commendo.config do |config|
11
+ config.backend = :mysql
12
+ config.host = 'localhost'
13
+ config.port = 3306
14
+ config.database = 'commendo_test'
15
+ config.username = 'commendo'
16
+ config.password = 'commendo123'
17
+ end
18
+ cs = Commendo::ContentSet.new(key_base: key_base)
19
+
20
+ $stderr.puts "Selecting #{limit} random names to use"
21
+ client = Mysql2::Client.new(Commendo.config.to_hash)
22
+ names_to_query = client.query("SELECT DISTINCT name FROM Resources WHERE keybase = '#{key_base}' ORDER BY RAND() LIMIT #{limit}")
23
+ names_to_query = names_to_query.map { |r| r['name'] }
24
+
25
+ pbar = ProgressBar.new('Querying similar_to', names_to_query.length)
26
+ names_to_query.each do |name|
27
+ cs.similar_to(name)
28
+ pbar.inc
29
+ end
30
+ pbar.finish
31
+
@@ -8,8 +8,8 @@ Gem::Specification.new do |spec|
8
8
  spec.version = Commendo::VERSION
9
9
  spec.authors = ['Rob Styles']
10
10
  spec.email = ['rob.styles@dynamicorange.com']
11
- spec.summary = 'A Jaccard-similarity recommender using Redis sets'
12
- spec.description = 'A Jaccard-similarity recommender using Redis sets'
11
+ spec.summary = 'A Jaccard-similarity recommender using Redis sets or MySQL'
12
+ spec.description = 'A Jaccard-similarity recommender using Redis sets or MySQL'
13
13
  spec.homepage = ''
14
14
  spec.license = 'MIT'
15
15
 
@@ -19,7 +19,9 @@ Gem::Specification.new do |spec|
19
19
  spec.require_paths = ['lib']
20
20
 
21
21
  spec.add_dependency 'redis'
22
+ spec.add_dependency 'mysql2'
22
23
  spec.add_dependency 'progressbar'
24
+ spec.add_dependency 'slop'
23
25
 
24
26
  spec.add_development_dependency 'bundler', '~> 1.5'
25
27
  spec.add_development_dependency 'rake'
@@ -1,9 +1,33 @@
1
+ require 'forwardable'
2
+ require 'mysql2'
3
+ require_relative 'mysql2/client'
1
4
  require 'redis'
5
+
6
+ require 'commendo/configuration'
2
7
  require 'commendo/version'
3
8
  require 'commendo/content_set'
4
9
  require 'commendo/tag_set'
5
10
  require 'commendo/weighted_group'
6
11
 
12
+ require 'commendo/redis-backed/content_set'
13
+ require 'commendo/redis-backed/tag_set'
14
+ require 'commendo/redis-backed/weighted_group'
15
+
16
+ require 'commendo/mysql-backed/content_set'
17
+ require 'commendo/mysql-backed/tag_set'
18
+ require 'commendo/mysql-backed/weighted_group'
19
+
7
20
  module Commendo
8
21
 
22
+ def self.config
23
+ config = @@config ||= Configuration.new
24
+ yield(config) if block_given?
25
+ config
26
+ end
27
+
28
+ def self.config=(config)
29
+ raise 'Configuration must be either a Commendo::Configuration object or nil to reset' unless config.nil? || config.is_a?(Configuration)
30
+ @@config = config
31
+ end
32
+
9
33
  end
@@ -0,0 +1,25 @@
1
+ module Commendo
2
+ class Configuration
3
+
4
+ attr_accessor :backend, :host, :port, :database, :username, :password
5
+
6
+ def initialize
7
+ @backend = :redis
8
+ @host = 'localhost'
9
+ @port = 6379
10
+ @database = 15
11
+ end
12
+
13
+ def to_hash
14
+ {
15
+ backend: backend,
16
+ host: host,
17
+ port: port,
18
+ database: database,
19
+ username: username,
20
+ password: password
21
+ }
22
+ end
23
+
24
+ end
25
+ end
@@ -1,191 +1,22 @@
1
1
  module Commendo
2
2
 
3
3
  class ContentSet
4
+ extend Forwardable
4
5
 
5
- attr_accessor :redis, :key_base, :tag_set
6
+ def_delegators :@backend,
7
+ :add_by_group, :add, :add_single, :add_and_calculate,
8
+ :groups, :delete,
9
+ :calculate_similarity, :calculate_similarity_for_resource, :calculate_similarity_for_key_resource,
10
+ :similar_to, :filtered_similar_to,
11
+ :similarity_key,
12
+ :remove_from_groups, :remove_from_groups_and_calculate
6
13
 
7
- def initialize(redis, key_base, tag_set = nil)
8
- @redis, @key_base, @tag_set = redis, key_base, tag_set
9
- end
10
-
11
- def add_by_group(group, *resources)
12
- resources.each do |resource|
13
- if resource.kind_of?(Array)
14
- add_single(resource[0], group, resource[1])
15
- else
16
- add_single(resource, group, 1)
17
- end
18
- end
19
- end
20
-
21
- def add(resource, *groups)
22
- groups.each do |group|
23
- if group.kind_of?(Array)
24
- add_single(resource, group[0], group[1])
25
- else
26
- add_single(resource, group, 1)
27
- end
28
- end
29
- end
30
-
31
- def add_single(resource, group, score)
32
- redis.zincrby(group_key(group), score, resource)
33
- redis.zincrby(resource_key(resource), score, group)
34
- end
35
-
36
- def add_and_calculate(resource, *groups)
37
- add(resource, *groups)
38
- calculate_similarity_for_resource(resource, 0)
39
- end
40
-
41
- def groups(resource)
42
- redis.zrange(resource_key(resource), 0, -1)
43
- end
44
-
45
- def delete(resource)
46
- similar = similar_to(resource)
47
- similar.each do |other_resource|
48
- redis.zrem(similarity_key(other_resource[:resource]), "#{resource}")
49
- end
50
- #TODO delete from groups?
51
- redis.del(similarity_key(resource))
52
- redis.del(resource_key(resource))
53
- end
54
-
55
- SET_TOO_LARGE_FOR_LUA = 999
56
-
57
- def calculate_similarity(threshold = 0)
58
- #TODO make this use scan for scaling
59
- keys = redis.keys("#{resource_key_base}:*")
60
- keys.each_with_index do |key, i|
61
- resource = key.gsub(/^#{resource_key_base}:/, '')
62
- similarity_key = similarity_key(resource)
63
- redis.del(similarity_key)
64
- yield(key, i, keys.length) if block_given?
65
- completed = redis.eval(similarity_lua, keys: [key], argv: [tmp_key_base, resource_key_base, similar_key_base, group_key_base, threshold])
66
- if completed == SET_TOO_LARGE_FOR_LUA
67
- calculate_similarity_for_key_resource(key, resource, threshold)
68
- end
69
- end
70
- end
71
-
72
-
73
- def calculate_similarity_for_resource(resource, threshold)
74
- key = resource_key(resource)
75
- calculate_similarity_for_key_resource(key, resource, threshold)
76
- end
77
-
78
- def calculate_similarity_for_key_resource(key, resource, threshold)
79
- groups = groups(resource)
80
- return if groups.empty?
81
- group_keys = groups.map { |group| group_key(group) }
82
- tmp_key = "#{tmp_key_base}:#{SecureRandom.uuid}"
83
- redis.zunionstore(tmp_key, group_keys)
84
- resources = redis.zrange(tmp_key, 0, -1)
85
- redis.del(tmp_key)
86
- similarity_key = similarity_key(resource)
87
- redis.del(similarity_key)
88
- resources.each do |to_compare|
89
- next if resource == to_compare
90
- redis.eval(pair_comparison_lua, keys: [key, resource_key(to_compare), similarity_key(resource), similarity_key(to_compare)], argv: [tmp_key_base, resource, to_compare, threshold])
91
- end
92
- end
93
-
94
- def similar_to(resource, limit = 0)
95
- finish = limit -1
96
- if resource.kind_of? Array
97
- keys = resource.map do |res|
98
- similarity_key(res)
99
- end
100
- tmp_key = "#{key_base}:tmp:#{SecureRandom.uuid}"
101
- redis.zunionstore(tmp_key, keys)
102
- similar_resources = redis.zrevrange(tmp_key, 0, finish, with_scores: true)
103
- redis.del(tmp_key)
104
- else
105
- similar_resources = redis.zrevrange(similarity_key(resource), 0, finish, with_scores: true)
106
- end
107
- similar_resources.map do |resource|
108
- {resource: resource[0], similarity: resource[1].to_f}
109
- end
110
- end
111
-
112
- def filtered_similar_to(resource, options = {})
113
- if @tag_set.nil? || (options[:include].nil? && options[:exclude].nil?) || @tag_set.empty?
114
- return similar_to(resource, options[:limit] || 0)
115
- else
116
- similar = similar_to(resource)
117
- limit = options[:limit] || similar.length
118
- filtered = []
119
- similar.each do |s|
120
- return filtered if filtered.length >= limit
121
- filtered << s if @tag_set.matches(s[:resource], options[:include], options[:exclude])
122
- end
123
- return filtered
124
- end
125
- end
126
-
127
- def similarity_key(resource)
128
- "#{similar_key_base}:#{resource}"
129
- end
130
-
131
- def remove_from_groups(resource, *groups)
132
- resource_key = resource_key(resource)
133
- redis.zrem(resource_key, groups)
134
- groups.each do |group|
135
- group_key = group_key(group)
136
- redis.zrem(group_key, resource)
137
- end
138
- end
139
-
140
- def remove_from_groups_and_calculate(resource, *groups)
141
- remove_from_groups(resource, *groups)
142
- calculate_similarity_for_resource(resource, 0)
143
- end
144
-
145
- private
146
-
147
- def similarity_lua
148
- @similarity_lua ||= load_similarity_lua
149
- end
150
-
151
- def load_similarity_lua
152
- file = File.open(File.expand_path('../similarity.lua', __FILE__), "r")
153
- file.read
154
- end
155
-
156
- def pair_comparison_lua
157
- @pair_comparison_lua ||= load_pair_comparison_lua
158
- end
159
-
160
- def load_pair_comparison_lua
161
- file = File.open(File.expand_path('../pair_comparison.lua', __FILE__), "r")
162
- file.read
163
- end
164
-
165
- def tmp_key_base
166
- "#{key_base}:tmp"
167
- end
168
-
169
- def similar_key_base
170
- "#{key_base}:similar"
171
- end
172
-
173
- def resource_key_base
174
- "#{key_base}:resources"
175
- end
176
-
177
- def resource_key(resource)
178
- "#{resource_key_base}:#{resource}"
179
- end
180
-
181
- def group_key_base
182
- "#{key_base}:groups"
183
- end
184
-
185
- def group_key(group)
186
- "#{group_key_base}:#{group}"
14
+ def initialize(args)
15
+ @backend = RedisBacked::ContentSet.new(args[:key_base], args[:tag_set]) if Commendo.config.backend == :redis
16
+ @backend = MySqlBacked::ContentSet.new(args[:key_base], args[:tag_set]) if Commendo.config.backend == :mysql
17
+ raise 'Unrecognised backend type, try :redis or :mysql' if @backend.nil?
187
18
  end
188
19
 
189
20
  end
190
21
 
191
- end
22
+ end