disco 0.2.3 → 0.2.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/LICENSE.txt +1 -1
- data/README.md +17 -3
- data/lib/disco/recommender.rb +56 -26
- data/lib/disco/version.rb +1 -1
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e4a978d2eec39ca280142c49fb4ef4be2e1ad4f35dfa4d977941f46d5d34b466
|
4
|
+
data.tar.gz: 8a29a54bba5ac8b715294e2fce4e34fa1b11442b1800c388807c60b9520ced23
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 99376dd48cce340a4fdcb0d76c93b03af494d88167e2caaca0d186fcf5d2303f2524884e0c712c2f8e3d7be79a92b029a8d5fa726bb94826315f283afea0f74b
|
7
|
+
data.tar.gz: eeb8c480098616f93d6c7e39a1bb57e2feefa6af3696c407791ff6f052450eb035f1d1659ded70d7b5fbbbe8cff9f7309118828a454b1d4f9d459321b90035cf
|
data/CHANGELOG.md
CHANGED
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -46,13 +46,13 @@ recommender.fit([
|
|
46
46
|
|
47
47
|
> Use `value` instead of rating for implicit feedback
|
48
48
|
|
49
|
-
Get user-based
|
49
|
+
Get user-based recommendations - “users like you also liked”
|
50
50
|
|
51
51
|
```ruby
|
52
52
|
recommender.user_recs(user_id)
|
53
53
|
```
|
54
54
|
|
55
|
-
Get item-based
|
55
|
+
Get item-based recommendations - “users who liked this item also liked”
|
56
56
|
|
57
57
|
```ruby
|
58
58
|
recommender.item_recs(item_id)
|
@@ -283,19 +283,33 @@ This should be called after fitting or loading the model.
|
|
283
283
|
|
284
284
|
## Reference
|
285
285
|
|
286
|
+
Get ids
|
287
|
+
|
288
|
+
```ruby
|
289
|
+
recommender.user_ids
|
290
|
+
recommender.item_ids
|
291
|
+
```
|
292
|
+
|
286
293
|
Get the global mean
|
287
294
|
|
288
295
|
```ruby
|
289
296
|
recommender.global_mean
|
290
297
|
```
|
291
298
|
|
292
|
-
Get
|
299
|
+
Get factors
|
293
300
|
|
294
301
|
```ruby
|
295
302
|
recommender.user_factors
|
296
303
|
recommender.item_factors
|
297
304
|
```
|
298
305
|
|
306
|
+
Get factors for specific users and items
|
307
|
+
|
308
|
+
```ruby
|
309
|
+
recommender.user_factors(user_id)
|
310
|
+
recommender.item_factors(item_id)
|
311
|
+
```
|
312
|
+
|
299
313
|
## Credits
|
300
314
|
|
301
315
|
Thanks to:
|
data/lib/disco/recommender.rb
CHANGED
@@ -1,32 +1,32 @@
|
|
1
1
|
module Disco
|
2
2
|
class Recommender
|
3
|
-
attr_reader :global_mean
|
3
|
+
attr_reader :global_mean
|
4
4
|
|
5
5
|
def initialize(factors: 8, epochs: 20, verbose: nil)
|
6
6
|
@factors = factors
|
7
7
|
@epochs = epochs
|
8
8
|
@verbose = verbose
|
9
|
+
@user_map = {}
|
10
|
+
@item_map = {}
|
9
11
|
end
|
10
12
|
|
11
13
|
def fit(train_set, validation_set: nil)
|
12
14
|
train_set = to_dataset(train_set)
|
13
15
|
validation_set = to_dataset(validation_set) if validation_set
|
14
16
|
|
15
|
-
|
17
|
+
check_training_set(train_set)
|
16
18
|
|
19
|
+
@implicit = !train_set.any? { |v| v[:rating] }
|
17
20
|
unless @implicit
|
18
|
-
|
19
|
-
|
20
|
-
@min_rating = ratings.min
|
21
|
-
@max_rating = ratings.max
|
21
|
+
check_ratings(train_set)
|
22
|
+
@min_rating, @max_rating = train_set.minmax_by { |o| o[:rating] }.map { |o| o[:rating] }
|
22
23
|
|
23
24
|
if validation_set
|
24
|
-
check_ratings(validation_set
|
25
|
+
check_ratings(validation_set)
|
25
26
|
end
|
26
27
|
end
|
27
28
|
|
28
|
-
|
29
|
-
create_maps(train_set)
|
29
|
+
update_maps(train_set)
|
30
30
|
|
31
31
|
@rated = Hash.new { |hash, key| hash[key] = {} }
|
32
32
|
input = []
|
@@ -143,13 +143,39 @@ module Disco
|
|
143
143
|
|
144
144
|
def similar_items(item_id, count: 5)
|
145
145
|
check_fit
|
146
|
-
similar(item_id, @item_map, @item_factors, item_norms, count, @item_index)
|
146
|
+
similar(item_id, @item_map, @item_factors, @item_index ? nil : item_norms, count, @item_index)
|
147
147
|
end
|
148
148
|
alias_method :item_recs, :similar_items
|
149
149
|
|
150
150
|
def similar_users(user_id, count: 5)
|
151
151
|
check_fit
|
152
|
-
similar(user_id, @user_map, @user_factors, user_norms, count, @user_index)
|
152
|
+
similar(user_id, @user_map, @user_factors, @user_index ? nil : user_norms, count, @user_index)
|
153
|
+
end
|
154
|
+
|
155
|
+
def user_ids
|
156
|
+
@user_map.keys
|
157
|
+
end
|
158
|
+
|
159
|
+
def item_ids
|
160
|
+
@item_map.keys
|
161
|
+
end
|
162
|
+
|
163
|
+
def user_factors(user_id = nil)
|
164
|
+
if user_id
|
165
|
+
u = @user_map[user_id]
|
166
|
+
@user_factors[u, true] if u
|
167
|
+
else
|
168
|
+
@user_factors
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
def item_factors(item_id = nil)
|
173
|
+
if item_id
|
174
|
+
i = @item_map[item_id]
|
175
|
+
@item_factors[i, true] if i
|
176
|
+
else
|
177
|
+
@item_factors
|
178
|
+
end
|
153
179
|
end
|
154
180
|
|
155
181
|
private
|
@@ -157,8 +183,11 @@ module Disco
|
|
157
183
|
def create_index(factors)
|
158
184
|
require "ngt"
|
159
185
|
|
186
|
+
# could speed up search with normalized cosine
|
187
|
+
# https://github.com/yahoojapan/NGT/issues/36
|
160
188
|
index = Ngt::Index.new(factors.shape[1], distance_type: "Cosine")
|
161
|
-
index.batch_insert(factors)
|
189
|
+
ids = index.batch_insert(factors)
|
190
|
+
raise "Unexpected ids. Please report a bug." if ids.first != 1 || ids.last != factors.shape[0]
|
162
191
|
index
|
163
192
|
end
|
164
193
|
|
@@ -191,19 +220,21 @@ module Disco
|
|
191
220
|
}
|
192
221
|
end
|
193
222
|
else
|
194
|
-
|
223
|
+
# cosine similarity without norms[i]
|
224
|
+
# otherwise, denominator would be (norms[i] * norms)
|
225
|
+
predictions = factors.inner(factors[i, true]) / norms
|
195
226
|
|
196
227
|
predictions =
|
197
228
|
map.keys.zip(predictions).map do |item_id, pred|
|
198
229
|
{item_id: item_id, score: pred}
|
199
230
|
end
|
200
231
|
|
201
|
-
|
232
|
+
predictions.delete_at(i)
|
202
233
|
predictions.sort_by! { |pred| -pred[:score] } # already sorted by id
|
203
234
|
predictions = predictions.first(count) if count
|
204
|
-
# divide by
|
235
|
+
# divide by norms[i] to get cosine similarity
|
205
236
|
# only need to do for returned records
|
206
|
-
predictions.each { |pred| pred[:score] /=
|
237
|
+
predictions.each { |pred| pred[:score] /= norms[i] }
|
207
238
|
predictions
|
208
239
|
end
|
209
240
|
else
|
@@ -211,22 +242,21 @@ module Disco
|
|
211
242
|
end
|
212
243
|
end
|
213
244
|
|
214
|
-
def
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
raise ArgumentError, "Missing user_id" if user_ids.any?(&:nil?)
|
219
|
-
raise ArgumentError, "Missing item_id" if item_ids.any?(&:nil?)
|
245
|
+
def update_maps(train_set)
|
246
|
+
raise ArgumentError, "Missing user_id" if train_set.any? { |v| v[:user_id].nil? }
|
247
|
+
raise ArgumentError, "Missing item_id" if train_set.any? { |v| v[:item_id].nil? }
|
220
248
|
|
221
|
-
|
222
|
-
|
249
|
+
train_set.each do |v|
|
250
|
+
@user_map[v[:user_id]] ||= @user_map.size
|
251
|
+
@item_map[v[:item_id]] ||= @item_map.size
|
252
|
+
end
|
223
253
|
end
|
224
254
|
|
225
255
|
def check_ratings(ratings)
|
226
|
-
unless ratings.all? { |r| !r.nil? }
|
256
|
+
unless ratings.all? { |r| !r[:rating].nil? }
|
227
257
|
raise ArgumentError, "Missing ratings"
|
228
258
|
end
|
229
|
-
unless ratings.all? { |r| r.is_a?(Numeric) }
|
259
|
+
unless ratings.all? { |r| r[:rating].is_a?(Numeric) }
|
230
260
|
raise ArgumentError, "Ratings must be numeric"
|
231
261
|
end
|
232
262
|
end
|
data/lib/disco/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: disco
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-02-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: libmf
|
@@ -39,7 +39,7 @@ dependencies:
|
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
41
|
description:
|
42
|
-
email: andrew@
|
42
|
+
email: andrew@ankane.org
|
43
43
|
executables: []
|
44
44
|
extensions: []
|
45
45
|
extra_rdoc_files: []
|
@@ -75,7 +75,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
75
75
|
- !ruby/object:Gem::Version
|
76
76
|
version: '0'
|
77
77
|
requirements: []
|
78
|
-
rubygems_version: 3.
|
78
|
+
rubygems_version: 3.2.3
|
79
79
|
signing_key:
|
80
80
|
specification_version: 4
|
81
81
|
summary: Recommendations for Ruby and Rails using collaborative filtering
|