disco 0.2.3 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/LICENSE.txt +1 -1
- data/README.md +17 -3
- data/lib/disco/recommender.rb +56 -26
- data/lib/disco/version.rb +1 -1
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e4a978d2eec39ca280142c49fb4ef4be2e1ad4f35dfa4d977941f46d5d34b466
|
4
|
+
data.tar.gz: 8a29a54bba5ac8b715294e2fce4e34fa1b11442b1800c388807c60b9520ced23
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 99376dd48cce340a4fdcb0d76c93b03af494d88167e2caaca0d186fcf5d2303f2524884e0c712c2f8e3d7be79a92b029a8d5fa726bb94826315f283afea0f74b
|
7
|
+
data.tar.gz: eeb8c480098616f93d6c7e39a1bb57e2feefa6af3696c407791ff6f052450eb035f1d1659ded70d7b5fbbbe8cff9f7309118828a454b1d4f9d459321b90035cf
|
data/CHANGELOG.md
CHANGED
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -46,13 +46,13 @@ recommender.fit([
|
|
46
46
|
|
47
47
|
> Use `value` instead of rating for implicit feedback
|
48
48
|
|
49
|
-
Get user-based
|
49
|
+
Get user-based recommendations - “users like you also liked”
|
50
50
|
|
51
51
|
```ruby
|
52
52
|
recommender.user_recs(user_id)
|
53
53
|
```
|
54
54
|
|
55
|
-
Get item-based
|
55
|
+
Get item-based recommendations - “users who liked this item also liked”
|
56
56
|
|
57
57
|
```ruby
|
58
58
|
recommender.item_recs(item_id)
|
@@ -283,19 +283,33 @@ This should be called after fitting or loading the model.
|
|
283
283
|
|
284
284
|
## Reference
|
285
285
|
|
286
|
+
Get ids
|
287
|
+
|
288
|
+
```ruby
|
289
|
+
recommender.user_ids
|
290
|
+
recommender.item_ids
|
291
|
+
```
|
292
|
+
|
286
293
|
Get the global mean
|
287
294
|
|
288
295
|
```ruby
|
289
296
|
recommender.global_mean
|
290
297
|
```
|
291
298
|
|
292
|
-
Get
|
299
|
+
Get factors
|
293
300
|
|
294
301
|
```ruby
|
295
302
|
recommender.user_factors
|
296
303
|
recommender.item_factors
|
297
304
|
```
|
298
305
|
|
306
|
+
Get factors for specific users and items
|
307
|
+
|
308
|
+
```ruby
|
309
|
+
recommender.user_factors(user_id)
|
310
|
+
recommender.item_factors(item_id)
|
311
|
+
```
|
312
|
+
|
299
313
|
## Credits
|
300
314
|
|
301
315
|
Thanks to:
|
data/lib/disco/recommender.rb
CHANGED
@@ -1,32 +1,32 @@
|
|
1
1
|
module Disco
|
2
2
|
class Recommender
|
3
|
-
attr_reader :global_mean
|
3
|
+
attr_reader :global_mean
|
4
4
|
|
5
5
|
def initialize(factors: 8, epochs: 20, verbose: nil)
|
6
6
|
@factors = factors
|
7
7
|
@epochs = epochs
|
8
8
|
@verbose = verbose
|
9
|
+
@user_map = {}
|
10
|
+
@item_map = {}
|
9
11
|
end
|
10
12
|
|
11
13
|
def fit(train_set, validation_set: nil)
|
12
14
|
train_set = to_dataset(train_set)
|
13
15
|
validation_set = to_dataset(validation_set) if validation_set
|
14
16
|
|
15
|
-
|
17
|
+
check_training_set(train_set)
|
16
18
|
|
19
|
+
@implicit = !train_set.any? { |v| v[:rating] }
|
17
20
|
unless @implicit
|
18
|
-
|
19
|
-
|
20
|
-
@min_rating = ratings.min
|
21
|
-
@max_rating = ratings.max
|
21
|
+
check_ratings(train_set)
|
22
|
+
@min_rating, @max_rating = train_set.minmax_by { |o| o[:rating] }.map { |o| o[:rating] }
|
22
23
|
|
23
24
|
if validation_set
|
24
|
-
check_ratings(validation_set
|
25
|
+
check_ratings(validation_set)
|
25
26
|
end
|
26
27
|
end
|
27
28
|
|
28
|
-
|
29
|
-
create_maps(train_set)
|
29
|
+
update_maps(train_set)
|
30
30
|
|
31
31
|
@rated = Hash.new { |hash, key| hash[key] = {} }
|
32
32
|
input = []
|
@@ -143,13 +143,39 @@ module Disco
|
|
143
143
|
|
144
144
|
def similar_items(item_id, count: 5)
|
145
145
|
check_fit
|
146
|
-
similar(item_id, @item_map, @item_factors, item_norms, count, @item_index)
|
146
|
+
similar(item_id, @item_map, @item_factors, @item_index ? nil : item_norms, count, @item_index)
|
147
147
|
end
|
148
148
|
alias_method :item_recs, :similar_items
|
149
149
|
|
150
150
|
def similar_users(user_id, count: 5)
|
151
151
|
check_fit
|
152
|
-
similar(user_id, @user_map, @user_factors, user_norms, count, @user_index)
|
152
|
+
similar(user_id, @user_map, @user_factors, @user_index ? nil : user_norms, count, @user_index)
|
153
|
+
end
|
154
|
+
|
155
|
+
def user_ids
|
156
|
+
@user_map.keys
|
157
|
+
end
|
158
|
+
|
159
|
+
def item_ids
|
160
|
+
@item_map.keys
|
161
|
+
end
|
162
|
+
|
163
|
+
def user_factors(user_id = nil)
|
164
|
+
if user_id
|
165
|
+
u = @user_map[user_id]
|
166
|
+
@user_factors[u, true] if u
|
167
|
+
else
|
168
|
+
@user_factors
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
def item_factors(item_id = nil)
|
173
|
+
if item_id
|
174
|
+
i = @item_map[item_id]
|
175
|
+
@item_factors[i, true] if i
|
176
|
+
else
|
177
|
+
@item_factors
|
178
|
+
end
|
153
179
|
end
|
154
180
|
|
155
181
|
private
|
@@ -157,8 +183,11 @@ module Disco
|
|
157
183
|
def create_index(factors)
|
158
184
|
require "ngt"
|
159
185
|
|
186
|
+
# could speed up search with normalized cosine
|
187
|
+
# https://github.com/yahoojapan/NGT/issues/36
|
160
188
|
index = Ngt::Index.new(factors.shape[1], distance_type: "Cosine")
|
161
|
-
index.batch_insert(factors)
|
189
|
+
ids = index.batch_insert(factors)
|
190
|
+
raise "Unexpected ids. Please report a bug." if ids.first != 1 || ids.last != factors.shape[0]
|
162
191
|
index
|
163
192
|
end
|
164
193
|
|
@@ -191,19 +220,21 @@ module Disco
|
|
191
220
|
}
|
192
221
|
end
|
193
222
|
else
|
194
|
-
|
223
|
+
# cosine similarity without norms[i]
|
224
|
+
# otherwise, denominator would be (norms[i] * norms)
|
225
|
+
predictions = factors.inner(factors[i, true]) / norms
|
195
226
|
|
196
227
|
predictions =
|
197
228
|
map.keys.zip(predictions).map do |item_id, pred|
|
198
229
|
{item_id: item_id, score: pred}
|
199
230
|
end
|
200
231
|
|
201
|
-
|
232
|
+
predictions.delete_at(i)
|
202
233
|
predictions.sort_by! { |pred| -pred[:score] } # already sorted by id
|
203
234
|
predictions = predictions.first(count) if count
|
204
|
-
# divide by
|
235
|
+
# divide by norms[i] to get cosine similarity
|
205
236
|
# only need to do for returned records
|
206
|
-
predictions.each { |pred| pred[:score] /=
|
237
|
+
predictions.each { |pred| pred[:score] /= norms[i] }
|
207
238
|
predictions
|
208
239
|
end
|
209
240
|
else
|
@@ -211,22 +242,21 @@ module Disco
|
|
211
242
|
end
|
212
243
|
end
|
213
244
|
|
214
|
-
def
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
raise ArgumentError, "Missing user_id" if user_ids.any?(&:nil?)
|
219
|
-
raise ArgumentError, "Missing item_id" if item_ids.any?(&:nil?)
|
245
|
+
def update_maps(train_set)
|
246
|
+
raise ArgumentError, "Missing user_id" if train_set.any? { |v| v[:user_id].nil? }
|
247
|
+
raise ArgumentError, "Missing item_id" if train_set.any? { |v| v[:item_id].nil? }
|
220
248
|
|
221
|
-
|
222
|
-
|
249
|
+
train_set.each do |v|
|
250
|
+
@user_map[v[:user_id]] ||= @user_map.size
|
251
|
+
@item_map[v[:item_id]] ||= @item_map.size
|
252
|
+
end
|
223
253
|
end
|
224
254
|
|
225
255
|
def check_ratings(ratings)
|
226
|
-
unless ratings.all? { |r| !r.nil? }
|
256
|
+
unless ratings.all? { |r| !r[:rating].nil? }
|
227
257
|
raise ArgumentError, "Missing ratings"
|
228
258
|
end
|
229
|
-
unless ratings.all? { |r| r.is_a?(Numeric) }
|
259
|
+
unless ratings.all? { |r| r[:rating].is_a?(Numeric) }
|
230
260
|
raise ArgumentError, "Ratings must be numeric"
|
231
261
|
end
|
232
262
|
end
|
data/lib/disco/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: disco
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-02-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: libmf
|
@@ -39,7 +39,7 @@ dependencies:
|
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
41
|
description:
|
42
|
-
email: andrew@
|
42
|
+
email: andrew@ankane.org
|
43
43
|
executables: []
|
44
44
|
extensions: []
|
45
45
|
extra_rdoc_files: []
|
@@ -75,7 +75,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
75
75
|
- !ruby/object:Gem::Version
|
76
76
|
version: '0'
|
77
77
|
requirements: []
|
78
|
-
rubygems_version: 3.
|
78
|
+
rubygems_version: 3.2.3
|
79
79
|
signing_key:
|
80
80
|
specification_version: 4
|
81
81
|
summary: Recommendations for Ruby and Rails using collaborative filtering
|