disco 0.2.6 → 0.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +32 -13
- data/lib/disco/recommender.rb +30 -14
- data/lib/disco/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5f400f07839587b574ddcfa4c88335bfe20fcd876164b943e8094a35c3c1cfef
|
4
|
+
data.tar.gz: e2426b283146837d14be154ff0e67eb2505fd6587958b39212bf2dfe3bfccd80
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2be9f24184036ec5b093de55640aebb60887ac59c566f37698fcba7a18daa15cf586566708def0060f80fc0747a50447538cf42fdf36024ae19ddac0de8b415c
|
7
|
+
data.tar.gz: 4682a5524a8cad4a247ec53f99c78e317d56ee55433bb2ad7806af4f2a9854bc016fd23564003f009dc69d0fdcf81949dc88c64d3cbe824a8e76fc5cae8abc7d
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -35,17 +35,15 @@ recommender.fit([
|
|
35
35
|
|
36
36
|
> IDs can be integers, strings, or any other data type
|
37
37
|
|
38
|
-
If users don’t rate items directly (for instance, they’re purchasing items or reading posts), this is known as implicit feedback. Leave out the rating
|
38
|
+
If users don’t rate items directly (for instance, they’re purchasing items or reading posts), this is known as implicit feedback. Leave out the rating.
|
39
39
|
|
40
40
|
```ruby
|
41
41
|
recommender.fit([
|
42
|
-
{user_id: 1, item_id: 1
|
43
|
-
{user_id: 2, item_id: 1
|
42
|
+
{user_id: 1, item_id: 1},
|
43
|
+
{user_id: 2, item_id: 1}
|
44
44
|
])
|
45
45
|
```
|
46
46
|
|
47
|
-
> Use `value` instead of `rating` for implicit feedback
|
48
|
-
|
49
47
|
Get user-based recommendations - “users like you also liked”
|
50
48
|
|
51
49
|
```ruby
|
@@ -106,11 +104,10 @@ views = Ahoy::Event.
|
|
106
104
|
count
|
107
105
|
|
108
106
|
data =
|
109
|
-
views.map do |(user_id, post_id),
|
107
|
+
views.map do |(user_id, post_id), _|
|
110
108
|
{
|
111
109
|
user_id: user_id,
|
112
|
-
item_id: post_id
|
113
|
-
value: count
|
110
|
+
item_id: post_id
|
114
111
|
}
|
115
112
|
end
|
116
113
|
```
|
@@ -201,7 +198,7 @@ bin = File.binread("recommender.bin")
|
|
201
198
|
recommender = Marshal.load(bin)
|
202
199
|
```
|
203
200
|
|
204
|
-
Alternatively, you can store only the factors and use a library like [Neighbor](https://github.com/ankane/neighbor)
|
201
|
+
Alternatively, you can store only the factors and use a library like [Neighbor](https://github.com/ankane/neighbor). See the [examples](https://github.com/ankane/neighbor/tree/master/examples).
|
205
202
|
|
206
203
|
## Algorithms
|
207
204
|
|
@@ -282,22 +279,22 @@ gem 'faiss'
|
|
282
279
|
Speed up the `user_recs` method with:
|
283
280
|
|
284
281
|
```ruby
|
285
|
-
|
282
|
+
recommender.optimize_user_recs
|
286
283
|
```
|
287
284
|
|
288
285
|
Speed up the `item_recs` method with:
|
289
286
|
|
290
287
|
```ruby
|
291
|
-
|
288
|
+
recommender.optimize_item_recs
|
292
289
|
```
|
293
290
|
|
294
291
|
Speed up the `similar_users` method with:
|
295
292
|
|
296
293
|
```ruby
|
297
|
-
|
294
|
+
recommender.optimize_similar_users
|
298
295
|
```
|
299
296
|
|
300
|
-
This should be called after fitting or loading the
|
297
|
+
This should be called after fitting or loading the recommender.
|
301
298
|
|
302
299
|
## Reference
|
303
300
|
|
@@ -336,6 +333,28 @@ Thanks to:
|
|
336
333
|
- [Implicit](https://github.com/benfred/implicit/) for serving as an initial reference for user and item similarity
|
337
334
|
- [@dasch](https://github.com/dasch) for the gem name
|
338
335
|
|
336
|
+
## Upgrading
|
337
|
+
|
338
|
+
### 0.2.7
|
339
|
+
|
340
|
+
There’s now a warning when passing `:value` with implicit feedback, as this has no effect on recommendations and can be removed. Earlier versions of the library incorrectly stated this was used.
|
341
|
+
|
342
|
+
```ruby
|
343
|
+
recommender.fit([
|
344
|
+
{user_id: 1, item_id: 1, value: 1},
|
345
|
+
{user_id: 2, item_id: 1, value: 3}
|
346
|
+
])
|
347
|
+
```
|
348
|
+
|
349
|
+
to:
|
350
|
+
|
351
|
+
```ruby
|
352
|
+
recommender.fit([
|
353
|
+
{user_id: 1, item_id: 1},
|
354
|
+
{user_id: 2, item_id: 1}
|
355
|
+
])
|
356
|
+
```
|
357
|
+
|
339
358
|
## History
|
340
359
|
|
341
360
|
View the [changelog](https://github.com/ankane/disco/blob/master/CHANGELOG.md)
|
data/lib/disco/recommender.rb
CHANGED
@@ -22,6 +22,10 @@ module Disco
|
|
22
22
|
# but may be confusing if they are all missing and later ones aren't
|
23
23
|
@implicit = !train_set.any? { |v| v[:rating] }
|
24
24
|
|
25
|
+
if @implicit && train_set.any? { |v| v[:value] }
|
26
|
+
warn "[disco] WARNING: Passing `:value` with implicit feedback has no effect on recommendations and can be removed. Earlier versions of the library incorrectly stated this was used."
|
27
|
+
end
|
28
|
+
|
25
29
|
# TODO improve performance
|
26
30
|
# (catch exception instead of checking ahead of time)
|
27
31
|
unless @implicit
|
@@ -34,7 +38,6 @@ module Disco
|
|
34
38
|
|
35
39
|
@rated = Hash.new { |hash, key| hash[key] = {} }
|
36
40
|
input = []
|
37
|
-
value_key = @implicit ? :value : :rating
|
38
41
|
train_set.each do |v|
|
39
42
|
# update maps and build matrix in single pass
|
40
43
|
u = (@user_map[v[:user_id]] ||= @user_map.size)
|
@@ -42,7 +45,7 @@ module Disco
|
|
42
45
|
@rated[u][i] = true
|
43
46
|
|
44
47
|
# explicit will always have a value due to check_ratings
|
45
|
-
input << [u, i,
|
48
|
+
input << [u, i, @implicit ? 1 : v[:rating]]
|
46
49
|
end
|
47
50
|
@rated.default = nil
|
48
51
|
|
@@ -61,7 +64,7 @@ module Disco
|
|
61
64
|
train_set.each do |v|
|
62
65
|
i = @item_map[v[:item_id]]
|
63
66
|
@item_count[i] += 1
|
64
|
-
@item_sum[i] += (v[
|
67
|
+
@item_sum[i] += (@implicit ? 1 : v[:rating])
|
65
68
|
end
|
66
69
|
end
|
67
70
|
|
@@ -76,7 +79,7 @@ module Disco
|
|
76
79
|
u ||= -1
|
77
80
|
i ||= -1
|
78
81
|
|
79
|
-
eval_set << [u, i,
|
82
|
+
eval_set << [u, i, @implicit ? 1 : v[:rating]]
|
80
83
|
end
|
81
84
|
end
|
82
85
|
|
@@ -138,8 +141,7 @@ module Disco
|
|
138
141
|
predictions, ids = @user_recs_index.search(@user_factors[u, true].expand_dims(0), count + rated.size).map { |v| v[0, true] }
|
139
142
|
else
|
140
143
|
predictions = @item_factors.inner(@user_factors[u, true])
|
141
|
-
|
142
|
-
indexes = predictions.sort_index.reverse
|
144
|
+
indexes = predictions.sort_index.reverse # reverse just creates view
|
143
145
|
indexes = indexes[0...[count + rated.size, indexes.size].min] if count
|
144
146
|
predictions = predictions[indexes]
|
145
147
|
ids = indexes
|
@@ -179,19 +181,32 @@ module Disco
|
|
179
181
|
raise "top_items not computed" unless @top_items
|
180
182
|
|
181
183
|
if @implicit
|
182
|
-
scores = @item_count
|
184
|
+
scores = Numo::UInt64.cast(@item_count)
|
183
185
|
else
|
184
186
|
require "wilson_score"
|
185
187
|
|
186
188
|
range = @min_rating..@max_rating
|
187
|
-
scores = @item_sum.zip(@item_count).map { |s, c| WilsonScore.rating_lower_bound(s / c, c, range) }
|
189
|
+
scores = Numo::DFloat.cast(@item_sum.zip(@item_count).map { |s, c| WilsonScore.rating_lower_bound(s / c, c, range) })
|
190
|
+
|
191
|
+
# TODO uncomment in 0.3.0
|
192
|
+
# wilson score with continuity correction
|
193
|
+
# https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Wilson_score_interval_with_continuity_correction
|
194
|
+
# z = 1.96 # 95% confidence
|
195
|
+
# range = @max_rating - @min_rating
|
196
|
+
# n = Numo::DFloat.cast(@item_count)
|
197
|
+
# phat = (Numo::DFloat.cast(@item_sum) - (@min_rating * n)) / range / n
|
198
|
+
# phat = (phat - (1 / 2 * n)).clip(0, 100) # continuity correction
|
199
|
+
# scores = (phat + z**2 / (2 * n) - z * Numo::DFloat::Math.sqrt((phat * (1 - phat) + z**2 / (4 * n)) / n)) / (1 + z**2 / n)
|
200
|
+
# scores = scores * range + @min_rating
|
188
201
|
end
|
189
202
|
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
203
|
+
indexes = scores.sort_index.reverse
|
204
|
+
indexes = indexes[0...[count, indexes.size].min] if count
|
205
|
+
scores = scores[indexes]
|
206
|
+
|
207
|
+
keys = @item_map.keys
|
208
|
+
indexes.size.times.map do |i|
|
209
|
+
{item_id: keys[indexes[i]], score: scores[i]}
|
195
210
|
end
|
196
211
|
end
|
197
212
|
|
@@ -255,8 +270,9 @@ module Disco
|
|
255
270
|
# inner product is cosine similarity with normalized vectors
|
256
271
|
# https://github.com/facebookresearch/faiss/issues/95
|
257
272
|
#
|
258
|
-
# TODO use non-exact index
|
273
|
+
# TODO use non-exact index in 0.3.0
|
259
274
|
# https://github.com/facebookresearch/faiss/wiki/Faiss-indexes
|
275
|
+
# index = Faiss::IndexHNSWFlat.new(factors.shape[1], 32, :inner_product)
|
260
276
|
index = Faiss::IndexFlatIP.new(factors.shape[1])
|
261
277
|
|
262
278
|
# ids are from 0...total
|
data/lib/disco/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: disco
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-08-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: libmf
|
@@ -76,7 +76,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
76
76
|
- !ruby/object:Gem::Version
|
77
77
|
version: '0'
|
78
78
|
requirements: []
|
79
|
-
rubygems_version: 3.2.
|
79
|
+
rubygems_version: 3.2.22
|
80
80
|
signing_key:
|
81
81
|
specification_version: 4
|
82
82
|
summary: Recommendations for Ruby and Rails using collaborative filtering
|