disco 0.2.9 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/LICENSE.txt +1 -1
- data/README.md +1 -1
- data/lib/disco/model.rb +1 -2
- data/lib/disco/recommender.rb +16 -27
- data/lib/disco/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 815bc7de802959be7093d9e0478d83a0cf49a522e72a2df928de86223799d83d
|
4
|
+
data.tar.gz: cbfacf86f1e0507abe4df07b45f20bc3d06d682617c482419a05935186a61c15
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d0f3285b53cb8fe7e7d5ef30a970c632c52112c6b0503b8c81155f6cdb37583f036107b052c37019671355d0838512f904a61aeff5b69b7f6f8a2c1f4fabe785
|
7
|
+
data.tar.gz: f1b9c5759d77c1f497a0ac09ccf455beda29417024c4cd8ba6c0f8fcbac3347ab233c9e8c558a75382ef3b41495b5b693495ab0533c3a084a416c1f75a38313b
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,11 @@
|
|
1
|
+
## 0.3.0 (2022-03-22)
|
2
|
+
|
3
|
+
- Changed `item_id` to `user_id` for `similar_users`
|
4
|
+
- Changed warning to an error when `value` passed to `fit`
|
5
|
+
- Changed to use Faiss over NGT for `optimize_item_recs` and `optimize_similar_users` when both are installed
|
6
|
+
- Removed dependency on `wilson_score` gem for `top_items`
|
7
|
+
- Dropped support for Ruby < 2.6
|
8
|
+
|
1
9
|
## 0.2.9 (2022-03-22)
|
2
10
|
|
3
11
|
- Fixed error with `load_movielens`
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -242,7 +242,7 @@ recommender.fit(data)
|
|
242
242
|
recommender.top_items
|
243
243
|
```
|
244
244
|
|
245
|
-
This uses [Wilson score](https://www.evanmiller.org/how-not-to-sort-by-average-rating.html) for explicit feedback
|
245
|
+
This uses [Wilson score](https://www.evanmiller.org/how-not-to-sort-by-average-rating.html) for explicit feedback and item frequency for implicit feedback.
|
246
246
|
|
247
247
|
## Data
|
248
248
|
|
data/lib/disco/model.rb
CHANGED
@@ -10,10 +10,9 @@ module Disco
|
|
10
10
|
|
11
11
|
has_many :"recommended_#{name}", -> { where("disco_recommendations.context = ?", name).order("disco_recommendations.score DESC") }, through: :recommendations, source: :item, source_type: class_name
|
12
12
|
|
13
|
-
# TODO use fetch for item_id and score in 0.3.0
|
14
13
|
define_method("update_recommended_#{name}") do |items|
|
15
14
|
now = Time.now
|
16
|
-
items = items.map { |item| {subject_type: model_name.name, subject_id: id, item_type: class_name, item_id: item
|
15
|
+
items = items.map { |item| {subject_type: model_name.name, subject_id: id, item_type: class_name, item_id: item.fetch(:item_id), context: name, score: item.fetch(:score), created_at: now, updated_at: now} }
|
17
16
|
|
18
17
|
self.class.transaction do
|
19
18
|
recommendations.where(context: name).delete_all
|
data/lib/disco/recommender.rb
CHANGED
@@ -23,7 +23,7 @@ module Disco
|
|
23
23
|
@implicit = !train_set.any? { |v| v[:rating] }
|
24
24
|
|
25
25
|
if @implicit && train_set.any? { |v| v[:value] }
|
26
|
-
|
26
|
+
raise ArgumentError, "Passing `:value` with implicit feedback has no effect on recommendations and should be removed. Earlier versions of the library incorrectly stated this was used."
|
27
27
|
end
|
28
28
|
|
29
29
|
# TODO improve performance
|
@@ -167,13 +167,13 @@ module Disco
|
|
167
167
|
|
168
168
|
def similar_items(item_id, count: 5)
|
169
169
|
check_fit
|
170
|
-
similar(item_id, @item_map, normalized_item_factors, count, @similar_items_index)
|
170
|
+
similar(item_id, :item_id, @item_map, normalized_item_factors, count, @similar_items_index)
|
171
171
|
end
|
172
172
|
alias_method :item_recs, :similar_items
|
173
173
|
|
174
174
|
def similar_users(user_id, count: 5)
|
175
175
|
check_fit
|
176
|
-
similar(user_id, @user_map, normalized_user_factors, count, @similar_users_index)
|
176
|
+
similar(user_id, :user_id, @user_map, normalized_user_factors, count, @similar_users_index)
|
177
177
|
end
|
178
178
|
|
179
179
|
def top_items(count: 5)
|
@@ -183,27 +183,20 @@ module Disco
|
|
183
183
|
if @implicit
|
184
184
|
scores = Numo::UInt64.cast(@item_count)
|
185
185
|
else
|
186
|
-
|
186
|
+
min_rating = @min_rating
|
187
187
|
|
188
|
-
|
189
|
-
|
190
|
-
# TODO remove temp fix
|
191
|
-
(@min_rating - 1)..@max_rating
|
192
|
-
else
|
193
|
-
@min_rating..@max_rating
|
194
|
-
end
|
195
|
-
scores = Numo::DFloat.cast(@item_sum.zip(@item_count).map { |s, c| WilsonScore.rating_lower_bound(s / c, c, range) })
|
188
|
+
# TODO remove temp fix
|
189
|
+
min_rating -= 1 if @min_rating == @max_rating
|
196
190
|
|
197
|
-
# TODO uncomment in 0.3.0
|
198
191
|
# wilson score with continuity correction
|
199
192
|
# https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Wilson_score_interval_with_continuity_correction
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
193
|
+
z = 1.96 # 95% confidence
|
194
|
+
range = @max_rating - @min_rating
|
195
|
+
n = Numo::DFloat.cast(@item_count)
|
196
|
+
phat = (Numo::DFloat.cast(@item_sum) - (min_rating * n)) / range / n
|
197
|
+
phat = (phat - (1 / (2 * n))).clip(0, nil) # continuity correction
|
198
|
+
scores = (phat + z**2 / (2 * n) - z * Numo::DFloat::Math.sqrt((phat * (1 - phat) + z**2 / (4 * n)) / n)) / (1 + z**2 / n)
|
199
|
+
scores = scores * range + min_rating
|
207
200
|
end
|
208
201
|
|
209
202
|
indexes = scores.sort_index.reverse
|
@@ -266,8 +259,7 @@ module Disco
|
|
266
259
|
|
267
260
|
# factors should already be normalized for similar users/items
|
268
261
|
def create_index(factors, library:)
|
269
|
-
|
270
|
-
library ||= defined?(Faiss) && !defined?(Ngt) ? "faiss" : "ngt"
|
262
|
+
library ||= defined?(Ngt) && !defined?(Faiss) ? "ngt" : "faiss"
|
271
263
|
|
272
264
|
case library
|
273
265
|
when "faiss"
|
@@ -276,7 +268,7 @@ module Disco
|
|
276
268
|
# inner product is cosine similarity with normalized vectors
|
277
269
|
# https://github.com/facebookresearch/faiss/issues/95
|
278
270
|
#
|
279
|
-
# TODO
|
271
|
+
# TODO add option for index type
|
280
272
|
# https://github.com/facebookresearch/faiss/wiki/Faiss-indexes
|
281
273
|
# index = Faiss::IndexHNSWFlat.new(factors.shape[1], 32, :inner_product)
|
282
274
|
index = Faiss::IndexFlatIP.new(factors.shape[1])
|
@@ -318,7 +310,7 @@ module Disco
|
|
318
310
|
factors / norms.expand_dims(1)
|
319
311
|
end
|
320
312
|
|
321
|
-
def similar(id, map, norm_factors, count, index)
|
313
|
+
def similar(id, key, map, norm_factors, count, index)
|
322
314
|
i = map[id]
|
323
315
|
|
324
316
|
if i && norm_factors.shape[0] > 1
|
@@ -342,9 +334,6 @@ module Disco
|
|
342
334
|
|
343
335
|
keys = map.keys
|
344
336
|
|
345
|
-
# TODO use user_id for similar_users in 0.3.0
|
346
|
-
key = :item_id
|
347
|
-
|
348
337
|
result = []
|
349
338
|
# items can have the same score
|
350
339
|
# so original item may not be at index 0
|
data/lib/disco/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: disco
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
@@ -69,7 +69,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
69
69
|
requirements:
|
70
70
|
- - ">="
|
71
71
|
- !ruby/object:Gem::Version
|
72
|
-
version: '2.
|
72
|
+
version: '2.6'
|
73
73
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
74
74
|
requirements:
|
75
75
|
- - ">="
|