disco 0.2.9 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/LICENSE.txt +1 -1
- data/README.md +1 -1
- data/lib/disco/model.rb +1 -2
- data/lib/disco/recommender.rb +16 -27
- data/lib/disco/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 815bc7de802959be7093d9e0478d83a0cf49a522e72a2df928de86223799d83d
|
4
|
+
data.tar.gz: cbfacf86f1e0507abe4df07b45f20bc3d06d682617c482419a05935186a61c15
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d0f3285b53cb8fe7e7d5ef30a970c632c52112c6b0503b8c81155f6cdb37583f036107b052c37019671355d0838512f904a61aeff5b69b7f6f8a2c1f4fabe785
|
7
|
+
data.tar.gz: f1b9c5759d77c1f497a0ac09ccf455beda29417024c4cd8ba6c0f8fcbac3347ab233c9e8c558a75382ef3b41495b5b693495ab0533c3a084a416c1f75a38313b
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,11 @@
|
|
1
|
+
## 0.3.0 (2022-03-22)
|
2
|
+
|
3
|
+
- Changed `item_id` to `user_id` for `similar_users`
|
4
|
+
- Changed warning to an error when `value` passed to `fit`
|
5
|
+
- Changed to use Faiss over NGT for `optimize_item_recs` and `optimize_similar_users` when both are installed
|
6
|
+
- Removed dependency on `wilson_score` gem for `top_items`
|
7
|
+
- Dropped support for Ruby < 2.6
|
8
|
+
|
1
9
|
## 0.2.9 (2022-03-22)
|
2
10
|
|
3
11
|
- Fixed error with `load_movielens`
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -242,7 +242,7 @@ recommender.fit(data)
|
|
242
242
|
recommender.top_items
|
243
243
|
```
|
244
244
|
|
245
|
-
This uses [Wilson score](https://www.evanmiller.org/how-not-to-sort-by-average-rating.html) for explicit feedback
|
245
|
+
This uses [Wilson score](https://www.evanmiller.org/how-not-to-sort-by-average-rating.html) for explicit feedback and item frequency for implicit feedback.
|
246
246
|
|
247
247
|
## Data
|
248
248
|
|
data/lib/disco/model.rb
CHANGED
@@ -10,10 +10,9 @@ module Disco
|
|
10
10
|
|
11
11
|
has_many :"recommended_#{name}", -> { where("disco_recommendations.context = ?", name).order("disco_recommendations.score DESC") }, through: :recommendations, source: :item, source_type: class_name
|
12
12
|
|
13
|
-
# TODO use fetch for item_id and score in 0.3.0
|
14
13
|
define_method("update_recommended_#{name}") do |items|
|
15
14
|
now = Time.now
|
16
|
-
items = items.map { |item| {subject_type: model_name.name, subject_id: id, item_type: class_name, item_id: item
|
15
|
+
items = items.map { |item| {subject_type: model_name.name, subject_id: id, item_type: class_name, item_id: item.fetch(:item_id), context: name, score: item.fetch(:score), created_at: now, updated_at: now} }
|
17
16
|
|
18
17
|
self.class.transaction do
|
19
18
|
recommendations.where(context: name).delete_all
|
data/lib/disco/recommender.rb
CHANGED
@@ -23,7 +23,7 @@ module Disco
|
|
23
23
|
@implicit = !train_set.any? { |v| v[:rating] }
|
24
24
|
|
25
25
|
if @implicit && train_set.any? { |v| v[:value] }
|
26
|
-
|
26
|
+
raise ArgumentError, "Passing `:value` with implicit feedback has no effect on recommendations and should be removed. Earlier versions of the library incorrectly stated this was used."
|
27
27
|
end
|
28
28
|
|
29
29
|
# TODO improve performance
|
@@ -167,13 +167,13 @@ module Disco
|
|
167
167
|
|
168
168
|
def similar_items(item_id, count: 5)
|
169
169
|
check_fit
|
170
|
-
similar(item_id, @item_map, normalized_item_factors, count, @similar_items_index)
|
170
|
+
similar(item_id, :item_id, @item_map, normalized_item_factors, count, @similar_items_index)
|
171
171
|
end
|
172
172
|
alias_method :item_recs, :similar_items
|
173
173
|
|
174
174
|
def similar_users(user_id, count: 5)
|
175
175
|
check_fit
|
176
|
-
similar(user_id, @user_map, normalized_user_factors, count, @similar_users_index)
|
176
|
+
similar(user_id, :user_id, @user_map, normalized_user_factors, count, @similar_users_index)
|
177
177
|
end
|
178
178
|
|
179
179
|
def top_items(count: 5)
|
@@ -183,27 +183,20 @@ module Disco
|
|
183
183
|
if @implicit
|
184
184
|
scores = Numo::UInt64.cast(@item_count)
|
185
185
|
else
|
186
|
-
|
186
|
+
min_rating = @min_rating
|
187
187
|
|
188
|
-
|
189
|
-
|
190
|
-
# TODO remove temp fix
|
191
|
-
(@min_rating - 1)..@max_rating
|
192
|
-
else
|
193
|
-
@min_rating..@max_rating
|
194
|
-
end
|
195
|
-
scores = Numo::DFloat.cast(@item_sum.zip(@item_count).map { |s, c| WilsonScore.rating_lower_bound(s / c, c, range) })
|
188
|
+
# TODO remove temp fix
|
189
|
+
min_rating -= 1 if @min_rating == @max_rating
|
196
190
|
|
197
|
-
# TODO uncomment in 0.3.0
|
198
191
|
# wilson score with continuity correction
|
199
192
|
# https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Wilson_score_interval_with_continuity_correction
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
193
|
+
z = 1.96 # 95% confidence
|
194
|
+
range = @max_rating - @min_rating
|
195
|
+
n = Numo::DFloat.cast(@item_count)
|
196
|
+
phat = (Numo::DFloat.cast(@item_sum) - (min_rating * n)) / range / n
|
197
|
+
phat = (phat - (1 / (2 * n))).clip(0, nil) # continuity correction
|
198
|
+
scores = (phat + z**2 / (2 * n) - z * Numo::DFloat::Math.sqrt((phat * (1 - phat) + z**2 / (4 * n)) / n)) / (1 + z**2 / n)
|
199
|
+
scores = scores * range + min_rating
|
207
200
|
end
|
208
201
|
|
209
202
|
indexes = scores.sort_index.reverse
|
@@ -266,8 +259,7 @@ module Disco
|
|
266
259
|
|
267
260
|
# factors should already be normalized for similar users/items
|
268
261
|
def create_index(factors, library:)
|
269
|
-
|
270
|
-
library ||= defined?(Faiss) && !defined?(Ngt) ? "faiss" : "ngt"
|
262
|
+
library ||= defined?(Ngt) && !defined?(Faiss) ? "ngt" : "faiss"
|
271
263
|
|
272
264
|
case library
|
273
265
|
when "faiss"
|
@@ -276,7 +268,7 @@ module Disco
|
|
276
268
|
# inner product is cosine similarity with normalized vectors
|
277
269
|
# https://github.com/facebookresearch/faiss/issues/95
|
278
270
|
#
|
279
|
-
# TODO
|
271
|
+
# TODO add option for index type
|
280
272
|
# https://github.com/facebookresearch/faiss/wiki/Faiss-indexes
|
281
273
|
# index = Faiss::IndexHNSWFlat.new(factors.shape[1], 32, :inner_product)
|
282
274
|
index = Faiss::IndexFlatIP.new(factors.shape[1])
|
@@ -318,7 +310,7 @@ module Disco
|
|
318
310
|
factors / norms.expand_dims(1)
|
319
311
|
end
|
320
312
|
|
321
|
-
def similar(id, map, norm_factors, count, index)
|
313
|
+
def similar(id, key, map, norm_factors, count, index)
|
322
314
|
i = map[id]
|
323
315
|
|
324
316
|
if i && norm_factors.shape[0] > 1
|
@@ -342,9 +334,6 @@ module Disco
|
|
342
334
|
|
343
335
|
keys = map.keys
|
344
336
|
|
345
|
-
# TODO use user_id for similar_users in 0.3.0
|
346
|
-
key = :item_id
|
347
|
-
|
348
337
|
result = []
|
349
338
|
# items can have the same score
|
350
339
|
# so original item may not be at index 0
|
data/lib/disco/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: disco
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
@@ -69,7 +69,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
69
69
|
requirements:
|
70
70
|
- - ">="
|
71
71
|
- !ruby/object:Gem::Version
|
72
|
-
version: '2.
|
72
|
+
version: '2.6'
|
73
73
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
74
74
|
requirements:
|
75
75
|
- - ">="
|