disco 0.2.9 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d677e14bfb504669dd4f36cc00997128bbb0f7282c428ec29f88bf072587d82f
4
- data.tar.gz: c797b4f1eb39aff5596b5e10b346260f6701df43e1c84559fb2395b23ed2c8f3
3
+ metadata.gz: 815bc7de802959be7093d9e0478d83a0cf49a522e72a2df928de86223799d83d
4
+ data.tar.gz: cbfacf86f1e0507abe4df07b45f20bc3d06d682617c482419a05935186a61c15
5
5
  SHA512:
6
- metadata.gz: b2f0d889ac2c3dbed66642a15460c87e04eb81731f3a729b111522220159e7c52927aed5238413e323b83901fc490e1dce2cd12d6005b196f91eca69c9023277
7
- data.tar.gz: a57abc84399c0cbc57f2997f341d8fe192e2835887df63fc75c13bc6a5d2aed5808aaf555b48c25902a65375df5cfd11990f1e987dfef77a7eef98d63f19914e
6
+ metadata.gz: d0f3285b53cb8fe7e7d5ef30a970c632c52112c6b0503b8c81155f6cdb37583f036107b052c37019671355d0838512f904a61aeff5b69b7f6f8a2c1f4fabe785
7
+ data.tar.gz: f1b9c5759d77c1f497a0ac09ccf455beda29417024c4cd8ba6c0f8fcbac3347ab233c9e8c558a75382ef3b41495b5b693495ab0533c3a084a416c1f75a38313b
data/CHANGELOG.md CHANGED
@@ -1,3 +1,11 @@
1
+ ## 0.3.0 (2022-03-22)
2
+
3
+ - Changed `item_id` to `user_id` for `similar_users`
4
+ - Changed warning to an error when `value` passed to `fit`
5
+ - Changed to use Faiss over NGT for `optimize_item_recs` and `optimize_similar_users` when both are installed
6
+ - Removed dependency on `wilson_score` gem for `top_items`
7
+ - Dropped support for Ruby < 2.6
8
+
1
9
  ## 0.2.9 (2022-03-22)
2
10
 
3
11
  - Fixed error with `load_movielens`
data/LICENSE.txt CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2019-2021 Andrew Kane
1
+ Copyright (c) 2019-2022 Andrew Kane
2
2
 
3
3
  MIT License
4
4
 
data/README.md CHANGED
@@ -242,7 +242,7 @@ recommender.fit(data)
242
242
  recommender.top_items
243
243
  ```
244
244
 
245
- This uses [Wilson score](https://www.evanmiller.org/how-not-to-sort-by-average-rating.html) for explicit feedback (add [wilson_score](https://github.com/instacart/wilson_score) to your application’s Gemfile) and item frequency for implicit feedback.
245
+ This uses [Wilson score](https://www.evanmiller.org/how-not-to-sort-by-average-rating.html) for explicit feedback and item frequency for implicit feedback.
246
246
 
247
247
  ## Data
248
248
 
data/lib/disco/model.rb CHANGED
@@ -10,10 +10,9 @@ module Disco
10
10
 
11
11
  has_many :"recommended_#{name}", -> { where("disco_recommendations.context = ?", name).order("disco_recommendations.score DESC") }, through: :recommendations, source: :item, source_type: class_name
12
12
 
13
- # TODO use fetch for item_id and score in 0.3.0
14
13
  define_method("update_recommended_#{name}") do |items|
15
14
  now = Time.now
16
- items = items.map { |item| {subject_type: model_name.name, subject_id: id, item_type: class_name, item_id: item[:item_id], context: name, score: item[:score], created_at: now, updated_at: now} }
15
+ items = items.map { |item| {subject_type: model_name.name, subject_id: id, item_type: class_name, item_id: item.fetch(:item_id), context: name, score: item.fetch(:score), created_at: now, updated_at: now} }
17
16
 
18
17
  self.class.transaction do
19
18
  recommendations.where(context: name).delete_all
@@ -23,7 +23,7 @@ module Disco
23
23
  @implicit = !train_set.any? { |v| v[:rating] }
24
24
 
25
25
  if @implicit && train_set.any? { |v| v[:value] }
26
- warn "[disco] WARNING: Passing `:value` with implicit feedback has no effect on recommendations and can be removed. Earlier versions of the library incorrectly stated this was used."
26
+ raise ArgumentError, "Passing `:value` with implicit feedback has no effect on recommendations and should be removed. Earlier versions of the library incorrectly stated this was used."
27
27
  end
28
28
 
29
29
  # TODO improve performance
@@ -167,13 +167,13 @@ module Disco
167
167
 
168
168
  def similar_items(item_id, count: 5)
169
169
  check_fit
170
- similar(item_id, @item_map, normalized_item_factors, count, @similar_items_index)
170
+ similar(item_id, :item_id, @item_map, normalized_item_factors, count, @similar_items_index)
171
171
  end
172
172
  alias_method :item_recs, :similar_items
173
173
 
174
174
  def similar_users(user_id, count: 5)
175
175
  check_fit
176
- similar(user_id, @user_map, normalized_user_factors, count, @similar_users_index)
176
+ similar(user_id, :user_id, @user_map, normalized_user_factors, count, @similar_users_index)
177
177
  end
178
178
 
179
179
  def top_items(count: 5)
@@ -183,27 +183,20 @@ module Disco
183
183
  if @implicit
184
184
  scores = Numo::UInt64.cast(@item_count)
185
185
  else
186
- require "wilson_score"
186
+ min_rating = @min_rating
187
187
 
188
- range =
189
- if @min_rating == @max_rating
190
- # TODO remove temp fix
191
- (@min_rating - 1)..@max_rating
192
- else
193
- @min_rating..@max_rating
194
- end
195
- scores = Numo::DFloat.cast(@item_sum.zip(@item_count).map { |s, c| WilsonScore.rating_lower_bound(s / c, c, range) })
188
+ # TODO remove temp fix
189
+ min_rating -= 1 if @min_rating == @max_rating
196
190
 
197
- # TODO uncomment in 0.3.0
198
191
  # wilson score with continuity correction
199
192
  # https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Wilson_score_interval_with_continuity_correction
200
- # z = 1.96 # 95% confidence
201
- # range = @max_rating - @min_rating
202
- # n = Numo::DFloat.cast(@item_count)
203
- # phat = (Numo::DFloat.cast(@item_sum) - (@min_rating * n)) / range / n
204
- # phat = (phat - (1 / (2 * n))).clip(0, nil) # continuity correction
205
- # scores = (phat + z**2 / (2 * n) - z * Numo::DFloat::Math.sqrt((phat * (1 - phat) + z**2 / (4 * n)) / n)) / (1 + z**2 / n)
206
- # scores = scores * range + @min_rating
193
+ z = 1.96 # 95% confidence
194
+ range = @max_rating - @min_rating
195
+ n = Numo::DFloat.cast(@item_count)
196
+ phat = (Numo::DFloat.cast(@item_sum) - (min_rating * n)) / range / n
197
+ phat = (phat - (1 / (2 * n))).clip(0, nil) # continuity correction
198
+ scores = (phat + z**2 / (2 * n) - z * Numo::DFloat::Math.sqrt((phat * (1 - phat) + z**2 / (4 * n)) / n)) / (1 + z**2 / n)
199
+ scores = scores * range + min_rating
207
200
  end
208
201
 
209
202
  indexes = scores.sort_index.reverse
@@ -266,8 +259,7 @@ module Disco
266
259
 
267
260
  # factors should already be normalized for similar users/items
268
261
  def create_index(factors, library:)
269
- # TODO make Faiss the default in 0.3.0
270
- library ||= defined?(Faiss) && !defined?(Ngt) ? "faiss" : "ngt"
262
+ library ||= defined?(Ngt) && !defined?(Faiss) ? "ngt" : "faiss"
271
263
 
272
264
  case library
273
265
  when "faiss"
@@ -276,7 +268,7 @@ module Disco
276
268
  # inner product is cosine similarity with normalized vectors
277
269
  # https://github.com/facebookresearch/faiss/issues/95
278
270
  #
279
- # TODO use non-exact index in 0.3.0
271
+ # TODO add option for index type
280
272
  # https://github.com/facebookresearch/faiss/wiki/Faiss-indexes
281
273
  # index = Faiss::IndexHNSWFlat.new(factors.shape[1], 32, :inner_product)
282
274
  index = Faiss::IndexFlatIP.new(factors.shape[1])
@@ -318,7 +310,7 @@ module Disco
318
310
  factors / norms.expand_dims(1)
319
311
  end
320
312
 
321
- def similar(id, map, norm_factors, count, index)
313
+ def similar(id, key, map, norm_factors, count, index)
322
314
  i = map[id]
323
315
 
324
316
  if i && norm_factors.shape[0] > 1
@@ -342,9 +334,6 @@ module Disco
342
334
 
343
335
  keys = map.keys
344
336
 
345
- # TODO use user_id for similar_users in 0.3.0
346
- key = :item_id
347
-
348
337
  result = []
349
338
  # items can have the same score
350
339
  # so original item may not be at index 0
data/lib/disco/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Disco
2
- VERSION = "0.2.9"
2
+ VERSION = "0.3.0"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: disco
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.9
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
@@ -69,7 +69,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
69
69
  requirements:
70
70
  - - ">="
71
71
  - !ruby/object:Gem::Version
72
- version: '2.4'
72
+ version: '2.6'
73
73
  required_rubygems_version: !ruby/object:Gem::Requirement
74
74
  requirements:
75
75
  - - ">="