disco 0.2.9 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d677e14bfb504669dd4f36cc00997128bbb0f7282c428ec29f88bf072587d82f
4
- data.tar.gz: c797b4f1eb39aff5596b5e10b346260f6701df43e1c84559fb2395b23ed2c8f3
3
+ metadata.gz: 815bc7de802959be7093d9e0478d83a0cf49a522e72a2df928de86223799d83d
4
+ data.tar.gz: cbfacf86f1e0507abe4df07b45f20bc3d06d682617c482419a05935186a61c15
5
5
  SHA512:
6
- metadata.gz: b2f0d889ac2c3dbed66642a15460c87e04eb81731f3a729b111522220159e7c52927aed5238413e323b83901fc490e1dce2cd12d6005b196f91eca69c9023277
7
- data.tar.gz: a57abc84399c0cbc57f2997f341d8fe192e2835887df63fc75c13bc6a5d2aed5808aaf555b48c25902a65375df5cfd11990f1e987dfef77a7eef98d63f19914e
6
+ metadata.gz: d0f3285b53cb8fe7e7d5ef30a970c632c52112c6b0503b8c81155f6cdb37583f036107b052c37019671355d0838512f904a61aeff5b69b7f6f8a2c1f4fabe785
7
+ data.tar.gz: f1b9c5759d77c1f497a0ac09ccf455beda29417024c4cd8ba6c0f8fcbac3347ab233c9e8c558a75382ef3b41495b5b693495ab0533c3a084a416c1f75a38313b
data/CHANGELOG.md CHANGED
@@ -1,3 +1,11 @@
1
+ ## 0.3.0 (2022-03-22)
2
+
3
+ - Changed `item_id` to `user_id` for `similar_users`
4
+ - Changed warning to an error when `value` passed to `fit`
5
+ - Changed to use Faiss over NGT for `optimize_item_recs` and `optimize_similar_users` when both are installed
6
+ - Removed dependency on `wilson_score` gem for `top_items`
7
+ - Dropped support for Ruby < 2.6
8
+
1
9
  ## 0.2.9 (2022-03-22)
2
10
 
3
11
  - Fixed error with `load_movielens`
data/LICENSE.txt CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2019-2021 Andrew Kane
1
+ Copyright (c) 2019-2022 Andrew Kane
2
2
 
3
3
  MIT License
4
4
 
data/README.md CHANGED
@@ -242,7 +242,7 @@ recommender.fit(data)
242
242
  recommender.top_items
243
243
  ```
244
244
 
245
- This uses [Wilson score](https://www.evanmiller.org/how-not-to-sort-by-average-rating.html) for explicit feedback (add [wilson_score](https://github.com/instacart/wilson_score) to your application’s Gemfile) and item frequency for implicit feedback.
245
+ This uses [Wilson score](https://www.evanmiller.org/how-not-to-sort-by-average-rating.html) for explicit feedback and item frequency for implicit feedback.
246
246
 
247
247
  ## Data
248
248
 
data/lib/disco/model.rb CHANGED
@@ -10,10 +10,9 @@ module Disco
10
10
 
11
11
  has_many :"recommended_#{name}", -> { where("disco_recommendations.context = ?", name).order("disco_recommendations.score DESC") }, through: :recommendations, source: :item, source_type: class_name
12
12
 
13
- # TODO use fetch for item_id and score in 0.3.0
14
13
  define_method("update_recommended_#{name}") do |items|
15
14
  now = Time.now
16
- items = items.map { |item| {subject_type: model_name.name, subject_id: id, item_type: class_name, item_id: item[:item_id], context: name, score: item[:score], created_at: now, updated_at: now} }
15
+ items = items.map { |item| {subject_type: model_name.name, subject_id: id, item_type: class_name, item_id: item.fetch(:item_id), context: name, score: item.fetch(:score), created_at: now, updated_at: now} }
17
16
 
18
17
  self.class.transaction do
19
18
  recommendations.where(context: name).delete_all
@@ -23,7 +23,7 @@ module Disco
23
23
  @implicit = !train_set.any? { |v| v[:rating] }
24
24
 
25
25
  if @implicit && train_set.any? { |v| v[:value] }
26
- warn "[disco] WARNING: Passing `:value` with implicit feedback has no effect on recommendations and can be removed. Earlier versions of the library incorrectly stated this was used."
26
+ raise ArgumentError, "Passing `:value` with implicit feedback has no effect on recommendations and should be removed. Earlier versions of the library incorrectly stated this was used."
27
27
  end
28
28
 
29
29
  # TODO improve performance
@@ -167,13 +167,13 @@ module Disco
167
167
 
168
168
  def similar_items(item_id, count: 5)
169
169
  check_fit
170
- similar(item_id, @item_map, normalized_item_factors, count, @similar_items_index)
170
+ similar(item_id, :item_id, @item_map, normalized_item_factors, count, @similar_items_index)
171
171
  end
172
172
  alias_method :item_recs, :similar_items
173
173
 
174
174
  def similar_users(user_id, count: 5)
175
175
  check_fit
176
- similar(user_id, @user_map, normalized_user_factors, count, @similar_users_index)
176
+ similar(user_id, :user_id, @user_map, normalized_user_factors, count, @similar_users_index)
177
177
  end
178
178
 
179
179
  def top_items(count: 5)
@@ -183,27 +183,20 @@ module Disco
183
183
  if @implicit
184
184
  scores = Numo::UInt64.cast(@item_count)
185
185
  else
186
- require "wilson_score"
186
+ min_rating = @min_rating
187
187
 
188
- range =
189
- if @min_rating == @max_rating
190
- # TODO remove temp fix
191
- (@min_rating - 1)..@max_rating
192
- else
193
- @min_rating..@max_rating
194
- end
195
- scores = Numo::DFloat.cast(@item_sum.zip(@item_count).map { |s, c| WilsonScore.rating_lower_bound(s / c, c, range) })
188
+ # TODO remove temp fix
189
+ min_rating -= 1 if @min_rating == @max_rating
196
190
 
197
- # TODO uncomment in 0.3.0
198
191
  # wilson score with continuity correction
199
192
  # https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Wilson_score_interval_with_continuity_correction
200
- # z = 1.96 # 95% confidence
201
- # range = @max_rating - @min_rating
202
- # n = Numo::DFloat.cast(@item_count)
203
- # phat = (Numo::DFloat.cast(@item_sum) - (@min_rating * n)) / range / n
204
- # phat = (phat - (1 / (2 * n))).clip(0, nil) # continuity correction
205
- # scores = (phat + z**2 / (2 * n) - z * Numo::DFloat::Math.sqrt((phat * (1 - phat) + z**2 / (4 * n)) / n)) / (1 + z**2 / n)
206
- # scores = scores * range + @min_rating
193
+ z = 1.96 # 95% confidence
194
+ range = @max_rating - @min_rating
195
+ n = Numo::DFloat.cast(@item_count)
196
+ phat = (Numo::DFloat.cast(@item_sum) - (min_rating * n)) / range / n
197
+ phat = (phat - (1 / (2 * n))).clip(0, nil) # continuity correction
198
+ scores = (phat + z**2 / (2 * n) - z * Numo::DFloat::Math.sqrt((phat * (1 - phat) + z**2 / (4 * n)) / n)) / (1 + z**2 / n)
199
+ scores = scores * range + min_rating
207
200
  end
208
201
 
209
202
  indexes = scores.sort_index.reverse
@@ -266,8 +259,7 @@ module Disco
266
259
 
267
260
  # factors should already be normalized for similar users/items
268
261
  def create_index(factors, library:)
269
- # TODO make Faiss the default in 0.3.0
270
- library ||= defined?(Faiss) && !defined?(Ngt) ? "faiss" : "ngt"
262
+ library ||= defined?(Ngt) && !defined?(Faiss) ? "ngt" : "faiss"
271
263
 
272
264
  case library
273
265
  when "faiss"
@@ -276,7 +268,7 @@ module Disco
276
268
  # inner product is cosine similarity with normalized vectors
277
269
  # https://github.com/facebookresearch/faiss/issues/95
278
270
  #
279
- # TODO use non-exact index in 0.3.0
271
+ # TODO add option for index type
280
272
  # https://github.com/facebookresearch/faiss/wiki/Faiss-indexes
281
273
  # index = Faiss::IndexHNSWFlat.new(factors.shape[1], 32, :inner_product)
282
274
  index = Faiss::IndexFlatIP.new(factors.shape[1])
@@ -318,7 +310,7 @@ module Disco
318
310
  factors / norms.expand_dims(1)
319
311
  end
320
312
 
321
- def similar(id, map, norm_factors, count, index)
313
+ def similar(id, key, map, norm_factors, count, index)
322
314
  i = map[id]
323
315
 
324
316
  if i && norm_factors.shape[0] > 1
@@ -342,9 +334,6 @@ module Disco
342
334
 
343
335
  keys = map.keys
344
336
 
345
- # TODO use user_id for similar_users in 0.3.0
346
- key = :item_id
347
-
348
337
  result = []
349
338
  # items can have the same score
350
339
  # so original item may not be at index 0
data/lib/disco/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Disco
2
- VERSION = "0.2.9"
2
+ VERSION = "0.3.0"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: disco
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.9
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
@@ -69,7 +69,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
69
69
  requirements:
70
70
  - - ">="
71
71
  - !ruby/object:Gem::Version
72
- version: '2.4'
72
+ version: '2.6'
73
73
  required_rubygems_version: !ruby/object:Gem::Requirement
74
74
  requirements:
75
75
  - - ">="