disco 0.2.7 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/LICENSE.txt +1 -1
- data/README.md +6 -8
- data/lib/disco/data.rb +9 -2
- data/lib/disco/model.rb +1 -1
- data/lib/disco/recommender.rb +16 -21
- data/lib/disco/version.rb +1 -1
- data/lib/disco.rb +0 -5
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 815bc7de802959be7093d9e0478d83a0cf49a522e72a2df928de86223799d83d
|
4
|
+
data.tar.gz: cbfacf86f1e0507abe4df07b45f20bc3d06d682617c482419a05935186a61c15
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d0f3285b53cb8fe7e7d5ef30a970c632c52112c6b0503b8c81155f6cdb37583f036107b052c37019671355d0838512f904a61aeff5b69b7f6f8a2c1f4fabe785
|
7
|
+
data.tar.gz: f1b9c5759d77c1f497a0ac09ccf455beda29417024c4cd8ba6c0f8fcbac3347ab233c9e8c558a75382ef3b41495b5b693495ab0533c3a084a416c1f75a38313b
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,19 @@
|
|
1
|
+
## 0.3.0 (2022-03-22)
|
2
|
+
|
3
|
+
- Changed `item_id` to `user_id` for `similar_users`
|
4
|
+
- Changed warning to an error when `value` passed to `fit`
|
5
|
+
- Changed to use Faiss over NGT for `optimize_item_recs` and `optimize_similar_users` when both are installed
|
6
|
+
- Removed dependency on `wilson_score` gem for `top_items`
|
7
|
+
- Dropped support for Ruby < 2.6
|
8
|
+
|
9
|
+
## 0.2.9 (2022-03-22)
|
10
|
+
|
11
|
+
- Fixed error with `load_movielens`
|
12
|
+
|
13
|
+
## 0.2.8 (2022-03-13)
|
14
|
+
|
15
|
+
- Fixed error with `top_items` with all same rating
|
16
|
+
|
1
17
|
## 0.2.7 (2021-08-06)
|
2
18
|
|
3
19
|
- Added warning for `value`
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -13,7 +13,7 @@
|
|
13
13
|
Add this line to your application’s Gemfile:
|
14
14
|
|
15
15
|
```ruby
|
16
|
-
gem
|
16
|
+
gem "disco"
|
17
17
|
```
|
18
18
|
|
19
19
|
## Getting Started
|
@@ -44,6 +44,8 @@ recommender.fit([
|
|
44
44
|
])
|
45
45
|
```
|
46
46
|
|
47
|
+
> Each `user_id`/`item_id` combination should only appear once
|
48
|
+
|
47
49
|
Get user-based recommendations - “users like you also liked”
|
48
50
|
|
49
51
|
```ruby
|
@@ -97,11 +99,7 @@ recommender.item_recs("Star Wars (1977)")
|
|
97
99
|
[Ahoy](https://github.com/ankane/ahoy) is a great source for implicit feedback
|
98
100
|
|
99
101
|
```ruby
|
100
|
-
views = Ahoy::Event.
|
101
|
-
where(name: "Viewed post").
|
102
|
-
group(:user_id).
|
103
|
-
group("properties->>'post_id'"). # postgres syntax
|
104
|
-
count
|
102
|
+
views = Ahoy::Event.where(name: "Viewed post").group(:user_id).group_prop(:post_id).count
|
105
103
|
|
106
104
|
data =
|
107
105
|
views.map do |(user_id, post_id), _|
|
@@ -244,7 +242,7 @@ recommender.fit(data)
|
|
244
242
|
recommender.top_items
|
245
243
|
```
|
246
244
|
|
247
|
-
This uses [Wilson score](https://www.evanmiller.org/how-not-to-sort-by-average-rating.html) for explicit feedback
|
245
|
+
This uses [Wilson score](https://www.evanmiller.org/how-not-to-sort-by-average-rating.html) for explicit feedback and item frequency for implicit feedback.
|
248
246
|
|
249
247
|
## Data
|
250
248
|
|
@@ -273,7 +271,7 @@ If you have a large number of users or items, you can use an approximate nearest
|
|
273
271
|
Add this line to your application’s Gemfile:
|
274
272
|
|
275
273
|
```ruby
|
276
|
-
gem
|
274
|
+
gem "faiss"
|
277
275
|
```
|
278
276
|
|
279
277
|
Speed up the `user_recs` method with:
|
data/lib/disco/data.rb
CHANGED
@@ -1,9 +1,11 @@
|
|
1
1
|
module Disco
|
2
2
|
module Data
|
3
3
|
def load_movielens
|
4
|
-
|
4
|
+
require "csv"
|
5
|
+
|
6
|
+
item_path = download_file("ml-100k/u.item", "https://files.grouplens.org/datasets/movielens/ml-100k/u.item",
|
5
7
|
file_hash: "553841ebc7de3a0fd0d6b62a204ea30c1e651aacfb2814c7a6584ac52f2c5701")
|
6
|
-
data_path = download_file("ml-100k/u.data", "
|
8
|
+
data_path = download_file("ml-100k/u.data", "https://files.grouplens.org/datasets/movielens/ml-100k/u.data",
|
7
9
|
file_hash: "06416e597f82b7342361e41163890c81036900f418ad91315590814211dca490")
|
8
10
|
|
9
11
|
# convert u.item to utf-8
|
@@ -29,6 +31,11 @@ module Disco
|
|
29
31
|
private
|
30
32
|
|
31
33
|
def download_file(fname, origin, file_hash:)
|
34
|
+
require "digest"
|
35
|
+
require "fileutils"
|
36
|
+
require "net/http"
|
37
|
+
require "tmpdir"
|
38
|
+
|
32
39
|
# TODO handle this better
|
33
40
|
raise "No HOME" unless ENV["HOME"]
|
34
41
|
dest = "#{ENV["HOME"]}/.disco/#{fname}"
|
data/lib/disco/model.rb
CHANGED
@@ -12,7 +12,7 @@ module Disco
|
|
12
12
|
|
13
13
|
define_method("update_recommended_#{name}") do |items|
|
14
14
|
now = Time.now
|
15
|
-
items = items.map { |item| {subject_type: model_name.name, subject_id: id, item_type: class_name, item_id: item
|
15
|
+
items = items.map { |item| {subject_type: model_name.name, subject_id: id, item_type: class_name, item_id: item.fetch(:item_id), context: name, score: item.fetch(:score), created_at: now, updated_at: now} }
|
16
16
|
|
17
17
|
self.class.transaction do
|
18
18
|
recommendations.where(context: name).delete_all
|
data/lib/disco/recommender.rb
CHANGED
@@ -23,7 +23,7 @@ module Disco
|
|
23
23
|
@implicit = !train_set.any? { |v| v[:rating] }
|
24
24
|
|
25
25
|
if @implicit && train_set.any? { |v| v[:value] }
|
26
|
-
|
26
|
+
raise ArgumentError, "Passing `:value` with implicit feedback has no effect on recommendations and should be removed. Earlier versions of the library incorrectly stated this was used."
|
27
27
|
end
|
28
28
|
|
29
29
|
# TODO improve performance
|
@@ -167,13 +167,13 @@ module Disco
|
|
167
167
|
|
168
168
|
def similar_items(item_id, count: 5)
|
169
169
|
check_fit
|
170
|
-
similar(item_id, @item_map, normalized_item_factors, count, @similar_items_index)
|
170
|
+
similar(item_id, :item_id, @item_map, normalized_item_factors, count, @similar_items_index)
|
171
171
|
end
|
172
172
|
alias_method :item_recs, :similar_items
|
173
173
|
|
174
174
|
def similar_users(user_id, count: 5)
|
175
175
|
check_fit
|
176
|
-
similar(user_id, @user_map, normalized_user_factors, count, @similar_users_index)
|
176
|
+
similar(user_id, :user_id, @user_map, normalized_user_factors, count, @similar_users_index)
|
177
177
|
end
|
178
178
|
|
179
179
|
def top_items(count: 5)
|
@@ -183,21 +183,20 @@ module Disco
|
|
183
183
|
if @implicit
|
184
184
|
scores = Numo::UInt64.cast(@item_count)
|
185
185
|
else
|
186
|
-
|
186
|
+
min_rating = @min_rating
|
187
187
|
|
188
|
-
|
189
|
-
|
188
|
+
# TODO remove temp fix
|
189
|
+
min_rating -= 1 if @min_rating == @max_rating
|
190
190
|
|
191
|
-
# TODO uncomment in 0.3.0
|
192
191
|
# wilson score with continuity correction
|
193
192
|
# https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Wilson_score_interval_with_continuity_correction
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
193
|
+
z = 1.96 # 95% confidence
|
194
|
+
range = @max_rating - @min_rating
|
195
|
+
n = Numo::DFloat.cast(@item_count)
|
196
|
+
phat = (Numo::DFloat.cast(@item_sum) - (min_rating * n)) / range / n
|
197
|
+
phat = (phat - (1 / (2 * n))).clip(0, nil) # continuity correction
|
198
|
+
scores = (phat + z**2 / (2 * n) - z * Numo::DFloat::Math.sqrt((phat * (1 - phat) + z**2 / (4 * n)) / n)) / (1 + z**2 / n)
|
199
|
+
scores = scores * range + min_rating
|
201
200
|
end
|
202
201
|
|
203
202
|
indexes = scores.sort_index.reverse
|
@@ -260,8 +259,7 @@ module Disco
|
|
260
259
|
|
261
260
|
# factors should already be normalized for similar users/items
|
262
261
|
def create_index(factors, library:)
|
263
|
-
|
264
|
-
library ||= defined?(Faiss) && !defined?(Ngt) ? "faiss" : "ngt"
|
262
|
+
library ||= defined?(Ngt) && !defined?(Faiss) ? "ngt" : "faiss"
|
265
263
|
|
266
264
|
case library
|
267
265
|
when "faiss"
|
@@ -270,7 +268,7 @@ module Disco
|
|
270
268
|
# inner product is cosine similarity with normalized vectors
|
271
269
|
# https://github.com/facebookresearch/faiss/issues/95
|
272
270
|
#
|
273
|
-
# TODO
|
271
|
+
# TODO add option for index type
|
274
272
|
# https://github.com/facebookresearch/faiss/wiki/Faiss-indexes
|
275
273
|
# index = Faiss::IndexHNSWFlat.new(factors.shape[1], 32, :inner_product)
|
276
274
|
index = Faiss::IndexFlatIP.new(factors.shape[1])
|
@@ -312,7 +310,7 @@ module Disco
|
|
312
310
|
factors / norms.expand_dims(1)
|
313
311
|
end
|
314
312
|
|
315
|
-
def similar(id, map, norm_factors, count, index)
|
313
|
+
def similar(id, key, map, norm_factors, count, index)
|
316
314
|
i = map[id]
|
317
315
|
|
318
316
|
if i && norm_factors.shape[0] > 1
|
@@ -336,9 +334,6 @@ module Disco
|
|
336
334
|
|
337
335
|
keys = map.keys
|
338
336
|
|
339
|
-
# TODO use user_id for similar_users in 0.3.0
|
340
|
-
key = :item_id
|
341
|
-
|
342
337
|
result = []
|
343
338
|
# items can have the same score
|
344
339
|
# so original item may not be at index 0
|
data/lib/disco/version.rb
CHANGED
data/lib/disco.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: disco
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-03-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: libmf
|
@@ -69,14 +69,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
69
69
|
requirements:
|
70
70
|
- - ">="
|
71
71
|
- !ruby/object:Gem::Version
|
72
|
-
version: '2.
|
72
|
+
version: '2.6'
|
73
73
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
74
74
|
requirements:
|
75
75
|
- - ">="
|
76
76
|
- !ruby/object:Gem::Version
|
77
77
|
version: '0'
|
78
78
|
requirements: []
|
79
|
-
rubygems_version: 3.
|
79
|
+
rubygems_version: 3.3.7
|
80
80
|
signing_key:
|
81
81
|
specification_version: 4
|
82
82
|
summary: Recommendations for Ruby and Rails using collaborative filtering
|