disco 0.4.0 → 0.4.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/LICENSE.txt +1 -1
- data/README.md +3 -25
- data/lib/disco/data.rb +4 -7
- data/lib/disco/recommender.rb +19 -17
- data/lib/disco/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bbc2c36a98486f496c7c5aed996b3250def9f87ce444dc48e4f8c9164db9e630
|
4
|
+
data.tar.gz: a862bf6d66484f5dac154586dea0a89d85a4873644ff00f4420ac3dfc0c9a852
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 948d564359a61c1ad356c0806e34c57d6dcae354cc55cf1bff4bce5f40ee94b37edd3c5d8fc35e36cb0aeae59ee467acb561c0074bbb7fb8da929b7e548bcf1f
|
7
|
+
data.tar.gz: f3d98a62dd540957343a29c01624586e853a0f400f8105d2ae67d34e85e408b652befc0d702720e4d7e33852ac738b2a8e276acb6ac143195620386b07e99084
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,11 @@
|
|
1
|
+
## 0.4.2 (2024-06-24)
|
2
|
+
|
3
|
+
- Removed dependency on `csv` gem for `load_movielens`
|
4
|
+
|
5
|
+
## 0.4.1 (2024-05-23)
|
6
|
+
|
7
|
+
- Reduced memory for `item_recs` and `similar_users`
|
8
|
+
|
1
9
|
## 0.4.0 (2023-01-30)
|
2
10
|
|
3
11
|
- Fixed issue with `has_recommended` and inheritance with Rails < 6.1
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -6,7 +6,7 @@
|
|
6
6
|
- Works with explicit and implicit feedback
|
7
7
|
- Uses high-performance matrix factorization
|
8
8
|
|
9
|
-
[![Build Status](https://github.com/ankane/disco/workflows/build/badge.svg
|
9
|
+
[![Build Status](https://github.com/ankane/disco/actions/workflows/build.yml/badge.svg)](https://github.com/ankane/disco/actions)
|
10
10
|
|
11
11
|
## Installation
|
12
12
|
|
@@ -229,8 +229,8 @@ recommender.user_recs(new_user_id) # returns empty array
|
|
229
229
|
|
230
230
|
There are a number of ways to deal with this, but here are some common ones:
|
231
231
|
|
232
|
-
- For user-based recommendations, show new users the most popular items
|
233
|
-
- For item-based recommendations, make content-based recommendations with a gem like [tf-idf-similarity](https://github.com/jpmckinney/tf-idf-similarity)
|
232
|
+
- For user-based recommendations, show new users the most popular items
|
233
|
+
- For item-based recommendations, make content-based recommendations with a gem like [tf-idf-similarity](https://github.com/jpmckinney/tf-idf-similarity)
|
234
234
|
|
235
235
|
Get top items with:
|
236
236
|
|
@@ -329,28 +329,6 @@ Thanks to:
|
|
329
329
|
- [Implicit](https://github.com/benfred/implicit/) for serving as an initial reference for user and item similarity
|
330
330
|
- [@dasch](https://github.com/dasch) for the gem name
|
331
331
|
|
332
|
-
## Upgrading
|
333
|
-
|
334
|
-
### 0.2.7
|
335
|
-
|
336
|
-
There’s now a warning when passing `:value` with implicit feedback, as this has no effect on recommendations and can be removed. Earlier versions of the library incorrectly stated this was used.
|
337
|
-
|
338
|
-
```ruby
|
339
|
-
recommender.fit([
|
340
|
-
{user_id: 1, item_id: 1, value: 1},
|
341
|
-
{user_id: 2, item_id: 1, value: 3}
|
342
|
-
])
|
343
|
-
```
|
344
|
-
|
345
|
-
to:
|
346
|
-
|
347
|
-
```ruby
|
348
|
-
recommender.fit([
|
349
|
-
{user_id: 1, item_id: 1},
|
350
|
-
{user_id: 2, item_id: 1}
|
351
|
-
])
|
352
|
-
```
|
353
|
-
|
354
332
|
## History
|
355
333
|
|
356
334
|
View the [changelog](https://github.com/ankane/disco/blob/master/CHANGELOG.md)
|
data/lib/disco/data.rb
CHANGED
@@ -1,23 +1,20 @@
|
|
1
1
|
module Disco
|
2
2
|
module Data
|
3
3
|
def load_movielens
|
4
|
-
require "csv"
|
5
|
-
|
6
4
|
item_path = download_file("ml-100k/u.item", "https://files.grouplens.org/datasets/movielens/ml-100k/u.item",
|
7
5
|
file_hash: "553841ebc7de3a0fd0d6b62a204ea30c1e651aacfb2814c7a6584ac52f2c5701")
|
8
6
|
data_path = download_file("ml-100k/u.data", "https://files.grouplens.org/datasets/movielens/ml-100k/u.data",
|
9
7
|
file_hash: "06416e597f82b7342361e41163890c81036900f418ad91315590814211dca490")
|
10
8
|
|
11
|
-
# convert u.item to utf-8
|
12
|
-
movies_str = File.read(item_path).encode("UTF-8", "binary", invalid: :replace, undef: :replace, replace: "")
|
13
|
-
|
14
9
|
movies = {}
|
15
|
-
|
10
|
+
File.foreach(item_path) do |line|
|
11
|
+
row = line.encode("UTF-8", "ISO-8859-1").split("|")
|
16
12
|
movies[row[0]] = row[1]
|
17
13
|
end
|
18
14
|
|
19
15
|
data = []
|
20
|
-
|
16
|
+
File.foreach(data_path) do |line|
|
17
|
+
row = line.split("\t")
|
21
18
|
data << {
|
22
19
|
user_id: row[0].to_i,
|
23
20
|
item_id: movies[row[1]],
|
data/lib/disco/recommender.rb
CHANGED
@@ -99,8 +99,8 @@ module Disco
|
|
99
99
|
@user_factors = model.p_factors(format: :numo)
|
100
100
|
@item_factors = model.q_factors(format: :numo)
|
101
101
|
|
102
|
-
@
|
103
|
-
@
|
102
|
+
@user_norms = nil
|
103
|
+
@item_norms = nil
|
104
104
|
|
105
105
|
@user_recs_index = nil
|
106
106
|
@similar_users_index = nil
|
@@ -172,13 +172,13 @@ module Disco
|
|
172
172
|
|
173
173
|
def similar_items(item_id, count: 5)
|
174
174
|
check_fit
|
175
|
-
similar(item_id, :item_id, @item_map,
|
175
|
+
similar(item_id, :item_id, @item_map, @item_factors, item_norms, count, @similar_items_index)
|
176
176
|
end
|
177
177
|
alias_method :item_recs, :similar_items
|
178
178
|
|
179
179
|
def similar_users(user_id, count: 5)
|
180
180
|
check_fit
|
181
|
-
similar(user_id, :user_id, @user_map,
|
181
|
+
similar(user_id, :user_id, @user_map, @user_factors, user_norms, count, @similar_users_index)
|
182
182
|
end
|
183
183
|
|
184
184
|
def top_items(count: 5)
|
@@ -247,13 +247,13 @@ module Disco
|
|
247
247
|
|
248
248
|
def optimize_similar_items(library: nil)
|
249
249
|
check_fit
|
250
|
-
@similar_items_index = create_index(
|
250
|
+
@similar_items_index = create_index(@item_factors / item_norms.expand_dims(1), library: library)
|
251
251
|
end
|
252
252
|
alias_method :optimize_item_recs, :optimize_similar_items
|
253
253
|
|
254
254
|
def optimize_similar_users(library: nil)
|
255
255
|
check_fit
|
256
|
-
@similar_users_index = create_index(
|
256
|
+
@similar_users_index = create_index(@user_factors / user_norms.expand_dims(1), library: library)
|
257
257
|
end
|
258
258
|
|
259
259
|
def inspect
|
@@ -341,36 +341,37 @@ module Disco
|
|
341
341
|
end
|
342
342
|
end
|
343
343
|
|
344
|
-
def
|
345
|
-
@
|
344
|
+
def user_norms
|
345
|
+
@user_norms ||= norms(@user_factors)
|
346
346
|
end
|
347
347
|
|
348
|
-
def
|
349
|
-
@
|
348
|
+
def item_norms
|
349
|
+
@item_norms ||= norms(@item_factors)
|
350
350
|
end
|
351
351
|
|
352
|
-
def
|
352
|
+
def norms(factors)
|
353
353
|
norms = Numo::SFloat::Math.sqrt((factors * factors).sum(axis: 1))
|
354
354
|
norms[norms.eq(0)] = 1e-10 # no zeros
|
355
|
-
|
355
|
+
norms
|
356
356
|
end
|
357
357
|
|
358
|
-
def similar(id, key, map,
|
358
|
+
def similar(id, key, map, factors, norms, count, index)
|
359
359
|
i = map[id]
|
360
360
|
|
361
|
-
if i &&
|
361
|
+
if i && factors.shape[0] > 1
|
362
362
|
if index && count
|
363
|
+
norm_factors = factors[i, true] / norms[i]
|
363
364
|
if defined?(Faiss) && index.is_a?(Faiss::Index)
|
364
|
-
predictions, ids = index.search(norm_factors
|
365
|
+
predictions, ids = index.search(norm_factors.expand_dims(0), count + 1).map { |v| v.to_a[0] }
|
365
366
|
else
|
366
|
-
result = index.search(norm_factors
|
367
|
+
result = index.search(norm_factors, size: count + 1)
|
367
368
|
# ids from batch_insert start at 1 instead of 0
|
368
369
|
ids = result.map { |v| v[:id] - 1 }
|
369
370
|
# convert cosine distance to cosine similarity
|
370
371
|
predictions = result.map { |v| 1 - v[:distance] }
|
371
372
|
end
|
372
373
|
else
|
373
|
-
predictions =
|
374
|
+
predictions = factors.inner(factors[i, true]) / (norms * norms[i])
|
374
375
|
indexes = predictions.sort_index.reverse
|
375
376
|
indexes = indexes[0...[count + 1, indexes.size].min] if count
|
376
377
|
predictions = predictions[indexes]
|
@@ -386,6 +387,7 @@ module Disco
|
|
386
387
|
next if id == i
|
387
388
|
|
388
389
|
result << {key => keys[id], score: predictions[j]}
|
390
|
+
break if result.size == count
|
389
391
|
end
|
390
392
|
result
|
391
393
|
else
|
data/lib/disco/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: disco
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-06-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: libmf
|
@@ -76,7 +76,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
76
76
|
- !ruby/object:Gem::Version
|
77
77
|
version: '0'
|
78
78
|
requirements: []
|
79
|
-
rubygems_version: 3.
|
79
|
+
rubygems_version: 3.5.11
|
80
80
|
signing_key:
|
81
81
|
specification_version: 4
|
82
82
|
summary: Recommendations for Ruby and Rails using collaborative filtering
|