disco 0.4.0 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/LICENSE.txt +1 -1
- data/README.md +3 -25
- data/lib/disco/data.rb +4 -7
- data/lib/disco/recommender.rb +19 -17
- data/lib/disco/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bbc2c36a98486f496c7c5aed996b3250def9f87ce444dc48e4f8c9164db9e630
|
4
|
+
data.tar.gz: a862bf6d66484f5dac154586dea0a89d85a4873644ff00f4420ac3dfc0c9a852
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 948d564359a61c1ad356c0806e34c57d6dcae354cc55cf1bff4bce5f40ee94b37edd3c5d8fc35e36cb0aeae59ee467acb561c0074bbb7fb8da929b7e548bcf1f
|
7
|
+
data.tar.gz: f3d98a62dd540957343a29c01624586e853a0f400f8105d2ae67d34e85e408b652befc0d702720e4d7e33852ac738b2a8e276acb6ac143195620386b07e99084
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,11 @@
|
|
1
|
+
## 0.4.2 (2024-06-24)
|
2
|
+
|
3
|
+
- Removed dependency on `csv` gem for `load_movielens`
|
4
|
+
|
5
|
+
## 0.4.1 (2024-05-23)
|
6
|
+
|
7
|
+
- Reduced memory for `item_recs` and `similar_users`
|
8
|
+
|
1
9
|
## 0.4.0 (2023-01-30)
|
2
10
|
|
3
11
|
- Fixed issue with `has_recommended` and inheritance with Rails < 6.1
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -6,7 +6,7 @@
|
|
6
6
|
- Works with explicit and implicit feedback
|
7
7
|
- Uses high-performance matrix factorization
|
8
8
|
|
9
|
-
[](https://github.com/ankane/disco/actions)
|
10
10
|
|
11
11
|
## Installation
|
12
12
|
|
@@ -229,8 +229,8 @@ recommender.user_recs(new_user_id) # returns empty array
|
|
229
229
|
|
230
230
|
There are a number of ways to deal with this, but here are some common ones:
|
231
231
|
|
232
|
-
- For user-based recommendations, show new users the most popular items
|
233
|
-
- For item-based recommendations, make content-based recommendations with a gem like [tf-idf-similarity](https://github.com/jpmckinney/tf-idf-similarity)
|
232
|
+
- For user-based recommendations, show new users the most popular items
|
233
|
+
- For item-based recommendations, make content-based recommendations with a gem like [tf-idf-similarity](https://github.com/jpmckinney/tf-idf-similarity)
|
234
234
|
|
235
235
|
Get top items with:
|
236
236
|
|
@@ -329,28 +329,6 @@ Thanks to:
|
|
329
329
|
- [Implicit](https://github.com/benfred/implicit/) for serving as an initial reference for user and item similarity
|
330
330
|
- [@dasch](https://github.com/dasch) for the gem name
|
331
331
|
|
332
|
-
## Upgrading
|
333
|
-
|
334
|
-
### 0.2.7
|
335
|
-
|
336
|
-
There’s now a warning when passing `:value` with implicit feedback, as this has no effect on recommendations and can be removed. Earlier versions of the library incorrectly stated this was used.
|
337
|
-
|
338
|
-
```ruby
|
339
|
-
recommender.fit([
|
340
|
-
{user_id: 1, item_id: 1, value: 1},
|
341
|
-
{user_id: 2, item_id: 1, value: 3}
|
342
|
-
])
|
343
|
-
```
|
344
|
-
|
345
|
-
to:
|
346
|
-
|
347
|
-
```ruby
|
348
|
-
recommender.fit([
|
349
|
-
{user_id: 1, item_id: 1},
|
350
|
-
{user_id: 2, item_id: 1}
|
351
|
-
])
|
352
|
-
```
|
353
|
-
|
354
332
|
## History
|
355
333
|
|
356
334
|
View the [changelog](https://github.com/ankane/disco/blob/master/CHANGELOG.md)
|
data/lib/disco/data.rb
CHANGED
@@ -1,23 +1,20 @@
|
|
1
1
|
module Disco
|
2
2
|
module Data
|
3
3
|
def load_movielens
|
4
|
-
require "csv"
|
5
|
-
|
6
4
|
item_path = download_file("ml-100k/u.item", "https://files.grouplens.org/datasets/movielens/ml-100k/u.item",
|
7
5
|
file_hash: "553841ebc7de3a0fd0d6b62a204ea30c1e651aacfb2814c7a6584ac52f2c5701")
|
8
6
|
data_path = download_file("ml-100k/u.data", "https://files.grouplens.org/datasets/movielens/ml-100k/u.data",
|
9
7
|
file_hash: "06416e597f82b7342361e41163890c81036900f418ad91315590814211dca490")
|
10
8
|
|
11
|
-
# convert u.item to utf-8
|
12
|
-
movies_str = File.read(item_path).encode("UTF-8", "binary", invalid: :replace, undef: :replace, replace: "")
|
13
|
-
|
14
9
|
movies = {}
|
15
|
-
|
10
|
+
File.foreach(item_path) do |line|
|
11
|
+
row = line.encode("UTF-8", "ISO-8859-1").split("|")
|
16
12
|
movies[row[0]] = row[1]
|
17
13
|
end
|
18
14
|
|
19
15
|
data = []
|
20
|
-
|
16
|
+
File.foreach(data_path) do |line|
|
17
|
+
row = line.split("\t")
|
21
18
|
data << {
|
22
19
|
user_id: row[0].to_i,
|
23
20
|
item_id: movies[row[1]],
|
data/lib/disco/recommender.rb
CHANGED
@@ -99,8 +99,8 @@ module Disco
|
|
99
99
|
@user_factors = model.p_factors(format: :numo)
|
100
100
|
@item_factors = model.q_factors(format: :numo)
|
101
101
|
|
102
|
-
@
|
103
|
-
@
|
102
|
+
@user_norms = nil
|
103
|
+
@item_norms = nil
|
104
104
|
|
105
105
|
@user_recs_index = nil
|
106
106
|
@similar_users_index = nil
|
@@ -172,13 +172,13 @@ module Disco
|
|
172
172
|
|
173
173
|
def similar_items(item_id, count: 5)
|
174
174
|
check_fit
|
175
|
-
similar(item_id, :item_id, @item_map,
|
175
|
+
similar(item_id, :item_id, @item_map, @item_factors, item_norms, count, @similar_items_index)
|
176
176
|
end
|
177
177
|
alias_method :item_recs, :similar_items
|
178
178
|
|
179
179
|
def similar_users(user_id, count: 5)
|
180
180
|
check_fit
|
181
|
-
similar(user_id, :user_id, @user_map,
|
181
|
+
similar(user_id, :user_id, @user_map, @user_factors, user_norms, count, @similar_users_index)
|
182
182
|
end
|
183
183
|
|
184
184
|
def top_items(count: 5)
|
@@ -247,13 +247,13 @@ module Disco
|
|
247
247
|
|
248
248
|
def optimize_similar_items(library: nil)
|
249
249
|
check_fit
|
250
|
-
@similar_items_index = create_index(
|
250
|
+
@similar_items_index = create_index(@item_factors / item_norms.expand_dims(1), library: library)
|
251
251
|
end
|
252
252
|
alias_method :optimize_item_recs, :optimize_similar_items
|
253
253
|
|
254
254
|
def optimize_similar_users(library: nil)
|
255
255
|
check_fit
|
256
|
-
@similar_users_index = create_index(
|
256
|
+
@similar_users_index = create_index(@user_factors / user_norms.expand_dims(1), library: library)
|
257
257
|
end
|
258
258
|
|
259
259
|
def inspect
|
@@ -341,36 +341,37 @@ module Disco
|
|
341
341
|
end
|
342
342
|
end
|
343
343
|
|
344
|
-
def
|
345
|
-
@
|
344
|
+
def user_norms
|
345
|
+
@user_norms ||= norms(@user_factors)
|
346
346
|
end
|
347
347
|
|
348
|
-
def
|
349
|
-
@
|
348
|
+
def item_norms
|
349
|
+
@item_norms ||= norms(@item_factors)
|
350
350
|
end
|
351
351
|
|
352
|
-
def
|
352
|
+
def norms(factors)
|
353
353
|
norms = Numo::SFloat::Math.sqrt((factors * factors).sum(axis: 1))
|
354
354
|
norms[norms.eq(0)] = 1e-10 # no zeros
|
355
|
-
|
355
|
+
norms
|
356
356
|
end
|
357
357
|
|
358
|
-
def similar(id, key, map,
|
358
|
+
def similar(id, key, map, factors, norms, count, index)
|
359
359
|
i = map[id]
|
360
360
|
|
361
|
-
if i &&
|
361
|
+
if i && factors.shape[0] > 1
|
362
362
|
if index && count
|
363
|
+
norm_factors = factors[i, true] / norms[i]
|
363
364
|
if defined?(Faiss) && index.is_a?(Faiss::Index)
|
364
|
-
predictions, ids = index.search(norm_factors
|
365
|
+
predictions, ids = index.search(norm_factors.expand_dims(0), count + 1).map { |v| v.to_a[0] }
|
365
366
|
else
|
366
|
-
result = index.search(norm_factors
|
367
|
+
result = index.search(norm_factors, size: count + 1)
|
367
368
|
# ids from batch_insert start at 1 instead of 0
|
368
369
|
ids = result.map { |v| v[:id] - 1 }
|
369
370
|
# convert cosine distance to cosine similarity
|
370
371
|
predictions = result.map { |v| 1 - v[:distance] }
|
371
372
|
end
|
372
373
|
else
|
373
|
-
predictions =
|
374
|
+
predictions = factors.inner(factors[i, true]) / (norms * norms[i])
|
374
375
|
indexes = predictions.sort_index.reverse
|
375
376
|
indexes = indexes[0...[count + 1, indexes.size].min] if count
|
376
377
|
predictions = predictions[indexes]
|
@@ -386,6 +387,7 @@ module Disco
|
|
386
387
|
next if id == i
|
387
388
|
|
388
389
|
result << {key => keys[id], score: predictions[j]}
|
390
|
+
break if result.size == count
|
389
391
|
end
|
390
392
|
result
|
391
393
|
else
|
data/lib/disco/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: disco
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-06-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: libmf
|
@@ -76,7 +76,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
76
76
|
- !ruby/object:Gem::Version
|
77
77
|
version: '0'
|
78
78
|
requirements: []
|
79
|
-
rubygems_version: 3.
|
79
|
+
rubygems_version: 3.5.11
|
80
80
|
signing_key:
|
81
81
|
specification_version: 4
|
82
82
|
summary: Recommendations for Ruby and Rails using collaborative filtering
|