disco 0.3.2 → 0.4.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/README.md +3 -27
- data/lib/disco/data.rb +1 -1
- data/lib/disco/model.rb +7 -10
- data/lib/disco/recommender.rb +23 -17
- data/lib/disco/version.rb +1 -1
- data/lib/disco.rb +6 -6
- metadata +6 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: eb58ae5c2579ef03fc0089f9f9e0421cd38bd8996472bf04ef9383b601759e5a
|
4
|
+
data.tar.gz: 9bb8c5aa186ab1e6364d0ed446dc61e69a0f0ee730cee4c8367586dac69b666a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ecee89ddb2db25a9ba697ec637f16a2408079b8b3d7d287df0cfdb30d9b7c0a996d6cb38075766d7e3fecf529be6ad90187a1ec95d2d141076ecfe3e2236209a
|
7
|
+
data.tar.gz: 31d7259ac86779530b468392b611f0920b24682f8017e3b50f9a3ca41d30ad08454219881e26eae67d1add7a47d645f7a8b77d253ada4a3d0b5db0f5dbf5e33d
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,13 @@
|
|
1
|
+
## 0.4.1 (2024-05-23)
|
2
|
+
|
3
|
+
- Reduced memory for `item_recs` and `similar_users`
|
4
|
+
|
5
|
+
## 0.4.0 (2023-01-30)
|
6
|
+
|
7
|
+
- Fixed issue with `has_recommended` and inheritance with Rails < 6.1
|
8
|
+
- Deprecated marshal serialization
|
9
|
+
- Dropped support for Ruby < 2.7 and Rails < 6
|
10
|
+
|
1
11
|
## 0.3.2 (2022-09-26)
|
2
12
|
|
3
13
|
- Fixed issue when `fit` is called multiple times
|
data/README.md
CHANGED
@@ -6,7 +6,7 @@
|
|
6
6
|
- Works with explicit and implicit feedback
|
7
7
|
- Uses high-performance matrix factorization
|
8
8
|
|
9
|
-
[![Build Status](https://github.com/ankane/disco/workflows/build/badge.svg
|
9
|
+
[![Build Status](https://github.com/ankane/disco/actions/workflows/build.yml/badge.svg)](https://github.com/ankane/disco/actions)
|
10
10
|
|
11
11
|
## Installation
|
12
12
|
|
@@ -176,8 +176,6 @@ user.update_recommended_products_v2(recs)
|
|
176
176
|
user.recommended_products_v2
|
177
177
|
```
|
178
178
|
|
179
|
-
For Rails < 6, speed up inserts by adding [activerecord-import](https://github.com/zdennis/activerecord-import) to your app.
|
180
|
-
|
181
179
|
## Storing Recommenders
|
182
180
|
|
183
181
|
If you’d prefer to perform recommendations on-the-fly, store the recommender
|
@@ -231,8 +229,8 @@ recommender.user_recs(new_user_id) # returns empty array
|
|
231
229
|
|
232
230
|
There are a number of ways to deal with this, but here are some common ones:
|
233
231
|
|
234
|
-
- For user-based recommendations, show new users the most popular items
|
235
|
-
- For item-based recommendations, make content-based recommendations with a gem like [tf-idf-similarity](https://github.com/jpmckinney/tf-idf-similarity)
|
232
|
+
- For user-based recommendations, show new users the most popular items
|
233
|
+
- For item-based recommendations, make content-based recommendations with a gem like [tf-idf-similarity](https://github.com/jpmckinney/tf-idf-similarity)
|
236
234
|
|
237
235
|
Get top items with:
|
238
236
|
|
@@ -331,28 +329,6 @@ Thanks to:
|
|
331
329
|
- [Implicit](https://github.com/benfred/implicit/) for serving as an initial reference for user and item similarity
|
332
330
|
- [@dasch](https://github.com/dasch) for the gem name
|
333
331
|
|
334
|
-
## Upgrading
|
335
|
-
|
336
|
-
### 0.2.7
|
337
|
-
|
338
|
-
There’s now a warning when passing `:value` with implicit feedback, as this has no effect on recommendations and can be removed. Earlier versions of the library incorrectly stated this was used.
|
339
|
-
|
340
|
-
```ruby
|
341
|
-
recommender.fit([
|
342
|
-
{user_id: 1, item_id: 1, value: 1},
|
343
|
-
{user_id: 2, item_id: 1, value: 3}
|
344
|
-
])
|
345
|
-
```
|
346
|
-
|
347
|
-
to:
|
348
|
-
|
349
|
-
```ruby
|
350
|
-
recommender.fit([
|
351
|
-
{user_id: 1, item_id: 1},
|
352
|
-
{user_id: 2, item_id: 1}
|
353
|
-
])
|
354
|
-
```
|
355
|
-
|
356
332
|
## History
|
357
333
|
|
358
334
|
View the [changelog](https://github.com/ankane/disco/blob/master/CHANGELOG.md)
|
data/lib/disco/data.rb
CHANGED
@@ -9,7 +9,7 @@ module Disco
|
|
9
9
|
file_hash: "06416e597f82b7342361e41163890c81036900f418ad91315590814211dca490")
|
10
10
|
|
11
11
|
# convert u.item to utf-8
|
12
|
-
movies_str = File.read(item_path).encode("UTF-8", "
|
12
|
+
movies_str = File.read(item_path).encode("UTF-8", "ISO-8859-1")
|
13
13
|
|
14
14
|
movies = {}
|
15
15
|
CSV.parse(movies_str, col_sep: "|") do |row|
|
data/lib/disco/model.rb
CHANGED
@@ -1,7 +1,12 @@
|
|
1
1
|
module Disco
|
2
2
|
module Model
|
3
3
|
def has_recommended(name, class_name: nil)
|
4
|
+
if ActiveRecord::VERSION::MAJOR < 6
|
5
|
+
raise Disco::Error, "Requires Active Record 6+"
|
6
|
+
end
|
7
|
+
|
4
8
|
class_name ||= name.to_s.singularize.camelize
|
9
|
+
subject_type = model_name.name
|
5
10
|
|
6
11
|
class_eval do
|
7
12
|
unless reflect_on_association(:recommendations)
|
@@ -12,21 +17,13 @@ module Disco
|
|
12
17
|
|
13
18
|
define_method("update_recommended_#{name}") do |items|
|
14
19
|
now = Time.now
|
15
|
-
items = items.map { |item| {subject_type:
|
20
|
+
items = items.map { |item| {subject_type: subject_type, subject_id: id, item_type: class_name, item_id: item.fetch(:item_id), context: name, score: item.fetch(:score), created_at: now, updated_at: now} }
|
16
21
|
|
17
22
|
self.class.transaction do
|
18
23
|
recommendations.where(context: name).delete_all
|
19
24
|
|
20
25
|
if items.any?
|
21
|
-
|
22
|
-
# Rails 6
|
23
|
-
recommendations.insert_all!(items)
|
24
|
-
elsif recommendations.respond_to?(:bulk_import!)
|
25
|
-
# activerecord-import
|
26
|
-
recommendations.bulk_import!(items, validate: false)
|
27
|
-
else
|
28
|
-
recommendations.create!([items])
|
29
|
-
end
|
26
|
+
recommendations.insert_all!(items)
|
30
27
|
end
|
31
28
|
end
|
32
29
|
end
|
data/lib/disco/recommender.rb
CHANGED
@@ -99,8 +99,8 @@ module Disco
|
|
99
99
|
@user_factors = model.p_factors(format: :numo)
|
100
100
|
@item_factors = model.q_factors(format: :numo)
|
101
101
|
|
102
|
-
@
|
103
|
-
@
|
102
|
+
@user_norms = nil
|
103
|
+
@item_norms = nil
|
104
104
|
|
105
105
|
@user_recs_index = nil
|
106
106
|
@similar_users_index = nil
|
@@ -172,13 +172,13 @@ module Disco
|
|
172
172
|
|
173
173
|
def similar_items(item_id, count: 5)
|
174
174
|
check_fit
|
175
|
-
similar(item_id, :item_id, @item_map,
|
175
|
+
similar(item_id, :item_id, @item_map, @item_factors, item_norms, count, @similar_items_index)
|
176
176
|
end
|
177
177
|
alias_method :item_recs, :similar_items
|
178
178
|
|
179
179
|
def similar_users(user_id, count: 5)
|
180
180
|
check_fit
|
181
|
-
similar(user_id, :user_id, @user_map,
|
181
|
+
similar(user_id, :user_id, @user_map, @user_factors, user_norms, count, @similar_users_index)
|
182
182
|
end
|
183
183
|
|
184
184
|
def top_items(count: 5)
|
@@ -247,13 +247,13 @@ module Disco
|
|
247
247
|
|
248
248
|
def optimize_similar_items(library: nil)
|
249
249
|
check_fit
|
250
|
-
@similar_items_index = create_index(
|
250
|
+
@similar_items_index = create_index(@item_factors / item_norms.expand_dims(1), library: library)
|
251
251
|
end
|
252
252
|
alias_method :optimize_item_recs, :optimize_similar_items
|
253
253
|
|
254
254
|
def optimize_similar_users(library: nil)
|
255
255
|
check_fit
|
256
|
-
@similar_users_index = create_index(
|
256
|
+
@similar_users_index = create_index(@user_factors / user_norms.expand_dims(1), library: library)
|
257
257
|
end
|
258
258
|
|
259
259
|
def inspect
|
@@ -341,36 +341,37 @@ module Disco
|
|
341
341
|
end
|
342
342
|
end
|
343
343
|
|
344
|
-
def
|
345
|
-
@
|
344
|
+
def user_norms
|
345
|
+
@user_norms ||= norms(@user_factors)
|
346
346
|
end
|
347
347
|
|
348
|
-
def
|
349
|
-
@
|
348
|
+
def item_norms
|
349
|
+
@item_norms ||= norms(@item_factors)
|
350
350
|
end
|
351
351
|
|
352
|
-
def
|
352
|
+
def norms(factors)
|
353
353
|
norms = Numo::SFloat::Math.sqrt((factors * factors).sum(axis: 1))
|
354
354
|
norms[norms.eq(0)] = 1e-10 # no zeros
|
355
|
-
|
355
|
+
norms
|
356
356
|
end
|
357
357
|
|
358
|
-
def similar(id, key, map,
|
358
|
+
def similar(id, key, map, factors, norms, count, index)
|
359
359
|
i = map[id]
|
360
360
|
|
361
|
-
if i &&
|
361
|
+
if i && factors.shape[0] > 1
|
362
362
|
if index && count
|
363
|
+
norm_factors = factors[i, true] / norms[i]
|
363
364
|
if defined?(Faiss) && index.is_a?(Faiss::Index)
|
364
|
-
predictions, ids = index.search(norm_factors
|
365
|
+
predictions, ids = index.search(norm_factors.expand_dims(0), count + 1).map { |v| v.to_a[0] }
|
365
366
|
else
|
366
|
-
result = index.search(norm_factors
|
367
|
+
result = index.search(norm_factors, size: count + 1)
|
367
368
|
# ids from batch_insert start at 1 instead of 0
|
368
369
|
ids = result.map { |v| v[:id] - 1 }
|
369
370
|
# convert cosine distance to cosine similarity
|
370
371
|
predictions = result.map { |v| 1 - v[:distance] }
|
371
372
|
end
|
372
373
|
else
|
373
|
-
predictions =
|
374
|
+
predictions = factors.inner(factors[i, true]) / (norms * norms[i])
|
374
375
|
indexes = predictions.sort_index.reverse
|
375
376
|
indexes = indexes[0...[count + 1, indexes.size].min] if count
|
376
377
|
predictions = predictions[indexes]
|
@@ -386,6 +387,7 @@ module Disco
|
|
386
387
|
next if id == i
|
387
388
|
|
388
389
|
result << {key => keys[id], score: predictions[j]}
|
390
|
+
break if result.size == count
|
389
391
|
end
|
390
392
|
result
|
391
393
|
else
|
@@ -430,6 +432,8 @@ module Disco
|
|
430
432
|
end
|
431
433
|
|
432
434
|
def marshal_dump
|
435
|
+
warn "[disco] Marshal serialization is deprecated - use JSON instead"
|
436
|
+
|
433
437
|
obj = {
|
434
438
|
implicit: @implicit,
|
435
439
|
user_map: @user_map,
|
@@ -457,6 +461,8 @@ module Disco
|
|
457
461
|
end
|
458
462
|
|
459
463
|
def marshal_load(obj)
|
464
|
+
warn "[disco] Marshal serialization is deprecated - use JSON instead"
|
465
|
+
|
460
466
|
@implicit = obj[:implicit]
|
461
467
|
@user_map = obj[:user_map]
|
462
468
|
@item_map = obj[:item_map]
|
data/lib/disco/version.rb
CHANGED
data/lib/disco.rb
CHANGED
@@ -3,13 +3,13 @@ require "libmf"
|
|
3
3
|
require "numo/narray"
|
4
4
|
|
5
5
|
# modules
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
6
|
+
require_relative "disco/data"
|
7
|
+
require_relative "disco/metrics"
|
8
|
+
require_relative "disco/recommender"
|
9
|
+
require_relative "disco/version"
|
10
10
|
|
11
11
|
# integrations
|
12
|
-
|
12
|
+
require_relative "disco/engine" if defined?(Rails)
|
13
13
|
|
14
14
|
module Disco
|
15
15
|
class Error < StandardError; end
|
@@ -19,7 +19,7 @@ end
|
|
19
19
|
|
20
20
|
if defined?(ActiveSupport.on_load)
|
21
21
|
ActiveSupport.on_load(:active_record) do
|
22
|
-
|
22
|
+
require_relative "disco/model"
|
23
23
|
extend Disco::Model
|
24
24
|
end
|
25
25
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: disco
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-05-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: libmf
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 0.2
|
19
|
+
version: '0.2'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 0.2
|
26
|
+
version: '0.2'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: numo-narray
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -69,14 +69,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
69
69
|
requirements:
|
70
70
|
- - ">="
|
71
71
|
- !ruby/object:Gem::Version
|
72
|
-
version: '2.
|
72
|
+
version: '2.7'
|
73
73
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
74
74
|
requirements:
|
75
75
|
- - ">="
|
76
76
|
- !ruby/object:Gem::Version
|
77
77
|
version: '0'
|
78
78
|
requirements: []
|
79
|
-
rubygems_version: 3.
|
79
|
+
rubygems_version: 3.5.9
|
80
80
|
signing_key:
|
81
81
|
specification_version: 4
|
82
82
|
summary: Recommendations for Ruby and Rails using collaborative filtering
|