disco 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/README.md +8 -2
- data/lib/disco/recommender.rb +24 -8
- data/lib/disco/version.rb +1 -1
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 33961b51cd8461f821c4622f5983b2ac6138cc3b70c9be8ef1d3a6e82c37ab9e
|
4
|
+
data.tar.gz: f4e8cdfa4efb354878c459b57b522a81cd3f0c81e4297c53f9dc88517b312ac8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2f4c207486e858a23480e52b4b9a479fd23b26f0259ef12e39b964d9d7f4cc0067f162207d88119f76414269d65e3ee3d7c675c46f5f143c5b016eacab6e888c
|
7
|
+
data.tar.gz: 2734c1dcc87c423566dd2f842ef7fdd1b7e3cbaa1ecac61dbfafdbc1769b43edca81d28ce60712008eee9d381d64c9e2dea71b210c1a10fecaef75696ee2fd05
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,10 @@
|
|
1
|
+
## 0.1.3 (2020-06-28)
|
2
|
+
|
3
|
+
- Added support for Rover
|
4
|
+
- Raise error when missing user or item ids
|
5
|
+
- Fixed string keys for Daru data frames
|
6
|
+
- `optimize_item_recs` and `optimize_similar_users` methods are no longer experimental
|
7
|
+
|
1
8
|
## 0.1.2 (2020-03-26)
|
2
9
|
|
3
10
|
- Added experimental `optimize_item_recs` and `optimize_similar_users` methods
|
data/README.md
CHANGED
@@ -244,20 +244,26 @@ Data can be an array of hashes
|
|
244
244
|
[{user_id: 1, item_id: 1, rating: 5}, {user_id: 2, item_id: 1, rating: 3}]
|
245
245
|
```
|
246
246
|
|
247
|
+
Or a Rover data frame
|
248
|
+
|
249
|
+
```ruby
|
250
|
+
Rover.read_csv("ratings.csv")
|
251
|
+
```
|
252
|
+
|
247
253
|
Or a Daru data frame
|
248
254
|
|
249
255
|
```ruby
|
250
256
|
Daru::DataFrame.from_csv("ratings.csv")
|
251
257
|
```
|
252
258
|
|
253
|
-
## Faster Similarity
|
259
|
+
## Faster Similarity
|
254
260
|
|
255
261
|
If you have a large number of users/items, you can use an approximate nearest neighbors library like [NGT](https://github.com/ankane/ngt) to speed up item-based recommendations and similar users.
|
256
262
|
|
257
263
|
Add this line to your application’s Gemfile:
|
258
264
|
|
259
265
|
```ruby
|
260
|
-
gem 'ngt', '>= 0.
|
266
|
+
gem 'ngt', '>= 0.3.0'
|
261
267
|
```
|
262
268
|
|
263
269
|
Speed up item-based recommendations with:
|
data/lib/disco/recommender.rb
CHANGED
@@ -9,14 +9,8 @@ module Disco
|
|
9
9
|
end
|
10
10
|
|
11
11
|
def fit(train_set, validation_set: nil)
|
12
|
-
|
13
|
-
|
14
|
-
train_set = train_set.to_a[0]
|
15
|
-
end
|
16
|
-
if validation_set.is_a?(Daru::DataFrame)
|
17
|
-
validation_set = validation_set.to_a[0]
|
18
|
-
end
|
19
|
-
end
|
12
|
+
train_set = to_dataset(train_set)
|
13
|
+
validation_set = to_dataset(validation_set) if validation_set
|
20
14
|
|
21
15
|
@implicit = !train_set.any? { |v| v[:rating] }
|
22
16
|
|
@@ -190,6 +184,9 @@ module Disco
|
|
190
184
|
user_ids = train_set.map { |v| v[:user_id] }.uniq.sort
|
191
185
|
item_ids = train_set.map { |v| v[:item_id] }.uniq.sort
|
192
186
|
|
187
|
+
raise ArgumentError, "Missing user_id" if user_ids.any?(&:nil?)
|
188
|
+
raise ArgumentError, "Missing item_id" if item_ids.any?(&:nil?)
|
189
|
+
|
193
190
|
@user_map = user_ids.zip(user_ids.size.times).to_h
|
194
191
|
@item_map = item_ids.zip(item_ids.size.times).to_h
|
195
192
|
end
|
@@ -207,6 +204,25 @@ module Disco
|
|
207
204
|
raise ArgumentError, "No training data" if train_set.empty?
|
208
205
|
end
|
209
206
|
|
207
|
+
def to_dataset(dataset)
|
208
|
+
if defined?(Rover::DataFrame) && dataset.is_a?(Rover::DataFrame)
|
209
|
+
# convert keys to symbols
|
210
|
+
dataset = dataset.dup
|
211
|
+
dataset.keys.each do |k, v|
|
212
|
+
dataset[k.to_sym] ||= dataset.delete(k)
|
213
|
+
end
|
214
|
+
dataset.to_a
|
215
|
+
elsif defined?(Daru::DataFrame) && dataset.is_a?(Daru::DataFrame)
|
216
|
+
# convert keys to symbols
|
217
|
+
dataset = dataset.dup
|
218
|
+
new_names = dataset.vectors.to_a.map { |k| [k, k.to_sym] }.to_h
|
219
|
+
dataset.rename_vectors!(new_names)
|
220
|
+
dataset.to_a[0]
|
221
|
+
else
|
222
|
+
dataset
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
210
226
|
def marshal_dump
|
211
227
|
obj = {
|
212
228
|
implicit: @implicit,
|
data/lib/disco/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: disco
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-06-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: libmf
|
@@ -122,6 +122,20 @@ dependencies:
|
|
122
122
|
- - ">="
|
123
123
|
- !ruby/object:Gem::Version
|
124
124
|
version: '0'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: rover-df
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - ">="
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '0'
|
132
|
+
type: :development
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - ">="
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '0'
|
125
139
|
- !ruby/object:Gem::Dependency
|
126
140
|
name: ngt
|
127
141
|
requirement: !ruby/object:Gem::Requirement
|