disco 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/README.md +8 -2
- data/lib/disco/recommender.rb +24 -8
- data/lib/disco/version.rb +1 -1
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 33961b51cd8461f821c4622f5983b2ac6138cc3b70c9be8ef1d3a6e82c37ab9e
|
4
|
+
data.tar.gz: f4e8cdfa4efb354878c459b57b522a81cd3f0c81e4297c53f9dc88517b312ac8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2f4c207486e858a23480e52b4b9a479fd23b26f0259ef12e39b964d9d7f4cc0067f162207d88119f76414269d65e3ee3d7c675c46f5f143c5b016eacab6e888c
|
7
|
+
data.tar.gz: 2734c1dcc87c423566dd2f842ef7fdd1b7e3cbaa1ecac61dbfafdbc1769b43edca81d28ce60712008eee9d381d64c9e2dea71b210c1a10fecaef75696ee2fd05
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,10 @@
|
|
1
|
+
## 0.1.3 (2020-06-28)
|
2
|
+
|
3
|
+
- Added support for Rover
|
4
|
+
- Raise error when missing user or item ids
|
5
|
+
- Fixed string keys for Daru data frames
|
6
|
+
- `optimize_item_recs` and `optimize_similar_users` methods are no longer experimental
|
7
|
+
|
1
8
|
## 0.1.2 (2020-03-26)
|
2
9
|
|
3
10
|
- Added experimental `optimize_item_recs` and `optimize_similar_users` methods
|
data/README.md
CHANGED
@@ -244,20 +244,26 @@ Data can be an array of hashes
|
|
244
244
|
[{user_id: 1, item_id: 1, rating: 5}, {user_id: 2, item_id: 1, rating: 3}]
|
245
245
|
```
|
246
246
|
|
247
|
+
Or a Rover data frame
|
248
|
+
|
249
|
+
```ruby
|
250
|
+
Rover.read_csv("ratings.csv")
|
251
|
+
```
|
252
|
+
|
247
253
|
Or a Daru data frame
|
248
254
|
|
249
255
|
```ruby
|
250
256
|
Daru::DataFrame.from_csv("ratings.csv")
|
251
257
|
```
|
252
258
|
|
253
|
-
## Faster Similarity
|
259
|
+
## Faster Similarity
|
254
260
|
|
255
261
|
If you have a large number of users/items, you can use an approximate nearest neighbors library like [NGT](https://github.com/ankane/ngt) to speed up item-based recommendations and similar users.
|
256
262
|
|
257
263
|
Add this line to your application’s Gemfile:
|
258
264
|
|
259
265
|
```ruby
|
260
|
-
gem 'ngt', '>= 0.
|
266
|
+
gem 'ngt', '>= 0.3.0'
|
261
267
|
```
|
262
268
|
|
263
269
|
Speed up item-based recommendations with:
|
data/lib/disco/recommender.rb
CHANGED
@@ -9,14 +9,8 @@ module Disco
|
|
9
9
|
end
|
10
10
|
|
11
11
|
def fit(train_set, validation_set: nil)
|
12
|
-
|
13
|
-
|
14
|
-
train_set = train_set.to_a[0]
|
15
|
-
end
|
16
|
-
if validation_set.is_a?(Daru::DataFrame)
|
17
|
-
validation_set = validation_set.to_a[0]
|
18
|
-
end
|
19
|
-
end
|
12
|
+
train_set = to_dataset(train_set)
|
13
|
+
validation_set = to_dataset(validation_set) if validation_set
|
20
14
|
|
21
15
|
@implicit = !train_set.any? { |v| v[:rating] }
|
22
16
|
|
@@ -190,6 +184,9 @@ module Disco
|
|
190
184
|
user_ids = train_set.map { |v| v[:user_id] }.uniq.sort
|
191
185
|
item_ids = train_set.map { |v| v[:item_id] }.uniq.sort
|
192
186
|
|
187
|
+
raise ArgumentError, "Missing user_id" if user_ids.any?(&:nil?)
|
188
|
+
raise ArgumentError, "Missing item_id" if item_ids.any?(&:nil?)
|
189
|
+
|
193
190
|
@user_map = user_ids.zip(user_ids.size.times).to_h
|
194
191
|
@item_map = item_ids.zip(item_ids.size.times).to_h
|
195
192
|
end
|
@@ -207,6 +204,25 @@ module Disco
|
|
207
204
|
raise ArgumentError, "No training data" if train_set.empty?
|
208
205
|
end
|
209
206
|
|
207
|
+
def to_dataset(dataset)
|
208
|
+
if defined?(Rover::DataFrame) && dataset.is_a?(Rover::DataFrame)
|
209
|
+
# convert keys to symbols
|
210
|
+
dataset = dataset.dup
|
211
|
+
dataset.keys.each do |k, v|
|
212
|
+
dataset[k.to_sym] ||= dataset.delete(k)
|
213
|
+
end
|
214
|
+
dataset.to_a
|
215
|
+
elsif defined?(Daru::DataFrame) && dataset.is_a?(Daru::DataFrame)
|
216
|
+
# convert keys to symbols
|
217
|
+
dataset = dataset.dup
|
218
|
+
new_names = dataset.vectors.to_a.map { |k| [k, k.to_sym] }.to_h
|
219
|
+
dataset.rename_vectors!(new_names)
|
220
|
+
dataset.to_a[0]
|
221
|
+
else
|
222
|
+
dataset
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
210
226
|
def marshal_dump
|
211
227
|
obj = {
|
212
228
|
implicit: @implicit,
|
data/lib/disco/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: disco
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-06-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: libmf
|
@@ -122,6 +122,20 @@ dependencies:
|
|
122
122
|
- - ">="
|
123
123
|
- !ruby/object:Gem::Version
|
124
124
|
version: '0'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: rover-df
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - ">="
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '0'
|
132
|
+
type: :development
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - ">="
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '0'
|
125
139
|
- !ruby/object:Gem::Dependency
|
126
140
|
name: ngt
|
127
141
|
requirement: !ruby/object:Gem::Requirement
|