disco 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 04d278a7daf8187ac8a5eadaa279c98a0a51a8cf0ad596e793198dcc9141233a
4
- data.tar.gz: '0916f7cfb91d5bf48ce1186502f15647c102eba54e07bdc33eb042b75e1fb0c6'
3
+ metadata.gz: 33961b51cd8461f821c4622f5983b2ac6138cc3b70c9be8ef1d3a6e82c37ab9e
4
+ data.tar.gz: f4e8cdfa4efb354878c459b57b522a81cd3f0c81e4297c53f9dc88517b312ac8
5
5
  SHA512:
6
- metadata.gz: a8e977bcf2988e8e4cb85b13959446d068e3a41feeca26f3789ff7aa0a454258340bc81fb3adb470e0143cc6027cd803ef034900cc29db4648b01f855f6cb011
7
- data.tar.gz: defc71dd93461a114338f0737cfa3eccae47605e2922aaf12d960a0cb6309131dbba497f7c7d125e962edd055ff7df898cd406544971ed75906cb8c1db6004cf
6
+ metadata.gz: 2f4c207486e858a23480e52b4b9a479fd23b26f0259ef12e39b964d9d7f4cc0067f162207d88119f76414269d65e3ee3d7c675c46f5f143c5b016eacab6e888c
7
+ data.tar.gz: 2734c1dcc87c423566dd2f842ef7fdd1b7e3cbaa1ecac61dbfafdbc1769b43edca81d28ce60712008eee9d381d64c9e2dea71b210c1a10fecaef75696ee2fd05
@@ -1,3 +1,10 @@
1
+ ## 0.1.3 (2020-06-28)
2
+
3
+ - Added support for Rover
4
+ - Raise error when missing user or item ids
5
+ - Fixed string keys for Daru data frames
6
+ - `optimize_item_recs` and `optimize_similar_users` methods are no longer experimental
7
+
1
8
  ## 0.1.2 (2020-03-26)
2
9
 
3
10
  - Added experimental `optimize_item_recs` and `optimize_similar_users` methods
data/README.md CHANGED
@@ -244,20 +244,26 @@ Data can be an array of hashes
244
244
  [{user_id: 1, item_id: 1, rating: 5}, {user_id: 2, item_id: 1, rating: 3}]
245
245
  ```
246
246
 
247
+ Or a Rover data frame
248
+
249
+ ```ruby
250
+ Rover.read_csv("ratings.csv")
251
+ ```
252
+
247
253
  Or a Daru data frame
248
254
 
249
255
  ```ruby
250
256
  Daru::DataFrame.from_csv("ratings.csv")
251
257
  ```
252
258
 
253
- ## Faster Similarity [experimental]
259
+ ## Faster Similarity
254
260
 
255
261
  If you have a large number of users/items, you can use an approximate nearest neighbors library like [NGT](https://github.com/ankane/ngt) to speed up item-based recommendations and similar users.
256
262
 
257
263
  Add this line to your application’s Gemfile:
258
264
 
259
265
  ```ruby
260
- gem 'ngt', '>= 0.2.3'
266
+ gem 'ngt', '>= 0.3.0'
261
267
  ```
262
268
 
263
269
  Speed up item-based recommendations with:
@@ -9,14 +9,8 @@ module Disco
9
9
  end
10
10
 
11
11
  def fit(train_set, validation_set: nil)
12
- if defined?(Daru)
13
- if train_set.is_a?(Daru::DataFrame)
14
- train_set = train_set.to_a[0]
15
- end
16
- if validation_set.is_a?(Daru::DataFrame)
17
- validation_set = validation_set.to_a[0]
18
- end
19
- end
12
+ train_set = to_dataset(train_set)
13
+ validation_set = to_dataset(validation_set) if validation_set
20
14
 
21
15
  @implicit = !train_set.any? { |v| v[:rating] }
22
16
 
@@ -190,6 +184,9 @@ module Disco
190
184
  user_ids = train_set.map { |v| v[:user_id] }.uniq.sort
191
185
  item_ids = train_set.map { |v| v[:item_id] }.uniq.sort
192
186
 
187
+ raise ArgumentError, "Missing user_id" if user_ids.any?(&:nil?)
188
+ raise ArgumentError, "Missing item_id" if item_ids.any?(&:nil?)
189
+
193
190
  @user_map = user_ids.zip(user_ids.size.times).to_h
194
191
  @item_map = item_ids.zip(item_ids.size.times).to_h
195
192
  end
@@ -207,6 +204,25 @@ module Disco
207
204
  raise ArgumentError, "No training data" if train_set.empty?
208
205
  end
209
206
 
207
+ def to_dataset(dataset)
208
+ if defined?(Rover::DataFrame) && dataset.is_a?(Rover::DataFrame)
209
+ # convert keys to symbols
210
+ dataset = dataset.dup
211
+ dataset.keys.each do |k, v|
212
+ dataset[k.to_sym] ||= dataset.delete(k)
213
+ end
214
+ dataset.to_a
215
+ elsif defined?(Daru::DataFrame) && dataset.is_a?(Daru::DataFrame)
216
+ # convert keys to symbols
217
+ dataset = dataset.dup
218
+ new_names = dataset.vectors.to_a.map { |k| [k, k.to_sym] }.to_h
219
+ dataset.rename_vectors!(new_names)
220
+ dataset.to_a[0]
221
+ else
222
+ dataset
223
+ end
224
+ end
225
+
210
226
  def marshal_dump
211
227
  obj = {
212
228
  implicit: @implicit,
@@ -1,3 +1,3 @@
1
1
  module Disco
2
- VERSION = "0.1.2"
2
+ VERSION = "0.1.3"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: disco
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-03-26 00:00:00.000000000 Z
11
+ date: 2020-06-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: libmf
@@ -122,6 +122,20 @@ dependencies:
122
122
  - - ">="
123
123
  - !ruby/object:Gem::Version
124
124
  version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: rover-df
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
125
139
  - !ruby/object:Gem::Dependency
126
140
  name: ngt
127
141
  requirement: !ruby/object:Gem::Requirement