disco 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 04d278a7daf8187ac8a5eadaa279c98a0a51a8cf0ad596e793198dcc9141233a
4
- data.tar.gz: '0916f7cfb91d5bf48ce1186502f15647c102eba54e07bdc33eb042b75e1fb0c6'
3
+ metadata.gz: 33961b51cd8461f821c4622f5983b2ac6138cc3b70c9be8ef1d3a6e82c37ab9e
4
+ data.tar.gz: f4e8cdfa4efb354878c459b57b522a81cd3f0c81e4297c53f9dc88517b312ac8
5
5
  SHA512:
6
- metadata.gz: a8e977bcf2988e8e4cb85b13959446d068e3a41feeca26f3789ff7aa0a454258340bc81fb3adb470e0143cc6027cd803ef034900cc29db4648b01f855f6cb011
7
- data.tar.gz: defc71dd93461a114338f0737cfa3eccae47605e2922aaf12d960a0cb6309131dbba497f7c7d125e962edd055ff7df898cd406544971ed75906cb8c1db6004cf
6
+ metadata.gz: 2f4c207486e858a23480e52b4b9a479fd23b26f0259ef12e39b964d9d7f4cc0067f162207d88119f76414269d65e3ee3d7c675c46f5f143c5b016eacab6e888c
7
+ data.tar.gz: 2734c1dcc87c423566dd2f842ef7fdd1b7e3cbaa1ecac61dbfafdbc1769b43edca81d28ce60712008eee9d381d64c9e2dea71b210c1a10fecaef75696ee2fd05
@@ -1,3 +1,10 @@
1
+ ## 0.1.3 (2020-06-28)
2
+
3
+ - Added support for Rover
4
+ - Raise error when missing user or item ids
5
+ - Fixed string keys for Daru data frames
6
+ - `optimize_item_recs` and `optimize_similar_users` methods are no longer experimental
7
+
1
8
  ## 0.1.2 (2020-03-26)
2
9
 
3
10
  - Added experimental `optimize_item_recs` and `optimize_similar_users` methods
data/README.md CHANGED
@@ -244,20 +244,26 @@ Data can be an array of hashes
244
244
  [{user_id: 1, item_id: 1, rating: 5}, {user_id: 2, item_id: 1, rating: 3}]
245
245
  ```
246
246
 
247
+ Or a Rover data frame
248
+
249
+ ```ruby
250
+ Rover.read_csv("ratings.csv")
251
+ ```
252
+
247
253
  Or a Daru data frame
248
254
 
249
255
  ```ruby
250
256
  Daru::DataFrame.from_csv("ratings.csv")
251
257
  ```
252
258
 
253
- ## Faster Similarity [experimental]
259
+ ## Faster Similarity
254
260
 
255
261
  If you have a large number of users/items, you can use an approximate nearest neighbors library like [NGT](https://github.com/ankane/ngt) to speed up item-based recommendations and similar users.
256
262
 
257
263
  Add this line to your application’s Gemfile:
258
264
 
259
265
  ```ruby
260
- gem 'ngt', '>= 0.2.3'
266
+ gem 'ngt', '>= 0.3.0'
261
267
  ```
262
268
 
263
269
  Speed up item-based recommendations with:
@@ -9,14 +9,8 @@ module Disco
9
9
  end
10
10
 
11
11
  def fit(train_set, validation_set: nil)
12
- if defined?(Daru)
13
- if train_set.is_a?(Daru::DataFrame)
14
- train_set = train_set.to_a[0]
15
- end
16
- if validation_set.is_a?(Daru::DataFrame)
17
- validation_set = validation_set.to_a[0]
18
- end
19
- end
12
+ train_set = to_dataset(train_set)
13
+ validation_set = to_dataset(validation_set) if validation_set
20
14
 
21
15
  @implicit = !train_set.any? { |v| v[:rating] }
22
16
 
@@ -190,6 +184,9 @@ module Disco
190
184
  user_ids = train_set.map { |v| v[:user_id] }.uniq.sort
191
185
  item_ids = train_set.map { |v| v[:item_id] }.uniq.sort
192
186
 
187
+ raise ArgumentError, "Missing user_id" if user_ids.any?(&:nil?)
188
+ raise ArgumentError, "Missing item_id" if item_ids.any?(&:nil?)
189
+
193
190
  @user_map = user_ids.zip(user_ids.size.times).to_h
194
191
  @item_map = item_ids.zip(item_ids.size.times).to_h
195
192
  end
@@ -207,6 +204,25 @@ module Disco
207
204
  raise ArgumentError, "No training data" if train_set.empty?
208
205
  end
209
206
 
207
+ def to_dataset(dataset)
208
+ if defined?(Rover::DataFrame) && dataset.is_a?(Rover::DataFrame)
209
+ # convert keys to symbols
210
+ dataset = dataset.dup
211
+ dataset.keys.each do |k, v|
212
+ dataset[k.to_sym] ||= dataset.delete(k)
213
+ end
214
+ dataset.to_a
215
+ elsif defined?(Daru::DataFrame) && dataset.is_a?(Daru::DataFrame)
216
+ # convert keys to symbols
217
+ dataset = dataset.dup
218
+ new_names = dataset.vectors.to_a.map { |k| [k, k.to_sym] }.to_h
219
+ dataset.rename_vectors!(new_names)
220
+ dataset.to_a[0]
221
+ else
222
+ dataset
223
+ end
224
+ end
225
+
210
226
  def marshal_dump
211
227
  obj = {
212
228
  implicit: @implicit,
@@ -1,3 +1,3 @@
1
1
  module Disco
2
- VERSION = "0.1.2"
2
+ VERSION = "0.1.3"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: disco
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-03-26 00:00:00.000000000 Z
11
+ date: 2020-06-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: libmf
@@ -122,6 +122,20 @@ dependencies:
122
122
  - - ">="
123
123
  - !ruby/object:Gem::Version
124
124
  version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: rover-df
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
125
139
  - !ruby/object:Gem::Dependency
126
140
  name: ngt
127
141
  requirement: !ruby/object:Gem::Requirement