eps 0.3.4 → 0.3.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e1f8c1d5af8015ba568528bce2c928cf59c97bda5c4f91a6ace0af72a9a864da
4
- data.tar.gz: 3cbb6dfca84687c833051147a0fcad16cd258dd09f48a93da61f051c2081f62c
3
+ metadata.gz: 1369016c3cae228f169fe580b54fca3c0d240cda202fa7d03ecc7a4e156ee8c7
4
+ data.tar.gz: bf83ca424c509798d1a1436806b52cba0cfdbefecb8d827d5b17aec7b807b121
5
5
  SHA512:
6
- metadata.gz: 83477bb53b14a04798ab85f2efc9d4bb3af3195bd100ca42d601d2dbb2bc7fa09e7886f414e5a1e3041128bd671e3d15e39e071fe8de9d8d19dacc4c7e702cfc
7
- data.tar.gz: 1914eeb9509916e9b4eb530f56dab0bfa73683a9197e157c8030fc11c96efca983a0d0f257d2845d890c9772ccfb6550651fb9fba60d1671db914c51b3675fc8
6
+ metadata.gz: 2bf47d80a301eb546c348aaa71f847fa22ace5bed63d97a1f19eb14bc15388b056cd3f545ccf251b2bbf2afc485ef81e5559849ff7459e9dd9f88a71c7cbf83a
7
+ data.tar.gz: 82d65d84e95a6518cd132c2a42cdec20afd05c0013192941b59ee0edb524874d12b2dd9082dd89be1422872c88e827e031469e43b80336c48c7eab7ff4fe611e
@@ -1,3 +1,8 @@
1
+ ## 0.3.5 (2020-06-10)
2
+
3
+ - Added `learning_rate` option for LightGBM
4
+ - Added support for Numo and Rover
5
+
1
6
  ## 0.3.4 (2020-04-05)
2
7
 
3
8
  - Added `predict_probability` for classification
data/README.md CHANGED
@@ -313,7 +313,7 @@ y = [1, 2, 3]
313
313
  Eps::Model.new(x, y)
314
314
  ```
315
315
 
316
- Or pass arrays of arrays
316
+ Data can be an array of arrays
317
317
 
318
318
  ```ruby
319
319
  x = [[1, 2], [2, 0], [3, 1]]
@@ -321,9 +321,22 @@ y = [1, 2, 3]
321
321
  Eps::Model.new(x, y)
322
322
  ```
323
323
 
324
- ### Daru
324
+ Or Numo arrays
325
325
 
326
- Eps works well with Daru data frames.
326
+ ```ruby
327
+ x = Numo::NArray.cast([[1, 2], [2, 0], [3, 1]])
328
+ y = Numo::NArray.cast([1, 2, 3])
329
+ Eps::Model.new(x, y)
330
+ ```
331
+
332
+ Or a Rover data frame
333
+
334
+ ```ruby
335
+ df = Rover.read_csv("houses.csv")
336
+ Eps::Model.new(df, target: "price")
337
+ ```
338
+
339
+ Or a Daru data frame
327
340
 
328
341
  ```ruby
329
342
  df = Daru::DataFrame.from_csv("houses.csv")
@@ -352,6 +365,14 @@ Eps supports:
352
365
  - Linear Regression
353
366
  - Naive Bayes
354
367
 
368
+ ### LightGBM
369
+
370
+ Pass the learning rate with:
371
+
372
+ ```ruby
373
+ Eps::Model.new(data, learning_rate: 0.01)
374
+ ```
375
+
355
376
  ### Linear Regression
356
377
 
357
378
  #### Performance
@@ -2,9 +2,8 @@ module Eps
2
2
  class BaseEstimator
3
3
  def initialize(data = nil, y = nil, **options)
4
4
  @options = options.dup
5
- # TODO better pattern - don't pass most options to train
6
- options.delete(:intercept)
7
5
  @trained = false
6
+ # TODO better pattern - don't pass most options to train
8
7
  train(data, y, **options) if data
9
8
  end
10
9
 
@@ -83,7 +82,7 @@ module Eps
83
82
  singular ? predictions.first : predictions
84
83
  end
85
84
 
86
- def train(data, y = nil, target: nil, weight: nil, split: nil, validation_set: nil, verbose: nil, text_features: nil, early_stopping: nil)
85
+ def train(data, y = nil, target: nil, weight: nil, split: nil, validation_set: nil, text_features: nil, **options)
87
86
  data, @target = prep_data(data, y, target, weight)
88
87
  @target_type = Utils.column_type(data.label, @target)
89
88
 
@@ -175,7 +174,7 @@ module Eps
175
174
  raise "No data in validation set" if validation_set && validation_set.empty?
176
175
 
177
176
  @validation_set = validation_set
178
- @evaluator = _train(verbose: verbose, early_stopping: early_stopping)
177
+ @evaluator = _train(**options)
179
178
 
180
179
  # reset pmml
181
180
  @pmml = nil
@@ -246,7 +245,7 @@ module Eps
246
245
 
247
246
  def check_missing(c, name)
248
247
  raise ArgumentError, "Missing column: #{name}" if !c
249
- raise ArgumentError, "Missing values in column #{name}" if c.any?(&:nil?)
248
+ raise ArgumentError, "Missing values in column #{name}" if c.to_a.any?(&:nil?)
250
249
  end
251
250
 
252
251
  def check_missing_value(df)
@@ -10,7 +10,7 @@ module Eps
10
10
  data.columns.each do |k, v|
11
11
  @columns[k] = v
12
12
  end
13
- elsif daru?(data)
13
+ elsif rover?(data) || daru?(data)
14
14
  data.to_h.each do |k, v|
15
15
  @columns[k.to_s] = v.to_a
16
16
  end
@@ -19,6 +19,8 @@ module Eps
19
19
  @columns[k.to_s] = v.to_a
20
20
  end
21
21
  else
22
+ data = data.to_a if numo?(data)
23
+
22
24
  if data.any?
23
25
  row = data[0]
24
26
 
@@ -140,8 +142,16 @@ module Eps
140
142
 
141
143
  private
142
144
 
145
+ def numo?(x)
146
+ defined?(Numo::NArray) && x.is_a?(Numo::NArray)
147
+ end
148
+
149
+ def rover?(x)
150
+ defined?(Rover::DataFrame) && x.is_a?(Rover::DataFrame)
151
+ end
152
+
143
153
  def daru?(x)
144
- defined?(Daru) && x.is_a?(Daru::DataFrame)
154
+ defined?(Daru::DataFrame) && x.is_a?(Daru::DataFrame)
145
155
  end
146
156
  end
147
157
  end
@@ -17,7 +17,7 @@ module Eps
17
17
  str
18
18
  end
19
19
 
20
- def _train(verbose: nil, early_stopping: nil)
20
+ def _train(verbose: nil, early_stopping: nil, learning_rate: 0.1)
21
21
  train_set = @train_set
22
22
  validation_set = @validation_set.dup
23
23
  summary_label = train_set.label
@@ -60,7 +60,10 @@ module Eps
60
60
  prep_text_features(validation_set) if validation_set
61
61
 
62
62
  # create params
63
- params = {objective: objective}
63
+ params = {
64
+ objective: objective,
65
+ learning_rate: learning_rate
66
+ }
64
67
  params[:num_classes] = labels.size if objective == "multiclass"
65
68
  if train_set.size < 30
66
69
  params[:min_data_in_bin] = 1
@@ -37,6 +37,7 @@ module Eps
37
37
  str
38
38
  end
39
39
 
40
+ # TODO use keyword arguments for gsl and intercept in 0.4.0
40
41
  def _train(**options)
41
42
  raise "Target must be numeric" if @target_type != "numeric"
42
43
  check_missing_value(@train_set)
@@ -61,7 +62,7 @@ module Eps
61
62
  false
62
63
  end
63
64
 
64
- intercept = @options.key?(:intercept) ? @options[:intercept] : true
65
+ intercept = options.key?(:intercept) ? options[:intercept] : true
65
66
  if intercept && gsl != :gslr
66
67
  data.size.times do |i|
67
68
  x[i].unshift(1)
@@ -17,7 +17,7 @@ module Eps
17
17
  str
18
18
  end
19
19
 
20
- def _train(smoothing: 1, **options)
20
+ def _train(smoothing: 1)
21
21
  raise "Target must be strings" if @target_type != "categorical"
22
22
  check_missing_value(@train_set)
23
23
  check_missing_value(@validation_set) if @validation_set
@@ -1,3 +1,3 @@
1
1
  module Eps
2
- VERSION = "0.3.4"
2
+ VERSION = "0.3.5"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: eps
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.4
4
+ version: 0.3.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-04-05 00:00:00.000000000 Z
11
+ date: 2020-06-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: lightgbm
@@ -80,6 +80,20 @@ dependencies:
80
80
  - - ">="
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: numo-narray
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
83
97
  - !ruby/object:Gem::Dependency
84
98
  name: rake
85
99
  requirement: !ruby/object:Gem::Requirement
@@ -94,6 +108,20 @@ dependencies:
94
108
  - - ">="
95
109
  - !ruby/object:Gem::Version
96
110
  version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: rover-df
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
97
125
  description:
98
126
  email: andrew@chartkick.com
99
127
  executables: []