eps 0.3.4 → 0.3.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +24 -3
- data/lib/eps/base_estimator.rb +4 -5
- data/lib/eps/data_frame.rb +12 -2
- data/lib/eps/lightgbm.rb +5 -2
- data/lib/eps/linear_regression.rb +2 -1
- data/lib/eps/naive_bayes.rb +1 -1
- data/lib/eps/version.rb +1 -1
- metadata +30 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1369016c3cae228f169fe580b54fca3c0d240cda202fa7d03ecc7a4e156ee8c7
|
4
|
+
data.tar.gz: bf83ca424c509798d1a1436806b52cba0cfdbefecb8d827d5b17aec7b807b121
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2bf47d80a301eb546c348aaa71f847fa22ace5bed63d97a1f19eb14bc15388b056cd3f545ccf251b2bbf2afc485ef81e5559849ff7459e9dd9f88a71c7cbf83a
|
7
|
+
data.tar.gz: 82d65d84e95a6518cd132c2a42cdec20afd05c0013192941b59ee0edb524874d12b2dd9082dd89be1422872c88e827e031469e43b80336c48c7eab7ff4fe611e
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -313,7 +313,7 @@ y = [1, 2, 3]
|
|
313
313
|
Eps::Model.new(x, y)
|
314
314
|
```
|
315
315
|
|
316
|
-
|
316
|
+
Data can be an array of arrays
|
317
317
|
|
318
318
|
```ruby
|
319
319
|
x = [[1, 2], [2, 0], [3, 1]]
|
@@ -321,9 +321,22 @@ y = [1, 2, 3]
|
|
321
321
|
Eps::Model.new(x, y)
|
322
322
|
```
|
323
323
|
|
324
|
-
|
324
|
+
Or Numo arrays
|
325
325
|
|
326
|
-
|
326
|
+
```ruby
|
327
|
+
x = Numo::NArray.cast([[1, 2], [2, 0], [3, 1]])
|
328
|
+
y = Numo::NArray.cast([1, 2, 3])
|
329
|
+
Eps::Model.new(x, y)
|
330
|
+
```
|
331
|
+
|
332
|
+
Or a Rover data frame
|
333
|
+
|
334
|
+
```ruby
|
335
|
+
df = Rover.read_csv("houses.csv")
|
336
|
+
Eps::Model.new(df, target: "price")
|
337
|
+
```
|
338
|
+
|
339
|
+
Or a Daru data frame
|
327
340
|
|
328
341
|
```ruby
|
329
342
|
df = Daru::DataFrame.from_csv("houses.csv")
|
@@ -352,6 +365,14 @@ Eps supports:
|
|
352
365
|
- Linear Regression
|
353
366
|
- Naive Bayes
|
354
367
|
|
368
|
+
### LightGBM
|
369
|
+
|
370
|
+
Pass the learning rate with:
|
371
|
+
|
372
|
+
```ruby
|
373
|
+
Eps::Model.new(data, learning_rate: 0.01)
|
374
|
+
```
|
375
|
+
|
355
376
|
### Linear Regression
|
356
377
|
|
357
378
|
#### Performance
|
data/lib/eps/base_estimator.rb
CHANGED
@@ -2,9 +2,8 @@ module Eps
|
|
2
2
|
class BaseEstimator
|
3
3
|
def initialize(data = nil, y = nil, **options)
|
4
4
|
@options = options.dup
|
5
|
-
# TODO better pattern - don't pass most options to train
|
6
|
-
options.delete(:intercept)
|
7
5
|
@trained = false
|
6
|
+
# TODO better pattern - don't pass most options to train
|
8
7
|
train(data, y, **options) if data
|
9
8
|
end
|
10
9
|
|
@@ -83,7 +82,7 @@ module Eps
|
|
83
82
|
singular ? predictions.first : predictions
|
84
83
|
end
|
85
84
|
|
86
|
-
def train(data, y = nil, target: nil, weight: nil, split: nil, validation_set: nil,
|
85
|
+
def train(data, y = nil, target: nil, weight: nil, split: nil, validation_set: nil, text_features: nil, **options)
|
87
86
|
data, @target = prep_data(data, y, target, weight)
|
88
87
|
@target_type = Utils.column_type(data.label, @target)
|
89
88
|
|
@@ -175,7 +174,7 @@ module Eps
|
|
175
174
|
raise "No data in validation set" if validation_set && validation_set.empty?
|
176
175
|
|
177
176
|
@validation_set = validation_set
|
178
|
-
@evaluator = _train(
|
177
|
+
@evaluator = _train(**options)
|
179
178
|
|
180
179
|
# reset pmml
|
181
180
|
@pmml = nil
|
@@ -246,7 +245,7 @@ module Eps
|
|
246
245
|
|
247
246
|
def check_missing(c, name)
|
248
247
|
raise ArgumentError, "Missing column: #{name}" if !c
|
249
|
-
raise ArgumentError, "Missing values in column #{name}" if c.any?(&:nil?)
|
248
|
+
raise ArgumentError, "Missing values in column #{name}" if c.to_a.any?(&:nil?)
|
250
249
|
end
|
251
250
|
|
252
251
|
def check_missing_value(df)
|
data/lib/eps/data_frame.rb
CHANGED
@@ -10,7 +10,7 @@ module Eps
|
|
10
10
|
data.columns.each do |k, v|
|
11
11
|
@columns[k] = v
|
12
12
|
end
|
13
|
-
elsif daru?(data)
|
13
|
+
elsif rover?(data) || daru?(data)
|
14
14
|
data.to_h.each do |k, v|
|
15
15
|
@columns[k.to_s] = v.to_a
|
16
16
|
end
|
@@ -19,6 +19,8 @@ module Eps
|
|
19
19
|
@columns[k.to_s] = v.to_a
|
20
20
|
end
|
21
21
|
else
|
22
|
+
data = data.to_a if numo?(data)
|
23
|
+
|
22
24
|
if data.any?
|
23
25
|
row = data[0]
|
24
26
|
|
@@ -140,8 +142,16 @@ module Eps
|
|
140
142
|
|
141
143
|
private
|
142
144
|
|
145
|
+
def numo?(x)
|
146
|
+
defined?(Numo::NArray) && x.is_a?(Numo::NArray)
|
147
|
+
end
|
148
|
+
|
149
|
+
def rover?(x)
|
150
|
+
defined?(Rover::DataFrame) && x.is_a?(Rover::DataFrame)
|
151
|
+
end
|
152
|
+
|
143
153
|
def daru?(x)
|
144
|
-
defined?(Daru) && x.is_a?(Daru::DataFrame)
|
154
|
+
defined?(Daru::DataFrame) && x.is_a?(Daru::DataFrame)
|
145
155
|
end
|
146
156
|
end
|
147
157
|
end
|
data/lib/eps/lightgbm.rb
CHANGED
@@ -17,7 +17,7 @@ module Eps
|
|
17
17
|
str
|
18
18
|
end
|
19
19
|
|
20
|
-
def _train(verbose: nil, early_stopping: nil)
|
20
|
+
def _train(verbose: nil, early_stopping: nil, learning_rate: 0.1)
|
21
21
|
train_set = @train_set
|
22
22
|
validation_set = @validation_set.dup
|
23
23
|
summary_label = train_set.label
|
@@ -60,7 +60,10 @@ module Eps
|
|
60
60
|
prep_text_features(validation_set) if validation_set
|
61
61
|
|
62
62
|
# create params
|
63
|
-
params = {
|
63
|
+
params = {
|
64
|
+
objective: objective,
|
65
|
+
learning_rate: learning_rate
|
66
|
+
}
|
64
67
|
params[:num_classes] = labels.size if objective == "multiclass"
|
65
68
|
if train_set.size < 30
|
66
69
|
params[:min_data_in_bin] = 1
|
@@ -37,6 +37,7 @@ module Eps
|
|
37
37
|
str
|
38
38
|
end
|
39
39
|
|
40
|
+
# TODO use keyword arguments for gsl and intercept in 0.4.0
|
40
41
|
def _train(**options)
|
41
42
|
raise "Target must be numeric" if @target_type != "numeric"
|
42
43
|
check_missing_value(@train_set)
|
@@ -61,7 +62,7 @@ module Eps
|
|
61
62
|
false
|
62
63
|
end
|
63
64
|
|
64
|
-
intercept =
|
65
|
+
intercept = options.key?(:intercept) ? options[:intercept] : true
|
65
66
|
if intercept && gsl != :gslr
|
66
67
|
data.size.times do |i|
|
67
68
|
x[i].unshift(1)
|
data/lib/eps/naive_bayes.rb
CHANGED
data/lib/eps/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: eps
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-06-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: lightgbm
|
@@ -80,6 +80,20 @@ dependencies:
|
|
80
80
|
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: numo-narray
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
83
97
|
- !ruby/object:Gem::Dependency
|
84
98
|
name: rake
|
85
99
|
requirement: !ruby/object:Gem::Requirement
|
@@ -94,6 +108,20 @@ dependencies:
|
|
94
108
|
- - ">="
|
95
109
|
- !ruby/object:Gem::Version
|
96
110
|
version: '0'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: rover-df
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
type: :development
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - ">="
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0'
|
97
125
|
description:
|
98
126
|
email: andrew@chartkick.com
|
99
127
|
executables: []
|