eps 0.3.4 → 0.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +24 -3
- data/lib/eps/base_estimator.rb +4 -5
- data/lib/eps/data_frame.rb +12 -2
- data/lib/eps/lightgbm.rb +5 -2
- data/lib/eps/linear_regression.rb +2 -1
- data/lib/eps/naive_bayes.rb +1 -1
- data/lib/eps/version.rb +1 -1
- metadata +30 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1369016c3cae228f169fe580b54fca3c0d240cda202fa7d03ecc7a4e156ee8c7
|
4
|
+
data.tar.gz: bf83ca424c509798d1a1436806b52cba0cfdbefecb8d827d5b17aec7b807b121
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2bf47d80a301eb546c348aaa71f847fa22ace5bed63d97a1f19eb14bc15388b056cd3f545ccf251b2bbf2afc485ef81e5559849ff7459e9dd9f88a71c7cbf83a
|
7
|
+
data.tar.gz: 82d65d84e95a6518cd132c2a42cdec20afd05c0013192941b59ee0edb524874d12b2dd9082dd89be1422872c88e827e031469e43b80336c48c7eab7ff4fe611e
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -313,7 +313,7 @@ y = [1, 2, 3]
|
|
313
313
|
Eps::Model.new(x, y)
|
314
314
|
```
|
315
315
|
|
316
|
-
|
316
|
+
Data can be an array of arrays
|
317
317
|
|
318
318
|
```ruby
|
319
319
|
x = [[1, 2], [2, 0], [3, 1]]
|
@@ -321,9 +321,22 @@ y = [1, 2, 3]
|
|
321
321
|
Eps::Model.new(x, y)
|
322
322
|
```
|
323
323
|
|
324
|
-
|
324
|
+
Or Numo arrays
|
325
325
|
|
326
|
-
|
326
|
+
```ruby
|
327
|
+
x = Numo::NArray.cast([[1, 2], [2, 0], [3, 1]])
|
328
|
+
y = Numo::NArray.cast([1, 2, 3])
|
329
|
+
Eps::Model.new(x, y)
|
330
|
+
```
|
331
|
+
|
332
|
+
Or a Rover data frame
|
333
|
+
|
334
|
+
```ruby
|
335
|
+
df = Rover.read_csv("houses.csv")
|
336
|
+
Eps::Model.new(df, target: "price")
|
337
|
+
```
|
338
|
+
|
339
|
+
Or a Daru data frame
|
327
340
|
|
328
341
|
```ruby
|
329
342
|
df = Daru::DataFrame.from_csv("houses.csv")
|
@@ -352,6 +365,14 @@ Eps supports:
|
|
352
365
|
- Linear Regression
|
353
366
|
- Naive Bayes
|
354
367
|
|
368
|
+
### LightGBM
|
369
|
+
|
370
|
+
Pass the learning rate with:
|
371
|
+
|
372
|
+
```ruby
|
373
|
+
Eps::Model.new(data, learning_rate: 0.01)
|
374
|
+
```
|
375
|
+
|
355
376
|
### Linear Regression
|
356
377
|
|
357
378
|
#### Performance
|
data/lib/eps/base_estimator.rb
CHANGED
@@ -2,9 +2,8 @@ module Eps
|
|
2
2
|
class BaseEstimator
|
3
3
|
def initialize(data = nil, y = nil, **options)
|
4
4
|
@options = options.dup
|
5
|
-
# TODO better pattern - don't pass most options to train
|
6
|
-
options.delete(:intercept)
|
7
5
|
@trained = false
|
6
|
+
# TODO better pattern - don't pass most options to train
|
8
7
|
train(data, y, **options) if data
|
9
8
|
end
|
10
9
|
|
@@ -83,7 +82,7 @@ module Eps
|
|
83
82
|
singular ? predictions.first : predictions
|
84
83
|
end
|
85
84
|
|
86
|
-
def train(data, y = nil, target: nil, weight: nil, split: nil, validation_set: nil,
|
85
|
+
def train(data, y = nil, target: nil, weight: nil, split: nil, validation_set: nil, text_features: nil, **options)
|
87
86
|
data, @target = prep_data(data, y, target, weight)
|
88
87
|
@target_type = Utils.column_type(data.label, @target)
|
89
88
|
|
@@ -175,7 +174,7 @@ module Eps
|
|
175
174
|
raise "No data in validation set" if validation_set && validation_set.empty?
|
176
175
|
|
177
176
|
@validation_set = validation_set
|
178
|
-
@evaluator = _train(
|
177
|
+
@evaluator = _train(**options)
|
179
178
|
|
180
179
|
# reset pmml
|
181
180
|
@pmml = nil
|
@@ -246,7 +245,7 @@ module Eps
|
|
246
245
|
|
247
246
|
def check_missing(c, name)
|
248
247
|
raise ArgumentError, "Missing column: #{name}" if !c
|
249
|
-
raise ArgumentError, "Missing values in column #{name}" if c.any?(&:nil?)
|
248
|
+
raise ArgumentError, "Missing values in column #{name}" if c.to_a.any?(&:nil?)
|
250
249
|
end
|
251
250
|
|
252
251
|
def check_missing_value(df)
|
data/lib/eps/data_frame.rb
CHANGED
@@ -10,7 +10,7 @@ module Eps
|
|
10
10
|
data.columns.each do |k, v|
|
11
11
|
@columns[k] = v
|
12
12
|
end
|
13
|
-
elsif daru?(data)
|
13
|
+
elsif rover?(data) || daru?(data)
|
14
14
|
data.to_h.each do |k, v|
|
15
15
|
@columns[k.to_s] = v.to_a
|
16
16
|
end
|
@@ -19,6 +19,8 @@ module Eps
|
|
19
19
|
@columns[k.to_s] = v.to_a
|
20
20
|
end
|
21
21
|
else
|
22
|
+
data = data.to_a if numo?(data)
|
23
|
+
|
22
24
|
if data.any?
|
23
25
|
row = data[0]
|
24
26
|
|
@@ -140,8 +142,16 @@ module Eps
|
|
140
142
|
|
141
143
|
private
|
142
144
|
|
145
|
+
def numo?(x)
|
146
|
+
defined?(Numo::NArray) && x.is_a?(Numo::NArray)
|
147
|
+
end
|
148
|
+
|
149
|
+
def rover?(x)
|
150
|
+
defined?(Rover::DataFrame) && x.is_a?(Rover::DataFrame)
|
151
|
+
end
|
152
|
+
|
143
153
|
def daru?(x)
|
144
|
-
defined?(Daru) && x.is_a?(Daru::DataFrame)
|
154
|
+
defined?(Daru::DataFrame) && x.is_a?(Daru::DataFrame)
|
145
155
|
end
|
146
156
|
end
|
147
157
|
end
|
data/lib/eps/lightgbm.rb
CHANGED
@@ -17,7 +17,7 @@ module Eps
|
|
17
17
|
str
|
18
18
|
end
|
19
19
|
|
20
|
-
def _train(verbose: nil, early_stopping: nil)
|
20
|
+
def _train(verbose: nil, early_stopping: nil, learning_rate: 0.1)
|
21
21
|
train_set = @train_set
|
22
22
|
validation_set = @validation_set.dup
|
23
23
|
summary_label = train_set.label
|
@@ -60,7 +60,10 @@ module Eps
|
|
60
60
|
prep_text_features(validation_set) if validation_set
|
61
61
|
|
62
62
|
# create params
|
63
|
-
params = {
|
63
|
+
params = {
|
64
|
+
objective: objective,
|
65
|
+
learning_rate: learning_rate
|
66
|
+
}
|
64
67
|
params[:num_classes] = labels.size if objective == "multiclass"
|
65
68
|
if train_set.size < 30
|
66
69
|
params[:min_data_in_bin] = 1
|
@@ -37,6 +37,7 @@ module Eps
|
|
37
37
|
str
|
38
38
|
end
|
39
39
|
|
40
|
+
# TODO use keyword arguments for gsl and intercept in 0.4.0
|
40
41
|
def _train(**options)
|
41
42
|
raise "Target must be numeric" if @target_type != "numeric"
|
42
43
|
check_missing_value(@train_set)
|
@@ -61,7 +62,7 @@ module Eps
|
|
61
62
|
false
|
62
63
|
end
|
63
64
|
|
64
|
-
intercept =
|
65
|
+
intercept = options.key?(:intercept) ? options[:intercept] : true
|
65
66
|
if intercept && gsl != :gslr
|
66
67
|
data.size.times do |i|
|
67
68
|
x[i].unshift(1)
|
data/lib/eps/naive_bayes.rb
CHANGED
data/lib/eps/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: eps
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-06-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: lightgbm
|
@@ -80,6 +80,20 @@ dependencies:
|
|
80
80
|
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: numo-narray
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
83
97
|
- !ruby/object:Gem::Dependency
|
84
98
|
name: rake
|
85
99
|
requirement: !ruby/object:Gem::Requirement
|
@@ -94,6 +108,20 @@ dependencies:
|
|
94
108
|
- - ">="
|
95
109
|
- !ruby/object:Gem::Version
|
96
110
|
version: '0'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: rover-df
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
type: :development
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - ">="
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0'
|
97
125
|
description:
|
98
126
|
email: andrew@chartkick.com
|
99
127
|
executables: []
|