eps 0.3.3 → 0.3.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +13 -4
- data/lib/eps/base_estimator.rb +28 -20
- data/lib/eps/evaluators/lightgbm.rb +18 -5
- data/lib/eps/evaluators/linear_regression.rb +3 -1
- data/lib/eps/evaluators/naive_bayes.rb +7 -6
- data/lib/eps/lightgbm.rb +9 -7
- data/lib/eps/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e1f8c1d5af8015ba568528bce2c928cf59c97bda5c4f91a6ace0af72a9a864da
|
4
|
+
data.tar.gz: 3cbb6dfca84687c833051147a0fcad16cd258dd09f48a93da61f051c2081f62c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 83477bb53b14a04798ab85f2efc9d4bb3af3195bd100ca42d601d2dbb2bc7fa09e7886f414e5a1e3041128bd671e3d15e39e071fe8de9d8d19dacc4c7e702cfc
|
7
|
+
data.tar.gz: 1914eeb9509916e9b4eb530f56dab0bfa73683a9197e157c8030fc11c96efca983a0d0f257d2845d890c9772ccfb6550651fb9fba60d1671db914c51b3675fc8
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -4,7 +4,6 @@ Machine learning for Ruby
|
|
4
4
|
|
5
5
|
- Build predictive models quickly and easily
|
6
6
|
- Serve models built in Ruby, Python, R, and more
|
7
|
-
- No prior knowledge of machine learning required :tada:
|
8
7
|
|
9
8
|
Check out [this post](https://ankane.org/rails-meet-data-science) for more info on machine learning with Rails
|
10
9
|
|
@@ -379,6 +378,16 @@ By default, an intercept is included. Disable this with:
|
|
379
378
|
Eps::Model.new(data, intercept: false)
|
380
379
|
```
|
381
380
|
|
381
|
+
## Probability
|
382
|
+
|
383
|
+
To get the probability of each category for predictions with classification, use:
|
384
|
+
|
385
|
+
```ruby
|
386
|
+
model.predict_probability(data)
|
387
|
+
```
|
388
|
+
|
389
|
+
Naive Bayes is known to produce poor probability estimates, so stick with LightGBM if you need this.
|
390
|
+
|
382
391
|
## Validation Options
|
383
392
|
|
384
393
|
Pass your own validation set with:
|
@@ -414,7 +423,7 @@ The database is another place you can store models. It’s good if you retrain m
|
|
414
423
|
Create an ActiveRecord model to store the predictive model.
|
415
424
|
|
416
425
|
```sh
|
417
|
-
rails
|
426
|
+
rails generate model Model key:string:uniq data:text
|
418
427
|
```
|
419
428
|
|
420
429
|
Store the model with:
|
@@ -524,11 +533,11 @@ Everyone is encouraged to help improve this project. Here are a few ways you can
|
|
524
533
|
- Write, clarify, or fix documentation
|
525
534
|
- Suggest or add new features
|
526
535
|
|
527
|
-
To get started with development
|
536
|
+
To get started with development:
|
528
537
|
|
529
538
|
```sh
|
530
539
|
git clone https://github.com/ankane/eps.git
|
531
540
|
cd eps
|
532
541
|
bundle install
|
533
|
-
rake test
|
542
|
+
bundle exec rake test
|
534
543
|
```
|
data/lib/eps/base_estimator.rb
CHANGED
@@ -9,27 +9,11 @@ module Eps
|
|
9
9
|
end
|
10
10
|
|
11
11
|
def predict(data)
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
data = Eps::DataFrame.new(data)
|
16
|
-
|
17
|
-
@evaluator.features.each do |k, type|
|
18
|
-
values = data.columns[k]
|
19
|
-
raise ArgumentError, "Missing column: #{k}" if !values
|
20
|
-
column_type = Utils.column_type(values.compact, k) if values
|
21
|
-
|
22
|
-
if !column_type.nil?
|
23
|
-
if (type == "numeric" && column_type != "numeric") || (type != "numeric" && column_type != "categorical")
|
24
|
-
raise ArgumentError, "Bad type for column #{k}: Expected #{type} but got #{column_type}"
|
25
|
-
end
|
26
|
-
end
|
27
|
-
# TODO check for unknown values for categorical features
|
28
|
-
end
|
29
|
-
|
30
|
-
predictions = @evaluator.predict(data)
|
12
|
+
_predict(data, false)
|
13
|
+
end
|
31
14
|
|
32
|
-
|
15
|
+
def predict_probability(data)
|
16
|
+
_predict(data, true)
|
33
17
|
end
|
34
18
|
|
35
19
|
def evaluate(data, y = nil, target: nil, weight: nil)
|
@@ -75,6 +59,30 @@ module Eps
|
|
75
59
|
|
76
60
|
private
|
77
61
|
|
62
|
+
def _predict(data, probabilities)
|
63
|
+
singular = data.is_a?(Hash)
|
64
|
+
data = [data] if singular
|
65
|
+
|
66
|
+
data = Eps::DataFrame.new(data)
|
67
|
+
|
68
|
+
@evaluator.features.each do |k, type|
|
69
|
+
values = data.columns[k]
|
70
|
+
raise ArgumentError, "Missing column: #{k}" if !values
|
71
|
+
column_type = Utils.column_type(values.compact, k) if values
|
72
|
+
|
73
|
+
if !column_type.nil?
|
74
|
+
if (type == "numeric" && column_type != "numeric") || (type != "numeric" && column_type != "categorical")
|
75
|
+
raise ArgumentError, "Bad type for column #{k}: Expected #{type} but got #{column_type}"
|
76
|
+
end
|
77
|
+
end
|
78
|
+
# TODO check for unknown values for categorical features
|
79
|
+
end
|
80
|
+
|
81
|
+
predictions = @evaluator.predict(data, probabilities: probabilities)
|
82
|
+
|
83
|
+
singular ? predictions.first : predictions
|
84
|
+
end
|
85
|
+
|
78
86
|
def train(data, y = nil, target: nil, weight: nil, split: nil, validation_set: nil, verbose: nil, text_features: nil, early_stopping: nil)
|
79
87
|
data, @target = prep_data(data, y, target, weight)
|
80
88
|
@target_type = Utils.column_type(data.label, @target)
|
@@ -11,7 +11,9 @@ module Eps
|
|
11
11
|
@text_features = text_features
|
12
12
|
end
|
13
13
|
|
14
|
-
def predict(data)
|
14
|
+
def predict(data, probabilities: false)
|
15
|
+
raise "Probabilities not supported" if probabilities && @objective == "regression"
|
16
|
+
|
15
17
|
rows = data.map(&:to_h)
|
16
18
|
|
17
19
|
# sparse matrix
|
@@ -38,7 +40,12 @@ module Eps
|
|
38
40
|
when "regression"
|
39
41
|
sum_trees(rows, @trees)
|
40
42
|
when "binary"
|
41
|
-
sum_trees(rows, @trees).map { |s|
|
43
|
+
prob = sum_trees(rows, @trees).map { |s| sigmoid(s) }
|
44
|
+
if probabilities
|
45
|
+
prob.map { |v| @labels.zip([1 - v, v]).to_h }
|
46
|
+
else
|
47
|
+
prob.map { |v| @labels[v > 0.5 ? 1 : 0] }
|
48
|
+
end
|
42
49
|
else
|
43
50
|
tree_scores = []
|
44
51
|
num_trees = @trees.size / @labels.size
|
@@ -47,8 +54,14 @@ module Eps
|
|
47
54
|
end
|
48
55
|
data.size.times.map do |i|
|
49
56
|
v = tree_scores.map { |s| s[i] }
|
50
|
-
|
51
|
-
|
57
|
+
if probabilities
|
58
|
+
exp = v.map { |vi| Math.exp(vi) }
|
59
|
+
sum = exp.sum
|
60
|
+
@labels.zip(exp.map { |e| e / sum }).to_h
|
61
|
+
else
|
62
|
+
idx = v.map.with_index.max_by { |v2, _| v2 }.last
|
63
|
+
@labels[idx]
|
64
|
+
end
|
52
65
|
end
|
53
66
|
end
|
54
67
|
end
|
@@ -109,7 +122,7 @@ module Eps
|
|
109
122
|
end
|
110
123
|
|
111
124
|
def sigmoid(x)
|
112
|
-
1.0 / (1 + Math
|
125
|
+
1.0 / (1 + Math.exp(-x))
|
113
126
|
end
|
114
127
|
end
|
115
128
|
end
|
@@ -9,7 +9,9 @@ module Eps
|
|
9
9
|
@text_features = text_features || {}
|
10
10
|
end
|
11
11
|
|
12
|
-
def predict(x)
|
12
|
+
def predict(x, probabilities: false)
|
13
|
+
raise "Probabilities not supported" if probabilities
|
14
|
+
|
13
15
|
intercept = @coefficients["_intercept"] || 0.0
|
14
16
|
scores = [intercept] * x.size
|
15
17
|
|
@@ -10,14 +10,15 @@ module Eps
|
|
10
10
|
@legacy = legacy
|
11
11
|
end
|
12
12
|
|
13
|
-
def predict(x)
|
13
|
+
def predict(x, probabilities: false)
|
14
14
|
probs = calculate_class_probabilities(x)
|
15
15
|
probs.map do |xp|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
16
|
+
if probabilities
|
17
|
+
sum = xp.values.map { |v| Math.exp(v) }.sum.to_f
|
18
|
+
xp.map { |k, v| [k, Math.exp(v) / sum] }.to_h
|
19
|
+
else
|
20
|
+
xp.sort_by { |k, v| [-v, k] }[0][0]
|
21
|
+
end
|
21
22
|
end
|
22
23
|
end
|
23
24
|
|
data/lib/eps/lightgbm.rb
CHANGED
@@ -121,18 +121,20 @@ module Eps
|
|
121
121
|
def check_evaluator(objective, labels, booster, booster_set, evaluator, evaluator_set)
|
122
122
|
expected = @booster.predict(booster_set.map_rows(&:to_a))
|
123
123
|
if objective == "multiclass"
|
124
|
-
|
125
|
-
|
126
|
-
|
124
|
+
actual = evaluator.predict(evaluator_set, probabilities: true)
|
125
|
+
# just compare first for now
|
126
|
+
expected.map! { |v| v.first }
|
127
|
+
actual.map! { |v| v.values.first }
|
127
128
|
elsif objective == "binary"
|
128
|
-
|
129
|
+
actual = evaluator.predict(evaluator_set, probabilities: true).map { |v| v.values.last }
|
130
|
+
else
|
131
|
+
actual = evaluator.predict(evaluator_set)
|
129
132
|
end
|
130
|
-
actual = evaluator.predict(evaluator_set)
|
131
133
|
|
132
|
-
regression = objective == "regression"
|
134
|
+
regression = objective == "regression" || objective == "binary"
|
133
135
|
bad_observations = []
|
134
136
|
expected.zip(actual).each_with_index do |(exp, act), i|
|
135
|
-
success =
|
137
|
+
success = (act - exp).abs < 0.001
|
136
138
|
unless success
|
137
139
|
bad_observations << {expected: exp, actual: act, data_point: evaluator_set[i].map(&:itself).first}
|
138
140
|
end
|
data/lib/eps/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: eps
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-04-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: lightgbm
|