eps 0.3.3 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +13 -4
- data/lib/eps/base_estimator.rb +28 -20
- data/lib/eps/evaluators/lightgbm.rb +18 -5
- data/lib/eps/evaluators/linear_regression.rb +3 -1
- data/lib/eps/evaluators/naive_bayes.rb +7 -6
- data/lib/eps/lightgbm.rb +9 -7
- data/lib/eps/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e1f8c1d5af8015ba568528bce2c928cf59c97bda5c4f91a6ace0af72a9a864da
|
4
|
+
data.tar.gz: 3cbb6dfca84687c833051147a0fcad16cd258dd09f48a93da61f051c2081f62c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 83477bb53b14a04798ab85f2efc9d4bb3af3195bd100ca42d601d2dbb2bc7fa09e7886f414e5a1e3041128bd671e3d15e39e071fe8de9d8d19dacc4c7e702cfc
|
7
|
+
data.tar.gz: 1914eeb9509916e9b4eb530f56dab0bfa73683a9197e157c8030fc11c96efca983a0d0f257d2845d890c9772ccfb6550651fb9fba60d1671db914c51b3675fc8
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -4,7 +4,6 @@ Machine learning for Ruby
|
|
4
4
|
|
5
5
|
- Build predictive models quickly and easily
|
6
6
|
- Serve models built in Ruby, Python, R, and more
|
7
|
-
- No prior knowledge of machine learning required :tada:
|
8
7
|
|
9
8
|
Check out [this post](https://ankane.org/rails-meet-data-science) for more info on machine learning with Rails
|
10
9
|
|
@@ -379,6 +378,16 @@ By default, an intercept is included. Disable this with:
|
|
379
378
|
Eps::Model.new(data, intercept: false)
|
380
379
|
```
|
381
380
|
|
381
|
+
## Probability
|
382
|
+
|
383
|
+
To get the probability of each category for predictions with classification, use:
|
384
|
+
|
385
|
+
```ruby
|
386
|
+
model.predict_probability(data)
|
387
|
+
```
|
388
|
+
|
389
|
+
Naive Bayes is known to produce poor probability estimates, so stick with LightGBM if you need this.
|
390
|
+
|
382
391
|
## Validation Options
|
383
392
|
|
384
393
|
Pass your own validation set with:
|
@@ -414,7 +423,7 @@ The database is another place you can store models. It’s good if you retrain m
|
|
414
423
|
Create an ActiveRecord model to store the predictive model.
|
415
424
|
|
416
425
|
```sh
|
417
|
-
rails
|
426
|
+
rails generate model Model key:string:uniq data:text
|
418
427
|
```
|
419
428
|
|
420
429
|
Store the model with:
|
@@ -524,11 +533,11 @@ Everyone is encouraged to help improve this project. Here are a few ways you can
|
|
524
533
|
- Write, clarify, or fix documentation
|
525
534
|
- Suggest or add new features
|
526
535
|
|
527
|
-
To get started with development
|
536
|
+
To get started with development:
|
528
537
|
|
529
538
|
```sh
|
530
539
|
git clone https://github.com/ankane/eps.git
|
531
540
|
cd eps
|
532
541
|
bundle install
|
533
|
-
rake test
|
542
|
+
bundle exec rake test
|
534
543
|
```
|
data/lib/eps/base_estimator.rb
CHANGED
@@ -9,27 +9,11 @@ module Eps
|
|
9
9
|
end
|
10
10
|
|
11
11
|
def predict(data)
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
data = Eps::DataFrame.new(data)
|
16
|
-
|
17
|
-
@evaluator.features.each do |k, type|
|
18
|
-
values = data.columns[k]
|
19
|
-
raise ArgumentError, "Missing column: #{k}" if !values
|
20
|
-
column_type = Utils.column_type(values.compact, k) if values
|
21
|
-
|
22
|
-
if !column_type.nil?
|
23
|
-
if (type == "numeric" && column_type != "numeric") || (type != "numeric" && column_type != "categorical")
|
24
|
-
raise ArgumentError, "Bad type for column #{k}: Expected #{type} but got #{column_type}"
|
25
|
-
end
|
26
|
-
end
|
27
|
-
# TODO check for unknown values for categorical features
|
28
|
-
end
|
29
|
-
|
30
|
-
predictions = @evaluator.predict(data)
|
12
|
+
_predict(data, false)
|
13
|
+
end
|
31
14
|
|
32
|
-
|
15
|
+
def predict_probability(data)
|
16
|
+
_predict(data, true)
|
33
17
|
end
|
34
18
|
|
35
19
|
def evaluate(data, y = nil, target: nil, weight: nil)
|
@@ -75,6 +59,30 @@ module Eps
|
|
75
59
|
|
76
60
|
private
|
77
61
|
|
62
|
+
def _predict(data, probabilities)
|
63
|
+
singular = data.is_a?(Hash)
|
64
|
+
data = [data] if singular
|
65
|
+
|
66
|
+
data = Eps::DataFrame.new(data)
|
67
|
+
|
68
|
+
@evaluator.features.each do |k, type|
|
69
|
+
values = data.columns[k]
|
70
|
+
raise ArgumentError, "Missing column: #{k}" if !values
|
71
|
+
column_type = Utils.column_type(values.compact, k) if values
|
72
|
+
|
73
|
+
if !column_type.nil?
|
74
|
+
if (type == "numeric" && column_type != "numeric") || (type != "numeric" && column_type != "categorical")
|
75
|
+
raise ArgumentError, "Bad type for column #{k}: Expected #{type} but got #{column_type}"
|
76
|
+
end
|
77
|
+
end
|
78
|
+
# TODO check for unknown values for categorical features
|
79
|
+
end
|
80
|
+
|
81
|
+
predictions = @evaluator.predict(data, probabilities: probabilities)
|
82
|
+
|
83
|
+
singular ? predictions.first : predictions
|
84
|
+
end
|
85
|
+
|
78
86
|
def train(data, y = nil, target: nil, weight: nil, split: nil, validation_set: nil, verbose: nil, text_features: nil, early_stopping: nil)
|
79
87
|
data, @target = prep_data(data, y, target, weight)
|
80
88
|
@target_type = Utils.column_type(data.label, @target)
|
@@ -11,7 +11,9 @@ module Eps
|
|
11
11
|
@text_features = text_features
|
12
12
|
end
|
13
13
|
|
14
|
-
def predict(data)
|
14
|
+
def predict(data, probabilities: false)
|
15
|
+
raise "Probabilities not supported" if probabilities && @objective == "regression"
|
16
|
+
|
15
17
|
rows = data.map(&:to_h)
|
16
18
|
|
17
19
|
# sparse matrix
|
@@ -38,7 +40,12 @@ module Eps
|
|
38
40
|
when "regression"
|
39
41
|
sum_trees(rows, @trees)
|
40
42
|
when "binary"
|
41
|
-
sum_trees(rows, @trees).map { |s|
|
43
|
+
prob = sum_trees(rows, @trees).map { |s| sigmoid(s) }
|
44
|
+
if probabilities
|
45
|
+
prob.map { |v| @labels.zip([1 - v, v]).to_h }
|
46
|
+
else
|
47
|
+
prob.map { |v| @labels[v > 0.5 ? 1 : 0] }
|
48
|
+
end
|
42
49
|
else
|
43
50
|
tree_scores = []
|
44
51
|
num_trees = @trees.size / @labels.size
|
@@ -47,8 +54,14 @@ module Eps
|
|
47
54
|
end
|
48
55
|
data.size.times.map do |i|
|
49
56
|
v = tree_scores.map { |s| s[i] }
|
50
|
-
|
51
|
-
|
57
|
+
if probabilities
|
58
|
+
exp = v.map { |vi| Math.exp(vi) }
|
59
|
+
sum = exp.sum
|
60
|
+
@labels.zip(exp.map { |e| e / sum }).to_h
|
61
|
+
else
|
62
|
+
idx = v.map.with_index.max_by { |v2, _| v2 }.last
|
63
|
+
@labels[idx]
|
64
|
+
end
|
52
65
|
end
|
53
66
|
end
|
54
67
|
end
|
@@ -109,7 +122,7 @@ module Eps
|
|
109
122
|
end
|
110
123
|
|
111
124
|
def sigmoid(x)
|
112
|
-
1.0 / (1 + Math
|
125
|
+
1.0 / (1 + Math.exp(-x))
|
113
126
|
end
|
114
127
|
end
|
115
128
|
end
|
@@ -9,7 +9,9 @@ module Eps
|
|
9
9
|
@text_features = text_features || {}
|
10
10
|
end
|
11
11
|
|
12
|
-
def predict(x)
|
12
|
+
def predict(x, probabilities: false)
|
13
|
+
raise "Probabilities not supported" if probabilities
|
14
|
+
|
13
15
|
intercept = @coefficients["_intercept"] || 0.0
|
14
16
|
scores = [intercept] * x.size
|
15
17
|
|
@@ -10,14 +10,15 @@ module Eps
|
|
10
10
|
@legacy = legacy
|
11
11
|
end
|
12
12
|
|
13
|
-
def predict(x)
|
13
|
+
def predict(x, probabilities: false)
|
14
14
|
probs = calculate_class_probabilities(x)
|
15
15
|
probs.map do |xp|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
16
|
+
if probabilities
|
17
|
+
sum = xp.values.map { |v| Math.exp(v) }.sum.to_f
|
18
|
+
xp.map { |k, v| [k, Math.exp(v) / sum] }.to_h
|
19
|
+
else
|
20
|
+
xp.sort_by { |k, v| [-v, k] }[0][0]
|
21
|
+
end
|
21
22
|
end
|
22
23
|
end
|
23
24
|
|
data/lib/eps/lightgbm.rb
CHANGED
@@ -121,18 +121,20 @@ module Eps
|
|
121
121
|
def check_evaluator(objective, labels, booster, booster_set, evaluator, evaluator_set)
|
122
122
|
expected = @booster.predict(booster_set.map_rows(&:to_a))
|
123
123
|
if objective == "multiclass"
|
124
|
-
|
125
|
-
|
126
|
-
|
124
|
+
actual = evaluator.predict(evaluator_set, probabilities: true)
|
125
|
+
# just compare first for now
|
126
|
+
expected.map! { |v| v.first }
|
127
|
+
actual.map! { |v| v.values.first }
|
127
128
|
elsif objective == "binary"
|
128
|
-
|
129
|
+
actual = evaluator.predict(evaluator_set, probabilities: true).map { |v| v.values.last }
|
130
|
+
else
|
131
|
+
actual = evaluator.predict(evaluator_set)
|
129
132
|
end
|
130
|
-
actual = evaluator.predict(evaluator_set)
|
131
133
|
|
132
|
-
regression = objective == "regression"
|
134
|
+
regression = objective == "regression" || objective == "binary"
|
133
135
|
bad_observations = []
|
134
136
|
expected.zip(actual).each_with_index do |(exp, act), i|
|
135
|
-
success =
|
137
|
+
success = (act - exp).abs < 0.001
|
136
138
|
unless success
|
137
139
|
bad_observations << {expected: exp, actual: act, data_point: evaluator_set[i].map(&:itself).first}
|
138
140
|
end
|
data/lib/eps/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: eps
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-04-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: lightgbm
|