eps 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/LICENSE.txt +1 -1
- data/README.md +1 -8
- data/lib/eps/base_estimator.rb +2 -2
- data/lib/eps/data_frame.rb +1 -5
- data/lib/eps/evaluators/naive_bayes.rb +2 -2
- data/lib/eps/lightgbm.rb +0 -2
- data/lib/eps/linear_regression.rb +2 -2
- data/lib/eps/naive_bayes.rb +1 -1
- data/lib/eps/pmml/generator.rb +11 -11
- data/lib/eps/pmml/loader.rb +0 -2
- data/lib/eps/statistics.rb +1 -1
- data/lib/eps/text_encoder.rb +1 -1
- data/lib/eps/utils.rb +4 -4
- data/lib/eps/version.rb +1 -1
- metadata +6 -6
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 9559f4d440f2a7cb6d541073600829a0b1bdd0df61c6abe166e0ff731a34fe18
|
|
4
|
+
data.tar.gz: 6a9f37c76d6bdec50f877d9932f805bda751b87c03c0a69f187830dc7c1876d4
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: c12d5b688bbe9884a0350efd3900de68f1244264dc0cf56f7a8a64145b3ff4a2f08608146e596d0052c1a7da4cc52c4606ae9a5e52b180c1fe1e60d034170e9f
|
|
7
|
+
data.tar.gz: 5fe61bd82055f22210f27f7d1cceb7adbb71d50dd6750336ddc731ae617d749e2c9c478ada19f6a2ad8def2c51d8e2e13a0269ba7687ee97331490a49f59d7c4
|
data/CHANGELOG.md
CHANGED
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
|
@@ -147,7 +147,7 @@ You can set advanced options with:
|
|
|
147
147
|
```ruby
|
|
148
148
|
text_features: {
|
|
149
149
|
description: {
|
|
150
|
-
|
|
150
|
+
min_occurrences: 5, # min times a word must appear to be included in the model
|
|
151
151
|
max_features: 1000, # max number of words to include in the model
|
|
152
152
|
min_length: 1, # min length of words to be included
|
|
153
153
|
case_sensitive: true, # how to treat words with different case
|
|
@@ -336,13 +336,6 @@ df = Rover.read_csv("houses.csv")
|
|
|
336
336
|
Eps::Model.new(df, target: "price")
|
|
337
337
|
```
|
|
338
338
|
|
|
339
|
-
Or a Daru data frame
|
|
340
|
-
|
|
341
|
-
```ruby
|
|
342
|
-
df = Daru::DataFrame.from_csv("houses.csv")
|
|
343
|
-
Eps::Model.new(df, target: "price")
|
|
344
|
-
```
|
|
345
|
-
|
|
346
339
|
When reading CSV files directly, be sure to convert numeric fields. The `table` method does this automatically.
|
|
347
340
|
|
|
348
341
|
```ruby
|
data/lib/eps/base_estimator.rb
CHANGED
|
@@ -27,8 +27,8 @@ module Eps
|
|
|
27
27
|
|
|
28
28
|
def self.load_pmml(pmml)
|
|
29
29
|
model = new
|
|
30
|
-
model.instance_variable_set(
|
|
31
|
-
model.instance_variable_set(
|
|
30
|
+
model.instance_variable_set(:@evaluator, PMML.load(pmml))
|
|
31
|
+
model.instance_variable_set(:@pmml, pmml.respond_to?(:to_xml) ? pmml.to_xml : pmml) # cache data
|
|
32
32
|
model
|
|
33
33
|
end
|
|
34
34
|
|
data/lib/eps/data_frame.rb
CHANGED
|
@@ -10,7 +10,7 @@ module Eps
|
|
|
10
10
|
data.columns.each do |k, v|
|
|
11
11
|
@columns[k] = v
|
|
12
12
|
end
|
|
13
|
-
elsif rover?(data)
|
|
13
|
+
elsif rover?(data)
|
|
14
14
|
data.to_h.each do |k, v|
|
|
15
15
|
@columns[k.to_s] = v.to_a
|
|
16
16
|
end
|
|
@@ -152,9 +152,5 @@ module Eps
|
|
|
152
152
|
def rover?(x)
|
|
153
153
|
defined?(Rover::DataFrame) && x.is_a?(Rover::DataFrame)
|
|
154
154
|
end
|
|
155
|
-
|
|
156
|
-
def daru?(x)
|
|
157
|
-
defined?(Daru::DataFrame) && x.is_a?(Daru::DataFrame)
|
|
158
|
-
end
|
|
159
155
|
end
|
|
160
156
|
end
|
|
@@ -14,7 +14,7 @@ module Eps
|
|
|
14
14
|
probs = calculate_class_probabilities(x)
|
|
15
15
|
probs.map do |xp|
|
|
16
16
|
if probabilities
|
|
17
|
-
sum = xp.values.
|
|
17
|
+
sum = xp.values.sum { |v| Math.exp(v) }.to_f
|
|
18
18
|
xp.map { |k, v| [k, Math.exp(v) / sum] }.to_h
|
|
19
19
|
else
|
|
20
20
|
xp.sort_by { |k, v| [-v, k] }[0][0]
|
|
@@ -44,7 +44,7 @@ module Eps
|
|
|
44
44
|
|
|
45
45
|
# unknown value if not vc
|
|
46
46
|
if vc
|
|
47
|
-
denom = probabilities[:conditional][k].
|
|
47
|
+
denom = probabilities[:conditional][k].sum { |k, v| v[c] }.to_f
|
|
48
48
|
p2 = vc[c].to_f / denom
|
|
49
49
|
|
|
50
50
|
# TODO use proper smoothing instead
|
data/lib/eps/lightgbm.rb
CHANGED
|
@@ -20,7 +20,6 @@ module Eps
|
|
|
20
20
|
def _train(verbose: nil, early_stopping: nil, learning_rate: 0.1)
|
|
21
21
|
train_set = @train_set
|
|
22
22
|
validation_set = @validation_set.dup
|
|
23
|
-
summary_label = train_set.label
|
|
24
23
|
|
|
25
24
|
# create check set
|
|
26
25
|
evaluator_set = validation_set || train_set
|
|
@@ -134,7 +133,6 @@ module Eps
|
|
|
134
133
|
actual = evaluator.predict(evaluator_set)
|
|
135
134
|
end
|
|
136
135
|
|
|
137
|
-
regression = objective == "regression" || objective == "binary"
|
|
138
136
|
bad_observations = []
|
|
139
137
|
expected.zip(actual).each_with_index do |(exp, act), i|
|
|
140
138
|
success = (act - exp).abs < 0.001
|
|
@@ -221,13 +221,13 @@ module Eps
|
|
|
221
221
|
|
|
222
222
|
# total sum of squares
|
|
223
223
|
def sst
|
|
224
|
-
@sst ||= @train_set.label.
|
|
224
|
+
@sst ||= @train_set.label.sum { |y| (y - y_bar)**2 }
|
|
225
225
|
end
|
|
226
226
|
|
|
227
227
|
# sum of squared errors of prediction
|
|
228
228
|
# not to be confused with "explained sum of squares"
|
|
229
229
|
def sse
|
|
230
|
-
@sse ||= @train_set.label.zip(y_hat).
|
|
230
|
+
@sse ||= @train_set.label.zip(y_hat).sum { |y, yh| (y - yh)**2 }
|
|
231
231
|
end
|
|
232
232
|
|
|
233
233
|
def mst
|
data/lib/eps/naive_bayes.rb
CHANGED
|
@@ -73,7 +73,7 @@ module Eps
|
|
|
73
73
|
# smooth
|
|
74
74
|
if smoothing
|
|
75
75
|
labels.each do |label|
|
|
76
|
-
sum = prob.
|
|
76
|
+
sum = prob.sum { |k2, v2| v2[label] }.to_f
|
|
77
77
|
prob.each do |k2, v|
|
|
78
78
|
v[label] = (v[label] + smoothing) * sum / (sum + (prob.size * smoothing))
|
|
79
79
|
end
|
data/lib/eps/pmml/generator.rb
CHANGED
|
@@ -151,7 +151,7 @@ module Eps
|
|
|
151
151
|
end
|
|
152
152
|
|
|
153
153
|
def linear_regression
|
|
154
|
-
predictors = model.instance_variable_get(
|
|
154
|
+
predictors = model.instance_variable_get(:@coefficients).dup
|
|
155
155
|
intercept = predictors.delete("_intercept") || 0.0
|
|
156
156
|
|
|
157
157
|
data_fields = {}
|
|
@@ -377,43 +377,43 @@ module Eps
|
|
|
377
377
|
# TODO create instance methods on model for all of these features
|
|
378
378
|
|
|
379
379
|
def features
|
|
380
|
-
model.instance_variable_get(
|
|
380
|
+
model.instance_variable_get(:@features)
|
|
381
381
|
end
|
|
382
382
|
|
|
383
383
|
def text_features
|
|
384
|
-
model.instance_variable_get(
|
|
384
|
+
model.instance_variable_get(:@text_features)
|
|
385
385
|
end
|
|
386
386
|
|
|
387
387
|
def text_encoders
|
|
388
|
-
model.instance_variable_get(
|
|
388
|
+
model.instance_variable_get(:@text_encoders)
|
|
389
389
|
end
|
|
390
390
|
|
|
391
391
|
def feature_importance
|
|
392
|
-
model.instance_variable_get(
|
|
392
|
+
model.instance_variable_get(:@feature_importance)
|
|
393
393
|
end
|
|
394
394
|
|
|
395
395
|
def labels
|
|
396
|
-
model.instance_variable_get(
|
|
396
|
+
model.instance_variable_get(:@labels)
|
|
397
397
|
end
|
|
398
398
|
|
|
399
399
|
def trees
|
|
400
|
-
model.instance_variable_get(
|
|
400
|
+
model.instance_variable_get(:@trees)
|
|
401
401
|
end
|
|
402
402
|
|
|
403
403
|
def target
|
|
404
|
-
model.instance_variable_get(
|
|
404
|
+
model.instance_variable_get(:@target)
|
|
405
405
|
end
|
|
406
406
|
|
|
407
407
|
def label_encoders
|
|
408
|
-
model.instance_variable_get(
|
|
408
|
+
model.instance_variable_get(:@label_encoders)
|
|
409
409
|
end
|
|
410
410
|
|
|
411
411
|
def objective
|
|
412
|
-
model.instance_variable_get(
|
|
412
|
+
model.instance_variable_get(:@objective)
|
|
413
413
|
end
|
|
414
414
|
|
|
415
415
|
def probabilities
|
|
416
|
-
model.instance_variable_get(
|
|
416
|
+
model.instance_variable_get(:@probabilities)
|
|
417
417
|
end
|
|
418
418
|
|
|
419
419
|
# end TODO
|
data/lib/eps/pmml/loader.rb
CHANGED
data/lib/eps/statistics.rb
CHANGED
data/lib/eps/text_encoder.rb
CHANGED
data/lib/eps/utils.rb
CHANGED
|
@@ -3,14 +3,14 @@ module Eps
|
|
|
3
3
|
def self.column_type(c, k)
|
|
4
4
|
if !c
|
|
5
5
|
raise ArgumentError, "Missing column: #{k}"
|
|
6
|
-
elsif c.all?
|
|
6
|
+
elsif c.all?(&:nil?)
|
|
7
7
|
# goes here for empty as well
|
|
8
8
|
nil
|
|
9
|
-
elsif c.any?
|
|
9
|
+
elsif c.any?(&:nil?)
|
|
10
10
|
raise ArgumentError, "Missing values in column #{k}"
|
|
11
|
-
elsif c.all?
|
|
11
|
+
elsif c.all?(Numeric)
|
|
12
12
|
"numeric"
|
|
13
|
-
elsif c.all?
|
|
13
|
+
elsif c.all?(String)
|
|
14
14
|
"categorical"
|
|
15
15
|
elsif c.all? { |v| v == true || v == false }
|
|
16
16
|
"categorical" # boolean
|
data/lib/eps/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: eps
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.7.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Andrew Kane
|
|
8
8
|
bindir: bin
|
|
9
9
|
cert_chain: []
|
|
10
|
-
date:
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
11
11
|
dependencies:
|
|
12
12
|
- !ruby/object:Gem::Dependency
|
|
13
13
|
name: lightgbm
|
|
@@ -15,14 +15,14 @@ dependencies:
|
|
|
15
15
|
requirements:
|
|
16
16
|
- - ">="
|
|
17
17
|
- !ruby/object:Gem::Version
|
|
18
|
-
version: 0.
|
|
18
|
+
version: '0.4'
|
|
19
19
|
type: :runtime
|
|
20
20
|
prerelease: false
|
|
21
21
|
version_requirements: !ruby/object:Gem::Requirement
|
|
22
22
|
requirements:
|
|
23
23
|
- - ">="
|
|
24
24
|
- !ruby/object:Gem::Version
|
|
25
|
-
version: 0.
|
|
25
|
+
version: '0.4'
|
|
26
26
|
- !ruby/object:Gem::Dependency
|
|
27
27
|
name: matrix
|
|
28
28
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -91,14 +91,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
91
91
|
requirements:
|
|
92
92
|
- - ">="
|
|
93
93
|
- !ruby/object:Gem::Version
|
|
94
|
-
version: '3.
|
|
94
|
+
version: '3.3'
|
|
95
95
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
96
96
|
requirements:
|
|
97
97
|
- - ">="
|
|
98
98
|
- !ruby/object:Gem::Version
|
|
99
99
|
version: '0'
|
|
100
100
|
requirements: []
|
|
101
|
-
rubygems_version:
|
|
101
|
+
rubygems_version: 4.0.6
|
|
102
102
|
specification_version: 4
|
|
103
103
|
summary: Machine learning for Ruby. Supports regression (linear regression) and classification
|
|
104
104
|
(naive Bayes)
|