eps 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +4 -3
- data/lib/eps/base.rb +4 -4
- data/lib/eps/base_estimator.rb +1 -3
- data/lib/eps/linear_regression.rb +18 -19
- data/lib/eps/naive_bayes.rb +20 -14
- data/lib/eps/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ae5bc00818b79dc5e07f4dcda7ca56aa825f1014d1f70203564a87cb49b375d4
|
4
|
+
data.tar.gz: 8ba22dddc8635da418c429c12066c63bc1aea15238c32c4a7c4185f66281b6a5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e8a0f8cc325d26618691613a6213f6471b45c94a22bb2c9eb6ea729543dce4deabd9875d8e7055649fde90066d30d09c0b1b61949598c6859557e8270ff8e776
|
7
|
+
data.tar.gz: 0b8d0918e9571ce1587d4497b8de84b64222f19ba7c466d24bd490605dcfbed10ebfa10ad10ed6f03a13ab45a7bc1b77d5928f45d0a9f8ba7757e569591a36fe
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -233,16 +233,17 @@ class PriceModel < Eps::Base
|
|
233
233
|
metrics = model.evaluate(test_features, test_target)
|
234
234
|
puts "Test RMSE: #{metrics[:rmse]}"
|
235
235
|
# for classification, use:
|
236
|
-
# puts "Test accuracy: #{metrics[:accuracy]}"
|
236
|
+
# puts "Test accuracy: #{(100 * metrics[:accuracy]).round}%"
|
237
237
|
|
238
238
|
# finalize
|
239
239
|
houses = preprocess(houses)
|
240
240
|
all_features = houses.map { |h| features(h) }
|
241
241
|
all_target = houses.map { |h| target(h) }
|
242
|
-
|
242
|
+
model = Eps::Model.new(all_features, all_target)
|
243
243
|
|
244
244
|
# save
|
245
|
-
File.write(model_file,
|
245
|
+
File.write(model_file, model.to_pmml)
|
246
|
+
@model = nil # reset for future predictions
|
246
247
|
end
|
247
248
|
|
248
249
|
def predict(house)
|
data/lib/eps/base.rb
CHANGED
data/lib/eps/base_estimator.rb
CHANGED
@@ -12,15 +12,13 @@ module Eps
|
|
12
12
|
|
13
13
|
y = prep_y(y.to_a)
|
14
14
|
|
15
|
-
@target = target || "target"
|
16
|
-
|
17
15
|
if x.size != y.size
|
18
16
|
raise "Number of samples differs from target"
|
19
17
|
end
|
20
18
|
|
21
19
|
@x = x
|
22
20
|
@y = y
|
23
|
-
@target = target
|
21
|
+
@target = target || "target"
|
24
22
|
end
|
25
23
|
|
26
24
|
def predict(x)
|
@@ -223,26 +223,25 @@ module Eps
|
|
223
223
|
|
224
224
|
# https://people.richland.edu/james/ictcm/2004/multiple.html
|
225
225
|
def summary(extended: false)
|
226
|
-
|
227
|
-
|
228
|
-
|
226
|
+
coefficients = @coefficients
|
227
|
+
str = String.new("")
|
228
|
+
len = [coefficients.keys.map(&:size).max, 15].max
|
229
|
+
if extended
|
230
|
+
str += "%-#{len}s %12s %12s %12s %12s\n" % ["", "coef", "stderr", "t", "p"]
|
231
|
+
else
|
232
|
+
str += "%-#{len}s %12s %12s\n" % ["", "coef", "p"]
|
233
|
+
end
|
234
|
+
coefficients.each do |k, v|
|
229
235
|
if extended
|
230
|
-
str += "%-#{len}s %
|
236
|
+
str += "%-#{len}s %12.2f %12.2f %12.2f %12.3f\n" % [display_field(k), v, std_err[k], t_value[k], p_value[k]]
|
231
237
|
else
|
232
|
-
str += "%-#{len}s %
|
233
|
-
end
|
234
|
-
coefficients.each do |k, v|
|
235
|
-
if extended
|
236
|
-
str += "%-#{len}s %12.2f %12.2f %12.2f %12.3f\n" % [display_field(k), v, std_err[k], t_value[k], p_value[k]]
|
237
|
-
else
|
238
|
-
str += "%-#{len}s %12.2f %12.3f\n" % [display_field(k), v, p_value[k]]
|
239
|
-
end
|
238
|
+
str += "%-#{len}s %12.2f %12.3f\n" % [display_field(k), v, p_value[k]]
|
240
239
|
end
|
241
|
-
str += "\n"
|
242
|
-
str += "r2: %.3f\n" % [r2] if extended
|
243
|
-
str += "adjusted r2: %.3f\n" % [adjusted_r2]
|
244
|
-
str
|
245
240
|
end
|
241
|
+
str += "\n"
|
242
|
+
str += "r2: %.3f\n" % [r2] if extended
|
243
|
+
str += "adjusted r2: %.3f\n" % [adjusted_r2]
|
244
|
+
str
|
246
245
|
end
|
247
246
|
|
248
247
|
def r2
|
@@ -280,12 +279,12 @@ module Eps
|
|
280
279
|
# add epsilon for perfect fits
|
281
280
|
# consistent with GSL
|
282
281
|
def t_value
|
283
|
-
@t_value ||= Hash[coefficients.map { |k, v| [k, v / (std_err[k] + Float::EPSILON)] }]
|
282
|
+
@t_value ||= Hash[@coefficients.map { |k, v| [k, v / (std_err[k] + Float::EPSILON)] }]
|
284
283
|
end
|
285
284
|
|
286
285
|
def p_value
|
287
286
|
@p_value ||= begin
|
288
|
-
Hash[coefficients.map do |k, _|
|
287
|
+
Hash[@coefficients.map do |k, _|
|
289
288
|
tp =
|
290
289
|
if @gsl
|
291
290
|
GSL::Cdf.tdist_P(t_value[k].abs, degrees_of_freedom)
|
@@ -350,7 +349,7 @@ module Eps
|
|
350
349
|
end
|
351
350
|
|
352
351
|
def degrees_of_freedom
|
353
|
-
@y.size - coefficients.size
|
352
|
+
@y.size - @coefficients.size
|
354
353
|
end
|
355
354
|
|
356
355
|
def mean(arr)
|
data/lib/eps/naive_bayes.rb
CHANGED
@@ -3,8 +3,8 @@ module Eps
|
|
3
3
|
attr_reader :probabilities
|
4
4
|
|
5
5
|
def initialize(probabilities: nil, target: nil)
|
6
|
-
@probabilities = probabilities
|
7
|
-
@target = target
|
6
|
+
@probabilities = probabilities
|
7
|
+
@target = target
|
8
8
|
end
|
9
9
|
|
10
10
|
def train(*args)
|
@@ -22,7 +22,7 @@ module Eps
|
|
22
22
|
xi[@target] = @y[i]
|
23
23
|
end
|
24
24
|
keys.each do |k|
|
25
|
-
conditional[k] = {}
|
25
|
+
conditional[k.to_s] = {}
|
26
26
|
x.group_by { |xi| xi[@target] }.each do |group, xs|
|
27
27
|
v = xs.map { |xi| xi[k] }
|
28
28
|
|
@@ -32,9 +32,9 @@ module Eps
|
|
32
32
|
# 1. categorical features
|
33
33
|
# 2. conditional probabilities
|
34
34
|
# TODO more efficient count
|
35
|
-
conditional[k][group] = group_count(v)
|
35
|
+
conditional[k.to_s][group] = group_count(v)
|
36
36
|
else
|
37
|
-
conditional[k][group] = {mean: mean(v), stdev: stdev(v)}
|
37
|
+
conditional[k.to_s][group] = {mean: mean(v), stdev: stdev(v)}
|
38
38
|
end
|
39
39
|
end
|
40
40
|
end
|
@@ -48,15 +48,13 @@ module Eps
|
|
48
48
|
|
49
49
|
# TODO better summary
|
50
50
|
def summary(extended: false)
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
str += "#{k}: #{v}\n"
|
55
|
-
end
|
56
|
-
str += "\n"
|
57
|
-
str += "accuracy: %d%%\n" % [(100 * accuracy).round]
|
58
|
-
str
|
51
|
+
str = String.new("")
|
52
|
+
probabilities[:prior].each do |k, v|
|
53
|
+
str += "#{k}: #{v}\n"
|
59
54
|
end
|
55
|
+
str += "\n"
|
56
|
+
str += "accuracy: %d%%\n" % [(100 * accuracy).round]
|
57
|
+
str
|
60
58
|
end
|
61
59
|
|
62
60
|
def accuracy
|
@@ -186,7 +184,7 @@ module Eps
|
|
186
184
|
|
187
185
|
def _predict(x)
|
188
186
|
x.map do |xi|
|
189
|
-
probs = calculate_class_probabilities(xi)
|
187
|
+
probs = calculate_class_probabilities(stringify_keys(xi))
|
190
188
|
# deterministic for equal probabilities
|
191
189
|
probs.sort_by { |k, v| [-v, k.to_s] }[0][0]
|
192
190
|
end
|
@@ -236,5 +234,13 @@ module Eps
|
|
236
234
|
sum = arr.inject(0) { |accum, i| accum + (i - m)**2 }
|
237
235
|
Math.sqrt(sum / (arr.length - 1).to_f)
|
238
236
|
end
|
237
|
+
|
238
|
+
def stringify_keys(h)
|
239
|
+
o = {}
|
240
|
+
h.each do |k, v|
|
241
|
+
o[k.to_s] = v
|
242
|
+
end
|
243
|
+
o
|
244
|
+
end
|
239
245
|
end
|
240
246
|
end
|
data/lib/eps/version.rb
CHANGED