eps 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +4 -3
- data/lib/eps/base.rb +4 -4
- data/lib/eps/base_estimator.rb +1 -3
- data/lib/eps/linear_regression.rb +18 -19
- data/lib/eps/naive_bayes.rb +20 -14
- data/lib/eps/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ae5bc00818b79dc5e07f4dcda7ca56aa825f1014d1f70203564a87cb49b375d4
|
4
|
+
data.tar.gz: 8ba22dddc8635da418c429c12066c63bc1aea15238c32c4a7c4185f66281b6a5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e8a0f8cc325d26618691613a6213f6471b45c94a22bb2c9eb6ea729543dce4deabd9875d8e7055649fde90066d30d09c0b1b61949598c6859557e8270ff8e776
|
7
|
+
data.tar.gz: 0b8d0918e9571ce1587d4497b8de84b64222f19ba7c466d24bd490605dcfbed10ebfa10ad10ed6f03a13ab45a7bc1b77d5928f45d0a9f8ba7757e569591a36fe
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -233,16 +233,17 @@ class PriceModel < Eps::Base
|
|
233
233
|
metrics = model.evaluate(test_features, test_target)
|
234
234
|
puts "Test RMSE: #{metrics[:rmse]}"
|
235
235
|
# for classification, use:
|
236
|
-
# puts "Test accuracy: #{metrics[:accuracy]}"
|
236
|
+
# puts "Test accuracy: #{(100 * metrics[:accuracy]).round}%"
|
237
237
|
|
238
238
|
# finalize
|
239
239
|
houses = preprocess(houses)
|
240
240
|
all_features = houses.map { |h| features(h) }
|
241
241
|
all_target = houses.map { |h| target(h) }
|
242
|
-
|
242
|
+
model = Eps::Model.new(all_features, all_target)
|
243
243
|
|
244
244
|
# save
|
245
|
-
File.write(model_file,
|
245
|
+
File.write(model_file, model.to_pmml)
|
246
|
+
@model = nil # reset for future predictions
|
246
247
|
end
|
247
248
|
|
248
249
|
def predict(house)
|
data/lib/eps/base.rb
CHANGED
data/lib/eps/base_estimator.rb
CHANGED
@@ -12,15 +12,13 @@ module Eps
|
|
12
12
|
|
13
13
|
y = prep_y(y.to_a)
|
14
14
|
|
15
|
-
@target = target || "target"
|
16
|
-
|
17
15
|
if x.size != y.size
|
18
16
|
raise "Number of samples differs from target"
|
19
17
|
end
|
20
18
|
|
21
19
|
@x = x
|
22
20
|
@y = y
|
23
|
-
@target = target
|
21
|
+
@target = target || "target"
|
24
22
|
end
|
25
23
|
|
26
24
|
def predict(x)
|
@@ -223,26 +223,25 @@ module Eps
|
|
223
223
|
|
224
224
|
# https://people.richland.edu/james/ictcm/2004/multiple.html
|
225
225
|
def summary(extended: false)
|
226
|
-
|
227
|
-
|
228
|
-
|
226
|
+
coefficients = @coefficients
|
227
|
+
str = String.new("")
|
228
|
+
len = [coefficients.keys.map(&:size).max, 15].max
|
229
|
+
if extended
|
230
|
+
str += "%-#{len}s %12s %12s %12s %12s\n" % ["", "coef", "stderr", "t", "p"]
|
231
|
+
else
|
232
|
+
str += "%-#{len}s %12s %12s\n" % ["", "coef", "p"]
|
233
|
+
end
|
234
|
+
coefficients.each do |k, v|
|
229
235
|
if extended
|
230
|
-
str += "%-#{len}s %
|
236
|
+
str += "%-#{len}s %12.2f %12.2f %12.2f %12.3f\n" % [display_field(k), v, std_err[k], t_value[k], p_value[k]]
|
231
237
|
else
|
232
|
-
str += "%-#{len}s %
|
233
|
-
end
|
234
|
-
coefficients.each do |k, v|
|
235
|
-
if extended
|
236
|
-
str += "%-#{len}s %12.2f %12.2f %12.2f %12.3f\n" % [display_field(k), v, std_err[k], t_value[k], p_value[k]]
|
237
|
-
else
|
238
|
-
str += "%-#{len}s %12.2f %12.3f\n" % [display_field(k), v, p_value[k]]
|
239
|
-
end
|
238
|
+
str += "%-#{len}s %12.2f %12.3f\n" % [display_field(k), v, p_value[k]]
|
240
239
|
end
|
241
|
-
str += "\n"
|
242
|
-
str += "r2: %.3f\n" % [r2] if extended
|
243
|
-
str += "adjusted r2: %.3f\n" % [adjusted_r2]
|
244
|
-
str
|
245
240
|
end
|
241
|
+
str += "\n"
|
242
|
+
str += "r2: %.3f\n" % [r2] if extended
|
243
|
+
str += "adjusted r2: %.3f\n" % [adjusted_r2]
|
244
|
+
str
|
246
245
|
end
|
247
246
|
|
248
247
|
def r2
|
@@ -280,12 +279,12 @@ module Eps
|
|
280
279
|
# add epsilon for perfect fits
|
281
280
|
# consistent with GSL
|
282
281
|
def t_value
|
283
|
-
@t_value ||= Hash[coefficients.map { |k, v| [k, v / (std_err[k] + Float::EPSILON)] }]
|
282
|
+
@t_value ||= Hash[@coefficients.map { |k, v| [k, v / (std_err[k] + Float::EPSILON)] }]
|
284
283
|
end
|
285
284
|
|
286
285
|
def p_value
|
287
286
|
@p_value ||= begin
|
288
|
-
Hash[coefficients.map do |k, _|
|
287
|
+
Hash[@coefficients.map do |k, _|
|
289
288
|
tp =
|
290
289
|
if @gsl
|
291
290
|
GSL::Cdf.tdist_P(t_value[k].abs, degrees_of_freedom)
|
@@ -350,7 +349,7 @@ module Eps
|
|
350
349
|
end
|
351
350
|
|
352
351
|
def degrees_of_freedom
|
353
|
-
@y.size - coefficients.size
|
352
|
+
@y.size - @coefficients.size
|
354
353
|
end
|
355
354
|
|
356
355
|
def mean(arr)
|
data/lib/eps/naive_bayes.rb
CHANGED
@@ -3,8 +3,8 @@ module Eps
|
|
3
3
|
attr_reader :probabilities
|
4
4
|
|
5
5
|
def initialize(probabilities: nil, target: nil)
|
6
|
-
@probabilities = probabilities
|
7
|
-
@target = target
|
6
|
+
@probabilities = probabilities
|
7
|
+
@target = target
|
8
8
|
end
|
9
9
|
|
10
10
|
def train(*args)
|
@@ -22,7 +22,7 @@ module Eps
|
|
22
22
|
xi[@target] = @y[i]
|
23
23
|
end
|
24
24
|
keys.each do |k|
|
25
|
-
conditional[k] = {}
|
25
|
+
conditional[k.to_s] = {}
|
26
26
|
x.group_by { |xi| xi[@target] }.each do |group, xs|
|
27
27
|
v = xs.map { |xi| xi[k] }
|
28
28
|
|
@@ -32,9 +32,9 @@ module Eps
|
|
32
32
|
# 1. categorical features
|
33
33
|
# 2. conditional probabilities
|
34
34
|
# TODO more efficient count
|
35
|
-
conditional[k][group] = group_count(v)
|
35
|
+
conditional[k.to_s][group] = group_count(v)
|
36
36
|
else
|
37
|
-
conditional[k][group] = {mean: mean(v), stdev: stdev(v)}
|
37
|
+
conditional[k.to_s][group] = {mean: mean(v), stdev: stdev(v)}
|
38
38
|
end
|
39
39
|
end
|
40
40
|
end
|
@@ -48,15 +48,13 @@ module Eps
|
|
48
48
|
|
49
49
|
# TODO better summary
|
50
50
|
def summary(extended: false)
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
str += "#{k}: #{v}\n"
|
55
|
-
end
|
56
|
-
str += "\n"
|
57
|
-
str += "accuracy: %d%%\n" % [(100 * accuracy).round]
|
58
|
-
str
|
51
|
+
str = String.new("")
|
52
|
+
probabilities[:prior].each do |k, v|
|
53
|
+
str += "#{k}: #{v}\n"
|
59
54
|
end
|
55
|
+
str += "\n"
|
56
|
+
str += "accuracy: %d%%\n" % [(100 * accuracy).round]
|
57
|
+
str
|
60
58
|
end
|
61
59
|
|
62
60
|
def accuracy
|
@@ -186,7 +184,7 @@ module Eps
|
|
186
184
|
|
187
185
|
def _predict(x)
|
188
186
|
x.map do |xi|
|
189
|
-
probs = calculate_class_probabilities(xi)
|
187
|
+
probs = calculate_class_probabilities(stringify_keys(xi))
|
190
188
|
# deterministic for equal probabilities
|
191
189
|
probs.sort_by { |k, v| [-v, k.to_s] }[0][0]
|
192
190
|
end
|
@@ -236,5 +234,13 @@ module Eps
|
|
236
234
|
sum = arr.inject(0) { |accum, i| accum + (i - m)**2 }
|
237
235
|
Math.sqrt(sum / (arr.length - 1).to_f)
|
238
236
|
end
|
237
|
+
|
238
|
+
def stringify_keys(h)
|
239
|
+
o = {}
|
240
|
+
h.each do |k, v|
|
241
|
+
o[k.to_s] = v
|
242
|
+
end
|
243
|
+
o
|
244
|
+
end
|
239
245
|
end
|
240
246
|
end
|
data/lib/eps/version.rb
CHANGED