eps 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5ce1ae30056e3aaa08465d63571685ed2d43a17b40dc95bb2020d8d1f6752d6d
4
- data.tar.gz: f6f9652d164991b82fed1375b9039baa6cbe3dd3b5413cdee244ba5aac923012
3
+ metadata.gz: ae5bc00818b79dc5e07f4dcda7ca56aa825f1014d1f70203564a87cb49b375d4
4
+ data.tar.gz: 8ba22dddc8635da418c429c12066c63bc1aea15238c32c4a7c4185f66281b6a5
5
5
  SHA512:
6
- metadata.gz: bd9ddb1589b2866c42fda230389a27124285bd162ca9863528b41531d1c725fca3ac7327786b373e9cecda014d9cc2f34f791f203da77215f738cf92c261db61
7
- data.tar.gz: 7d209acc346b223d00827c1df817a2017882b9446ff523f985e4c89b44f10f5a702d6c6dd957c8fb9ceccd83b98f7492aa15e029c1d0655787c1823348beacd0
6
+ metadata.gz: e8a0f8cc325d26618691613a6213f6471b45c94a22bb2c9eb6ea729543dce4deabd9875d8e7055649fde90066d30d09c0b1b61949598c6859557e8270ff8e776
7
+ data.tar.gz: 0b8d0918e9571ce1587d4497b8de84b64222f19ba7c466d24bd490605dcfbed10ebfa10ad10ed6f03a13ab45a7bc1b77d5928f45d0a9f8ba7757e569591a36fe
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ ## 0.2.1
2
+
3
+ - Fixed error with `summary`
4
+ - Fixed error with `predict` in `Eps::Base`
5
+ - Fixed error with loaded classification models
6
+
1
7
  ## 0.2.0
2
8
 
3
9
  - Added support for classification
data/README.md CHANGED
@@ -233,16 +233,17 @@ class PriceModel < Eps::Base
233
233
  metrics = model.evaluate(test_features, test_target)
234
234
  puts "Test RMSE: #{metrics[:rmse]}"
235
235
  # for classification, use:
236
- # puts "Test accuracy: #{metrics[:accuracy]}"
236
+ # puts "Test accuracy: #{(100 * metrics[:accuracy]).round}%"
237
237
 
238
238
  # finalize
239
239
  houses = preprocess(houses)
240
240
  all_features = houses.map { |h| features(h) }
241
241
  all_target = houses.map { |h| target(h) }
242
- @model = Eps::Model.new(all_features, all_target)
242
+ model = Eps::Model.new(all_features, all_target)
243
243
 
244
244
  # save
245
- File.write(model_file, @model.to_pmml)
245
+ File.write(model_file, model.to_pmml)
246
+ @model = nil # reset for future predictions
246
247
  end
247
248
 
248
249
  def predict(house)
data/lib/eps/base.rb CHANGED
@@ -1,12 +1,12 @@
1
1
  module Eps
2
2
  class Base
3
3
  class << self
4
- def build
5
- instance.build
4
+ def build(*args)
5
+ instance.build(*args)
6
6
  end
7
7
 
8
- def predict
9
- instance.predict
8
+ def predict(*args)
9
+ instance.predict(*args)
10
10
  end
11
11
 
12
12
  private
@@ -12,15 +12,13 @@ module Eps
12
12
 
13
13
  y = prep_y(y.to_a)
14
14
 
15
- @target = target || "target"
16
-
17
15
  if x.size != y.size
18
16
  raise "Number of samples differs from target"
19
17
  end
20
18
 
21
19
  @x = x
22
20
  @y = y
23
- @target = target
21
+ @target = target || "target"
24
22
  end
25
23
 
26
24
  def predict(x)
@@ -223,26 +223,25 @@ module Eps
223
223
 
224
224
  # https://people.richland.edu/james/ictcm/2004/multiple.html
225
225
  def summary(extended: false)
226
- @summary_str ||= begin
227
- str = String.new("")
228
- len = [coefficients.keys.map(&:size).max, 15].max
226
+ coefficients = @coefficients
227
+ str = String.new("")
228
+ len = [coefficients.keys.map(&:size).max, 15].max
229
+ if extended
230
+ str += "%-#{len}s %12s %12s %12s %12s\n" % ["", "coef", "stderr", "t", "p"]
231
+ else
232
+ str += "%-#{len}s %12s %12s\n" % ["", "coef", "p"]
233
+ end
234
+ coefficients.each do |k, v|
229
235
  if extended
230
- str += "%-#{len}s %12s %12s %12s %12s\n" % ["", "coef", "stderr", "t", "p"]
236
+ str += "%-#{len}s %12.2f %12.2f %12.2f %12.3f\n" % [display_field(k), v, std_err[k], t_value[k], p_value[k]]
231
237
  else
232
- str += "%-#{len}s %12s %12s\n" % ["", "coef", "p"]
233
- end
234
- coefficients.each do |k, v|
235
- if extended
236
- str += "%-#{len}s %12.2f %12.2f %12.2f %12.3f\n" % [display_field(k), v, std_err[k], t_value[k], p_value[k]]
237
- else
238
- str += "%-#{len}s %12.2f %12.3f\n" % [display_field(k), v, p_value[k]]
239
- end
238
+ str += "%-#{len}s %12.2f %12.3f\n" % [display_field(k), v, p_value[k]]
240
239
  end
241
- str += "\n"
242
- str += "r2: %.3f\n" % [r2] if extended
243
- str += "adjusted r2: %.3f\n" % [adjusted_r2]
244
- str
245
240
  end
241
+ str += "\n"
242
+ str += "r2: %.3f\n" % [r2] if extended
243
+ str += "adjusted r2: %.3f\n" % [adjusted_r2]
244
+ str
246
245
  end
247
246
 
248
247
  def r2
@@ -280,12 +279,12 @@ module Eps
280
279
  # add epsilon for perfect fits
281
280
  # consistent with GSL
282
281
  def t_value
283
- @t_value ||= Hash[coefficients.map { |k, v| [k, v / (std_err[k] + Float::EPSILON)] }]
282
+ @t_value ||= Hash[@coefficients.map { |k, v| [k, v / (std_err[k] + Float::EPSILON)] }]
284
283
  end
285
284
 
286
285
  def p_value
287
286
  @p_value ||= begin
288
- Hash[coefficients.map do |k, _|
287
+ Hash[@coefficients.map do |k, _|
289
288
  tp =
290
289
  if @gsl
291
290
  GSL::Cdf.tdist_P(t_value[k].abs, degrees_of_freedom)
@@ -350,7 +349,7 @@ module Eps
350
349
  end
351
350
 
352
351
  def degrees_of_freedom
353
- @y.size - coefficients.size
352
+ @y.size - @coefficients.size
354
353
  end
355
354
 
356
355
  def mean(arr)
@@ -3,8 +3,8 @@ module Eps
3
3
  attr_reader :probabilities
4
4
 
5
5
  def initialize(probabilities: nil, target: nil)
6
- @probabilities = probabilities if probabilities
7
- @target = target if target
6
+ @probabilities = probabilities
7
+ @target = target
8
8
  end
9
9
 
10
10
  def train(*args)
@@ -22,7 +22,7 @@ module Eps
22
22
  xi[@target] = @y[i]
23
23
  end
24
24
  keys.each do |k|
25
- conditional[k] = {}
25
+ conditional[k.to_s] = {}
26
26
  x.group_by { |xi| xi[@target] }.each do |group, xs|
27
27
  v = xs.map { |xi| xi[k] }
28
28
 
@@ -32,9 +32,9 @@ module Eps
32
32
  # 1. categorical features
33
33
  # 2. conditional probabilities
34
34
  # TODO more efficient count
35
- conditional[k][group] = group_count(v)
35
+ conditional[k.to_s][group] = group_count(v)
36
36
  else
37
- conditional[k][group] = {mean: mean(v), stdev: stdev(v)}
37
+ conditional[k.to_s][group] = {mean: mean(v), stdev: stdev(v)}
38
38
  end
39
39
  end
40
40
  end
@@ -48,15 +48,13 @@ module Eps
48
48
 
49
49
  # TODO better summary
50
50
  def summary(extended: false)
51
- @summary_str ||= begin
52
- str = String.new("")
53
- probabilities[:prior].each do |k, v|
54
- str += "#{k}: #{v}\n"
55
- end
56
- str += "\n"
57
- str += "accuracy: %d%%\n" % [(100 * accuracy).round]
58
- str
51
+ str = String.new("")
52
+ probabilities[:prior].each do |k, v|
53
+ str += "#{k}: #{v}\n"
59
54
  end
55
+ str += "\n"
56
+ str += "accuracy: %d%%\n" % [(100 * accuracy).round]
57
+ str
60
58
  end
61
59
 
62
60
  def accuracy
@@ -186,7 +184,7 @@ module Eps
186
184
 
187
185
  def _predict(x)
188
186
  x.map do |xi|
189
- probs = calculate_class_probabilities(xi)
187
+ probs = calculate_class_probabilities(stringify_keys(xi))
190
188
  # deterministic for equal probabilities
191
189
  probs.sort_by { |k, v| [-v, k.to_s] }[0][0]
192
190
  end
@@ -236,5 +234,13 @@ module Eps
236
234
  sum = arr.inject(0) { |accum, i| accum + (i - m)**2 }
237
235
  Math.sqrt(sum / (arr.length - 1).to_f)
238
236
  end
237
+
238
+ def stringify_keys(h)
239
+ o = {}
240
+ h.each do |k, v|
241
+ o[k.to_s] = v
242
+ end
243
+ o
244
+ end
239
245
  end
240
246
  end
data/lib/eps/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Eps
2
- VERSION = "0.2.0"
2
+ VERSION = "0.2.1"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: eps
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane