eps 0.4.1 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4df8a83bee7fce8feebec2cf26d33d7ee4ca74fbcda9f41fb070f614cfb2e0eb
4
- data.tar.gz: 23f7dd9aa63eb4306268f19b862de3a07f9d72d9ec507160a7e6d291ea2245c6
3
+ metadata.gz: d93161edfe5b26ce55bbdafedfa4ead7fad756cc0f3e921f2b970a49c97bb5fc
4
+ data.tar.gz: 5d0e4f8326a6e446efbe0a4a6f9e8e6435b7314ac4dba737d87bbe4b73c4e04a
5
5
  SHA512:
6
- metadata.gz: c24ea7abf903829b3fe00dd0f7c601062464ecc193ccd8a725a98a437e7ed6f6bff8952c1c50aeeadcc5981e84325a44efedd53580e23f2475f1c8a7b927ed78
7
- data.tar.gz: 601cf18d044fd9ac348d3f632b7edda7fbd34ef11f497d4be998d62ae76f33f6681953fb5c263924deebed184b5b6f560bcd24de272c509317fb9c3b68f2f3b9
6
+ metadata.gz: e387214353fdf13608d48b306db3ce1b635eb3977f052d1d47b3e2b8cbe0c14628e01ca1d4291eaa9d3fb833864ff02628817155275d2105a069d2f4a866b8b3
7
+ data.tar.gz: b27237a71a7198719b3000f385ea946547258f789f1a650cc348ed38d96e49c4d56b01149917807c97200aa737d6864739094c91d86ab8bafdd29e96e25e0d3b
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## 0.5.0 (2023-07-02)
2
+
3
+ - Dropped support for Ruby < 3
4
+
1
5
  ## 0.4.1 (2022-09-28)
2
6
 
3
7
  - Fixed `cannot load such file -- matrix` error with Ruby 3.1
data/LICENSE.txt CHANGED
@@ -1,6 +1,6 @@
1
1
  The MIT License (MIT)
2
2
 
3
- Copyright (c) 2018-2021 Andrew Kane
3
+ Copyright (c) 2018-2023 Andrew Kane
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
@@ -54,7 +54,7 @@ module Eps
54
54
  def map
55
55
  if @columns.any?
56
56
  size.times.map do |i|
57
- yield Hash[@columns.map { |k, v| [k, v[i]] }]
57
+ yield @columns.to_h { |k, v| [k, v[i]] }
58
58
  end
59
59
  end
60
60
  end
@@ -4,7 +4,7 @@ module Eps
4
4
  attr_reader :features
5
5
 
6
6
  def initialize(coefficients:, features:, text_features:)
7
- @coefficients = Hash[coefficients.map { |k, v| [k.is_a?(Array) ? [k[0].to_s, k[1]] : k.to_s, v] }]
7
+ @coefficients = coefficients.to_h { |k, v| [k.is_a?(Array) ? [k[0].to_s, k[1]] : k.to_s, v] }
8
8
  @features = features
9
9
  @text_features = text_features || {}
10
10
  end
@@ -50,7 +50,7 @@ module Eps
50
50
  end
51
51
 
52
52
  def coefficients
53
- Hash[@coefficients.map { |k, v| [Array(k).join.to_sym, v] }]
53
+ @coefficients.to_h { |k, v| [Array(k).join.to_sym, v] }
54
54
  end
55
55
  end
56
56
  end
@@ -36,7 +36,7 @@ module Eps
36
36
  end
37
37
 
38
38
  def inverse_transform(y)
39
- inverse = Hash[@labels.map(&:reverse)]
39
+ inverse = @labels.map(&:reverse).to_h
40
40
  y.map do |yi|
41
41
  inverse[yi.to_i]
42
42
  end
@@ -146,7 +146,7 @@ module Eps
146
146
 
147
147
  @coefficient_names = data.columns.keys
148
148
  @coefficient_names.unshift("_intercept") if intercept
149
- @coefficients = Hash[@coefficient_names.zip(v3)]
149
+ @coefficients = @coefficient_names.zip(v3).to_h
150
150
  Evaluators::LinearRegression.new(coefficients: @coefficients, features: @features, text_features: @text_features)
151
151
  end
152
152
 
@@ -172,21 +172,20 @@ module Eps
172
172
  # add epsilon for perfect fits
173
173
  # consistent with GSL
174
174
  def t_value
175
- @t_value ||= Hash[@coefficients.map { |k, v| [k, v / (std_err[k] + Float::EPSILON)] }]
175
+ @t_value ||= @coefficients.to_h { |k, v| [k, v / (std_err[k] + Float::EPSILON)] }
176
176
  end
177
177
 
178
178
  def p_value
179
179
  @p_value ||= begin
180
- Hash[@coefficients.map do |k, _|
181
- tp = Eps::Statistics.tdist_p(t_value[k].abs, degrees_of_freedom)
182
- [k, 2 * (1 - tp)]
183
- end]
180
+ @coefficients.to_h do |k, _|
181
+ [k, 2 * Eps::Statistics.students_t_cdf(-t_value[k].abs, degrees_of_freedom)]
182
+ end
184
183
  end
185
184
  end
186
185
 
187
186
  def std_err
188
187
  @std_err ||= begin
189
- Hash[@coefficient_names.zip(diagonal.map { |v| Math.sqrt(v) })]
188
+ @coefficient_names.zip(diagonal.map { |v| Math.sqrt(v) }).to_h
190
189
  end
191
190
  end
192
191
 
@@ -1,79 +1,73 @@
1
- ### Extracted from https://github.com/estebanz01/ruby-statistics
2
- ### The Ruby author is Esteban Zapata Rojas
3
- ###
4
- ### Originally extracted from https://codeplea.com/incomplete-beta-function-c
5
- ### These functions shared under zlib license and the author is Lewis Van Winkle
6
-
7
1
  module Eps
8
2
  module Statistics
9
- def self.tdist_p(value, degrees_of_freedom)
10
- upper = (value + Math.sqrt(value * value + degrees_of_freedom))
11
- lower = (2.0 * Math.sqrt(value * value + degrees_of_freedom))
12
-
13
- x = upper/lower
14
-
15
- alpha = degrees_of_freedom/2.0
16
- beta = degrees_of_freedom/2.0
17
-
18
- incomplete_beta_function(x, alpha, beta)
3
+ def self.normal_cdf(x, mean, std_dev)
4
+ 0.5 * (1.0 + Math.erf((x - mean) / (std_dev * Math.sqrt(2))))
19
5
  end
20
6
 
21
- def self.incomplete_beta_function(x, alp, bet)
22
- return if x < 0.0
23
- return 1.0 if x > 1.0
24
-
25
- tiny = 1.0E-50
26
-
27
- if x > ((alp + 1.0)/(alp + bet + 2.0))
28
- return 1.0 - incomplete_beta_function(1.0 - x, bet, alp)
7
+ # Hill, G. W. (1970).
8
+ # Algorithm 395: Student's t-distribution.
9
+ # Communications of the ACM, 13(10), 617-619.
10
+ def self.students_t_cdf(x, n)
11
+ start, sign = x < 0 ? [0, 1] : [1, -1]
12
+
13
+ z = 1.0
14
+ t = x * x
15
+ y = t / n.to_f
16
+ b = 1.0 + y
17
+
18
+ if n > n.floor || (n >= 20.0 && t < n) || n > 200.0
19
+ # asymptotic series for large or noninteger n
20
+ if y > 10e-6
21
+ y = Math.log(b)
22
+ end
23
+ a = n - 0.5
24
+ b = 48.0 * a * a
25
+ y *= a
26
+ y = (((((-0.4 * y - 3.3) * y - 24.0) * y - 85.5) / (0.8 * y * y + 100.0 + b) + y + 3.0) / b + 1.0) * Math.sqrt(y)
27
+ return start + sign * normal_cdf(-y, 0.0, 1.0)
29
28
  end
30
29
 
31
- # To avoid overflow problems, the implementation applies the logarithm properties
32
- # to calculate in a faster and safer way the values.
33
- lbet_ab = (Math.lgamma(alp)[0] + Math.lgamma(bet)[0] - Math.lgamma(alp + bet)[0]).freeze
34
- front = (Math.exp(Math.log(x) * alp + Math.log(1.0 - x) * bet - lbet_ab) / alp.to_f).freeze
35
-
36
- # This is the non-log version of the left part of the formula (before the continuous fraction)
37
- # down_left = alp * self.beta_function(alp, bet)
38
- # upper_left = (x ** alp) * ((1.0 - x) ** bet)
39
- # front = upper_left/down_left
40
-
41
- f, c, d = 1.0, 1.0, 0.0
42
-
43
- returned_value = nil
44
-
45
- # Let's do more iterations than the proposed implementation (200 iters)
46
- (0..500).each do |number|
47
- m = number/2
48
-
49
- numerator = if number == 0
50
- 1.0
51
- elsif number % 2 == 0
52
- (m * (bet - m) * x)/((alp + 2.0 * m - 1.0)* (alp + 2.0 * m))
53
- else
54
- top = -((alp + m) * (alp + bet + m) * x)
55
- down = ((alp + 2.0 * m) * (alp + 2.0 * m + 1.0))
56
-
57
- top/down
58
- end
59
-
60
- d = 1.0 + numerator * d
61
- d = tiny if d.abs < tiny
62
- d = 1.0 / d
63
-
64
- c = 1.0 + numerator / c
65
- c = tiny if c.abs < tiny
66
-
67
- cd = (c*d).freeze
68
- f = f * cd
30
+ if n < 20 && t < 4.0
31
+ # nested summation of cosine series
32
+ y = Math.sqrt(y)
33
+ a = y
34
+ if n == 1
35
+ a = 0.0
36
+ end
69
37
 
70
- if (1.0 - cd).abs < 1.0E-10
71
- returned_value = front * (f - 1.0)
72
- break
38
+ # loop
39
+ if n > 1
40
+ n -= 2
41
+ while n > 1
42
+ a = (n - 1) / (b * n) * a + y
43
+ n -= 2
44
+ end
73
45
  end
46
+ a = n == 0 ? a / Math.sqrt(b) : (Math.atan(y) + a / b) * (2.0 / Math::PI)
47
+ return start + sign * (z - a) / 2.0
74
48
  end
75
49
 
76
- returned_value
50
+ # tail series expanation for large t-values
51
+ a = Math.sqrt(b)
52
+ y = a * n
53
+ j = 0
54
+ while a != z
55
+ j += 2
56
+ z = a
57
+ y = y * (j - 1) / (b * j)
58
+ a += y / (n + j)
59
+ end
60
+ z = 0.0
61
+ y = 0.0
62
+ a = -a
63
+
64
+ # loop (without n + 2 and n - 2)
65
+ while n > 1
66
+ a = (n - 1) / (b * n) * a + y
67
+ n -= 2
68
+ end
69
+ a = n == 0 ? a / Math.sqrt(b) : (Math.atan(y) + a / b) * (2.0 / Math::PI)
70
+ start + sign * (z - a) / 2.0
77
71
  end
78
72
  end
79
73
  end
@@ -27,7 +27,7 @@ module Eps
27
27
 
28
28
  max_features = options[:max_features]
29
29
  if max_features
30
- counts = Hash[counts.sort_by { |_, v| -v }[0...max_features]]
30
+ counts = counts.sort_by { |_, v| -v }[0...max_features].to_h
31
31
  end
32
32
 
33
33
  @vocabulary = counts.keys
data/lib/eps/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Eps
2
- VERSION = "0.4.1"
2
+ VERSION = "0.5.0"
3
3
  end
data/lib/eps.rb CHANGED
@@ -1,34 +1,36 @@
1
1
  # dependencies
2
- require "json"
3
2
  require "lightgbm"
4
3
  require "matrix"
5
4
  require "nokogiri"
6
5
 
6
+ # stdlib
7
+ require "json"
8
+
7
9
  # modules
8
- require "eps/base"
9
- require "eps/base_estimator"
10
- require "eps/data_frame"
11
- require "eps/label_encoder"
12
- require "eps/lightgbm"
13
- require "eps/linear_regression"
14
- require "eps/metrics"
15
- require "eps/model"
16
- require "eps/naive_bayes"
17
- require "eps/statistics"
18
- require "eps/text_encoder"
19
- require "eps/utils"
20
- require "eps/version"
10
+ require_relative "eps/base"
11
+ require_relative "eps/base_estimator"
12
+ require_relative "eps/data_frame"
13
+ require_relative "eps/label_encoder"
14
+ require_relative "eps/lightgbm"
15
+ require_relative "eps/linear_regression"
16
+ require_relative "eps/metrics"
17
+ require_relative "eps/model"
18
+ require_relative "eps/naive_bayes"
19
+ require_relative "eps/statistics"
20
+ require_relative "eps/text_encoder"
21
+ require_relative "eps/utils"
22
+ require_relative "eps/version"
21
23
 
22
24
  # pmml
23
- require "eps/pmml"
24
- require "eps/pmml/generator"
25
- require "eps/pmml/loader"
25
+ require_relative "eps/pmml"
26
+ require_relative "eps/pmml/generator"
27
+ require_relative "eps/pmml/loader"
26
28
 
27
29
  # evaluators
28
- require "eps/evaluators/linear_regression"
29
- require "eps/evaluators/lightgbm"
30
- require "eps/evaluators/naive_bayes"
31
- require "eps/evaluators/node"
30
+ require_relative "eps/evaluators/linear_regression"
31
+ require_relative "eps/evaluators/lightgbm"
32
+ require_relative "eps/evaluators/naive_bayes"
33
+ require_relative "eps/evaluators/node"
32
34
 
33
35
  module Eps
34
36
  class Error < StandardError; end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: eps
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.1
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-09-28 00:00:00.000000000 Z
11
+ date: 2023-07-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: lightgbm
@@ -94,14 +94,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
94
94
  requirements:
95
95
  - - ">="
96
96
  - !ruby/object:Gem::Version
97
- version: '2.7'
97
+ version: '3'
98
98
  required_rubygems_version: !ruby/object:Gem::Requirement
99
99
  requirements:
100
100
  - - ">="
101
101
  - !ruby/object:Gem::Version
102
102
  version: '0'
103
103
  requirements: []
104
- rubygems_version: 3.3.7
104
+ rubygems_version: 3.4.10
105
105
  signing_key:
106
106
  specification_version: 4
107
107
  summary: Machine learning for Ruby. Supports regression (linear regression) and classification