eps 0.4.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/LICENSE.txt +1 -1
- data/lib/eps/data_frame.rb +1 -1
- data/lib/eps/evaluators/linear_regression.rb +2 -2
- data/lib/eps/label_encoder.rb +1 -1
- data/lib/eps/linear_regression.rb +6 -7
- data/lib/eps/statistics.rb +60 -66
- data/lib/eps/text_encoder.rb +1 -1
- data/lib/eps/version.rb +1 -1
- data/lib/eps.rb +23 -21
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d93161edfe5b26ce55bbdafedfa4ead7fad756cc0f3e921f2b970a49c97bb5fc
|
4
|
+
data.tar.gz: 5d0e4f8326a6e446efbe0a4a6f9e8e6435b7314ac4dba737d87bbe4b73c4e04a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e387214353fdf13608d48b306db3ce1b635eb3977f052d1d47b3e2b8cbe0c14628e01ca1d4291eaa9d3fb833864ff02628817155275d2105a069d2f4a866b8b3
|
7
|
+
data.tar.gz: b27237a71a7198719b3000f385ea946547258f789f1a650cc348ed38d96e49c4d56b01149917807c97200aa737d6864739094c91d86ab8bafdd29e96e25e0d3b
|
data/CHANGELOG.md
CHANGED
data/LICENSE.txt
CHANGED
data/lib/eps/data_frame.rb
CHANGED
@@ -4,7 +4,7 @@ module Eps
|
|
4
4
|
attr_reader :features
|
5
5
|
|
6
6
|
def initialize(coefficients:, features:, text_features:)
|
7
|
-
@coefficients =
|
7
|
+
@coefficients = coefficients.to_h { |k, v| [k.is_a?(Array) ? [k[0].to_s, k[1]] : k.to_s, v] }
|
8
8
|
@features = features
|
9
9
|
@text_features = text_features || {}
|
10
10
|
end
|
@@ -50,7 +50,7 @@ module Eps
|
|
50
50
|
end
|
51
51
|
|
52
52
|
def coefficients
|
53
|
-
|
53
|
+
@coefficients.to_h { |k, v| [Array(k).join.to_sym, v] }
|
54
54
|
end
|
55
55
|
end
|
56
56
|
end
|
data/lib/eps/label_encoder.rb
CHANGED
@@ -146,7 +146,7 @@ module Eps
|
|
146
146
|
|
147
147
|
@coefficient_names = data.columns.keys
|
148
148
|
@coefficient_names.unshift("_intercept") if intercept
|
149
|
-
@coefficients =
|
149
|
+
@coefficients = @coefficient_names.zip(v3).to_h
|
150
150
|
Evaluators::LinearRegression.new(coefficients: @coefficients, features: @features, text_features: @text_features)
|
151
151
|
end
|
152
152
|
|
@@ -172,21 +172,20 @@ module Eps
|
|
172
172
|
# add epsilon for perfect fits
|
173
173
|
# consistent with GSL
|
174
174
|
def t_value
|
175
|
-
@t_value ||=
|
175
|
+
@t_value ||= @coefficients.to_h { |k, v| [k, v / (std_err[k] + Float::EPSILON)] }
|
176
176
|
end
|
177
177
|
|
178
178
|
def p_value
|
179
179
|
@p_value ||= begin
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
end]
|
180
|
+
@coefficients.to_h do |k, _|
|
181
|
+
[k, 2 * Eps::Statistics.students_t_cdf(-t_value[k].abs, degrees_of_freedom)]
|
182
|
+
end
|
184
183
|
end
|
185
184
|
end
|
186
185
|
|
187
186
|
def std_err
|
188
187
|
@std_err ||= begin
|
189
|
-
|
188
|
+
@coefficient_names.zip(diagonal.map { |v| Math.sqrt(v) }).to_h
|
190
189
|
end
|
191
190
|
end
|
192
191
|
|
data/lib/eps/statistics.rb
CHANGED
@@ -1,79 +1,73 @@
|
|
1
|
-
### Extracted from https://github.com/estebanz01/ruby-statistics
|
2
|
-
### The Ruby author is Esteban Zapata Rojas
|
3
|
-
###
|
4
|
-
### Originally extracted from https://codeplea.com/incomplete-beta-function-c
|
5
|
-
### These functions shared under zlib license and the author is Lewis Van Winkle
|
6
|
-
|
7
1
|
module Eps
|
8
2
|
module Statistics
|
9
|
-
def self.
|
10
|
-
|
11
|
-
lower = (2.0 * Math.sqrt(value * value + degrees_of_freedom))
|
12
|
-
|
13
|
-
x = upper/lower
|
14
|
-
|
15
|
-
alpha = degrees_of_freedom/2.0
|
16
|
-
beta = degrees_of_freedom/2.0
|
17
|
-
|
18
|
-
incomplete_beta_function(x, alpha, beta)
|
3
|
+
def self.normal_cdf(x, mean, std_dev)
|
4
|
+
0.5 * (1.0 + Math.erf((x - mean) / (std_dev * Math.sqrt(2))))
|
19
5
|
end
|
20
6
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
7
|
+
# Hill, G. W. (1970).
|
8
|
+
# Algorithm 395: Student's t-distribution.
|
9
|
+
# Communications of the ACM, 13(10), 617-619.
|
10
|
+
def self.students_t_cdf(x, n)
|
11
|
+
start, sign = x < 0 ? [0, 1] : [1, -1]
|
12
|
+
|
13
|
+
z = 1.0
|
14
|
+
t = x * x
|
15
|
+
y = t / n.to_f
|
16
|
+
b = 1.0 + y
|
17
|
+
|
18
|
+
if n > n.floor || (n >= 20.0 && t < n) || n > 200.0
|
19
|
+
# asymptotic series for large or noninteger n
|
20
|
+
if y > 10e-6
|
21
|
+
y = Math.log(b)
|
22
|
+
end
|
23
|
+
a = n - 0.5
|
24
|
+
b = 48.0 * a * a
|
25
|
+
y *= a
|
26
|
+
y = (((((-0.4 * y - 3.3) * y - 24.0) * y - 85.5) / (0.8 * y * y + 100.0 + b) + y + 3.0) / b + 1.0) * Math.sqrt(y)
|
27
|
+
return start + sign * normal_cdf(-y, 0.0, 1.0)
|
29
28
|
end
|
30
29
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
# upper_left = (x ** alp) * ((1.0 - x) ** bet)
|
39
|
-
# front = upper_left/down_left
|
40
|
-
|
41
|
-
f, c, d = 1.0, 1.0, 0.0
|
42
|
-
|
43
|
-
returned_value = nil
|
44
|
-
|
45
|
-
# Let's do more iterations than the proposed implementation (200 iters)
|
46
|
-
(0..500).each do |number|
|
47
|
-
m = number/2
|
48
|
-
|
49
|
-
numerator = if number == 0
|
50
|
-
1.0
|
51
|
-
elsif number % 2 == 0
|
52
|
-
(m * (bet - m) * x)/((alp + 2.0 * m - 1.0)* (alp + 2.0 * m))
|
53
|
-
else
|
54
|
-
top = -((alp + m) * (alp + bet + m) * x)
|
55
|
-
down = ((alp + 2.0 * m) * (alp + 2.0 * m + 1.0))
|
56
|
-
|
57
|
-
top/down
|
58
|
-
end
|
59
|
-
|
60
|
-
d = 1.0 + numerator * d
|
61
|
-
d = tiny if d.abs < tiny
|
62
|
-
d = 1.0 / d
|
63
|
-
|
64
|
-
c = 1.0 + numerator / c
|
65
|
-
c = tiny if c.abs < tiny
|
66
|
-
|
67
|
-
cd = (c*d).freeze
|
68
|
-
f = f * cd
|
30
|
+
if n < 20 && t < 4.0
|
31
|
+
# nested summation of cosine series
|
32
|
+
y = Math.sqrt(y)
|
33
|
+
a = y
|
34
|
+
if n == 1
|
35
|
+
a = 0.0
|
36
|
+
end
|
69
37
|
|
70
|
-
|
71
|
-
|
72
|
-
|
38
|
+
# loop
|
39
|
+
if n > 1
|
40
|
+
n -= 2
|
41
|
+
while n > 1
|
42
|
+
a = (n - 1) / (b * n) * a + y
|
43
|
+
n -= 2
|
44
|
+
end
|
73
45
|
end
|
46
|
+
a = n == 0 ? a / Math.sqrt(b) : (Math.atan(y) + a / b) * (2.0 / Math::PI)
|
47
|
+
return start + sign * (z - a) / 2.0
|
74
48
|
end
|
75
49
|
|
76
|
-
|
50
|
+
# tail series expanation for large t-values
|
51
|
+
a = Math.sqrt(b)
|
52
|
+
y = a * n
|
53
|
+
j = 0
|
54
|
+
while a != z
|
55
|
+
j += 2
|
56
|
+
z = a
|
57
|
+
y = y * (j - 1) / (b * j)
|
58
|
+
a += y / (n + j)
|
59
|
+
end
|
60
|
+
z = 0.0
|
61
|
+
y = 0.0
|
62
|
+
a = -a
|
63
|
+
|
64
|
+
# loop (without n + 2 and n - 2)
|
65
|
+
while n > 1
|
66
|
+
a = (n - 1) / (b * n) * a + y
|
67
|
+
n -= 2
|
68
|
+
end
|
69
|
+
a = n == 0 ? a / Math.sqrt(b) : (Math.atan(y) + a / b) * (2.0 / Math::PI)
|
70
|
+
start + sign * (z - a) / 2.0
|
77
71
|
end
|
78
72
|
end
|
79
73
|
end
|
data/lib/eps/text_encoder.rb
CHANGED
data/lib/eps/version.rb
CHANGED
data/lib/eps.rb
CHANGED
@@ -1,34 +1,36 @@
|
|
1
1
|
# dependencies
|
2
|
-
require "json"
|
3
2
|
require "lightgbm"
|
4
3
|
require "matrix"
|
5
4
|
require "nokogiri"
|
6
5
|
|
6
|
+
# stdlib
|
7
|
+
require "json"
|
8
|
+
|
7
9
|
# modules
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
10
|
+
require_relative "eps/base"
|
11
|
+
require_relative "eps/base_estimator"
|
12
|
+
require_relative "eps/data_frame"
|
13
|
+
require_relative "eps/label_encoder"
|
14
|
+
require_relative "eps/lightgbm"
|
15
|
+
require_relative "eps/linear_regression"
|
16
|
+
require_relative "eps/metrics"
|
17
|
+
require_relative "eps/model"
|
18
|
+
require_relative "eps/naive_bayes"
|
19
|
+
require_relative "eps/statistics"
|
20
|
+
require_relative "eps/text_encoder"
|
21
|
+
require_relative "eps/utils"
|
22
|
+
require_relative "eps/version"
|
21
23
|
|
22
24
|
# pmml
|
23
|
-
|
24
|
-
|
25
|
-
|
25
|
+
require_relative "eps/pmml"
|
26
|
+
require_relative "eps/pmml/generator"
|
27
|
+
require_relative "eps/pmml/loader"
|
26
28
|
|
27
29
|
# evaluators
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
30
|
+
require_relative "eps/evaluators/linear_regression"
|
31
|
+
require_relative "eps/evaluators/lightgbm"
|
32
|
+
require_relative "eps/evaluators/naive_bayes"
|
33
|
+
require_relative "eps/evaluators/node"
|
32
34
|
|
33
35
|
module Eps
|
34
36
|
class Error < StandardError; end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: eps
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-07-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: lightgbm
|
@@ -94,14 +94,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
94
94
|
requirements:
|
95
95
|
- - ">="
|
96
96
|
- !ruby/object:Gem::Version
|
97
|
-
version: '
|
97
|
+
version: '3'
|
98
98
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
99
99
|
requirements:
|
100
100
|
- - ">="
|
101
101
|
- !ruby/object:Gem::Version
|
102
102
|
version: '0'
|
103
103
|
requirements: []
|
104
|
-
rubygems_version: 3.
|
104
|
+
rubygems_version: 3.4.10
|
105
105
|
signing_key:
|
106
106
|
specification_version: 4
|
107
107
|
summary: Machine learning for Ruby. Supports regression (linear regression) and classification
|