eps 0.4.0 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/LICENSE.txt +1 -1
- data/lib/eps/data_frame.rb +1 -1
- data/lib/eps/evaluators/linear_regression.rb +2 -2
- data/lib/eps/label_encoder.rb +1 -1
- data/lib/eps/linear_regression.rb +6 -7
- data/lib/eps/statistics.rb +60 -66
- data/lib/eps/text_encoder.rb +1 -1
- data/lib/eps/version.rb +1 -1
- data/lib/eps.rb +23 -21
- metadata +18 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d93161edfe5b26ce55bbdafedfa4ead7fad756cc0f3e921f2b970a49c97bb5fc
|
4
|
+
data.tar.gz: 5d0e4f8326a6e446efbe0a4a6f9e8e6435b7314ac4dba737d87bbe4b73c4e04a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e387214353fdf13608d48b306db3ce1b635eb3977f052d1d47b3e2b8cbe0c14628e01ca1d4291eaa9d3fb833864ff02628817155275d2105a069d2f4a866b8b3
|
7
|
+
data.tar.gz: b27237a71a7198719b3000f385ea946547258f789f1a650cc348ed38d96e49c4d56b01149917807c97200aa737d6864739094c91d86ab8bafdd29e96e25e0d3b
|
data/CHANGELOG.md
CHANGED
data/LICENSE.txt
CHANGED
data/lib/eps/data_frame.rb
CHANGED
@@ -4,7 +4,7 @@ module Eps
|
|
4
4
|
attr_reader :features
|
5
5
|
|
6
6
|
def initialize(coefficients:, features:, text_features:)
|
7
|
-
@coefficients =
|
7
|
+
@coefficients = coefficients.to_h { |k, v| [k.is_a?(Array) ? [k[0].to_s, k[1]] : k.to_s, v] }
|
8
8
|
@features = features
|
9
9
|
@text_features = text_features || {}
|
10
10
|
end
|
@@ -50,7 +50,7 @@ module Eps
|
|
50
50
|
end
|
51
51
|
|
52
52
|
def coefficients
|
53
|
-
|
53
|
+
@coefficients.to_h { |k, v| [Array(k).join.to_sym, v] }
|
54
54
|
end
|
55
55
|
end
|
56
56
|
end
|
data/lib/eps/label_encoder.rb
CHANGED
@@ -146,7 +146,7 @@ module Eps
|
|
146
146
|
|
147
147
|
@coefficient_names = data.columns.keys
|
148
148
|
@coefficient_names.unshift("_intercept") if intercept
|
149
|
-
@coefficients =
|
149
|
+
@coefficients = @coefficient_names.zip(v3).to_h
|
150
150
|
Evaluators::LinearRegression.new(coefficients: @coefficients, features: @features, text_features: @text_features)
|
151
151
|
end
|
152
152
|
|
@@ -172,21 +172,20 @@ module Eps
|
|
172
172
|
# add epsilon for perfect fits
|
173
173
|
# consistent with GSL
|
174
174
|
def t_value
|
175
|
-
@t_value ||=
|
175
|
+
@t_value ||= @coefficients.to_h { |k, v| [k, v / (std_err[k] + Float::EPSILON)] }
|
176
176
|
end
|
177
177
|
|
178
178
|
def p_value
|
179
179
|
@p_value ||= begin
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
end]
|
180
|
+
@coefficients.to_h do |k, _|
|
181
|
+
[k, 2 * Eps::Statistics.students_t_cdf(-t_value[k].abs, degrees_of_freedom)]
|
182
|
+
end
|
184
183
|
end
|
185
184
|
end
|
186
185
|
|
187
186
|
def std_err
|
188
187
|
@std_err ||= begin
|
189
|
-
|
188
|
+
@coefficient_names.zip(diagonal.map { |v| Math.sqrt(v) }).to_h
|
190
189
|
end
|
191
190
|
end
|
192
191
|
|
data/lib/eps/statistics.rb
CHANGED
@@ -1,79 +1,73 @@
|
|
1
|
-
### Extracted from https://github.com/estebanz01/ruby-statistics
|
2
|
-
### The Ruby author is Esteban Zapata Rojas
|
3
|
-
###
|
4
|
-
### Originally extracted from https://codeplea.com/incomplete-beta-function-c
|
5
|
-
### These functions shared under zlib license and the author is Lewis Van Winkle
|
6
|
-
|
7
1
|
module Eps
|
8
2
|
module Statistics
|
9
|
-
def self.
|
10
|
-
|
11
|
-
lower = (2.0 * Math.sqrt(value * value + degrees_of_freedom))
|
12
|
-
|
13
|
-
x = upper/lower
|
14
|
-
|
15
|
-
alpha = degrees_of_freedom/2.0
|
16
|
-
beta = degrees_of_freedom/2.0
|
17
|
-
|
18
|
-
incomplete_beta_function(x, alpha, beta)
|
3
|
+
def self.normal_cdf(x, mean, std_dev)
|
4
|
+
0.5 * (1.0 + Math.erf((x - mean) / (std_dev * Math.sqrt(2))))
|
19
5
|
end
|
20
6
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
7
|
+
# Hill, G. W. (1970).
|
8
|
+
# Algorithm 395: Student's t-distribution.
|
9
|
+
# Communications of the ACM, 13(10), 617-619.
|
10
|
+
def self.students_t_cdf(x, n)
|
11
|
+
start, sign = x < 0 ? [0, 1] : [1, -1]
|
12
|
+
|
13
|
+
z = 1.0
|
14
|
+
t = x * x
|
15
|
+
y = t / n.to_f
|
16
|
+
b = 1.0 + y
|
17
|
+
|
18
|
+
if n > n.floor || (n >= 20.0 && t < n) || n > 200.0
|
19
|
+
# asymptotic series for large or noninteger n
|
20
|
+
if y > 10e-6
|
21
|
+
y = Math.log(b)
|
22
|
+
end
|
23
|
+
a = n - 0.5
|
24
|
+
b = 48.0 * a * a
|
25
|
+
y *= a
|
26
|
+
y = (((((-0.4 * y - 3.3) * y - 24.0) * y - 85.5) / (0.8 * y * y + 100.0 + b) + y + 3.0) / b + 1.0) * Math.sqrt(y)
|
27
|
+
return start + sign * normal_cdf(-y, 0.0, 1.0)
|
29
28
|
end
|
30
29
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
# upper_left = (x ** alp) * ((1.0 - x) ** bet)
|
39
|
-
# front = upper_left/down_left
|
40
|
-
|
41
|
-
f, c, d = 1.0, 1.0, 0.0
|
42
|
-
|
43
|
-
returned_value = nil
|
44
|
-
|
45
|
-
# Let's do more iterations than the proposed implementation (200 iters)
|
46
|
-
(0..500).each do |number|
|
47
|
-
m = number/2
|
48
|
-
|
49
|
-
numerator = if number == 0
|
50
|
-
1.0
|
51
|
-
elsif number % 2 == 0
|
52
|
-
(m * (bet - m) * x)/((alp + 2.0 * m - 1.0)* (alp + 2.0 * m))
|
53
|
-
else
|
54
|
-
top = -((alp + m) * (alp + bet + m) * x)
|
55
|
-
down = ((alp + 2.0 * m) * (alp + 2.0 * m + 1.0))
|
56
|
-
|
57
|
-
top/down
|
58
|
-
end
|
59
|
-
|
60
|
-
d = 1.0 + numerator * d
|
61
|
-
d = tiny if d.abs < tiny
|
62
|
-
d = 1.0 / d
|
63
|
-
|
64
|
-
c = 1.0 + numerator / c
|
65
|
-
c = tiny if c.abs < tiny
|
66
|
-
|
67
|
-
cd = (c*d).freeze
|
68
|
-
f = f * cd
|
30
|
+
if n < 20 && t < 4.0
|
31
|
+
# nested summation of cosine series
|
32
|
+
y = Math.sqrt(y)
|
33
|
+
a = y
|
34
|
+
if n == 1
|
35
|
+
a = 0.0
|
36
|
+
end
|
69
37
|
|
70
|
-
|
71
|
-
|
72
|
-
|
38
|
+
# loop
|
39
|
+
if n > 1
|
40
|
+
n -= 2
|
41
|
+
while n > 1
|
42
|
+
a = (n - 1) / (b * n) * a + y
|
43
|
+
n -= 2
|
44
|
+
end
|
73
45
|
end
|
46
|
+
a = n == 0 ? a / Math.sqrt(b) : (Math.atan(y) + a / b) * (2.0 / Math::PI)
|
47
|
+
return start + sign * (z - a) / 2.0
|
74
48
|
end
|
75
49
|
|
76
|
-
|
50
|
+
# tail series expanation for large t-values
|
51
|
+
a = Math.sqrt(b)
|
52
|
+
y = a * n
|
53
|
+
j = 0
|
54
|
+
while a != z
|
55
|
+
j += 2
|
56
|
+
z = a
|
57
|
+
y = y * (j - 1) / (b * j)
|
58
|
+
a += y / (n + j)
|
59
|
+
end
|
60
|
+
z = 0.0
|
61
|
+
y = 0.0
|
62
|
+
a = -a
|
63
|
+
|
64
|
+
# loop (without n + 2 and n - 2)
|
65
|
+
while n > 1
|
66
|
+
a = (n - 1) / (b * n) * a + y
|
67
|
+
n -= 2
|
68
|
+
end
|
69
|
+
a = n == 0 ? a / Math.sqrt(b) : (Math.atan(y) + a / b) * (2.0 / Math::PI)
|
70
|
+
start + sign * (z - a) / 2.0
|
77
71
|
end
|
78
72
|
end
|
79
73
|
end
|
data/lib/eps/text_encoder.rb
CHANGED
data/lib/eps/version.rb
CHANGED
data/lib/eps.rb
CHANGED
@@ -1,34 +1,36 @@
|
|
1
1
|
# dependencies
|
2
|
-
require "json"
|
3
2
|
require "lightgbm"
|
4
3
|
require "matrix"
|
5
4
|
require "nokogiri"
|
6
5
|
|
6
|
+
# stdlib
|
7
|
+
require "json"
|
8
|
+
|
7
9
|
# modules
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
10
|
+
require_relative "eps/base"
|
11
|
+
require_relative "eps/base_estimator"
|
12
|
+
require_relative "eps/data_frame"
|
13
|
+
require_relative "eps/label_encoder"
|
14
|
+
require_relative "eps/lightgbm"
|
15
|
+
require_relative "eps/linear_regression"
|
16
|
+
require_relative "eps/metrics"
|
17
|
+
require_relative "eps/model"
|
18
|
+
require_relative "eps/naive_bayes"
|
19
|
+
require_relative "eps/statistics"
|
20
|
+
require_relative "eps/text_encoder"
|
21
|
+
require_relative "eps/utils"
|
22
|
+
require_relative "eps/version"
|
21
23
|
|
22
24
|
# pmml
|
23
|
-
|
24
|
-
|
25
|
-
|
25
|
+
require_relative "eps/pmml"
|
26
|
+
require_relative "eps/pmml/generator"
|
27
|
+
require_relative "eps/pmml/loader"
|
26
28
|
|
27
29
|
# evaluators
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
30
|
+
require_relative "eps/evaluators/linear_regression"
|
31
|
+
require_relative "eps/evaluators/lightgbm"
|
32
|
+
require_relative "eps/evaluators/naive_bayes"
|
33
|
+
require_relative "eps/evaluators/node"
|
32
34
|
|
33
35
|
module Eps
|
34
36
|
class Error < StandardError; end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: eps
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-07-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: lightgbm
|
@@ -24,6 +24,20 @@ dependencies:
|
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: 0.1.7
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: matrix
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
27
41
|
- !ruby/object:Gem::Dependency
|
28
42
|
name: nokogiri
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -80,14 +94,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
80
94
|
requirements:
|
81
95
|
- - ">="
|
82
96
|
- !ruby/object:Gem::Version
|
83
|
-
version: '
|
97
|
+
version: '3'
|
84
98
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
85
99
|
requirements:
|
86
100
|
- - ">="
|
87
101
|
- !ruby/object:Gem::Version
|
88
102
|
version: '0'
|
89
103
|
requirements: []
|
90
|
-
rubygems_version: 3.
|
104
|
+
rubygems_version: 3.4.10
|
91
105
|
signing_key:
|
92
106
|
specification_version: 4
|
93
107
|
summary: Machine learning for Ruby. Supports regression (linear regression) and classification
|