eps 0.4.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4df8a83bee7fce8feebec2cf26d33d7ee4ca74fbcda9f41fb070f614cfb2e0eb
4
- data.tar.gz: 23f7dd9aa63eb4306268f19b862de3a07f9d72d9ec507160a7e6d291ea2245c6
3
+ metadata.gz: d93161edfe5b26ce55bbdafedfa4ead7fad756cc0f3e921f2b970a49c97bb5fc
4
+ data.tar.gz: 5d0e4f8326a6e446efbe0a4a6f9e8e6435b7314ac4dba737d87bbe4b73c4e04a
5
5
  SHA512:
6
- metadata.gz: c24ea7abf903829b3fe00dd0f7c601062464ecc193ccd8a725a98a437e7ed6f6bff8952c1c50aeeadcc5981e84325a44efedd53580e23f2475f1c8a7b927ed78
7
- data.tar.gz: 601cf18d044fd9ac348d3f632b7edda7fbd34ef11f497d4be998d62ae76f33f6681953fb5c263924deebed184b5b6f560bcd24de272c509317fb9c3b68f2f3b9
6
+ metadata.gz: e387214353fdf13608d48b306db3ce1b635eb3977f052d1d47b3e2b8cbe0c14628e01ca1d4291eaa9d3fb833864ff02628817155275d2105a069d2f4a866b8b3
7
+ data.tar.gz: b27237a71a7198719b3000f385ea946547258f789f1a650cc348ed38d96e49c4d56b01149917807c97200aa737d6864739094c91d86ab8bafdd29e96e25e0d3b
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## 0.5.0 (2023-07-02)
2
+
3
+ - Dropped support for Ruby < 3
4
+
1
5
  ## 0.4.1 (2022-09-28)
2
6
 
3
7
  - Fixed `cannot load such file -- matrix` error with Ruby 3.1
data/LICENSE.txt CHANGED
@@ -1,6 +1,6 @@
1
1
  The MIT License (MIT)
2
2
 
3
- Copyright (c) 2018-2021 Andrew Kane
3
+ Copyright (c) 2018-2023 Andrew Kane
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
@@ -54,7 +54,7 @@ module Eps
54
54
  def map
55
55
  if @columns.any?
56
56
  size.times.map do |i|
57
- yield Hash[@columns.map { |k, v| [k, v[i]] }]
57
+ yield @columns.to_h { |k, v| [k, v[i]] }
58
58
  end
59
59
  end
60
60
  end
@@ -4,7 +4,7 @@ module Eps
4
4
  attr_reader :features
5
5
 
6
6
  def initialize(coefficients:, features:, text_features:)
7
- @coefficients = Hash[coefficients.map { |k, v| [k.is_a?(Array) ? [k[0].to_s, k[1]] : k.to_s, v] }]
7
+ @coefficients = coefficients.to_h { |k, v| [k.is_a?(Array) ? [k[0].to_s, k[1]] : k.to_s, v] }
8
8
  @features = features
9
9
  @text_features = text_features || {}
10
10
  end
@@ -50,7 +50,7 @@ module Eps
50
50
  end
51
51
 
52
52
  def coefficients
53
- Hash[@coefficients.map { |k, v| [Array(k).join.to_sym, v] }]
53
+ @coefficients.to_h { |k, v| [Array(k).join.to_sym, v] }
54
54
  end
55
55
  end
56
56
  end
@@ -36,7 +36,7 @@ module Eps
36
36
  end
37
37
 
38
38
  def inverse_transform(y)
39
- inverse = Hash[@labels.map(&:reverse)]
39
+ inverse = @labels.map(&:reverse).to_h
40
40
  y.map do |yi|
41
41
  inverse[yi.to_i]
42
42
  end
@@ -146,7 +146,7 @@ module Eps
146
146
 
147
147
  @coefficient_names = data.columns.keys
148
148
  @coefficient_names.unshift("_intercept") if intercept
149
- @coefficients = Hash[@coefficient_names.zip(v3)]
149
+ @coefficients = @coefficient_names.zip(v3).to_h
150
150
  Evaluators::LinearRegression.new(coefficients: @coefficients, features: @features, text_features: @text_features)
151
151
  end
152
152
 
@@ -172,21 +172,20 @@ module Eps
172
172
  # add epsilon for perfect fits
173
173
  # consistent with GSL
174
174
  def t_value
175
- @t_value ||= Hash[@coefficients.map { |k, v| [k, v / (std_err[k] + Float::EPSILON)] }]
175
+ @t_value ||= @coefficients.to_h { |k, v| [k, v / (std_err[k] + Float::EPSILON)] }
176
176
  end
177
177
 
178
178
  def p_value
179
179
  @p_value ||= begin
180
- Hash[@coefficients.map do |k, _|
181
- tp = Eps::Statistics.tdist_p(t_value[k].abs, degrees_of_freedom)
182
- [k, 2 * (1 - tp)]
183
- end]
180
+ @coefficients.to_h do |k, _|
181
+ [k, 2 * Eps::Statistics.students_t_cdf(-t_value[k].abs, degrees_of_freedom)]
182
+ end
184
183
  end
185
184
  end
186
185
 
187
186
  def std_err
188
187
  @std_err ||= begin
189
- Hash[@coefficient_names.zip(diagonal.map { |v| Math.sqrt(v) })]
188
+ @coefficient_names.zip(diagonal.map { |v| Math.sqrt(v) }).to_h
190
189
  end
191
190
  end
192
191
 
@@ -1,79 +1,73 @@
1
- ### Extracted from https://github.com/estebanz01/ruby-statistics
2
- ### The Ruby author is Esteban Zapata Rojas
3
- ###
4
- ### Originally extracted from https://codeplea.com/incomplete-beta-function-c
5
- ### These functions shared under zlib license and the author is Lewis Van Winkle
6
-
7
1
  module Eps
8
2
  module Statistics
9
- def self.tdist_p(value, degrees_of_freedom)
10
- upper = (value + Math.sqrt(value * value + degrees_of_freedom))
11
- lower = (2.0 * Math.sqrt(value * value + degrees_of_freedom))
12
-
13
- x = upper/lower
14
-
15
- alpha = degrees_of_freedom/2.0
16
- beta = degrees_of_freedom/2.0
17
-
18
- incomplete_beta_function(x, alpha, beta)
3
+ def self.normal_cdf(x, mean, std_dev)
4
+ 0.5 * (1.0 + Math.erf((x - mean) / (std_dev * Math.sqrt(2))))
19
5
  end
20
6
 
21
- def self.incomplete_beta_function(x, alp, bet)
22
- return if x < 0.0
23
- return 1.0 if x > 1.0
24
-
25
- tiny = 1.0E-50
26
-
27
- if x > ((alp + 1.0)/(alp + bet + 2.0))
28
- return 1.0 - incomplete_beta_function(1.0 - x, bet, alp)
7
+ # Hill, G. W. (1970).
8
+ # Algorithm 395: Student's t-distribution.
9
+ # Communications of the ACM, 13(10), 617-619.
10
+ def self.students_t_cdf(x, n)
11
+ start, sign = x < 0 ? [0, 1] : [1, -1]
12
+
13
+ z = 1.0
14
+ t = x * x
15
+ y = t / n.to_f
16
+ b = 1.0 + y
17
+
18
+ if n > n.floor || (n >= 20.0 && t < n) || n > 200.0
19
+ # asymptotic series for large or noninteger n
20
+ if y > 10e-6
21
+ y = Math.log(b)
22
+ end
23
+ a = n - 0.5
24
+ b = 48.0 * a * a
25
+ y *= a
26
+ y = (((((-0.4 * y - 3.3) * y - 24.0) * y - 85.5) / (0.8 * y * y + 100.0 + b) + y + 3.0) / b + 1.0) * Math.sqrt(y)
27
+ return start + sign * normal_cdf(-y, 0.0, 1.0)
29
28
  end
30
29
 
31
- # To avoid overflow problems, the implementation applies the logarithm properties
32
- # to calculate in a faster and safer way the values.
33
- lbet_ab = (Math.lgamma(alp)[0] + Math.lgamma(bet)[0] - Math.lgamma(alp + bet)[0]).freeze
34
- front = (Math.exp(Math.log(x) * alp + Math.log(1.0 - x) * bet - lbet_ab) / alp.to_f).freeze
35
-
36
- # This is the non-log version of the left part of the formula (before the continuous fraction)
37
- # down_left = alp * self.beta_function(alp, bet)
38
- # upper_left = (x ** alp) * ((1.0 - x) ** bet)
39
- # front = upper_left/down_left
40
-
41
- f, c, d = 1.0, 1.0, 0.0
42
-
43
- returned_value = nil
44
-
45
- # Let's do more iterations than the proposed implementation (200 iters)
46
- (0..500).each do |number|
47
- m = number/2
48
-
49
- numerator = if number == 0
50
- 1.0
51
- elsif number % 2 == 0
52
- (m * (bet - m) * x)/((alp + 2.0 * m - 1.0)* (alp + 2.0 * m))
53
- else
54
- top = -((alp + m) * (alp + bet + m) * x)
55
- down = ((alp + 2.0 * m) * (alp + 2.0 * m + 1.0))
56
-
57
- top/down
58
- end
59
-
60
- d = 1.0 + numerator * d
61
- d = tiny if d.abs < tiny
62
- d = 1.0 / d
63
-
64
- c = 1.0 + numerator / c
65
- c = tiny if c.abs < tiny
66
-
67
- cd = (c*d).freeze
68
- f = f * cd
30
+ if n < 20 && t < 4.0
31
+ # nested summation of cosine series
32
+ y = Math.sqrt(y)
33
+ a = y
34
+ if n == 1
35
+ a = 0.0
36
+ end
69
37
 
70
- if (1.0 - cd).abs < 1.0E-10
71
- returned_value = front * (f - 1.0)
72
- break
38
+ # loop
39
+ if n > 1
40
+ n -= 2
41
+ while n > 1
42
+ a = (n - 1) / (b * n) * a + y
43
+ n -= 2
44
+ end
73
45
  end
46
+ a = n == 0 ? a / Math.sqrt(b) : (Math.atan(y) + a / b) * (2.0 / Math::PI)
47
+ return start + sign * (z - a) / 2.0
74
48
  end
75
49
 
76
- returned_value
50
+ # tail series expanation for large t-values
51
+ a = Math.sqrt(b)
52
+ y = a * n
53
+ j = 0
54
+ while a != z
55
+ j += 2
56
+ z = a
57
+ y = y * (j - 1) / (b * j)
58
+ a += y / (n + j)
59
+ end
60
+ z = 0.0
61
+ y = 0.0
62
+ a = -a
63
+
64
+ # loop (without n + 2 and n - 2)
65
+ while n > 1
66
+ a = (n - 1) / (b * n) * a + y
67
+ n -= 2
68
+ end
69
+ a = n == 0 ? a / Math.sqrt(b) : (Math.atan(y) + a / b) * (2.0 / Math::PI)
70
+ start + sign * (z - a) / 2.0
77
71
  end
78
72
  end
79
73
  end
@@ -27,7 +27,7 @@ module Eps
27
27
 
28
28
  max_features = options[:max_features]
29
29
  if max_features
30
- counts = Hash[counts.sort_by { |_, v| -v }[0...max_features]]
30
+ counts = counts.sort_by { |_, v| -v }[0...max_features].to_h
31
31
  end
32
32
 
33
33
  @vocabulary = counts.keys
data/lib/eps/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Eps
2
- VERSION = "0.4.1"
2
+ VERSION = "0.5.0"
3
3
  end
data/lib/eps.rb CHANGED
@@ -1,34 +1,36 @@
1
1
  # dependencies
2
- require "json"
3
2
  require "lightgbm"
4
3
  require "matrix"
5
4
  require "nokogiri"
6
5
 
6
+ # stdlib
7
+ require "json"
8
+
7
9
  # modules
8
- require "eps/base"
9
- require "eps/base_estimator"
10
- require "eps/data_frame"
11
- require "eps/label_encoder"
12
- require "eps/lightgbm"
13
- require "eps/linear_regression"
14
- require "eps/metrics"
15
- require "eps/model"
16
- require "eps/naive_bayes"
17
- require "eps/statistics"
18
- require "eps/text_encoder"
19
- require "eps/utils"
20
- require "eps/version"
10
+ require_relative "eps/base"
11
+ require_relative "eps/base_estimator"
12
+ require_relative "eps/data_frame"
13
+ require_relative "eps/label_encoder"
14
+ require_relative "eps/lightgbm"
15
+ require_relative "eps/linear_regression"
16
+ require_relative "eps/metrics"
17
+ require_relative "eps/model"
18
+ require_relative "eps/naive_bayes"
19
+ require_relative "eps/statistics"
20
+ require_relative "eps/text_encoder"
21
+ require_relative "eps/utils"
22
+ require_relative "eps/version"
21
23
 
22
24
  # pmml
23
- require "eps/pmml"
24
- require "eps/pmml/generator"
25
- require "eps/pmml/loader"
25
+ require_relative "eps/pmml"
26
+ require_relative "eps/pmml/generator"
27
+ require_relative "eps/pmml/loader"
26
28
 
27
29
  # evaluators
28
- require "eps/evaluators/linear_regression"
29
- require "eps/evaluators/lightgbm"
30
- require "eps/evaluators/naive_bayes"
31
- require "eps/evaluators/node"
30
+ require_relative "eps/evaluators/linear_regression"
31
+ require_relative "eps/evaluators/lightgbm"
32
+ require_relative "eps/evaluators/naive_bayes"
33
+ require_relative "eps/evaluators/node"
32
34
 
33
35
  module Eps
34
36
  class Error < StandardError; end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: eps
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.1
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-09-28 00:00:00.000000000 Z
11
+ date: 2023-07-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: lightgbm
@@ -94,14 +94,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
94
94
  requirements:
95
95
  - - ">="
96
96
  - !ruby/object:Gem::Version
97
- version: '2.7'
97
+ version: '3'
98
98
  required_rubygems_version: !ruby/object:Gem::Requirement
99
99
  requirements:
100
100
  - - ">="
101
101
  - !ruby/object:Gem::Version
102
102
  version: '0'
103
103
  requirements: []
104
- rubygems_version: 3.3.7
104
+ rubygems_version: 3.4.10
105
105
  signing_key:
106
106
  specification_version: 4
107
107
  summary: Machine learning for Ruby. Supports regression (linear regression) and classification