chris_lib 2.2.1 → 2.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/chris_lib/chris_math.rb +117 -23
- data/lib/chris_lib/date_ext.rb +13 -1
- data/lib/chris_lib/for_chris_lib.rb +835 -0
- data/lib/chris_lib/shell_methods.rb +80 -10
- data/lib/chris_lib/test_access.rb +25 -6
- data/lib/chris_lib/version.rb +1 -1
- data/lib/chris_lib.rb +3 -0
- metadata +62 -8
@@ -0,0 +1,835 @@
|
|
1
|
+
require 'ostruct'
|
2
|
+
|
3
|
+
# Error raised when ForChrisLib encounters invalid input or missing dependencies.
|
4
|
+
ForChrisLibError = Class.new(StandardError) unless defined?(ForChrisLibError)
|
5
|
+
|
6
|
+
# Aggregated analytical helpers formerly housed in golf_lab.
|
7
|
+
module ForChrisLib
|
8
|
+
include ChrisLib
|
9
|
+
include Math
|
10
|
+
|
11
|
+
# Compute probabilities of winning given an array of scores.
|
12
|
+
# @param results [Array<Numeric>]
|
13
|
+
# @return [Array<Float>] probability mass for each input
|
14
|
+
def outcome(results)
|
15
|
+
raise ForChrisLibError, 'results must respond to #each' unless results.respond_to?(:each)
|
16
|
+
results = results.to_a
|
17
|
+
raise ForChrisLibError, 'results cannot be empty' if results.empty?
|
18
|
+
s_min = results.min
|
19
|
+
flags = results.map { |value| value == s_min ? 1 : 0 }
|
20
|
+
total = flags.sum.nonzero? || 1
|
21
|
+
flags.map { |value| value.to_f / total }
|
22
|
+
end
|
23
|
+
|
24
|
+
# Evaluate a chi-squared goodness-of-fit test from summary statistics.
|
25
|
+
class ChiSquaredStdErr
|
26
|
+
# @param means [Array<Numeric>]
|
27
|
+
# @param std_errs [Array<Numeric>] standard errors of the means
|
28
|
+
# @param mus [Array<Numeric>] hypothesised means
|
29
|
+
# @param confidence_level [Float]
|
30
|
+
def initialize(means, std_errs, mus, confidence_level: 0.95)
|
31
|
+
[means, std_errs, mus].each do |collection|
|
32
|
+
unless collection.respond_to?(:to_a)
|
33
|
+
raise ArgumentError, 'means, std_errs, and mus must be enumerable'
|
34
|
+
end
|
35
|
+
end
|
36
|
+
@means = means
|
37
|
+
@std_errs = std_errs
|
38
|
+
@mus = mus
|
39
|
+
@confidence_level = confidence_level
|
40
|
+
check_confidence_level
|
41
|
+
raise ForChrisLibError, 'means, std_errs, and mus must be the same length' unless means.size == std_errs.size && means.size == mus.size
|
42
|
+
raise ForChrisLibError, 'means cannot be empty' if means.empty?
|
43
|
+
if std_errs.any? { |se| !se.is_a?(Numeric) }
|
44
|
+
raise ForChrisLibError, 'std_errs must all be numeric'
|
45
|
+
end
|
46
|
+
if std_errs.any?(&:zero?)
|
47
|
+
raise ForChrisLibError, 'std_errs must be non-zero to avoid division by zero'
|
48
|
+
end
|
49
|
+
@threshold = 1 - confidence_level
|
50
|
+
end
|
51
|
+
|
52
|
+
# @return [OpenStruct] containing :pass?, :p, and :chi2
|
53
|
+
def call
|
54
|
+
chi2 = means.zip(mus, std_errs).map { |m, mu, se| (m.to_f - mu)**2 / se**2 }.sum
|
55
|
+
p_value = PChiSquared.new.call(means.size, chi2)
|
56
|
+
OpenStruct.new(pass?: p_value > threshold, p: p_value, chi2: chi2)
|
57
|
+
end
|
58
|
+
|
59
|
+
private
|
60
|
+
|
61
|
+
attr_reader :means, :std_errs, :mus, :threshold, :confidence_level
|
62
|
+
|
63
|
+
def check_confidence_level
|
64
|
+
return if confidence_level.positive? && confidence_level < 1
|
65
|
+
|
66
|
+
msg = "Confidence level is #{confidence_level} must be between 0 and 1"
|
67
|
+
raise ForChrisLibError, msg
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
# Wrapper around chi-squared tail probability helpers.
|
72
|
+
class PChiSquared
|
73
|
+
# @param calculator [#call] dependency used to evaluate the tail probability
|
74
|
+
def initialize(calculator: nil)
|
75
|
+
@calculator = calculator
|
76
|
+
end
|
77
|
+
|
78
|
+
# @param dof [Integer] degrees of freedom
|
79
|
+
# @param nu [Numeric] chi-squared statistic
|
80
|
+
# @return [Float] upper-tail probability
|
81
|
+
def call(dof, nu)
|
82
|
+
raise ArgumentError, 'degrees of freedom must be positive' unless dof.is_a?(Numeric) && dof.positive?
|
83
|
+
raise ArgumentError, 'chi-squared statistic must be non-negative' unless nu.is_a?(Numeric) && nu >= 0
|
84
|
+
if calculator
|
85
|
+
return calculator.call(dof, nu)
|
86
|
+
end
|
87
|
+
|
88
|
+
# Use the complemented incomplete gamma to evaluate the survival function.
|
89
|
+
s = dof.to_f / 2.0
|
90
|
+
x = nu.to_f / 2.0
|
91
|
+
regularized_gamma_q(s, x)
|
92
|
+
end
|
93
|
+
|
94
|
+
private
|
95
|
+
|
96
|
+
attr_reader :calculator
|
97
|
+
|
98
|
+
def regularized_gamma_q(s, x)
|
99
|
+
# Borrowed from Numerical Recipes, see https://numerical.recipes
|
100
|
+
if x < s + 1.0
|
101
|
+
1.0 - regularized_gamma_p_series(s, x)
|
102
|
+
else
|
103
|
+
regularized_gamma_q_continued_fraction(s, x)
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def regularized_gamma_p_series(s, x)
|
108
|
+
return 0.0 if x <= 0.0
|
109
|
+
|
110
|
+
gln = Math.lgamma(s).first
|
111
|
+
sum = 1.0 / s
|
112
|
+
term = sum
|
113
|
+
n = 1
|
114
|
+
loop do
|
115
|
+
term *= x / (s + n)
|
116
|
+
sum += term
|
117
|
+
break if term.abs < sum.abs * 1e-12
|
118
|
+
n += 1
|
119
|
+
break if n > 10_000
|
120
|
+
end
|
121
|
+
Math.exp(-x + s * Math.log(x) - gln) * sum
|
122
|
+
end
|
123
|
+
|
124
|
+
def regularized_gamma_q_continued_fraction(s, x)
|
125
|
+
gln = Math.lgamma(s).first
|
126
|
+
b = x + 1.0 - s
|
127
|
+
c = 1.0 / 1e-30
|
128
|
+
d = 1.0 / b
|
129
|
+
h = d
|
130
|
+
n = 1
|
131
|
+
loop do
|
132
|
+
an = -n * (n - s)
|
133
|
+
b += 2.0
|
134
|
+
d = an * d + b
|
135
|
+
d = 1e-30 if d.abs < 1e-30
|
136
|
+
c = b + an / c
|
137
|
+
c = 1e-30 if c.abs < 1e-30
|
138
|
+
d = 1.0 / d
|
139
|
+
delta = d * c
|
140
|
+
h *= delta
|
141
|
+
break if (delta - 1.0).abs < 1e-12
|
142
|
+
n += 1
|
143
|
+
break if n > 10_000
|
144
|
+
end
|
145
|
+
Math.exp(-x + s * Math.log(x) - gln) * h
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
# Lightweight helper that keeps table data and headers together.
|
150
|
+
class Framed
|
151
|
+
attr_reader :hsh
|
152
|
+
|
153
|
+
# @param header [Array<String>]
|
154
|
+
# @param rows [Array<Array>]
|
155
|
+
def initialize(header, rows)
|
156
|
+
raise 'header must be an array' unless header.is_a?(Array)
|
157
|
+
raise 'rows must be an array' unless rows.is_a?(Array)
|
158
|
+
raise 'header cannot be empty' if header.empty?
|
159
|
+
|
160
|
+
@hsh = { header: header, rows: rows }
|
161
|
+
|
162
|
+
rows.each_with_index do |row, index|
|
163
|
+
raise "row #{index} must respond to #size" unless row.respond_to?(:size)
|
164
|
+
next if row.size == header.size
|
165
|
+
|
166
|
+
raise "row #{index} size not equal to header size"
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
# @return [Array<String>]
|
171
|
+
def header
|
172
|
+
hsh[:header]
|
173
|
+
end
|
174
|
+
|
175
|
+
# @return [Array<Array>]
|
176
|
+
def rows
|
177
|
+
hsh[:rows]
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
# @return [String] sentinel used in legacy tests
|
182
|
+
def test
|
183
|
+
'here'
|
184
|
+
end
|
185
|
+
|
186
|
+
# Fraction of variance unexplained given predictions and observations.
|
187
|
+
# @param y_hat_a [Array<Numeric>]
|
188
|
+
# @param y_a [Array<Numeric>]
|
189
|
+
# @return [Float]
|
190
|
+
def fvu(y_hat_a:, y_a:)
|
191
|
+
raise ForChrisLibError, 'y_hat_a must respond to #size and #zip' unless y_hat_a.respond_to?(:size) && y_hat_a.respond_to?(:zip)
|
192
|
+
raise ForChrisLibError, 'y_a must respond to #size' unless y_a.respond_to?(:size)
|
193
|
+
raise ForChrisLibError, 'y_hat_a must contain at least two values' if y_hat_a.size < 2
|
194
|
+
raise ForChrisLibError, 'y_hat_a and y_a must be the same length' unless y_hat_a.size == y_a.size
|
195
|
+
|
196
|
+
ss_err = y_hat_a.zip(y_a).sum { |yh, y| (y - yh)**2 }.to_f
|
197
|
+
y_mu = y_a.mean
|
198
|
+
ss_tot = y_a.sum { |y| (y - y_mu)**2 }.to_f
|
199
|
+
ss_err / ss_tot
|
200
|
+
end
|
201
|
+
|
202
|
+
# Estimate bias in a histogram by minimising win/loss difference between players.
|
203
|
+
# @param store [#histogram, #min, #max]
|
204
|
+
# @param win_loss_calculator [#win_loss_graph,#win_loss_stats]
|
205
|
+
# @param minimizer_class [Class]
|
206
|
+
# @return [Float]
|
207
|
+
def bias_estimate_by_min(store, win_loss_calculator: nil, minimizer_class: nil)
|
208
|
+
win_loss = win_loss_calculator || default_win_loss_calculator
|
209
|
+
unless store.respond_to?(:histogram) && store.respond_to?(:min) && store.respond_to?(:max)
|
210
|
+
raise ForChrisLibError, 'store must respond to :histogram, :min, and :max'
|
211
|
+
end
|
212
|
+
histogram_data = store.histogram
|
213
|
+
unless histogram_data.respond_to?(:[]) && histogram_data[0]
|
214
|
+
raise ForChrisLibError, 'store.histogram must include counts in the first slot'
|
215
|
+
end
|
216
|
+
|
217
|
+
fn = lambda do |x|
|
218
|
+
bins = store.histogram[0].bin_shift(x)
|
219
|
+
pdf = pdf_from_hist(bins, min: store.min)
|
220
|
+
wl_graph = win_loss.win_loss_graph(nil, pdf: pdf)
|
221
|
+
outcome = win_loss.win_loss_stats(wl_graph)[0]
|
222
|
+
(outcome - 50.0)**2
|
223
|
+
end
|
224
|
+
|
225
|
+
minimizer_class ||= default_minimizer_class
|
226
|
+
unless minimizer_class.respond_to?(:new)
|
227
|
+
raise ForChrisLibError, 'minimizer_class must respond to .new'
|
228
|
+
end
|
229
|
+
minimizer = minimizer_class.new(store.min, store.max, fn)
|
230
|
+
minimizer.expected = 0.0 if minimizer.respond_to?(:expected=)
|
231
|
+
minimizer.iterate
|
232
|
+
-minimizer.x_minimum
|
233
|
+
end
|
234
|
+
|
235
|
+
# Convert integer bin counts into a probability mass function.
|
236
|
+
# @param bins [Array<Integer>]
|
237
|
+
# @param min [Integer]
|
238
|
+
# @return [Hash{Integer=>Float}]
|
239
|
+
def pdf_from_hist(bins, min: 0)
|
240
|
+
unless bins.respond_to?(:each_with_index) && bins.respond_to?(:sum)
|
241
|
+
raise ForChrisLibError, 'bins must respond to #each_with_index and #sum'
|
242
|
+
end
|
243
|
+
total = bins.sum.nonzero? || 1
|
244
|
+
bins.map.with_index { |b, i| [i + min, b.to_f / total] }.to_h
|
245
|
+
end
|
246
|
+
|
247
|
+
# Sum y values into equi-width x bins.
|
248
|
+
# @param x_y [Array<Array(Float, Float)>]
|
249
|
+
# @param n_bins [Integer]
|
250
|
+
# @return [Array<Array<Float, Numeric, Integer>>]
|
251
|
+
def summed_bins_histogram(x_y, n_bins)
|
252
|
+
raise ForChrisLibError, 'x_y must respond to #transpose' unless x_y.respond_to?(:transpose)
|
253
|
+
raise ForChrisLibError, 'n_bins must be a positive Integer' unless n_bins.is_a?(Integer) && n_bins.positive?
|
254
|
+
x_a = x_y.transpose[0]
|
255
|
+
y_a = x_y.transpose[1]
|
256
|
+
min = x_a.min
|
257
|
+
max = x_a.max
|
258
|
+
bin_sums = Array.new(n_bins, 0)
|
259
|
+
bins = Array.new(n_bins, 0)
|
260
|
+
delta = (max - min).to_f / n_bins
|
261
|
+
|
262
|
+
x_a.each_with_index do |x, i|
|
263
|
+
j = [((x - min).to_f / delta), n_bins - 1].min
|
264
|
+
bin_sums[j] += y_a[i]
|
265
|
+
bins[j] += 1
|
266
|
+
end
|
267
|
+
|
268
|
+
bin_sums.each_with_index.map do |bin_sum, i|
|
269
|
+
[min + delta / 2 + i * delta, bin_sum, bins[i]]
|
270
|
+
end
|
271
|
+
end
|
272
|
+
|
273
|
+
# Incremental mean and second central moment accumulator.
|
274
|
+
# @param x [Numeric]
|
275
|
+
# @param accumulator [Array<Numeric>] [mean, m2, n]
|
276
|
+
# @return [Array<Numeric>]
|
277
|
+
def inc_m2_var(x, accumulator)
|
278
|
+
raise ForChrisLibError, 'accumulator must be an array of [mean, m2, n]' unless accumulator.is_a?(Array) && accumulator.size == 3
|
279
|
+
mean, m2, n = accumulator
|
280
|
+
n += 1
|
281
|
+
delta = x - mean
|
282
|
+
mean += delta.to_f / n
|
283
|
+
delta2 = x - mean
|
284
|
+
m2 += delta * delta2
|
285
|
+
[mean, m2, n]
|
286
|
+
end
|
287
|
+
|
288
|
+
# Autocorrelation at a specific lag.
|
289
|
+
# @param x_a [Array<Numeric>]
|
290
|
+
# @param lag [Integer]
|
291
|
+
# @return [Float]
|
292
|
+
def acf(x_a, lag)
|
293
|
+
raise ForChrisLibError, 'lag must be a non-negative Integer' unless lag.is_a?(Integer) && lag >= 0
|
294
|
+
raise ForChrisLibError, 'x_a must respond to #size and #[ ]' unless x_a.respond_to?(:size) && x_a.respond_to?(:[])
|
295
|
+
n = x_a.size
|
296
|
+
raise "Lag is too large, n = #{n}, lag = #{lag}" if n < lag + 1
|
297
|
+
|
298
|
+
mu = x_a.mean
|
299
|
+
total = 0
|
300
|
+
x_a[0..-(lag + 1)].each_with_index do |x, i|
|
301
|
+
total += (x - mu) * (x_a[i + lag] - mu)
|
302
|
+
end
|
303
|
+
total.to_f / (n - lag) / x_a.var
|
304
|
+
end
|
305
|
+
|
306
|
+
# Weighted mean based on histogram bins.
|
307
|
+
# @param bins [Array<Numeric>]
|
308
|
+
# @param min [Numeric]
|
309
|
+
# @param delta [Numeric]
|
310
|
+
# @return [Float, nil]
|
311
|
+
def weighted_mean(bins, min = 0, delta = 1)
|
312
|
+
raise ForChrisLibError, 'bins must respond to #sum and #each_with_index' unless bins.respond_to?(:sum) && bins.respond_to?(:each_with_index)
|
313
|
+
return nil if bins.sum.zero?
|
314
|
+
|
315
|
+
sum = bins.each_with_index.sum do |w, i|
|
316
|
+
(min * delta + i * delta) * w
|
317
|
+
end
|
318
|
+
sum.to_f / bins.sum
|
319
|
+
end
|
320
|
+
|
321
|
+
# Weighted sample standard deviation.
|
322
|
+
# @param bins [Array<Numeric>]
|
323
|
+
# @param mu [Numeric]
|
324
|
+
# @param min [Numeric]
|
325
|
+
# @param delta [Numeric]
|
326
|
+
# @return [Float, nil]
|
327
|
+
def weighted_sd(bins, mu, min = 0, delta = 1)
|
328
|
+
raise ForChrisLibError, 'bins must respond to #sum and #each_with_index' unless bins.respond_to?(:sum) && bins.respond_to?(:each_with_index)
|
329
|
+
raise ForChrisLibError, 'mu must be Numeric' unless mu.is_a?(Numeric)
|
330
|
+
return nil if bins.sum < 2
|
331
|
+
|
332
|
+
sum = bins.each_with_index.sum do |w, i|
|
333
|
+
v = min * delta + i * delta
|
334
|
+
(v - mu)**2 * w
|
335
|
+
end
|
336
|
+
sqrt(sum / (bins.sum - 1))
|
337
|
+
end
|
338
|
+
|
339
|
+
# Weighted skewness using the third central moment.
|
340
|
+
# @param bins [Array<Numeric>]
|
341
|
+
# @param mu [Numeric]
|
342
|
+
# @param min [Numeric]
|
343
|
+
# @param delta [Numeric]
|
344
|
+
# @return [Float, nil]
|
345
|
+
def weighted_skewness(bins, mu, min = 0, delta = 1)
|
346
|
+
raise ForChrisLibError, 'bins must respond to #sum and #each_with_index' unless bins.respond_to?(:sum) && bins.respond_to?(:each_with_index)
|
347
|
+
raise ForChrisLibError, 'mu must be Numeric' unless mu.is_a?(Numeric)
|
348
|
+
n = bins.sum
|
349
|
+
return nil if n < 2
|
350
|
+
|
351
|
+
third_moment = weighted_m_3(bins, mu, min, delta)
|
352
|
+
sd = weighted_sd(bins, mu, min, delta)
|
353
|
+
third_moment / sd**3
|
354
|
+
end
|
355
|
+
|
356
|
+
# Weighted third central moment.
|
357
|
+
# @return [Float, nil]
|
358
|
+
def weighted_m_3(bins, mu, min = 0, delta = 1)
|
359
|
+
raise ForChrisLibError, 'bins must respond to #sum and #each_with_index' unless bins.respond_to?(:sum) && bins.respond_to?(:each_with_index)
|
360
|
+
raise ForChrisLibError, 'mu must be Numeric' unless mu.is_a?(Numeric)
|
361
|
+
n = bins.sum
|
362
|
+
return if n < 1
|
363
|
+
|
364
|
+
sum = bins.each_with_index.sum do |w, i|
|
365
|
+
v = min * delta + i * delta
|
366
|
+
(v - mu)**3 * w
|
367
|
+
end
|
368
|
+
sum / n
|
369
|
+
end
|
370
|
+
|
371
|
+
# Weighted fourth central moment.
|
372
|
+
# @return [Float, nil]
|
373
|
+
def weighted_m_4(bins, mu, min = 0, delta = 1)
|
374
|
+
raise ForChrisLibError, 'bins must respond to #sum and #each_with_index' unless bins.respond_to?(:sum) && bins.respond_to?(:each_with_index)
|
375
|
+
raise ForChrisLibError, 'mu must be Numeric' unless mu.is_a?(Numeric)
|
376
|
+
n = bins.sum
|
377
|
+
return if n < 1
|
378
|
+
|
379
|
+
sum = bins.each_with_index.sum do |w, i|
|
380
|
+
v = min * delta + i * delta
|
381
|
+
(v - mu)**4 * w
|
382
|
+
end
|
383
|
+
sum / n
|
384
|
+
end
|
385
|
+
|
386
|
+
# Probability mass function derived from histogram bins.
|
387
|
+
# @return [Hash{Numeric=>Float}]
|
388
|
+
def pdf_from_bins(bins, min = 0, delta = 1)
|
389
|
+
raise ForChrisLibError, 'bins must respond to #sum and #each_with_index' unless bins.respond_to?(:sum) && bins.respond_to?(:each_with_index)
|
390
|
+
total = bins.sum.nonzero? || 1
|
391
|
+
bins.each_with_index.map { |bin, i| [min * delta + i * delta, bin.to_f / total] }.to_h
|
392
|
+
end
|
393
|
+
|
394
|
+
# Cumulative distribution function derived from histogram bins.
|
395
|
+
# @return [Hash{Numeric=>Float}]
|
396
|
+
def cdf_from_bins(bins, min = 0, delta = 1)
|
397
|
+
pdf_from_bins(bins, min, delta).cdf_from_pdf
|
398
|
+
end
|
399
|
+
|
400
|
+
# Standard normal (or shifted) probability density function.
|
401
|
+
# @param x [Numeric]
|
402
|
+
# @param options [Hash] :mu and :sigma keys
|
403
|
+
# @return [Float]
|
404
|
+
def normal_pdf(x, options = {})
|
405
|
+
params = { mu: 0, sigma: 1 }.merge(options)
|
406
|
+
mu = params[:mu]
|
407
|
+
sigma = params[:sigma]
|
408
|
+
raise ForChrisLibError, 'sigma must be positive' unless sigma.is_a?(Numeric) && sigma.positive?
|
409
|
+
E**(-(x - mu)**2 / 2 / sigma**2) / sqrt(2 * PI) / sigma
|
410
|
+
end
|
411
|
+
|
412
|
+
# Standard normal cumulative distribution function.
|
413
|
+
# @param x [Numeric]
|
414
|
+
# @return [Float]
|
415
|
+
def normal_cdf(x)
|
416
|
+
0.5 * (1 + erf(x / sqrt(2)))
|
417
|
+
end
|
418
|
+
|
419
|
+
# Skew-normal probability density function using alpha parameterisation.
|
420
|
+
# @param x [Numeric]
|
421
|
+
# @param options [Hash]
|
422
|
+
# @return [Float]
|
423
|
+
def skew_normal_pdf(x, options = { alpha: 0 })
|
424
|
+
params = { alpha: 0 }.merge(options)
|
425
|
+
alpha = params[:alpha]
|
426
|
+
raise ForChrisLibError, 'alpha must be numeric' unless alpha.is_a?(Numeric)
|
427
|
+
2 * normal_pdf(x) * normal_cdf(alpha * x)
|
428
|
+
end
|
429
|
+
|
430
|
+
# Placeholder skew-normal sampler backed by numerical integration.
|
431
|
+
# @return [Float]
|
432
|
+
def skew_normal_rand(_x, options = { alpha: 0 })
|
433
|
+
cdf_calc(rand, :normal_pdf, { mu: 2, sigma: 4 }, n_pts: 100, sigma: 4, mu: 2)
|
434
|
+
end
|
435
|
+
|
436
|
+
# Evaluate quadratic polynomial with configurable coefficients.
|
437
|
+
# @param x [Numeric]
|
438
|
+
# @param options [Hash]
|
439
|
+
# @return [Numeric]
|
440
|
+
def parabola(x, options = {})
|
441
|
+
params = { a: 2, b: 3, c: 4 }.merge(options)
|
442
|
+
a = params[:a]
|
443
|
+
b = params[:b]
|
444
|
+
c = params[:c]
|
445
|
+
a * x**2 + b * x + c
|
446
|
+
end
|
447
|
+
|
448
|
+
# Simpson's rule numerical integration for functions referenced by symbol.
|
449
|
+
# @param func [Symbol]
|
450
|
+
# @param a [Numeric]
|
451
|
+
# @param b [Numeric]
|
452
|
+
# @param n [Integer] even number of intervals
|
453
|
+
# @param options [Hash]
|
454
|
+
# @return [Float]
|
455
|
+
def simpson(func, a, b, n, options = {})
|
456
|
+
raise "n must be even (received n=#{n})" unless n.even?
|
457
|
+
raise ForChrisLibError, 'integration function must be defined' unless respond_to?(func)
|
458
|
+
|
459
|
+
h = (b - a).to_f / n
|
460
|
+
s = send(func, a, options) + send(func, b, options)
|
461
|
+
(1..n).step(2) { |i| s += 4 * send(func, a + i * h, options) }
|
462
|
+
(2..n - 1).step(2) { |i| s += 2 * send(func, a + i * h, options) }
|
463
|
+
s * h / 3
|
464
|
+
end
|
465
|
+
|
466
|
+
# Generate random samples from the skew-normal distribution using inverse transform.
|
467
|
+
# @param n [Integer]
|
468
|
+
# @param alpha [Numeric]
|
469
|
+
# @return [Array<Float>]
|
470
|
+
def skew_normal_rand_a(n, alpha)
|
471
|
+
raise ForChrisLibError, 'n must be a positive Integer' unless n.is_a?(Integer) && n.positive?
|
472
|
+
cdf_a = arbitrary_cdf_a(:skew_normal_pdf, { alpha: alpha })
|
473
|
+
(1..n).map { inverse_transform_rand(cdf_a) }
|
474
|
+
end
|
475
|
+
|
476
|
+
# Inverse transform sampling based on a discretised CDF array.
|
477
|
+
# @param cdf_a [Array<Array(Float, Float)>]
|
478
|
+
# @return [Float]
|
479
|
+
def inverse_transform_rand(cdf_a)
|
480
|
+
raise ForChrisLibError, 'cdf_a must be an array of coordinate pairs' unless cdf_a.respond_to?(:map) && cdf_a.all? { |pair| pair.is_a?(Array) && pair.size >= 2 }
|
481
|
+
p_a = cdf_a.map { |pair| pair[1] }
|
482
|
+
x_a = cdf_a.map { |pair| pair[0] }
|
483
|
+
p_min = p_a.first
|
484
|
+
p_max = p_a.last
|
485
|
+
p_rand = rand
|
486
|
+
return p_min if p_rand <= p_min
|
487
|
+
return p_max if p_rand >= p_max
|
488
|
+
|
489
|
+
i = p_a.find_index { |p| p > p_rand }
|
490
|
+
interpolate(p_rand, p_a[i - 1], p_a[i], x_a[i - 1], x_a[i])
|
491
|
+
end
|
492
|
+
|
493
|
+
# Linear interpolation between two points.
|
494
|
+
# @return [Float]
|
495
|
+
def interpolate(x, x_L, x_U, y_L, y_U)
|
496
|
+
m = (y_U - y_L) / (x_U - x_L)
|
497
|
+
(x - x_L) * m + y_L
|
498
|
+
end
|
499
|
+
|
500
|
+
# Sample a cumulative distribution function for plotting.
|
501
|
+
# @param func [Symbol]
|
502
|
+
# @param options [Hash]
|
503
|
+
# @param n_samples [Integer]
|
504
|
+
# @return [Array<Array<Float, Float>>]
|
505
|
+
def arbitrary_cdf_a(func, options, n_samples: 100)
|
506
|
+
raise ForChrisLibError, 'n_samples must be greater than 1' unless n_samples.is_a?(Integer) && n_samples > 1
|
507
|
+
raise ForChrisLibError, 'function must be defined' unless respond_to?(func)
|
508
|
+
width = 8.0
|
509
|
+
h = width / (n_samples - 1)
|
510
|
+
x_a = (1..n_samples).map { |i| -width / 2 + (i - 1) * h }
|
511
|
+
x_a.map do |x|
|
512
|
+
[x, cdf_calc(x, func, options)]
|
513
|
+
end
|
514
|
+
end
|
515
|
+
|
516
|
+
# Discretised skew-normal cumulative distribution function.
|
517
|
+
# @return [Array<Array<Float, Float>>]
|
518
|
+
def skew_normal_cdf_a(options, n_samples: 100)
|
519
|
+
raise ForChrisLibError, 'n_samples must be greater than 1' unless n_samples.is_a?(Integer) && n_samples > 1
|
520
|
+
width = 8.0
|
521
|
+
h = width / (n_samples - 1)
|
522
|
+
x_a = (1..n_samples).map { |i| -width / 2 + (i - 1) * h }
|
523
|
+
x_a.map do |x|
|
524
|
+
[x, cdf_calc(x, :skew_normal_pdf, options)]
|
525
|
+
end
|
526
|
+
end
|
527
|
+
|
528
|
+
# Numerical integration helper for CDFs.
|
529
|
+
# @param x [Numeric]
|
530
|
+
# @param func [Symbol]
|
531
|
+
# @param options [Hash]
|
532
|
+
# @param mu [Numeric]
|
533
|
+
# @param sigma [Numeric]
|
534
|
+
# @param n_pts [Integer]
|
535
|
+
# @return [Float]
|
536
|
+
def cdf_calc(x, func, options, mu: 0, sigma: 1, n_pts: 100)
|
537
|
+
raise "n_pts must be even (received n_pts=#{n_pts})" unless n_pts.even?
|
538
|
+
raise ForChrisLibError, 'integration function must be defined' unless respond_to?(func)
|
539
|
+
|
540
|
+
a = x - mu < -3 * sigma ? x - 2 * sigma + mu : -5 * sigma + mu
|
541
|
+
simpson(func, a, x, n_pts, options)
|
542
|
+
end
|
543
|
+
|
544
|
+
# Format a number with thousands delimiters.
|
545
|
+
# @param number [Numeric, String]
|
546
|
+
# @param delimiter [String]
|
547
|
+
# @param separator [String]
|
548
|
+
# @return [String]
|
549
|
+
def delimit(number, delimiter = ',', separator = '.')
|
550
|
+
raise ForChrisLibError, 'number must respond to #to_s' unless number.respond_to?(:to_s)
|
551
|
+
parts = number.to_s.split('.')
|
552
|
+
parts[0].gsub!(/(\d)(?=(\d\d\d)+(?!\d))/, "\\1#{delimiter}")
|
553
|
+
parts.join separator
|
554
|
+
end
|
555
|
+
|
556
|
+
# @return [String] hostname truncated to ten characters
|
557
|
+
def computer_name_short
|
558
|
+
host = `hostname`
|
559
|
+
if host.nil? || host.empty?
|
560
|
+
warn 'computer_name_short could not determine hostname'
|
561
|
+
return nil
|
562
|
+
end
|
563
|
+
host[0..9]
|
564
|
+
end
|
565
|
+
|
566
|
+
private
|
567
|
+
|
568
|
+
# @return [Object] default WinLoss calculator or raises a helpful error
|
569
|
+
def default_win_loss_calculator
|
570
|
+
return WinLoss.new if defined?(WinLoss)
|
571
|
+
|
572
|
+
raise ForChrisLibError, 'WinLoss dependency is not available. Provide win_loss_calculator:'
|
573
|
+
end
|
574
|
+
|
575
|
+
# @return [Class] default minimizer or raises when dependency missing
|
576
|
+
def default_minimizer_class
|
577
|
+
require 'minimization'
|
578
|
+
Minimization::Brent
|
579
|
+
rescue LoadError
|
580
|
+
raise ForChrisLibError, 'minimization gem is required to estimate bias or supply minimizer_class'
|
581
|
+
end
|
582
|
+
end
|
583
|
+
|
584
|
+
String.class_eval do
|
585
|
+
# Extract substring located between two marker strings.
|
586
|
+
# @param marker1 [String]
|
587
|
+
# @param marker2 [String]
|
588
|
+
# @return [String, nil]
|
589
|
+
def string_between_markers(marker1, marker2)
|
590
|
+
self[/#{Regexp.escape(marker1)}(.*?)#{Regexp.escape(marker2)}/m, 1]
|
591
|
+
end
|
592
|
+
end
|
593
|
+
|
594
|
+
Integer.class_eval do
|
595
|
+
# Coerce integer to float then apply {Float#sigmoid}.
|
596
|
+
# @return [Integer]
|
597
|
+
def sigmoid
|
598
|
+
to_f.sigmoid
|
599
|
+
end
|
600
|
+
end
|
601
|
+
|
602
|
+
Float.class_eval do
|
603
|
+
# Sign-based sigmoid returning -1, 0, or 1.
|
604
|
+
# @return [Integer]
|
605
|
+
def sigmoid
|
606
|
+
if self > 0
|
607
|
+
1
|
608
|
+
elsif self < 0
|
609
|
+
-1
|
610
|
+
else
|
611
|
+
0
|
612
|
+
end
|
613
|
+
end
|
614
|
+
end
|
615
|
+
|
616
|
+
Array.class_eval do
|
617
|
+
# Pad nested arrays with nil so all sub-arrays share a length.
|
618
|
+
# @return [Array<Array>]
|
619
|
+
def pad_sub_arrays!
|
620
|
+
raise ForChrisLibError, 'pad_sub_arrays! requires an array of arrays' unless all? { |a| a.is_a?(Array) }
|
621
|
+
max_len = map(&:length).max
|
622
|
+
map do |a|
|
623
|
+
len = a.length
|
624
|
+
if len == max_len
|
625
|
+
a
|
626
|
+
else
|
627
|
+
a.fill(nil, len...max_len)
|
628
|
+
end
|
629
|
+
end
|
630
|
+
end
|
631
|
+
|
632
|
+
# Remove nil padding created by {#pad_sub_arrays!}.
|
633
|
+
# @return [Array<Array>]
|
634
|
+
def unpad_sub_arrays!
|
635
|
+
raise ForChrisLibError, 'unpad_sub_arrays! requires an array of arrays' unless all? { |a| a.is_a?(Array) }
|
636
|
+
map(&:compact)
|
637
|
+
end
|
638
|
+
|
639
|
+
# Histogram of rounded integers derived from the array values.
|
640
|
+
# @return [Hash{Integer=>Integer}]
|
641
|
+
def histogram_int
|
642
|
+
raise ForChrisLibError, 'histogram_int requires numeric values' unless all? { |x| x.respond_to?(:round) }
|
643
|
+
hsh = Hash.new(0)
|
644
|
+
each { |x| hsh[x.round] += 1 }
|
645
|
+
hsh.sort_by { |k, _| k }.to_h
|
646
|
+
end
|
647
|
+
|
648
|
+
# Shift histogram bins by a fractional amount using linear interpolation.
|
649
|
+
# @param x [Float]
|
650
|
+
# @return [Array<Float>]
|
651
|
+
def bin_shift(x)
|
652
|
+
raise ForChrisLibError, 'bin_shift requires numeric bins' unless all? { |v| v.is_a?(Numeric) }
|
653
|
+
raise ForChrisLibError, 'shift must be numeric' unless x.is_a?(Numeric)
|
654
|
+
i_max = length - 1
|
655
|
+
return self if x.zero?
|
656
|
+
|
657
|
+
if x.positive?
|
658
|
+
j = x.floor
|
659
|
+
dx = x - j
|
660
|
+
b = bin_int_shift(j)
|
661
|
+
delta = b.map { |e| e * dx }
|
662
|
+
delta[-1] = 0.0
|
663
|
+
b_1 = b.zip(delta).map { |e_b, e_d| e_b - e_d }
|
664
|
+
b_2 = (1..i_max).map { |i| b_1[i] + delta[i - 1] }
|
665
|
+
b_2.insert(0, b_1[0])
|
666
|
+
else
|
667
|
+
j = x.ceil
|
668
|
+
dx = (x - j).abs
|
669
|
+
b = bin_int_shift(j)
|
670
|
+
delta = b.map { |e| e * dx }
|
671
|
+
delta[0] = 0.0
|
672
|
+
b_1 = b.zip(delta).map { |e_b, e_d| e_b - e_d }
|
673
|
+
b_2 = (0..(i_max - 1)).map { |i| b_1[i] + delta[i + 1] }
|
674
|
+
b_2.insert(i_max, b_1[i_max])
|
675
|
+
end
|
676
|
+
end
|
677
|
+
|
678
|
+
# Shift histogram bins by an integer amount.
|
679
|
+
# @param j [Integer]
|
680
|
+
# @return [Array<Numeric>]
|
681
|
+
def bin_int_shift(j)
|
682
|
+
raise ForChrisLibError, 'bin_int_shift requires numeric bins' unless all? { |v| v.is_a?(Numeric) }
|
683
|
+
raise ForChrisLibError, 'shift must be an Integer' unless j.is_a?(Integer)
|
684
|
+
i_max = length - 1
|
685
|
+
if j.zero?
|
686
|
+
self
|
687
|
+
elsif j.positive?
|
688
|
+
a_s = self
|
689
|
+
(1..[j, i_max].min).each do
|
690
|
+
a_s = a_s.bin_int_shift_right(i_max)
|
691
|
+
end
|
692
|
+
a_s
|
693
|
+
else
|
694
|
+
a_s = self
|
695
|
+
(1..[-j, i_max].min).each do
|
696
|
+
a_s = a_s.bin_int_shift_left(i_max)
|
697
|
+
end
|
698
|
+
a_s
|
699
|
+
end
|
700
|
+
end
|
701
|
+
|
702
|
+
# Helper used by {#bin_int_shift} for leftward shifts.
|
703
|
+
# @return [Array<Numeric>]
|
704
|
+
def bin_int_shift_left(i_max)
|
705
|
+
each_with_index.map do |_, i|
|
706
|
+
if i == i_max
|
707
|
+
0
|
708
|
+
elsif i.positive?
|
709
|
+
self[i + 1]
|
710
|
+
else
|
711
|
+
self[0] + self[1]
|
712
|
+
end
|
713
|
+
end
|
714
|
+
end
|
715
|
+
|
716
|
+
# Helper used by {#bin_int_shift} for rightward shifts.
|
717
|
+
# @return [Array<Numeric>]
|
718
|
+
def bin_int_shift_right(i_max)
|
719
|
+
each_with_index.map do |_, i|
|
720
|
+
if i.zero?
|
721
|
+
0
|
722
|
+
elsif i < i_max
|
723
|
+
self[i - 1]
|
724
|
+
else
|
725
|
+
self[i - 1] + self[i]
|
726
|
+
end
|
727
|
+
end
|
728
|
+
end
|
729
|
+
|
730
|
+
# Apply {Float#sigmoid} element-wise.
|
731
|
+
# @return [Array<Integer>]
|
732
|
+
def sigmoid
|
733
|
+
raise ForChrisLibError, 'sigmoid requires numeric values' unless all? { |v| v.is_a?(Numeric) }
|
734
|
+
map do |v|
|
735
|
+
if v > 0
|
736
|
+
1
|
737
|
+
elsif v < 0
|
738
|
+
-1
|
739
|
+
else
|
740
|
+
0
|
741
|
+
end
|
742
|
+
end
|
743
|
+
end
|
744
|
+
|
745
|
+
# Linear interpolation over sorted [x, y] pairs.
|
746
|
+
# @param x [Numeric]
|
747
|
+
# @return [Numeric]
|
748
|
+
def interpolate(x)
|
749
|
+
raise ForChrisLibError, 'interpolate requires a two-column array' unless all? { |pair| pair.is_a?(Array) && pair.size >= 2 }
|
750
|
+
x_a = transpose[0]
|
751
|
+
x_min = x_a[0]
|
752
|
+
x_max = x_a[-1]
|
753
|
+
return self[0][1] if x <= x_min
|
754
|
+
return self[-1][1] if x >= x_max
|
755
|
+
|
756
|
+
i = x_a.find_index { |v| v >= x }
|
757
|
+
return self[i][1] if x == self[i][0]
|
758
|
+
|
759
|
+
m = (self[i][1] - self[i - 1][1]).to_f / (self[i][0] - self[i - 1][0])
|
760
|
+
(x - self[i - 1][0]) * m + self[i - 1][1]
|
761
|
+
end
|
762
|
+
|
763
|
+
# Approximate tensor dimension based on nested arrays.
|
764
|
+
# @return [Integer]
|
765
|
+
def dimension
|
766
|
+
return 0 unless is_a?(Array)
|
767
|
+
|
768
|
+
result = 1
|
769
|
+
each do |sub_a|
|
770
|
+
next unless sub_a.is_a?(Array)
|
771
|
+
|
772
|
+
dim = sub_a.dimension
|
773
|
+
result = dim + 1 if dim + 1 > result
|
774
|
+
end
|
775
|
+
result
|
776
|
+
end
|
777
|
+
|
778
|
+
# Sum values across 1D, 2D, or 3D arrays.
|
779
|
+
# @return [Numeric]
|
780
|
+
def total
|
781
|
+
unless [1, 2, 3].include?(dimension)
|
782
|
+
raise "not implemented for #{dimension} dimensions"
|
783
|
+
end
|
784
|
+
|
785
|
+
case dimension
|
786
|
+
when 1
|
787
|
+
sum
|
788
|
+
when 2
|
789
|
+
sum { |row| row.sum }
|
790
|
+
when 3
|
791
|
+
sum { |plane| plane.sum { |row| row.sum } }
|
792
|
+
end
|
793
|
+
end
|
794
|
+
|
795
|
+
# Discrete probability density function derived from sample counts.
|
796
|
+
# @return [Hash{Numeric=>Float}]
|
797
|
+
def pdf
|
798
|
+
tally.map { |k, v| [k, v.to_f / count] }.sort_by { |k, _| k }.to_h
|
799
|
+
end
|
800
|
+
|
801
|
+
# Discrete cumulative distribution function derived from {#pdf}.
|
802
|
+
# @return [Hash{Numeric=>Float}]
|
803
|
+
def cdf
|
804
|
+
pdf_temp = pdf
|
805
|
+
pdf_temp.keys.each_with_index.map { |k, i| [k, pdf_temp.values[0..i].sum] }.to_h
|
806
|
+
end
|
807
|
+
end
|
808
|
+
|
809
|
+
Hash.class_eval do
|
810
|
+
# Build a cumulative distribution function from ordered PDF values.
|
811
|
+
# @return [Hash{Object=>Float}]
|
812
|
+
def cdf_from_pdf
|
813
|
+
keys.each_with_index.map { |k, i| [k, values[0..i].sum] }.to_h
|
814
|
+
end
|
815
|
+
|
816
|
+
# Normalise Handicap histogram into ordered hash for male golfers.
|
817
|
+
# @return [Hash{Integer=>Integer}]
|
818
|
+
def male_ga_hist
|
819
|
+
hist = Hash.new(0)
|
820
|
+
(-5..37).each do |h|
|
821
|
+
each { |k, v| hist[h] += v if k.to_i == h }
|
822
|
+
end
|
823
|
+
hist
|
824
|
+
end
|
825
|
+
|
826
|
+
# Normalise Handicap histogram into ordered hash for female golfers.
|
827
|
+
# @return [Hash{Integer=>Integer}]
|
828
|
+
def female_male_ga_hist
|
829
|
+
hist = Hash.new(0)
|
830
|
+
(-5..46).each do |h|
|
831
|
+
each { |k, v| hist[h] += v if k.to_i == h }
|
832
|
+
end
|
833
|
+
hist
|
834
|
+
end
|
835
|
+
end
|