chris_lib 2.2.1 → 2.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,835 @@
1
+ require 'ostruct'
2
+
3
+ # Error raised when ForChrisLib encounters invalid input or missing dependencies.
4
+ ForChrisLibError = Class.new(StandardError) unless defined?(ForChrisLibError)
5
+
6
+ # Aggregated analytical helpers formerly housed in golf_lab.
7
+ module ForChrisLib
8
+ include ChrisLib
9
+ include Math
10
+
11
+ # Compute probabilities of winning given an array of scores.
12
+ # @param results [Array<Numeric>]
13
+ # @return [Array<Float>] probability mass for each input
14
+ def outcome(results)
15
+ raise ForChrisLibError, 'results must respond to #each' unless results.respond_to?(:each)
16
+ results = results.to_a
17
+ raise ForChrisLibError, 'results cannot be empty' if results.empty?
18
+ s_min = results.min
19
+ flags = results.map { |value| value == s_min ? 1 : 0 }
20
+ total = flags.sum.nonzero? || 1
21
+ flags.map { |value| value.to_f / total }
22
+ end
23
+
24
+ # Evaluate a chi-squared goodness-of-fit test from summary statistics.
25
+ class ChiSquaredStdErr
26
+ # @param means [Array<Numeric>]
27
+ # @param std_errs [Array<Numeric>] standard errors of the means
28
+ # @param mus [Array<Numeric>] hypothesised means
29
+ # @param confidence_level [Float]
30
+ def initialize(means, std_errs, mus, confidence_level: 0.95)
31
+ [means, std_errs, mus].each do |collection|
32
+ unless collection.respond_to?(:to_a)
33
+ raise ArgumentError, 'means, std_errs, and mus must be enumerable'
34
+ end
35
+ end
36
+ @means = means
37
+ @std_errs = std_errs
38
+ @mus = mus
39
+ @confidence_level = confidence_level
40
+ check_confidence_level
41
+ raise ForChrisLibError, 'means, std_errs, and mus must be the same length' unless means.size == std_errs.size && means.size == mus.size
42
+ raise ForChrisLibError, 'means cannot be empty' if means.empty?
43
+ if std_errs.any? { |se| !se.is_a?(Numeric) }
44
+ raise ForChrisLibError, 'std_errs must all be numeric'
45
+ end
46
+ if std_errs.any?(&:zero?)
47
+ raise ForChrisLibError, 'std_errs must be non-zero to avoid division by zero'
48
+ end
49
+ @threshold = 1 - confidence_level
50
+ end
51
+
52
+ # @return [OpenStruct] containing :pass?, :p, and :chi2
53
+ def call
54
+ chi2 = means.zip(mus, std_errs).map { |m, mu, se| (m.to_f - mu)**2 / se**2 }.sum
55
+ p_value = PChiSquared.new.call(means.size, chi2)
56
+ OpenStruct.new(pass?: p_value > threshold, p: p_value, chi2: chi2)
57
+ end
58
+
59
+ private
60
+
61
+ attr_reader :means, :std_errs, :mus, :threshold, :confidence_level
62
+
63
+ def check_confidence_level
64
+ return if confidence_level.positive? && confidence_level < 1
65
+
66
+ msg = "Confidence level is #{confidence_level} must be between 0 and 1"
67
+ raise ForChrisLibError, msg
68
+ end
69
+ end
70
+
71
+ # Wrapper around chi-squared tail probability helpers.
72
+ class PChiSquared
73
+ # @param calculator [#call] dependency used to evaluate the tail probability
74
+ def initialize(calculator: nil)
75
+ @calculator = calculator
76
+ end
77
+
78
+ # @param dof [Integer] degrees of freedom
79
+ # @param nu [Numeric] chi-squared statistic
80
+ # @return [Float] upper-tail probability
81
+ def call(dof, nu)
82
+ raise ArgumentError, 'degrees of freedom must be positive' unless dof.is_a?(Numeric) && dof.positive?
83
+ raise ArgumentError, 'chi-squared statistic must be non-negative' unless nu.is_a?(Numeric) && nu >= 0
84
+ if calculator
85
+ return calculator.call(dof, nu)
86
+ end
87
+
88
+ # Use the complemented incomplete gamma to evaluate the survival function.
89
+ s = dof.to_f / 2.0
90
+ x = nu.to_f / 2.0
91
+ regularized_gamma_q(s, x)
92
+ end
93
+
94
+ private
95
+
96
+ attr_reader :calculator
97
+
98
+ def regularized_gamma_q(s, x)
99
+ # Borrowed from Numerical Recipes, see https://numerical.recipes
100
+ if x < s + 1.0
101
+ 1.0 - regularized_gamma_p_series(s, x)
102
+ else
103
+ regularized_gamma_q_continued_fraction(s, x)
104
+ end
105
+ end
106
+
107
+ def regularized_gamma_p_series(s, x)
108
+ return 0.0 if x <= 0.0
109
+
110
+ gln = Math.lgamma(s).first
111
+ sum = 1.0 / s
112
+ term = sum
113
+ n = 1
114
+ loop do
115
+ term *= x / (s + n)
116
+ sum += term
117
+ break if term.abs < sum.abs * 1e-12
118
+ n += 1
119
+ break if n > 10_000
120
+ end
121
+ Math.exp(-x + s * Math.log(x) - gln) * sum
122
+ end
123
+
124
+ def regularized_gamma_q_continued_fraction(s, x)
125
+ gln = Math.lgamma(s).first
126
+ b = x + 1.0 - s
127
+ c = 1.0 / 1e-30
128
+ d = 1.0 / b
129
+ h = d
130
+ n = 1
131
+ loop do
132
+ an = -n * (n - s)
133
+ b += 2.0
134
+ d = an * d + b
135
+ d = 1e-30 if d.abs < 1e-30
136
+ c = b + an / c
137
+ c = 1e-30 if c.abs < 1e-30
138
+ d = 1.0 / d
139
+ delta = d * c
140
+ h *= delta
141
+ break if (delta - 1.0).abs < 1e-12
142
+ n += 1
143
+ break if n > 10_000
144
+ end
145
+ Math.exp(-x + s * Math.log(x) - gln) * h
146
+ end
147
+ end
148
+
149
+ # Lightweight helper that keeps table data and headers together.
150
+ class Framed
151
+ attr_reader :hsh
152
+
153
+ # @param header [Array<String>]
154
+ # @param rows [Array<Array>]
155
+ def initialize(header, rows)
156
+ raise 'header must be an array' unless header.is_a?(Array)
157
+ raise 'rows must be an array' unless rows.is_a?(Array)
158
+ raise 'header cannot be empty' if header.empty?
159
+
160
+ @hsh = { header: header, rows: rows }
161
+
162
+ rows.each_with_index do |row, index|
163
+ raise "row #{index} must respond to #size" unless row.respond_to?(:size)
164
+ next if row.size == header.size
165
+
166
+ raise "row #{index} size not equal to header size"
167
+ end
168
+ end
169
+
170
+ # @return [Array<String>]
171
+ def header
172
+ hsh[:header]
173
+ end
174
+
175
+ # @return [Array<Array>]
176
+ def rows
177
+ hsh[:rows]
178
+ end
179
+ end
180
+
181
+ # @return [String] sentinel used in legacy tests
182
+ def test
183
+ 'here'
184
+ end
185
+
186
+ # Fraction of variance unexplained given predictions and observations.
187
+ # @param y_hat_a [Array<Numeric>]
188
+ # @param y_a [Array<Numeric>]
189
+ # @return [Float]
190
+ def fvu(y_hat_a:, y_a:)
191
+ raise ForChrisLibError, 'y_hat_a must respond to #size and #zip' unless y_hat_a.respond_to?(:size) && y_hat_a.respond_to?(:zip)
192
+ raise ForChrisLibError, 'y_a must respond to #size' unless y_a.respond_to?(:size)
193
+ raise ForChrisLibError, 'y_hat_a must contain at least two values' if y_hat_a.size < 2
194
+ raise ForChrisLibError, 'y_hat_a and y_a must be the same length' unless y_hat_a.size == y_a.size
195
+
196
+ ss_err = y_hat_a.zip(y_a).sum { |yh, y| (y - yh)**2 }.to_f
197
+ y_mu = y_a.mean
198
+ ss_tot = y_a.sum { |y| (y - y_mu)**2 }.to_f
199
+ ss_err / ss_tot
200
+ end
201
+
202
+ # Estimate bias in a histogram by minimising win/loss difference between players.
203
+ # @param store [#histogram, #min, #max]
204
+ # @param win_loss_calculator [#win_loss_graph,#win_loss_stats]
205
+ # @param minimizer_class [Class]
206
+ # @return [Float]
207
+ def bias_estimate_by_min(store, win_loss_calculator: nil, minimizer_class: nil)
208
+ win_loss = win_loss_calculator || default_win_loss_calculator
209
+ unless store.respond_to?(:histogram) && store.respond_to?(:min) && store.respond_to?(:max)
210
+ raise ForChrisLibError, 'store must respond to :histogram, :min, and :max'
211
+ end
212
+ histogram_data = store.histogram
213
+ unless histogram_data.respond_to?(:[]) && histogram_data[0]
214
+ raise ForChrisLibError, 'store.histogram must include counts in the first slot'
215
+ end
216
+
217
+ fn = lambda do |x|
218
+ bins = store.histogram[0].bin_shift(x)
219
+ pdf = pdf_from_hist(bins, min: store.min)
220
+ wl_graph = win_loss.win_loss_graph(nil, pdf: pdf)
221
+ outcome = win_loss.win_loss_stats(wl_graph)[0]
222
+ (outcome - 50.0)**2
223
+ end
224
+
225
+ minimizer_class ||= default_minimizer_class
226
+ unless minimizer_class.respond_to?(:new)
227
+ raise ForChrisLibError, 'minimizer_class must respond to .new'
228
+ end
229
+ minimizer = minimizer_class.new(store.min, store.max, fn)
230
+ minimizer.expected = 0.0 if minimizer.respond_to?(:expected=)
231
+ minimizer.iterate
232
+ -minimizer.x_minimum
233
+ end
234
+
235
+ # Convert integer bin counts into a probability mass function.
236
+ # @param bins [Array<Integer>]
237
+ # @param min [Integer]
238
+ # @return [Hash{Integer=>Float}]
239
+ def pdf_from_hist(bins, min: 0)
240
+ unless bins.respond_to?(:each_with_index) && bins.respond_to?(:sum)
241
+ raise ForChrisLibError, 'bins must respond to #each_with_index and #sum'
242
+ end
243
+ total = bins.sum.nonzero? || 1
244
+ bins.map.with_index { |b, i| [i + min, b.to_f / total] }.to_h
245
+ end
246
+
247
+ # Sum y values into equi-width x bins.
248
+ # @param x_y [Array<Array(Float, Float)>]
249
+ # @param n_bins [Integer]
250
+ # @return [Array<Array<Float, Numeric, Integer>>]
251
+ def summed_bins_histogram(x_y, n_bins)
252
+ raise ForChrisLibError, 'x_y must respond to #transpose' unless x_y.respond_to?(:transpose)
253
+ raise ForChrisLibError, 'n_bins must be a positive Integer' unless n_bins.is_a?(Integer) && n_bins.positive?
254
+ x_a = x_y.transpose[0]
255
+ y_a = x_y.transpose[1]
256
+ min = x_a.min
257
+ max = x_a.max
258
+ bin_sums = Array.new(n_bins, 0)
259
+ bins = Array.new(n_bins, 0)
260
+ delta = (max - min).to_f / n_bins
261
+
262
+ x_a.each_with_index do |x, i|
263
+ j = [((x - min).to_f / delta), n_bins - 1].min
264
+ bin_sums[j] += y_a[i]
265
+ bins[j] += 1
266
+ end
267
+
268
+ bin_sums.each_with_index.map do |bin_sum, i|
269
+ [min + delta / 2 + i * delta, bin_sum, bins[i]]
270
+ end
271
+ end
272
+
273
+ # Incremental mean and second central moment accumulator.
274
+ # @param x [Numeric]
275
+ # @param accumulator [Array<Numeric>] [mean, m2, n]
276
+ # @return [Array<Numeric>]
277
+ def inc_m2_var(x, accumulator)
278
+ raise ForChrisLibError, 'accumulator must be an array of [mean, m2, n]' unless accumulator.is_a?(Array) && accumulator.size == 3
279
+ mean, m2, n = accumulator
280
+ n += 1
281
+ delta = x - mean
282
+ mean += delta.to_f / n
283
+ delta2 = x - mean
284
+ m2 += delta * delta2
285
+ [mean, m2, n]
286
+ end
287
+
288
+ # Autocorrelation at a specific lag.
289
+ # @param x_a [Array<Numeric>]
290
+ # @param lag [Integer]
291
+ # @return [Float]
292
+ def acf(x_a, lag)
293
+ raise ForChrisLibError, 'lag must be a non-negative Integer' unless lag.is_a?(Integer) && lag >= 0
294
+ raise ForChrisLibError, 'x_a must respond to #size and #[ ]' unless x_a.respond_to?(:size) && x_a.respond_to?(:[])
295
+ n = x_a.size
296
+ raise "Lag is too large, n = #{n}, lag = #{lag}" if n < lag + 1
297
+
298
+ mu = x_a.mean
299
+ total = 0
300
+ x_a[0..-(lag + 1)].each_with_index do |x, i|
301
+ total += (x - mu) * (x_a[i + lag] - mu)
302
+ end
303
+ total.to_f / (n - lag) / x_a.var
304
+ end
305
+
306
+ # Weighted mean based on histogram bins.
307
+ # @param bins [Array<Numeric>]
308
+ # @param min [Numeric]
309
+ # @param delta [Numeric]
310
+ # @return [Float, nil]
311
+ def weighted_mean(bins, min = 0, delta = 1)
312
+ raise ForChrisLibError, 'bins must respond to #sum and #each_with_index' unless bins.respond_to?(:sum) && bins.respond_to?(:each_with_index)
313
+ return nil if bins.sum.zero?
314
+
315
+ sum = bins.each_with_index.sum do |w, i|
316
+ (min * delta + i * delta) * w
317
+ end
318
+ sum.to_f / bins.sum
319
+ end
320
+
321
+ # Weighted sample standard deviation.
322
+ # @param bins [Array<Numeric>]
323
+ # @param mu [Numeric]
324
+ # @param min [Numeric]
325
+ # @param delta [Numeric]
326
+ # @return [Float, nil]
327
+ def weighted_sd(bins, mu, min = 0, delta = 1)
328
+ raise ForChrisLibError, 'bins must respond to #sum and #each_with_index' unless bins.respond_to?(:sum) && bins.respond_to?(:each_with_index)
329
+ raise ForChrisLibError, 'mu must be Numeric' unless mu.is_a?(Numeric)
330
+ return nil if bins.sum < 2
331
+
332
+ sum = bins.each_with_index.sum do |w, i|
333
+ v = min * delta + i * delta
334
+ (v - mu)**2 * w
335
+ end
336
+ sqrt(sum / (bins.sum - 1))
337
+ end
338
+
339
+ # Weighted skewness using the third central moment.
340
+ # @param bins [Array<Numeric>]
341
+ # @param mu [Numeric]
342
+ # @param min [Numeric]
343
+ # @param delta [Numeric]
344
+ # @return [Float, nil]
345
+ def weighted_skewness(bins, mu, min = 0, delta = 1)
346
+ raise ForChrisLibError, 'bins must respond to #sum and #each_with_index' unless bins.respond_to?(:sum) && bins.respond_to?(:each_with_index)
347
+ raise ForChrisLibError, 'mu must be Numeric' unless mu.is_a?(Numeric)
348
+ n = bins.sum
349
+ return nil if n < 2
350
+
351
+ third_moment = weighted_m_3(bins, mu, min, delta)
352
+ sd = weighted_sd(bins, mu, min, delta)
353
+ third_moment / sd**3
354
+ end
355
+
356
+ # Weighted third central moment.
357
+ # @return [Float, nil]
358
+ def weighted_m_3(bins, mu, min = 0, delta = 1)
359
+ raise ForChrisLibError, 'bins must respond to #sum and #each_with_index' unless bins.respond_to?(:sum) && bins.respond_to?(:each_with_index)
360
+ raise ForChrisLibError, 'mu must be Numeric' unless mu.is_a?(Numeric)
361
+ n = bins.sum
362
+ return if n < 1
363
+
364
+ sum = bins.each_with_index.sum do |w, i|
365
+ v = min * delta + i * delta
366
+ (v - mu)**3 * w
367
+ end
368
+ sum / n
369
+ end
370
+
371
+ # Weighted fourth central moment.
372
+ # @return [Float, nil]
373
+ def weighted_m_4(bins, mu, min = 0, delta = 1)
374
+ raise ForChrisLibError, 'bins must respond to #sum and #each_with_index' unless bins.respond_to?(:sum) && bins.respond_to?(:each_with_index)
375
+ raise ForChrisLibError, 'mu must be Numeric' unless mu.is_a?(Numeric)
376
+ n = bins.sum
377
+ return if n < 1
378
+
379
+ sum = bins.each_with_index.sum do |w, i|
380
+ v = min * delta + i * delta
381
+ (v - mu)**4 * w
382
+ end
383
+ sum / n
384
+ end
385
+
386
+ # Probability mass function derived from histogram bins.
387
+ # @return [Hash{Numeric=>Float}]
388
+ def pdf_from_bins(bins, min = 0, delta = 1)
389
+ raise ForChrisLibError, 'bins must respond to #sum and #each_with_index' unless bins.respond_to?(:sum) && bins.respond_to?(:each_with_index)
390
+ total = bins.sum.nonzero? || 1
391
+ bins.each_with_index.map { |bin, i| [min * delta + i * delta, bin.to_f / total] }.to_h
392
+ end
393
+
394
+ # Cumulative distribution function derived from histogram bins.
395
+ # @return [Hash{Numeric=>Float}]
396
+ def cdf_from_bins(bins, min = 0, delta = 1)
397
+ pdf_from_bins(bins, min, delta).cdf_from_pdf
398
+ end
399
+
400
+ # Standard normal (or shifted) probability density function.
401
+ # @param x [Numeric]
402
+ # @param options [Hash] :mu and :sigma keys
403
+ # @return [Float]
404
+ def normal_pdf(x, options = {})
405
+ params = { mu: 0, sigma: 1 }.merge(options)
406
+ mu = params[:mu]
407
+ sigma = params[:sigma]
408
+ raise ForChrisLibError, 'sigma must be positive' unless sigma.is_a?(Numeric) && sigma.positive?
409
+ E**(-(x - mu)**2 / 2 / sigma**2) / sqrt(2 * PI) / sigma
410
+ end
411
+
412
+ # Standard normal cumulative distribution function.
413
+ # @param x [Numeric]
414
+ # @return [Float]
415
+ def normal_cdf(x)
416
+ 0.5 * (1 + erf(x / sqrt(2)))
417
+ end
418
+
419
+ # Skew-normal probability density function using alpha parameterisation.
420
+ # @param x [Numeric]
421
+ # @param options [Hash]
422
+ # @return [Float]
423
+ def skew_normal_pdf(x, options = { alpha: 0 })
424
+ params = { alpha: 0 }.merge(options)
425
+ alpha = params[:alpha]
426
+ raise ForChrisLibError, 'alpha must be numeric' unless alpha.is_a?(Numeric)
427
+ 2 * normal_pdf(x) * normal_cdf(alpha * x)
428
+ end
429
+
430
+ # Placeholder skew-normal sampler backed by numerical integration.
431
+ # @return [Float]
432
+ def skew_normal_rand(_x, options = { alpha: 0 })
433
+ cdf_calc(rand, :normal_pdf, { mu: 2, sigma: 4 }, n_pts: 100, sigma: 4, mu: 2)
434
+ end
435
+
436
+ # Evaluate quadratic polynomial with configurable coefficients.
437
+ # @param x [Numeric]
438
+ # @param options [Hash]
439
+ # @return [Numeric]
440
+ def parabola(x, options = {})
441
+ params = { a: 2, b: 3, c: 4 }.merge(options)
442
+ a = params[:a]
443
+ b = params[:b]
444
+ c = params[:c]
445
+ a * x**2 + b * x + c
446
+ end
447
+
448
+ # Simpson's rule numerical integration for functions referenced by symbol.
449
+ # @param func [Symbol]
450
+ # @param a [Numeric]
451
+ # @param b [Numeric]
452
+ # @param n [Integer] even number of intervals
453
+ # @param options [Hash]
454
+ # @return [Float]
455
+ def simpson(func, a, b, n, options = {})
456
+ raise "n must be even (received n=#{n})" unless n.even?
457
+ raise ForChrisLibError, 'integration function must be defined' unless respond_to?(func)
458
+
459
+ h = (b - a).to_f / n
460
+ s = send(func, a, options) + send(func, b, options)
461
+ (1..n).step(2) { |i| s += 4 * send(func, a + i * h, options) }
462
+ (2..n - 1).step(2) { |i| s += 2 * send(func, a + i * h, options) }
463
+ s * h / 3
464
+ end
465
+
466
+ # Generate random samples from the skew-normal distribution using inverse transform.
467
+ # @param n [Integer]
468
+ # @param alpha [Numeric]
469
+ # @return [Array<Float>]
470
+ def skew_normal_rand_a(n, alpha)
471
+ raise ForChrisLibError, 'n must be a positive Integer' unless n.is_a?(Integer) && n.positive?
472
+ cdf_a = arbitrary_cdf_a(:skew_normal_pdf, { alpha: alpha })
473
+ (1..n).map { inverse_transform_rand(cdf_a) }
474
+ end
475
+
476
+ # Inverse transform sampling based on a discretised CDF array.
477
+ # @param cdf_a [Array<Array(Float, Float)>]
478
+ # @return [Float]
479
+ def inverse_transform_rand(cdf_a)
480
+ raise ForChrisLibError, 'cdf_a must be an array of coordinate pairs' unless cdf_a.respond_to?(:map) && cdf_a.all? { |pair| pair.is_a?(Array) && pair.size >= 2 }
481
+ p_a = cdf_a.map { |pair| pair[1] }
482
+ x_a = cdf_a.map { |pair| pair[0] }
483
+ p_min = p_a.first
484
+ p_max = p_a.last
485
+ p_rand = rand
486
+ return p_min if p_rand <= p_min
487
+ return p_max if p_rand >= p_max
488
+
489
+ i = p_a.find_index { |p| p > p_rand }
490
+ interpolate(p_rand, p_a[i - 1], p_a[i], x_a[i - 1], x_a[i])
491
+ end
492
+
493
+ # Linear interpolation between two points.
494
+ # @return [Float]
495
+ def interpolate(x, x_L, x_U, y_L, y_U)
496
+ m = (y_U - y_L) / (x_U - x_L)
497
+ (x - x_L) * m + y_L
498
+ end
499
+
500
+ # Sample a cumulative distribution function for plotting.
501
+ # @param func [Symbol]
502
+ # @param options [Hash]
503
+ # @param n_samples [Integer]
504
+ # @return [Array<Array<Float, Float>>]
505
+ def arbitrary_cdf_a(func, options, n_samples: 100)
506
+ raise ForChrisLibError, 'n_samples must be greater than 1' unless n_samples.is_a?(Integer) && n_samples > 1
507
+ raise ForChrisLibError, 'function must be defined' unless respond_to?(func)
508
+ width = 8.0
509
+ h = width / (n_samples - 1)
510
+ x_a = (1..n_samples).map { |i| -width / 2 + (i - 1) * h }
511
+ x_a.map do |x|
512
+ [x, cdf_calc(x, func, options)]
513
+ end
514
+ end
515
+
516
+ # Discretised skew-normal cumulative distribution function.
517
+ # @return [Array<Array<Float, Float>>]
518
+ def skew_normal_cdf_a(options, n_samples: 100)
519
+ raise ForChrisLibError, 'n_samples must be greater than 1' unless n_samples.is_a?(Integer) && n_samples > 1
520
+ width = 8.0
521
+ h = width / (n_samples - 1)
522
+ x_a = (1..n_samples).map { |i| -width / 2 + (i - 1) * h }
523
+ x_a.map do |x|
524
+ [x, cdf_calc(x, :skew_normal_pdf, options)]
525
+ end
526
+ end
527
+
528
+ # Numerical integration helper for CDFs.
529
+ # @param x [Numeric]
530
+ # @param func [Symbol]
531
+ # @param options [Hash]
532
+ # @param mu [Numeric]
533
+ # @param sigma [Numeric]
534
+ # @param n_pts [Integer]
535
+ # @return [Float]
536
+ def cdf_calc(x, func, options, mu: 0, sigma: 1, n_pts: 100)
537
+ raise "n_pts must be even (received n_pts=#{n_pts})" unless n_pts.even?
538
+ raise ForChrisLibError, 'integration function must be defined' unless respond_to?(func)
539
+
540
+ a = x - mu < -3 * sigma ? x - 2 * sigma + mu : -5 * sigma + mu
541
+ simpson(func, a, x, n_pts, options)
542
+ end
543
+
544
+ # Format a number with thousands delimiters.
545
+ # @param number [Numeric, String]
546
+ # @param delimiter [String]
547
+ # @param separator [String]
548
+ # @return [String]
549
+ def delimit(number, delimiter = ',', separator = '.')
550
+ raise ForChrisLibError, 'number must respond to #to_s' unless number.respond_to?(:to_s)
551
+ parts = number.to_s.split('.')
552
+ parts[0].gsub!(/(\d)(?=(\d\d\d)+(?!\d))/, "\\1#{delimiter}")
553
+ parts.join separator
554
+ end
555
+
556
+ # @return [String] hostname truncated to ten characters
557
+ def computer_name_short
558
+ host = `hostname`
559
+ if host.nil? || host.empty?
560
+ warn 'computer_name_short could not determine hostname'
561
+ return nil
562
+ end
563
+ host[0..9]
564
+ end
565
+
566
+ private
567
+
568
+ # @return [Object] default WinLoss calculator or raises a helpful error
569
+ def default_win_loss_calculator
570
+ return WinLoss.new if defined?(WinLoss)
571
+
572
+ raise ForChrisLibError, 'WinLoss dependency is not available. Provide win_loss_calculator:'
573
+ end
574
+
575
+ # @return [Class] default minimizer or raises when dependency missing
576
+ def default_minimizer_class
577
+ require 'minimization'
578
+ Minimization::Brent
579
+ rescue LoadError
580
+ raise ForChrisLibError, 'minimization gem is required to estimate bias or supply minimizer_class'
581
+ end
582
+ end
583
+
584
+ String.class_eval do
585
+ # Extract substring located between two marker strings.
586
+ # @param marker1 [String]
587
+ # @param marker2 [String]
588
+ # @return [String, nil]
589
+ def string_between_markers(marker1, marker2)
590
+ self[/#{Regexp.escape(marker1)}(.*?)#{Regexp.escape(marker2)}/m, 1]
591
+ end
592
+ end
593
+
594
+ Integer.class_eval do
595
+ # Coerce integer to float then apply {Float#sigmoid}.
596
+ # @return [Integer]
597
+ def sigmoid
598
+ to_f.sigmoid
599
+ end
600
+ end
601
+
602
+ Float.class_eval do
603
+ # Sign-based sigmoid returning -1, 0, or 1.
604
+ # @return [Integer]
605
+ def sigmoid
606
+ if self > 0
607
+ 1
608
+ elsif self < 0
609
+ -1
610
+ else
611
+ 0
612
+ end
613
+ end
614
+ end
615
+
616
+ Array.class_eval do
617
+ # Pad nested arrays with nil so all sub-arrays share a length.
618
+ # @return [Array<Array>]
619
+ def pad_sub_arrays!
620
+ raise ForChrisLibError, 'pad_sub_arrays! requires an array of arrays' unless all? { |a| a.is_a?(Array) }
621
+ max_len = map(&:length).max
622
+ map do |a|
623
+ len = a.length
624
+ if len == max_len
625
+ a
626
+ else
627
+ a.fill(nil, len...max_len)
628
+ end
629
+ end
630
+ end
631
+
632
+ # Remove nil padding created by {#pad_sub_arrays!}.
633
+ # @return [Array<Array>]
634
+ def unpad_sub_arrays!
635
+ raise ForChrisLibError, 'unpad_sub_arrays! requires an array of arrays' unless all? { |a| a.is_a?(Array) }
636
+ map(&:compact)
637
+ end
638
+
639
+ # Histogram of rounded integers derived from the array values.
640
+ # @return [Hash{Integer=>Integer}]
641
+ def histogram_int
642
+ raise ForChrisLibError, 'histogram_int requires numeric values' unless all? { |x| x.respond_to?(:round) }
643
+ hsh = Hash.new(0)
644
+ each { |x| hsh[x.round] += 1 }
645
+ hsh.sort_by { |k, _| k }.to_h
646
+ end
647
+
648
+ # Shift histogram bins by a fractional amount using linear interpolation.
649
+ # @param x [Float]
650
+ # @return [Array<Float>]
651
+ def bin_shift(x)
652
+ raise ForChrisLibError, 'bin_shift requires numeric bins' unless all? { |v| v.is_a?(Numeric) }
653
+ raise ForChrisLibError, 'shift must be numeric' unless x.is_a?(Numeric)
654
+ i_max = length - 1
655
+ return self if x.zero?
656
+
657
+ if x.positive?
658
+ j = x.floor
659
+ dx = x - j
660
+ b = bin_int_shift(j)
661
+ delta = b.map { |e| e * dx }
662
+ delta[-1] = 0.0
663
+ b_1 = b.zip(delta).map { |e_b, e_d| e_b - e_d }
664
+ b_2 = (1..i_max).map { |i| b_1[i] + delta[i - 1] }
665
+ b_2.insert(0, b_1[0])
666
+ else
667
+ j = x.ceil
668
+ dx = (x - j).abs
669
+ b = bin_int_shift(j)
670
+ delta = b.map { |e| e * dx }
671
+ delta[0] = 0.0
672
+ b_1 = b.zip(delta).map { |e_b, e_d| e_b - e_d }
673
+ b_2 = (0..(i_max - 1)).map { |i| b_1[i] + delta[i + 1] }
674
+ b_2.insert(i_max, b_1[i_max])
675
+ end
676
+ end
677
+
678
+ # Shift histogram bins by an integer amount.
679
+ # @param j [Integer]
680
+ # @return [Array<Numeric>]
681
+ def bin_int_shift(j)
682
+ raise ForChrisLibError, 'bin_int_shift requires numeric bins' unless all? { |v| v.is_a?(Numeric) }
683
+ raise ForChrisLibError, 'shift must be an Integer' unless j.is_a?(Integer)
684
+ i_max = length - 1
685
+ if j.zero?
686
+ self
687
+ elsif j.positive?
688
+ a_s = self
689
+ (1..[j, i_max].min).each do
690
+ a_s = a_s.bin_int_shift_right(i_max)
691
+ end
692
+ a_s
693
+ else
694
+ a_s = self
695
+ (1..[-j, i_max].min).each do
696
+ a_s = a_s.bin_int_shift_left(i_max)
697
+ end
698
+ a_s
699
+ end
700
+ end
701
+
702
+ # Helper used by {#bin_int_shift} for leftward shifts.
703
+ # @return [Array<Numeric>]
704
+ def bin_int_shift_left(i_max)
705
+ each_with_index.map do |_, i|
706
+ if i == i_max
707
+ 0
708
+ elsif i.positive?
709
+ self[i + 1]
710
+ else
711
+ self[0] + self[1]
712
+ end
713
+ end
714
+ end
715
+
716
+ # Helper used by {#bin_int_shift} for rightward shifts.
717
+ # @return [Array<Numeric>]
718
+ def bin_int_shift_right(i_max)
719
+ each_with_index.map do |_, i|
720
+ if i.zero?
721
+ 0
722
+ elsif i < i_max
723
+ self[i - 1]
724
+ else
725
+ self[i - 1] + self[i]
726
+ end
727
+ end
728
+ end
729
+
730
+ # Apply {Float#sigmoid} element-wise.
731
+ # @return [Array<Integer>]
732
+ def sigmoid
733
+ raise ForChrisLibError, 'sigmoid requires numeric values' unless all? { |v| v.is_a?(Numeric) }
734
+ map do |v|
735
+ if v > 0
736
+ 1
737
+ elsif v < 0
738
+ -1
739
+ else
740
+ 0
741
+ end
742
+ end
743
+ end
744
+
745
+ # Linear interpolation over sorted [x, y] pairs.
746
+ # @param x [Numeric]
747
+ # @return [Numeric]
748
+ def interpolate(x)
749
+ raise ForChrisLibError, 'interpolate requires a two-column array' unless all? { |pair| pair.is_a?(Array) && pair.size >= 2 }
750
+ x_a = transpose[0]
751
+ x_min = x_a[0]
752
+ x_max = x_a[-1]
753
+ return self[0][1] if x <= x_min
754
+ return self[-1][1] if x >= x_max
755
+
756
+ i = x_a.find_index { |v| v >= x }
757
+ return self[i][1] if x == self[i][0]
758
+
759
+ m = (self[i][1] - self[i - 1][1]).to_f / (self[i][0] - self[i - 1][0])
760
+ (x - self[i - 1][0]) * m + self[i - 1][1]
761
+ end
762
+
763
+ # Approximate tensor dimension based on nested arrays.
764
+ # @return [Integer]
765
+ def dimension
766
+ return 0 unless is_a?(Array)
767
+
768
+ result = 1
769
+ each do |sub_a|
770
+ next unless sub_a.is_a?(Array)
771
+
772
+ dim = sub_a.dimension
773
+ result = dim + 1 if dim + 1 > result
774
+ end
775
+ result
776
+ end
777
+
778
+ # Sum values across 1D, 2D, or 3D arrays.
779
+ # @return [Numeric]
780
+ def total
781
+ unless [1, 2, 3].include?(dimension)
782
+ raise "not implemented for #{dimension} dimensions"
783
+ end
784
+
785
+ case dimension
786
+ when 1
787
+ sum
788
+ when 2
789
+ sum { |row| row.sum }
790
+ when 3
791
+ sum { |plane| plane.sum { |row| row.sum } }
792
+ end
793
+ end
794
+
795
+ # Discrete probability density function derived from sample counts.
796
+ # @return [Hash{Numeric=>Float}]
797
+ def pdf
798
+ tally.map { |k, v| [k, v.to_f / count] }.sort_by { |k, _| k }.to_h
799
+ end
800
+
801
+ # Discrete cumulative distribution function derived from {#pdf}.
802
+ # @return [Hash{Numeric=>Float}]
803
+ def cdf
804
+ pdf_temp = pdf
805
+ pdf_temp.keys.each_with_index.map { |k, i| [k, pdf_temp.values[0..i].sum] }.to_h
806
+ end
807
+ end
808
+
809
+ Hash.class_eval do
810
+ # Build a cumulative distribution function from ordered PDF values.
811
+ # @return [Hash{Object=>Float}]
812
+ def cdf_from_pdf
813
+ keys.each_with_index.map { |k, i| [k, values[0..i].sum] }.to_h
814
+ end
815
+
816
+ # Normalise Handicap histogram into ordered hash for male golfers.
817
+ # @return [Hash{Integer=>Integer}]
818
+ def male_ga_hist
819
+ hist = Hash.new(0)
820
+ (-5..37).each do |h|
821
+ each { |k, v| hist[h] += v if k.to_i == h }
822
+ end
823
+ hist
824
+ end
825
+
826
+ # Normalise Handicap histogram into ordered hash for female golfers.
827
+ # @return [Hash{Integer=>Integer}]
828
+ def female_male_ga_hist
829
+ hist = Hash.new(0)
830
+ (-5..46).each do |h|
831
+ each { |k, v| hist[h] += v if k.to_i == h }
832
+ end
833
+ hist
834
+ end
835
+ end