statistics2 0.54

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,617 @@
1
+ $:.unshift File.dirname(__FILE__)
2
+ $:.unshift File.join(File.dirname(__FILE__), '..', 'ext')
3
+
4
+ require 'statistics2/version'
5
+
6
+ # distributions of statistics
7
+ # by Shin-ichiro HARA
8
+ #
9
+ # Ref:
10
+ # [1] http://www.matsusaka-u.ac.jp/~okumura/algo/
11
+ # [2] http://www5.airnet.ne.jp/tomy/cpro/sslib11.htm
12
+
13
+ module Statistics2
14
+ SQ2PI = Math.sqrt(2 * Math::PI)
15
+
16
+ # Easy access to our singleton
17
+ def self.metaclass; class << self; self; end; end
18
+
19
+ # Remove existing methods.
20
+ ["normaldist", "normalxXX_", "normal__X_", "normal___x", "normalx__x",
21
+ "pnormaldist", "pnormalxXX_", "pnormal__X_", "pnormal___x", "pnormalx__x",
22
+ "chi2dist", "chi2X_", "chi2_x", "pchi2dist", "pchi2X_", "pchi2_x",
23
+ "tdist", "txXX_", "t__X_", "t___x", "tx__x", "ptdist", "ptxXX_", "pt__X_", "pt___x", "ptx__x",
24
+ "fdist", "fX_", "f_x", "pfdist", "pfX_", "pf_x",
25
+ "bindens", "bindist", "binX_", "bin_x",
26
+ "poissondens", "poissondist", "poissonX_", "poisson_x"].each do |m|
27
+ undef_method(m) if self.private_method_defined?(m)
28
+ self.metaclass.instance_eval do
29
+ undef_method(m) if self.method_defined?(m)
30
+ end
31
+ end
32
+
33
+ # Newton approximation
34
+ def newton_a(y, ini, epsilon = 1.0e-6, limit = 30)
35
+ x = ini
36
+ limit.times do |i|
37
+ prev = x
38
+ f, df = yield(prev)
39
+ x = (y - f)/df + prev
40
+ if (x - prev).abs < epsilon
41
+ return x
42
+ end
43
+ end
44
+ $stderr.puts("Warning(newton approximation): over limit")
45
+ x
46
+ end
47
+
48
+ module_function :newton_a
49
+ private :newton_a
50
+ private_class_method :newton_a
51
+
52
+ # Gamma function
53
+ LOG_2PI = Math.log(2 * Math::PI)# log(2PI)
54
+ N = 8
55
+ B0 = 1.0
56
+ B1 = -1.0 / 2.0
57
+ B2 = 1.0 / 6.0
58
+ B4 = -1.0 / 30.0
59
+ B6 = 1.0 / 42.0
60
+ B8 = -1.0 / 30.0
61
+ B10 = 5.0 / 66.0
62
+ B12 = -691.0 / 2730.0
63
+ B14 = 7.0 / 6.0
64
+ B16 = -3617.0 / 510.0
65
+
66
+ def loggamma(x)
67
+ v = 1.0
68
+ while (x < N)
69
+ v *= x
70
+ x += 1.0
71
+ end
72
+ w = 1.0 / (x * x)
73
+ ret = B16 / (16 * 15)
74
+ ret = ret * w + B14 / (14 * 13)
75
+ ret = ret * w + B12 / (12 * 11)
76
+ ret = ret * w + B10 / (10 * 9)
77
+ ret = ret * w + B8 / ( 8 * 7)
78
+ ret = ret * w + B6 / ( 6 * 5)
79
+ ret = ret * w + B4 / ( 4 * 3)
80
+ ret = ret * w + B2 / ( 2 * 1)
81
+ ret = ret / x + 0.5 * LOG_2PI - Math.log(v) - x + (x - 0.5) * Math.log(x)
82
+ ret
83
+ end
84
+
85
+ def gamma(x)
86
+ if (x < 0.0)
87
+ return Math::PI / (Math.sin(Math.PI * x) * Math.exp(loggamma(1 - x))) #/
88
+ end
89
+ Math.exp(loggamma(x))
90
+ end
91
+
92
+ module_function :loggamma, :gamma
93
+ private :loggamma, :gamma
94
+ private_class_method :loggamma, :gamma
95
+
96
+ #normal-distribution
97
+ # (-\infty, z]
98
+ def p_nor(z)
99
+ if z < -12 then return 0.0 end
100
+ if z > 12 then return 1.0 end
101
+ if z == 0.0 then return 0.5 end
102
+
103
+ if z > 0.0
104
+ e = true
105
+ else
106
+ e = false
107
+ z = -z
108
+ end
109
+ z = z.to_f
110
+ z2 = z * z
111
+ t = q = z * Math.exp(-0.5 * z2) / SQ2PI
112
+
113
+ 3.step(199, 2) do |i|
114
+ prev = q
115
+ t *= z2 / i
116
+ q += t
117
+ if q <= prev
118
+ return(e ? 0.5 + q : 0.5 - q)
119
+ end
120
+ end
121
+ e ? 1.0 : 0.0
122
+ end
123
+
124
+ # inverse of normal distribution ([2])
125
+ # Pr( (-\infty, x] ) = qn -> x
126
+ def pnorm(qn)
127
+ b = [1.570796288, 0.03706987906, -0.8364353589e-3,
128
+ -0.2250947176e-3, 0.6841218299e-5, 0.5824238515e-5,
129
+ -0.104527497e-5, 0.8360937017e-7, -0.3231081277e-8,
130
+ 0.3657763036e-10, 0.6936233982e-12]
131
+
132
+ if(qn < 0.0 || 1.0 < qn)
133
+ $stderr.printf("Error : qn <= 0 or qn >= 1 in pnorm()!\n")
134
+ return 0.0;
135
+ end
136
+ qn == 0.5 and return 0.0
137
+
138
+ w1 = qn
139
+ qn > 0.5 and w1 = 1.0 - w1
140
+ w3 = -Math.log(4.0 * w1 * (1.0 - w1))
141
+ w1 = b[0]
142
+ 1.upto 10 do |i|
143
+ w1 += b[i] * w3**i;
144
+ end
145
+ qn > 0.5 and return Math.sqrt(w1 * w3)
146
+ -Math.sqrt(w1 * w3)
147
+ end
148
+
149
+ private :p_nor, :pnorm
150
+ module_function :p_nor, :pnorm
151
+ private_class_method :p_nor, :pnorm
152
+
153
+ # Returns the integral of normal distribution over (-Infty, x].
154
+ def normaldist(z)
155
+ p_nor(z)
156
+ end
157
+
158
+ # Returns the P-value of normaldist(x).
159
+ def pnormaldist(y)
160
+ pnorm(y)
161
+ end
162
+
163
+ #chi-square distribution ([1])
164
+ #[x, \infty)
165
+ def q_chi2(df, chi2)
166
+ chi2 = chi2.to_f
167
+ if (df & 1) != 0
168
+ chi = Math.sqrt(chi2)
169
+ if (df == 1) then return 2 * normal___x(chi); end
170
+ s = t = chi * Math.exp(-0.5 * chi2) / SQ2PI
171
+ k = 3
172
+ while k < df
173
+ t *= chi2 / k; s += t;
174
+ k += 2
175
+ end
176
+ 2 * (normal___x(chi) + s)
177
+ else
178
+ s = t = Math.exp(-0.5 * chi2)
179
+ k = 2
180
+ while k < df
181
+ t *= chi2 / k; s += t;
182
+ k += 2
183
+ end
184
+ s
185
+ end
186
+ end
187
+
188
+ def chi2dens(n, x)
189
+ if n == 1
190
+ 1.0/Math.sqrt(2 * Math::PI * x) * Math::E**(-x/2.0)
191
+ elsif n == 2
192
+ 0.5 * Math::E**(-x/2.0)
193
+ else
194
+ n = n.to_f
195
+ n2 = n/2
196
+ x = x.to_f
197
+ 1.0 / 2**n2 / gamma(n2) * x**(n2 - 1.0) * Math.exp(-x/2.0)
198
+ end
199
+ end
200
+
201
+ # [x, \infty)
202
+ # Pr([x, \infty)) = y -> x
203
+ def pchi2(n, y)
204
+ if n == 1
205
+ w = pnorm(1 - y/2) # = pnormal___x(y/2)
206
+ w * w
207
+ elsif n == 2
208
+ # v = (1.0 / y - 1.0) / 33.0
209
+ # newton_a(y, v) {|x| [q_chi2(n, x), -chi2dens(n, x)] }
210
+ -2.0 * Math.log(y)
211
+ else
212
+ eps = 1.0e-5
213
+ v = 0.0
214
+ s = 10.0
215
+ loop do
216
+ v += s
217
+ if s <= eps then break end
218
+ if (qe = q_chi2(n, v) - y) == 0.0 then break end
219
+ if qe < 0.0
220
+ v -= s
221
+ s /= 10.0 #/
222
+ end
223
+ end
224
+ v
225
+ end
226
+ end
227
+
228
+ private :q_chi2, :pchi2, :chi2dens
229
+ module_function :q_chi2, :pchi2, :chi2dens
230
+ private_class_method :q_chi2, :pchi2, :chi2dens
231
+
232
+ # Returns the integral of Chi-squared distribution with n degrees of freedom over [0, x].
233
+ def chi2dist(n, x); 1.0 - q_chi2(n, x); end
234
+
235
+ # Returns the P-value of chi2dist().
236
+ def pchi2dist(n, y); pchi2(n, 1.0 - y); end
237
+
238
+
239
+ # t-distribution ([1])
240
+ # (-\infty, x]
241
+ def p_t(df, t)
242
+ c2 = df.to_f / (df + t * t);
243
+ s = Math.sqrt(1.0 - c2)
244
+ s = -s if t < 0.0
245
+ p = 0.0;
246
+ i = df % 2 + 2
247
+ while i <= df
248
+ p += s
249
+ s *= (i - 1) * c2 / i
250
+ i += 2
251
+ end
252
+ if df & 1 != 0
253
+ 0.5+(p*Math.sqrt(c2)+Math.atan(t/Math.sqrt(df)))/Math::PI
254
+ else
255
+ (1.0 + p) / 2.0
256
+ end
257
+ end
258
+
259
+ # inverse of t-distribution ([2])
260
+ # (-\infty, -q/2] + [q/2, \infty)
261
+ def ptsub(q, n)
262
+ q = q.to_f
263
+ if(n == 1 && 0.001 < q && q < 0.01)
264
+ eps = 1.0e-4
265
+ elsif (n == 2 && q < 0.0001)
266
+ eps = 1.0e-4
267
+ elsif (n == 1 && q < 0.001)
268
+ eps = 1.0e-2
269
+ else
270
+ eps = 1.0e-5
271
+ end
272
+ s = 10000.0
273
+ w = 0.0
274
+ loop do
275
+ w += s
276
+ if(s <= eps) then return w end
277
+ if((qe = 2.0 - p_t(n, w)*2.0 - q) == 0.0) then return w end
278
+ if(qe < 0.0)
279
+ w -= s
280
+ s /= 10.0 #/
281
+ end
282
+ end
283
+ end
284
+
285
+ def pt(q, n)
286
+ q = q.to_f
287
+ if(q < 1.0e-5 || q > 1.0 || n < 1)
288
+ $stderr.printf("Error : Illigal parameter in pt()!\n")
289
+ return 0.0
290
+ end
291
+
292
+ if(n <= 5) then return ptsub(q, n) end
293
+ if(q <= 5.0e-3 && n <= 13) then return ptsub(q, n) end
294
+
295
+ f1 = 4.0 * (f = n.to_f)
296
+ f5 = (f4 = (f3 = (f2 = f * f) * f) * f) * f
297
+ f2 *= 96.0
298
+ f3 *= 384.0
299
+ f4 *= 92160.0
300
+ f5 *= 368640.0
301
+ u = pnormaldist(1.0 - q / 2.0)
302
+
303
+ w0 = (u2 = u * u) * u
304
+ w1 = w0 * u2
305
+ w2 = w1 * u2
306
+ w3 = w2 * u2
307
+ w4 = w3 * u2
308
+ w = (w0 + u) / f1
309
+ w += (5.0 * w1 + 16.0 * w0 + 3.0 * u) / f2
310
+ w += (3.0 * w2 + 19.0 * w1 + 17.0 * w0 - 15.0 * u) / f3
311
+ w += (79.0 * w3 + 776.0 * w2 + 1482.0 * w1 - 1920.0 * w0 - 9450.0 * u) / f4
312
+ w += (27.0 * w4 + 339.0 * w3 + 930.0 * w2 - 1782.0 * w1 - 765.0 * w0 + 17955.0 * u) / f5
313
+ u + w
314
+ end
315
+
316
+ private :p_t, :pt, :ptsub
317
+ module_function :p_t, :pt, :ptsub
318
+ private_class_method :p_t, :pt, :ptsub
319
+
320
+ # Returns the integral of t-distribution with n degrees of freedom over (-Infty, x].
321
+ def tdist(n, t); p_t(n, t); end
322
+
323
+ # Returns the P-value of tdist().
324
+ def ptdist(n, y)
325
+ if y > 0.5
326
+ pt(2.0 - y*2.0, n)
327
+ else
328
+ - pt(y*2.0, n)
329
+ end
330
+ end
331
+
332
+ # F-distribution ([1])
333
+ # [x, \infty)
334
+ def q_f(df1, df2, f)
335
+ if (f <= 0.0) then return 1.0; end
336
+ if (df1 % 2 != 0 && df2 % 2 == 0)
337
+ return 1.0 - q_f(df2, df1, 1.0 / f)
338
+ end
339
+ cos2 = 1.0 / (1.0 + df1.to_f * f / df2.to_f)
340
+ sin2 = 1.0 - cos2
341
+
342
+ if (df1 % 2 == 0)
343
+ prob = cos2 ** (df2.to_f / 2.0)
344
+ temp = prob
345
+ i = 2
346
+ while i < df1
347
+ temp *= (df2.to_f + i - 2) * sin2 / i
348
+ prob += temp
349
+ i += 2
350
+ end
351
+ return prob
352
+ end
353
+ prob = Math.atan(Math.sqrt(df2.to_f / (df1.to_f * f)))
354
+ temp = Math.sqrt(sin2 * cos2)
355
+ i = 3
356
+ while i <= df1
357
+ prob += temp
358
+ temp *= (i - 1).to_f * sin2 / i.to_f;
359
+ i += 2.0
360
+ end
361
+ temp *= df1.to_f
362
+ i = 3
363
+ while i <= df2
364
+ prob -= temp
365
+ temp *= (df1.to_f + i - 2) * cos2 / i.to_f
366
+ i += 2
367
+ end
368
+ prob * 2.0 / Math::PI
369
+ end
370
+
371
+ # inverse of F-distribution ([2])
372
+ def pfsub(x, y, z)
373
+ (Math.sqrt(z) - y) / x / 2.0
374
+ end
375
+
376
+ # [x, \infty)
377
+ def pf(q, n1, n2)
378
+ if(q < 0.0 || q > 1.0 || n1 < 1 || n2 < 1)
379
+ $stderr.printf("Error : Illegal parameter in pf()!\n")
380
+ return 0.0
381
+ end
382
+
383
+ if n1 <= 240 || n2 <= 240
384
+ eps = 1.0e-5
385
+ if(n2 == 1) then eps = 1.0e-4 end
386
+ fw = 0.0
387
+ s = 1000.0
388
+ loop do
389
+ fw += s
390
+ if s <= eps then return fw end
391
+ if (qe = q_f(n1, n2, fw) - q) == 0.0 then return fw end
392
+ if qe < 0.0
393
+ fw -= s
394
+ s /= 10.0 #/
395
+ end
396
+ end
397
+ end
398
+
399
+ eps = 1.0e-6
400
+ qn = q
401
+ if q < 0.5 then qn = 1.0 - q
402
+ u = pnorm(qn)
403
+ w1 = 2.0 / n1 / 9.0
404
+ w2 = 2.0 / n2 / 9.0
405
+ w3 = 1.0 - w1
406
+ w4 = 1.0 - w2
407
+ u2 = u * u
408
+ a = w4 * w4 - u2 * w2
409
+ b = -2. * w3 * w4
410
+ c = w3 * w3 - u2 * w1
411
+ d = b * b - 4 * a * c
412
+ if(d < 0.0)
413
+ fw = pfsub(a, b, 0.0)
414
+ else
415
+ if(a.abs > eps)
416
+ fw = pfsub(a, b, d)
417
+ else
418
+ if(b.abs > eps) then return -c / b end
419
+ fw = pfsub(a, b, 0.0)
420
+ end
421
+ end
422
+ fw * fw * fw
423
+ end
424
+ end
425
+
426
+ private :q_f, :pf, :pfsub
427
+ module_function :q_f, :pf, :pfsub
428
+ private_class_method :q_f, :pf, :pfsub
429
+
430
+ # Returns the integral of F-distribution with n1 and n2 degrees of freedom over [0, x].
431
+ def fdist(n1, n2, f); 1.0 - q_f(n1, n2, f); end
432
+
433
+ # Returns the P-value of fdist().
434
+ def pfdist(n1, n2, y); pf(1.0 - y, n1, n2); end
435
+
436
+ ############################################################################
437
+ # discrete distributions
438
+
439
+ def perm(n, x = n)
440
+ raise RangeError if n < 0 || x < 0
441
+ r = 1
442
+ while x >= 1
443
+ r *= n
444
+ n -= 1
445
+ x -= 1
446
+ end
447
+ r
448
+ end
449
+
450
+ def combi(n, x)
451
+ raise RangeError if n < 0 || x < 0
452
+ x = n - x if x*2 > n
453
+ perm(n, x) / perm(x, x)
454
+ end
455
+
456
+ module_function :perm, :combi
457
+ private_class_method :perm, :combi
458
+
459
+ def bindens(n, p, x)
460
+ p = p.to_f
461
+ q = 1.0 - p
462
+ combi(n, x) * p**x * q**(n - x)
463
+ end
464
+
465
+ def bindist(n, p, x)
466
+ (0..x).inject(0.0) do |s, k|
467
+ s + bindens(n, p, k)
468
+ end
469
+ end
470
+
471
+ def poissondens(m, x)
472
+ return 0.0 if x < 0
473
+ m = m.to_f
474
+ m ** x * Math::E ** (-m) / perm(x)
475
+ end
476
+
477
+ def poissondist(m, x)
478
+ (0..x).inject(0.0) do |s, k|
479
+ s + poissondens(m, k)
480
+ end
481
+ end
482
+
483
+ ############################################################################
484
+ # normal-distribution
485
+
486
+ # Returns the integral of normal distribution over (-Infty, x].
487
+ def normalxXX_(z); normaldist(z); end
488
+
489
+ # Returns the integral of normal distribution over [0, x].
490
+ def normal__X_(z); normaldist(z) - 0.5; end
491
+
492
+ # Returns the integral of normal distribution over [x, Infty).
493
+ def normal___x(z); 1.0 - normaldist(z); end
494
+
495
+ # Returns the integral of normal distribution over (-Infty, -x] + [x, Infty).
496
+ def normalx__x(z); 2.0 - normaldist(z) * 2.0; end
497
+
498
+ module_function :normaldist, :normalxXX_, :normal__X_, :normal___x, :normalx__x
499
+
500
+
501
+ # inverse of normal-distribution
502
+
503
+ # Return the P-value of the corresponding integral.
504
+ def pnormalxXX_(z); pnormaldist(z); end
505
+
506
+ # Return the P-value of the corresponding integral.
507
+ def pnormal__X_(y); pnormalxXX_(y + 0.5); end
508
+
509
+ # Return the P-value of the corresponding integral.
510
+ def pnormal___x(y); pnormalxXX_(1.0 - y); end
511
+
512
+ # Return the P-value of the corresponding integral.
513
+ def pnormalx__x(y); pnormalxXX_(1.0 - y/2.0); end
514
+
515
+ module_function :pnormaldist, :pnormalxXX_, :pnormal__X_, :pnormal___x, :pnormalx__x
516
+
517
+
518
+ # chi2-distribution
519
+
520
+ # Returns the integral of Chi-squared distribution with n degrees of freedom over [x, Infty).
521
+ def chi2_x(n, x); 1.0 - chi2dist(n, x); end
522
+
523
+ # Returns the integral of Chi-squared distribution with n degrees of freedom over [0, x].
524
+ def chi2X_(n, x); chi2dist(n, x); end
525
+
526
+ module_function :chi2dist, :chi2X_, :chi2_x
527
+
528
+
529
+ # inverse of chi2-distribution
530
+
531
+ # Return the P-value of the corresponding integral.
532
+ def pchi2_x(n, y); pchi2dist(n, 1.0 - y); end
533
+
534
+ # Return the P-value of the corresponding integral.
535
+ def pchi2X_(n, y); pchi2dist(n, y); end
536
+
537
+ module_function :pchi2dist, :pchi2X_, :pchi2_x
538
+
539
+
540
+ # t-distribution
541
+
542
+ # Returns the integral of normal distribution with n degrees of freedom over (-Infty, -x] + [x, Infty).
543
+ def tx__x(n, x); 2.0 - tdist(n, x) * 2.0; end
544
+
545
+ # Returns the integral of t-distribution with n degrees of freedom over (-Infty, x].
546
+ def txXX_(n, x); tdist(n, x); end
547
+
548
+ # Returns the integral of t-distribution with n degrees of freedom over [0, x].
549
+ def t__X_(n, x); tdist(n, x) - 0.5; end
550
+
551
+ # Returns the integral of t-distribution with n degrees of freedom over [x, Infty).
552
+ def t___x(n, x); 1.0 - tdist(n, x); end
553
+
554
+ module_function :tdist, :txXX_, :t__X_, :t___x, :tx__x
555
+
556
+
557
+ # inverse of t-distribution
558
+
559
+ # Return the P-value of the corresponding integral.
560
+ def ptx__x(n, y); ptdist(n, 1.0 - y / 2.0); end
561
+
562
+ # Return the P-value of the corresponding integral.
563
+ def ptxXX_(n, y); ptdist(n, y); end
564
+
565
+ # Return the P-value of the corresponding integral.
566
+ def pt__X_(n, y); ptdist(n, 0.5 + y); end
567
+
568
+ # Return the P-value of the corresponding integral.
569
+ def pt___x(n, y); ptdist(n, 1.0 - y); end
570
+
571
+ module_function :ptdist, :ptxXX_, :pt__X_, :pt___x, :ptx__x
572
+
573
+
574
+ # F-distribution
575
+
576
+ # Returns the integral of F-distribution with n1 and n2 degrees of freedom over [x, Infty).
577
+ def f_x(n1, n2, x); 1.0 - fdist(n1, n2, x); end
578
+
579
+ # Returns the integral of F-distribution with n1 and n2 degrees of freedom over [0, x].
580
+ def fX_(n1, n2, x); fdist(n1, n2, x); end
581
+ module_function :fdist, :fX_, :f_x
582
+
583
+
584
+ # inverse of F-distribution
585
+
586
+ # Return the P-value of the corresponding integral.
587
+ def pf_x(n1, n2, x); pfdist(n1, n2, 1.0 - x); end
588
+
589
+ # Return the P-value of the corresponding integral.
590
+ def pfX_(n1, n2, x); pfdist(n1, n2, x); end
591
+
592
+ module_function :pfdist, :pfX_, :pf_x
593
+
594
+
595
+ # discrete distributions
596
+ def binX_(n, p, x); bindist(n, p, x); end
597
+ def bin_x(n, p, x); bindist(n, 1.0 - p, n - x); end
598
+ module_function :bindens, :bindist, :binX_, :bin_x
599
+
600
+ def poissonX_(m, x); poissondist(m, x); end
601
+ def poisson_x(m, x); 1.0 - poissondist(m, x-1); end
602
+ module_function :poissondens, :poissondist, :poissonX_, :poisson_x
603
+ end
604
+
605
+ if !defined?(Statistics2::NO_EXT) || !Statistics2::NO_EXT
606
+ require 'statistics2.so'
607
+ end
608
+
609
+ if $0 == __FILE__
610
+ if ARGV.empty?
611
+ puts "Example:"
612
+ puts " #$0 normaldist 0.01"
613
+ puts " #$0 pf_x 2 3 0.01"
614
+ exit
615
+ end
616
+ p Statistics2.send(ARGV[0], *ARGV[1..-1].map{|x| eval(x)})
617
+ end