statistics2 0.54

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,617 @@
1
+ $:.unshift File.dirname(__FILE__)
2
+ $:.unshift File.join(File.dirname(__FILE__), '..', 'ext')
3
+
4
+ require 'statistics2/version'
5
+
6
+ # distributions of statistics
7
+ # by Shin-ichiro HARA
8
+ #
9
+ # Ref:
10
+ # [1] http://www.matsusaka-u.ac.jp/~okumura/algo/
11
+ # [2] http://www5.airnet.ne.jp/tomy/cpro/sslib11.htm
12
+
13
+ module Statistics2
14
+ SQ2PI = Math.sqrt(2 * Math::PI)
15
+
16
+ # Easy access to our singleton
17
+ def self.metaclass; class << self; self; end; end
18
+
19
+ # Remove existing methods.
20
+ ["normaldist", "normalxXX_", "normal__X_", "normal___x", "normalx__x",
21
+ "pnormaldist", "pnormalxXX_", "pnormal__X_", "pnormal___x", "pnormalx__x",
22
+ "chi2dist", "chi2X_", "chi2_x", "pchi2dist", "pchi2X_", "pchi2_x",
23
+ "tdist", "txXX_", "t__X_", "t___x", "tx__x", "ptdist", "ptxXX_", "pt__X_", "pt___x", "ptx__x",
24
+ "fdist", "fX_", "f_x", "pfdist", "pfX_", "pf_x",
25
+ "bindens", "bindist", "binX_", "bin_x",
26
+ "poissondens", "poissondist", "poissonX_", "poisson_x"].each do |m|
27
+ undef_method(m) if self.private_method_defined?(m)
28
+ self.metaclass.instance_eval do
29
+ undef_method(m) if self.method_defined?(m)
30
+ end
31
+ end
32
+
33
+ # Newton approximation
34
+ def newton_a(y, ini, epsilon = 1.0e-6, limit = 30)
35
+ x = ini
36
+ limit.times do |i|
37
+ prev = x
38
+ f, df = yield(prev)
39
+ x = (y - f)/df + prev
40
+ if (x - prev).abs < epsilon
41
+ return x
42
+ end
43
+ end
44
+ $stderr.puts("Warning(newton approximation): over limit")
45
+ x
46
+ end
47
+
48
+ module_function :newton_a
49
+ private :newton_a
50
+ private_class_method :newton_a
51
+
52
+ # Gamma function
53
+ LOG_2PI = Math.log(2 * Math::PI)# log(2PI)
54
+ N = 8
55
+ B0 = 1.0
56
+ B1 = -1.0 / 2.0
57
+ B2 = 1.0 / 6.0
58
+ B4 = -1.0 / 30.0
59
+ B6 = 1.0 / 42.0
60
+ B8 = -1.0 / 30.0
61
+ B10 = 5.0 / 66.0
62
+ B12 = -691.0 / 2730.0
63
+ B14 = 7.0 / 6.0
64
+ B16 = -3617.0 / 510.0
65
+
66
+ def loggamma(x)
67
+ v = 1.0
68
+ while (x < N)
69
+ v *= x
70
+ x += 1.0
71
+ end
72
+ w = 1.0 / (x * x)
73
+ ret = B16 / (16 * 15)
74
+ ret = ret * w + B14 / (14 * 13)
75
+ ret = ret * w + B12 / (12 * 11)
76
+ ret = ret * w + B10 / (10 * 9)
77
+ ret = ret * w + B8 / ( 8 * 7)
78
+ ret = ret * w + B6 / ( 6 * 5)
79
+ ret = ret * w + B4 / ( 4 * 3)
80
+ ret = ret * w + B2 / ( 2 * 1)
81
+ ret = ret / x + 0.5 * LOG_2PI - Math.log(v) - x + (x - 0.5) * Math.log(x)
82
+ ret
83
+ end
84
+
85
+ def gamma(x)
86
+ if (x < 0.0)
87
+ return Math::PI / (Math.sin(Math.PI * x) * Math.exp(loggamma(1 - x))) #/
88
+ end
89
+ Math.exp(loggamma(x))
90
+ end
91
+
92
+ module_function :loggamma, :gamma
93
+ private :loggamma, :gamma
94
+ private_class_method :loggamma, :gamma
95
+
96
+ #normal-distribution
97
+ # (-\infty, z]
98
+ def p_nor(z)
99
+ if z < -12 then return 0.0 end
100
+ if z > 12 then return 1.0 end
101
+ if z == 0.0 then return 0.5 end
102
+
103
+ if z > 0.0
104
+ e = true
105
+ else
106
+ e = false
107
+ z = -z
108
+ end
109
+ z = z.to_f
110
+ z2 = z * z
111
+ t = q = z * Math.exp(-0.5 * z2) / SQ2PI
112
+
113
+ 3.step(199, 2) do |i|
114
+ prev = q
115
+ t *= z2 / i
116
+ q += t
117
+ if q <= prev
118
+ return(e ? 0.5 + q : 0.5 - q)
119
+ end
120
+ end
121
+ e ? 1.0 : 0.0
122
+ end
123
+
124
+ # inverse of normal distribution ([2])
125
+ # Pr( (-\infty, x] ) = qn -> x
126
+ def pnorm(qn)
127
+ b = [1.570796288, 0.03706987906, -0.8364353589e-3,
128
+ -0.2250947176e-3, 0.6841218299e-5, 0.5824238515e-5,
129
+ -0.104527497e-5, 0.8360937017e-7, -0.3231081277e-8,
130
+ 0.3657763036e-10, 0.6936233982e-12]
131
+
132
+ if(qn < 0.0 || 1.0 < qn)
133
+ $stderr.printf("Error : qn <= 0 or qn >= 1 in pnorm()!\n")
134
+ return 0.0;
135
+ end
136
+ qn == 0.5 and return 0.0
137
+
138
+ w1 = qn
139
+ qn > 0.5 and w1 = 1.0 - w1
140
+ w3 = -Math.log(4.0 * w1 * (1.0 - w1))
141
+ w1 = b[0]
142
+ 1.upto 10 do |i|
143
+ w1 += b[i] * w3**i;
144
+ end
145
+ qn > 0.5 and return Math.sqrt(w1 * w3)
146
+ -Math.sqrt(w1 * w3)
147
+ end
148
+
149
+ private :p_nor, :pnorm
150
+ module_function :p_nor, :pnorm
151
+ private_class_method :p_nor, :pnorm
152
+
153
+ # Returns the integral of normal distribution over (-Infty, x].
154
+ def normaldist(z)
155
+ p_nor(z)
156
+ end
157
+
158
+ # Returns the P-value of normaldist(x).
159
+ def pnormaldist(y)
160
+ pnorm(y)
161
+ end
162
+
163
+ #chi-square distribution ([1])
164
+ #[x, \infty)
165
+ def q_chi2(df, chi2)
166
+ chi2 = chi2.to_f
167
+ if (df & 1) != 0
168
+ chi = Math.sqrt(chi2)
169
+ if (df == 1) then return 2 * normal___x(chi); end
170
+ s = t = chi * Math.exp(-0.5 * chi2) / SQ2PI
171
+ k = 3
172
+ while k < df
173
+ t *= chi2 / k; s += t;
174
+ k += 2
175
+ end
176
+ 2 * (normal___x(chi) + s)
177
+ else
178
+ s = t = Math.exp(-0.5 * chi2)
179
+ k = 2
180
+ while k < df
181
+ t *= chi2 / k; s += t;
182
+ k += 2
183
+ end
184
+ s
185
+ end
186
+ end
187
+
188
+ def chi2dens(n, x)
189
+ if n == 1
190
+ 1.0/Math.sqrt(2 * Math::PI * x) * Math::E**(-x/2.0)
191
+ elsif n == 2
192
+ 0.5 * Math::E**(-x/2.0)
193
+ else
194
+ n = n.to_f
195
+ n2 = n/2
196
+ x = x.to_f
197
+ 1.0 / 2**n2 / gamma(n2) * x**(n2 - 1.0) * Math.exp(-x/2.0)
198
+ end
199
+ end
200
+
201
+ # [x, \infty)
202
+ # Pr([x, \infty)) = y -> x
203
+ def pchi2(n, y)
204
+ if n == 1
205
+ w = pnorm(1 - y/2) # = pnormal___x(y/2)
206
+ w * w
207
+ elsif n == 2
208
+ # v = (1.0 / y - 1.0) / 33.0
209
+ # newton_a(y, v) {|x| [q_chi2(n, x), -chi2dens(n, x)] }
210
+ -2.0 * Math.log(y)
211
+ else
212
+ eps = 1.0e-5
213
+ v = 0.0
214
+ s = 10.0
215
+ loop do
216
+ v += s
217
+ if s <= eps then break end
218
+ if (qe = q_chi2(n, v) - y) == 0.0 then break end
219
+ if qe < 0.0
220
+ v -= s
221
+ s /= 10.0 #/
222
+ end
223
+ end
224
+ v
225
+ end
226
+ end
227
+
228
+ private :q_chi2, :pchi2, :chi2dens
229
+ module_function :q_chi2, :pchi2, :chi2dens
230
+ private_class_method :q_chi2, :pchi2, :chi2dens
231
+
232
+ # Returns the integral of Chi-squared distribution with n degrees of freedom over [0, x].
233
+ def chi2dist(n, x); 1.0 - q_chi2(n, x); end
234
+
235
+ # Returns the P-value of chi2dist().
236
+ def pchi2dist(n, y); pchi2(n, 1.0 - y); end
237
+
238
+
239
+ # t-distribution ([1])
240
+ # (-\infty, x]
241
+ def p_t(df, t)
242
+ c2 = df.to_f / (df + t * t);
243
+ s = Math.sqrt(1.0 - c2)
244
+ s = -s if t < 0.0
245
+ p = 0.0;
246
+ i = df % 2 + 2
247
+ while i <= df
248
+ p += s
249
+ s *= (i - 1) * c2 / i
250
+ i += 2
251
+ end
252
+ if df & 1 != 0
253
+ 0.5+(p*Math.sqrt(c2)+Math.atan(t/Math.sqrt(df)))/Math::PI
254
+ else
255
+ (1.0 + p) / 2.0
256
+ end
257
+ end
258
+
259
+ # inverse of t-distribution ([2])
260
+ # (-\infty, -q/2] + [q/2, \infty)
261
+ def ptsub(q, n)
262
+ q = q.to_f
263
+ if(n == 1 && 0.001 < q && q < 0.01)
264
+ eps = 1.0e-4
265
+ elsif (n == 2 && q < 0.0001)
266
+ eps = 1.0e-4
267
+ elsif (n == 1 && q < 0.001)
268
+ eps = 1.0e-2
269
+ else
270
+ eps = 1.0e-5
271
+ end
272
+ s = 10000.0
273
+ w = 0.0
274
+ loop do
275
+ w += s
276
+ if(s <= eps) then return w end
277
+ if((qe = 2.0 - p_t(n, w)*2.0 - q) == 0.0) then return w end
278
+ if(qe < 0.0)
279
+ w -= s
280
+ s /= 10.0 #/
281
+ end
282
+ end
283
+ end
284
+
285
+ def pt(q, n)
286
+ q = q.to_f
287
+ if(q < 1.0e-5 || q > 1.0 || n < 1)
288
+ $stderr.printf("Error : Illigal parameter in pt()!\n")
289
+ return 0.0
290
+ end
291
+
292
+ if(n <= 5) then return ptsub(q, n) end
293
+ if(q <= 5.0e-3 && n <= 13) then return ptsub(q, n) end
294
+
295
+ f1 = 4.0 * (f = n.to_f)
296
+ f5 = (f4 = (f3 = (f2 = f * f) * f) * f) * f
297
+ f2 *= 96.0
298
+ f3 *= 384.0
299
+ f4 *= 92160.0
300
+ f5 *= 368640.0
301
+ u = pnormaldist(1.0 - q / 2.0)
302
+
303
+ w0 = (u2 = u * u) * u
304
+ w1 = w0 * u2
305
+ w2 = w1 * u2
306
+ w3 = w2 * u2
307
+ w4 = w3 * u2
308
+ w = (w0 + u) / f1
309
+ w += (5.0 * w1 + 16.0 * w0 + 3.0 * u) / f2
310
+ w += (3.0 * w2 + 19.0 * w1 + 17.0 * w0 - 15.0 * u) / f3
311
+ w += (79.0 * w3 + 776.0 * w2 + 1482.0 * w1 - 1920.0 * w0 - 9450.0 * u) / f4
312
+ w += (27.0 * w4 + 339.0 * w3 + 930.0 * w2 - 1782.0 * w1 - 765.0 * w0 + 17955.0 * u) / f5
313
+ u + w
314
+ end
315
+
316
+ private :p_t, :pt, :ptsub
317
+ module_function :p_t, :pt, :ptsub
318
+ private_class_method :p_t, :pt, :ptsub
319
+
320
+ # Returns the integral of t-distribution with n degrees of freedom over (-Infty, x].
321
+ def tdist(n, t); p_t(n, t); end
322
+
323
+ # Returns the P-value of tdist().
324
+ def ptdist(n, y)
325
+ if y > 0.5
326
+ pt(2.0 - y*2.0, n)
327
+ else
328
+ - pt(y*2.0, n)
329
+ end
330
+ end
331
+
332
+ # F-distribution ([1])
333
+ # [x, \infty)
334
+ def q_f(df1, df2, f)
335
+ if (f <= 0.0) then return 1.0; end
336
+ if (df1 % 2 != 0 && df2 % 2 == 0)
337
+ return 1.0 - q_f(df2, df1, 1.0 / f)
338
+ end
339
+ cos2 = 1.0 / (1.0 + df1.to_f * f / df2.to_f)
340
+ sin2 = 1.0 - cos2
341
+
342
+ if (df1 % 2 == 0)
343
+ prob = cos2 ** (df2.to_f / 2.0)
344
+ temp = prob
345
+ i = 2
346
+ while i < df1
347
+ temp *= (df2.to_f + i - 2) * sin2 / i
348
+ prob += temp
349
+ i += 2
350
+ end
351
+ return prob
352
+ end
353
+ prob = Math.atan(Math.sqrt(df2.to_f / (df1.to_f * f)))
354
+ temp = Math.sqrt(sin2 * cos2)
355
+ i = 3
356
+ while i <= df1
357
+ prob += temp
358
+ temp *= (i - 1).to_f * sin2 / i.to_f;
359
+ i += 2.0
360
+ end
361
+ temp *= df1.to_f
362
+ i = 3
363
+ while i <= df2
364
+ prob -= temp
365
+ temp *= (df1.to_f + i - 2) * cos2 / i.to_f
366
+ i += 2
367
+ end
368
+ prob * 2.0 / Math::PI
369
+ end
370
+
371
+ # inverse of F-distribution ([2])
372
+ def pfsub(x, y, z)
373
+ (Math.sqrt(z) - y) / x / 2.0
374
+ end
375
+
376
+ # [x, \infty)
377
+ def pf(q, n1, n2)
378
+ if(q < 0.0 || q > 1.0 || n1 < 1 || n2 < 1)
379
+ $stderr.printf("Error : Illegal parameter in pf()!\n")
380
+ return 0.0
381
+ end
382
+
383
+ if n1 <= 240 || n2 <= 240
384
+ eps = 1.0e-5
385
+ if(n2 == 1) then eps = 1.0e-4 end
386
+ fw = 0.0
387
+ s = 1000.0
388
+ loop do
389
+ fw += s
390
+ if s <= eps then return fw end
391
+ if (qe = q_f(n1, n2, fw) - q) == 0.0 then return fw end
392
+ if qe < 0.0
393
+ fw -= s
394
+ s /= 10.0 #/
395
+ end
396
+ end
397
+ end
398
+
399
+ eps = 1.0e-6
400
+ qn = q
401
+ if q < 0.5 then qn = 1.0 - q
402
+ u = pnorm(qn)
403
+ w1 = 2.0 / n1 / 9.0
404
+ w2 = 2.0 / n2 / 9.0
405
+ w3 = 1.0 - w1
406
+ w4 = 1.0 - w2
407
+ u2 = u * u
408
+ a = w4 * w4 - u2 * w2
409
+ b = -2. * w3 * w4
410
+ c = w3 * w3 - u2 * w1
411
+ d = b * b - 4 * a * c
412
+ if(d < 0.0)
413
+ fw = pfsub(a, b, 0.0)
414
+ else
415
+ if(a.abs > eps)
416
+ fw = pfsub(a, b, d)
417
+ else
418
+ if(b.abs > eps) then return -c / b end
419
+ fw = pfsub(a, b, 0.0)
420
+ end
421
+ end
422
+ fw * fw * fw
423
+ end
424
+ end
425
+
426
+ private :q_f, :pf, :pfsub
427
+ module_function :q_f, :pf, :pfsub
428
+ private_class_method :q_f, :pf, :pfsub
429
+
430
+ # Returns the integral of F-distribution with n1 and n2 degrees of freedom over [0, x].
431
+ def fdist(n1, n2, f); 1.0 - q_f(n1, n2, f); end
432
+
433
+ # Returns the P-value of fdist().
434
+ def pfdist(n1, n2, y); pf(1.0 - y, n1, n2); end
435
+
436
+ ############################################################################
437
+ # discrete distributions
438
+
439
+ def perm(n, x = n)
440
+ raise RangeError if n < 0 || x < 0
441
+ r = 1
442
+ while x >= 1
443
+ r *= n
444
+ n -= 1
445
+ x -= 1
446
+ end
447
+ r
448
+ end
449
+
450
+ def combi(n, x)
451
+ raise RangeError if n < 0 || x < 0
452
+ x = n - x if x*2 > n
453
+ perm(n, x) / perm(x, x)
454
+ end
455
+
456
+ module_function :perm, :combi
457
+ private_class_method :perm, :combi
458
+
459
+ def bindens(n, p, x)
460
+ p = p.to_f
461
+ q = 1.0 - p
462
+ combi(n, x) * p**x * q**(n - x)
463
+ end
464
+
465
+ def bindist(n, p, x)
466
+ (0..x).inject(0.0) do |s, k|
467
+ s + bindens(n, p, k)
468
+ end
469
+ end
470
+
471
+ def poissondens(m, x)
472
+ return 0.0 if x < 0
473
+ m = m.to_f
474
+ m ** x * Math::E ** (-m) / perm(x)
475
+ end
476
+
477
+ def poissondist(m, x)
478
+ (0..x).inject(0.0) do |s, k|
479
+ s + poissondens(m, k)
480
+ end
481
+ end
482
+
483
+ ############################################################################
484
+ # normal-distribution
485
+
486
+ # Returns the integral of normal distribution over (-Infty, x].
487
+ def normalxXX_(z); normaldist(z); end
488
+
489
+ # Returns the integral of normal distribution over [0, x].
490
+ def normal__X_(z); normaldist(z) - 0.5; end
491
+
492
+ # Returns the integral of normal distribution over [x, Infty).
493
+ def normal___x(z); 1.0 - normaldist(z); end
494
+
495
+ # Returns the integral of normal distribution over (-Infty, -x] + [x, Infty).
496
+ def normalx__x(z); 2.0 - normaldist(z) * 2.0; end
497
+
498
+ module_function :normaldist, :normalxXX_, :normal__X_, :normal___x, :normalx__x
499
+
500
+
501
+ # inverse of normal-distribution
502
+
503
+ # Return the P-value of the corresponding integral.
504
+ def pnormalxXX_(z); pnormaldist(z); end
505
+
506
+ # Return the P-value of the corresponding integral.
507
+ def pnormal__X_(y); pnormalxXX_(y + 0.5); end
508
+
509
+ # Return the P-value of the corresponding integral.
510
+ def pnormal___x(y); pnormalxXX_(1.0 - y); end
511
+
512
+ # Return the P-value of the corresponding integral.
513
+ def pnormalx__x(y); pnormalxXX_(1.0 - y/2.0); end
514
+
515
+ module_function :pnormaldist, :pnormalxXX_, :pnormal__X_, :pnormal___x, :pnormalx__x
516
+
517
+
518
+ # chi2-distribution
519
+
520
+ # Returns the integral of Chi-squared distribution with n degrees of freedom over [x, Infty).
521
+ def chi2_x(n, x); 1.0 - chi2dist(n, x); end
522
+
523
+ # Returns the integral of Chi-squared distribution with n degrees of freedom over [0, x].
524
+ def chi2X_(n, x); chi2dist(n, x); end
525
+
526
+ module_function :chi2dist, :chi2X_, :chi2_x
527
+
528
+
529
+ # inverse of chi2-distribution
530
+
531
+ # Return the P-value of the corresponding integral.
532
+ def pchi2_x(n, y); pchi2dist(n, 1.0 - y); end
533
+
534
+ # Return the P-value of the corresponding integral.
535
+ def pchi2X_(n, y); pchi2dist(n, y); end
536
+
537
+ module_function :pchi2dist, :pchi2X_, :pchi2_x
538
+
539
+
540
+ # t-distribution
541
+
542
+ # Returns the integral of normal distribution with n degrees of freedom over (-Infty, -x] + [x, Infty).
543
+ def tx__x(n, x); 2.0 - tdist(n, x) * 2.0; end
544
+
545
+ # Returns the integral of t-distribution with n degrees of freedom over (-Infty, x].
546
+ def txXX_(n, x); tdist(n, x); end
547
+
548
+ # Returns the integral of t-distribution with n degrees of freedom over [0, x].
549
+ def t__X_(n, x); tdist(n, x) - 0.5; end
550
+
551
+ # Returns the integral of t-distribution with n degrees of freedom over [x, Infty).
552
+ def t___x(n, x); 1.0 - tdist(n, x); end
553
+
554
+ module_function :tdist, :txXX_, :t__X_, :t___x, :tx__x
555
+
556
+
557
+ # inverse of t-distribution
558
+
559
+ # Return the P-value of the corresponding integral.
560
+ def ptx__x(n, y); ptdist(n, 1.0 - y / 2.0); end
561
+
562
+ # Return the P-value of the corresponding integral.
563
+ def ptxXX_(n, y); ptdist(n, y); end
564
+
565
+ # Return the P-value of the corresponding integral.
566
+ def pt__X_(n, y); ptdist(n, 0.5 + y); end
567
+
568
+ # Return the P-value of the corresponding integral.
569
+ def pt___x(n, y); ptdist(n, 1.0 - y); end
570
+
571
+ module_function :ptdist, :ptxXX_, :pt__X_, :pt___x, :ptx__x
572
+
573
+
574
+ # F-distribution
575
+
576
+ # Returns the integral of F-distribution with n1 and n2 degrees of freedom over [x, Infty).
577
+ def f_x(n1, n2, x); 1.0 - fdist(n1, n2, x); end
578
+
579
+ # Returns the integral of F-distribution with n1 and n2 degrees of freedom over [0, x].
580
+ def fX_(n1, n2, x); fdist(n1, n2, x); end
581
+ module_function :fdist, :fX_, :f_x
582
+
583
+
584
+ # inverse of F-distribution
585
+
586
+ # Return the P-value of the corresponding integral.
587
+ def pf_x(n1, n2, x); pfdist(n1, n2, 1.0 - x); end
588
+
589
+ # Return the P-value of the corresponding integral.
590
+ def pfX_(n1, n2, x); pfdist(n1, n2, x); end
591
+
592
+ module_function :pfdist, :pfX_, :pf_x
593
+
594
+
595
+ # discrete distributions
596
+ def binX_(n, p, x); bindist(n, p, x); end
597
+ def bin_x(n, p, x); bindist(n, 1.0 - p, n - x); end
598
+ module_function :bindens, :bindist, :binX_, :bin_x
599
+
600
+ def poissonX_(m, x); poissondist(m, x); end
601
+ def poisson_x(m, x); 1.0 - poissondist(m, x-1); end
602
+ module_function :poissondens, :poissondist, :poissonX_, :poisson_x
603
+ end
604
+
605
+ if !defined?(Statistics2::NO_EXT) || !Statistics2::NO_EXT
606
+ require 'statistics2.so'
607
+ end
608
+
609
+ if $0 == __FILE__
610
+ if ARGV.empty?
611
+ puts "Example:"
612
+ puts " #$0 normaldist 0.01"
613
+ puts " #$0 pf_x 2 3 0.01"
614
+ exit
615
+ end
616
+ p Statistics2.send(ARGV[0], *ARGV[1..-1].map{|x| eval(x)})
617
+ end