fselector 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +21 -0
- data/README.md +195 -0
- data/lib/fselector.rb +41 -0
- data/lib/fselector/algo_continuous/PMetric.rb +51 -0
- data/lib/fselector/algo_continuous/ReliefF_c.rb +190 -0
- data/lib/fselector/algo_continuous/Relief_c.rb +150 -0
- data/lib/fselector/algo_continuous/TScore.rb +52 -0
- data/lib/fselector/algo_continuous/discretizer.rb +219 -0
- data/lib/fselector/algo_continuous/normalizer.rb +59 -0
- data/lib/fselector/algo_discrete/Accuracy.rb +35 -0
- data/lib/fselector/algo_discrete/AccuracyBalanced.rb +37 -0
- data/lib/fselector/algo_discrete/BiNormalSeparation.rb +45 -0
- data/lib/fselector/algo_discrete/ChiSquaredTest.rb +69 -0
- data/lib/fselector/algo_discrete/CorrelationCoefficient.rb +42 -0
- data/lib/fselector/algo_discrete/DocumentFrequency.rb +36 -0
- data/lib/fselector/algo_discrete/F1Measure.rb +41 -0
- data/lib/fselector/algo_discrete/FishersExactTest.rb +47 -0
- data/lib/fselector/algo_discrete/GMean.rb +37 -0
- data/lib/fselector/algo_discrete/GSSCoefficient.rb +43 -0
- data/lib/fselector/algo_discrete/GiniIndex.rb +44 -0
- data/lib/fselector/algo_discrete/InformationGain.rb +96 -0
- data/lib/fselector/algo_discrete/MatthewsCorrelationCoefficient.rb +45 -0
- data/lib/fselector/algo_discrete/McNemarsTest.rb +57 -0
- data/lib/fselector/algo_discrete/MutualInformation.rb +42 -0
- data/lib/fselector/algo_discrete/OddsRatio.rb +46 -0
- data/lib/fselector/algo_discrete/OddsRatioNumerator.rb +41 -0
- data/lib/fselector/algo_discrete/Power.rb +46 -0
- data/lib/fselector/algo_discrete/Precision.rb +31 -0
- data/lib/fselector/algo_discrete/ProbabilityRatio.rb +41 -0
- data/lib/fselector/algo_discrete/Random.rb +40 -0
- data/lib/fselector/algo_discrete/ReliefF_d.rb +173 -0
- data/lib/fselector/algo_discrete/Relief_d.rb +135 -0
- data/lib/fselector/algo_discrete/Sensitivity.rb +38 -0
- data/lib/fselector/algo_discrete/Specificity.rb +35 -0
- data/lib/fselector/base.rb +322 -0
- data/lib/fselector/base_continuous.rb +25 -0
- data/lib/fselector/base_discrete.rb +355 -0
- data/lib/fselector/ensemble.rb +181 -0
- data/lib/fselector/fileio.rb +455 -0
- data/lib/fselector/util.rb +707 -0
- metadata +86 -0
@@ -0,0 +1,707 @@
|
|
1
|
+
#
|
2
|
+
# add functions to Array class
|
3
|
+
#
|
4
|
+
class Array
|
5
|
+
# summation
|
6
|
+
# @return [Float] sum
|
7
|
+
def sum
|
8
|
+
self.inject(0.0) { |s, i| s+i }
|
9
|
+
end
|
10
|
+
|
11
|
+
|
12
|
+
# average (mean)
|
13
|
+
# @return [Float] average (mean)
|
14
|
+
def ave
|
15
|
+
self.sum / self.size
|
16
|
+
end
|
17
|
+
alias :mean :ave # make mean as an alias of ave
|
18
|
+
|
19
|
+
|
20
|
+
# variance
|
21
|
+
# @return [Float] variance
|
22
|
+
def var
|
23
|
+
u = self.ave
|
24
|
+
v2 = self.inject(0.0) { |v, i| v+(i-u)*(i-u) }
|
25
|
+
|
26
|
+
v2/(self.size-1)
|
27
|
+
end
|
28
|
+
|
29
|
+
|
30
|
+
# standard deviation
|
31
|
+
# @return [Float] standard deviation
|
32
|
+
def sd
|
33
|
+
Math.sqrt(self.var)
|
34
|
+
end
|
35
|
+
|
36
|
+
|
37
|
+
# scale to [min, max]
|
38
|
+
def to_scale(min=0.0, max=1.0)
|
39
|
+
if (min >= max)
|
40
|
+
abort "[#{__FILE__}@#{__LINE__}]: "+
|
41
|
+
"min must be smaller than max"
|
42
|
+
end
|
43
|
+
|
44
|
+
old_min = self.min
|
45
|
+
old_max = self.max
|
46
|
+
|
47
|
+
self.collect do |v|
|
48
|
+
if old_min == old_max
|
49
|
+
max
|
50
|
+
else
|
51
|
+
min + (v-old_min)*(max-min)/(old_max-old_min)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
|
57
|
+
# convert to zscore
|
58
|
+
#
|
59
|
+
# ref: [Wikipedia](http://en.wikipedia.org/wiki/Standard_score)
|
60
|
+
def to_zscore
|
61
|
+
ave = self.ave
|
62
|
+
sd = self.sd
|
63
|
+
|
64
|
+
return self.collect { |v| (v-ave)/sd }
|
65
|
+
end
|
66
|
+
|
67
|
+
|
68
|
+
# to symbol
|
69
|
+
# @return [Array<Symbol>] converted symbols
|
70
|
+
def to_sym
|
71
|
+
self.collect { |x| x.to_sym }
|
72
|
+
end
|
73
|
+
|
74
|
+
|
75
|
+
end
|
76
|
+
|
77
|
+
|
78
|
+
#
|
79
|
+
# add functions to String class
|
80
|
+
#
|
81
|
+
class String
|
82
|
+
# comment line?
|
83
|
+
#
|
84
|
+
# @param [String] char line beginning char
|
85
|
+
def comment?(char='#')
|
86
|
+
return self =~ /^#{char}/
|
87
|
+
end
|
88
|
+
|
89
|
+
|
90
|
+
# blank line?
|
91
|
+
def blank?
|
92
|
+
return self =~ /^\s*$/
|
93
|
+
end
|
94
|
+
|
95
|
+
|
96
|
+
#
|
97
|
+
# Enhanced String.split with escape char, which means
|
98
|
+
# string included in a pair of escape char is considered as a whole
|
99
|
+
# even if it matches the split regular expression. this is especially
|
100
|
+
# useful to parse CSV file that contains comma in a doube-quoted string
|
101
|
+
# e.g. 'a,"b, c",d'.split_me(/,/, '"') => [a, 'b, c', d]
|
102
|
+
#
|
103
|
+
# @param [Regex] delim_regex regular expression for split
|
104
|
+
# @param [String] quote quote char such as ' and "
|
105
|
+
# @return [Array<String>]
|
106
|
+
#
|
107
|
+
def split_me(delim_regex, quote_char="'")
|
108
|
+
d, q = delim_regex, quote_char
|
109
|
+
if not self.count(q) % 2 == 0
|
110
|
+
$stderr.puts "unpaired char of #{q} found, return nil"
|
111
|
+
return nil
|
112
|
+
end
|
113
|
+
self.split(/#{d.source} (?=(?:[^#{q}]* #{q} [^#{q}]* #{q})* [^#{q}]*$) /x)
|
114
|
+
end
|
115
|
+
|
116
|
+
|
117
|
+
end
|
118
|
+
|
119
|
+
#puts "a, 'b,c, d' ,'e'".split_me(/,\s*/, "'")
|
120
|
+
#=>a
|
121
|
+
#=>_'b,c, d'_
|
122
|
+
#=>'e'
|
123
|
+
|
124
|
+
|
125
|
+
#
|
126
|
+
# adapted from the Ruby statistics libraries --
|
127
|
+
# [Rubystats](http://rubystats.rubyforge.org)
|
128
|
+
#
|
129
|
+
# - for Fisher's exact test (Rubystats::FishersExactTest.calculate())
|
130
|
+
# used by algo\_binary/FishersExactText.rb
|
131
|
+
# - for inverse cumulative normal distribution function (Rubystats::NormalDistribution.get\_icdf())
|
132
|
+
# used by algo\_binary/BiNormalSeparation.rb. note the original get\_icdf() function is a private
|
133
|
+
# one, so we have to open it up and that's why the codes here.
|
134
|
+
#
|
135
|
+
#
|
136
|
+
module Rubystats
|
137
|
+
MAX_VALUE = 1.2e290
|
138
|
+
SQRT2PI = 2.5066282746310005024157652848110452530069867406099
|
139
|
+
SQRT2 = 1.4142135623730950488016887242096980785696718753769
|
140
|
+
TWO_PI = 6.2831853071795864769252867665590057683943387987502
|
141
|
+
|
142
|
+
#
|
143
|
+
# Fisher's exact test calculator
|
144
|
+
#
|
145
|
+
class FishersExactTest
|
146
|
+
# new()
|
147
|
+
def initialize
|
148
|
+
@sn11 = 0.0
|
149
|
+
@sn1_ = 0.0
|
150
|
+
@sn_1 = 0.0
|
151
|
+
@sn = 0.0
|
152
|
+
@sprob = 0.0
|
153
|
+
|
154
|
+
@sleft = 0.0
|
155
|
+
@sright = 0.0
|
156
|
+
@sless = 0.0
|
157
|
+
@slarg = 0.0
|
158
|
+
|
159
|
+
@left = 0.0
|
160
|
+
@right = 0.0
|
161
|
+
@twotail = 0.0
|
162
|
+
end
|
163
|
+
|
164
|
+
|
165
|
+
# Fisher's exact test
|
166
|
+
def calculate(n11_,n12_,n21_,n22_)
|
167
|
+
n11_ *= -1 if n11_ < 0
|
168
|
+
n12_ *= -1 if n12_ < 0
|
169
|
+
n21_ *= -1 if n21_ < 0
|
170
|
+
n22_ *= -1 if n22_ < 0
|
171
|
+
n1_ = n11_ + n12_
|
172
|
+
n_1 = n11_ + n21_
|
173
|
+
n = n11_ + n12_ + n21_ + n22_
|
174
|
+
prob = exact(n11_,n1_,n_1,n)
|
175
|
+
left = @sless
|
176
|
+
right = @slarg
|
177
|
+
twotail = @sleft + @sright
|
178
|
+
twotail = 1 if twotail > 1
|
179
|
+
values_hash = { :left =>left, :right =>right, :twotail =>twotail }
|
180
|
+
return values_hash
|
181
|
+
end
|
182
|
+
|
183
|
+
private
|
184
|
+
|
185
|
+
# Reference: "Lanczos, C. 'A precision approximation
|
186
|
+
# of the gamma function', J. SIAM Numer. Anal., B, 1, 86-96, 1964."
|
187
|
+
# Translation of Alan Miller's FORTRAN-implementation
|
188
|
+
# See http://lib.stat.cmu.edu/apstat/245
|
189
|
+
def lngamm(z)
|
190
|
+
x = 0
|
191
|
+
x += 0.0000001659470187408462/(z+7)
|
192
|
+
x += 0.000009934937113930748 /(z+6)
|
193
|
+
x -= 0.1385710331296526 /(z+5)
|
194
|
+
x += 12.50734324009056 /(z+4)
|
195
|
+
x -= 176.6150291498386 /(z+3)
|
196
|
+
x += 771.3234287757674 /(z+2)
|
197
|
+
x -= 1259.139216722289 /(z+1)
|
198
|
+
x += 676.5203681218835 /(z)
|
199
|
+
x += 0.9999999999995183
|
200
|
+
|
201
|
+
return(Math.log(x)-5.58106146679532777-z+(z-0.5) * Math.log(z+6.5))
|
202
|
+
end
|
203
|
+
|
204
|
+
def lnfact(n)
|
205
|
+
if n <= 1
|
206
|
+
return 0
|
207
|
+
else
|
208
|
+
return lngamm(n+1)
|
209
|
+
end
|
210
|
+
end
|
211
|
+
|
212
|
+
def lnbico(n,k)
|
213
|
+
return lnfact(n) - lnfact(k) - lnfact(n-k)
|
214
|
+
end
|
215
|
+
|
216
|
+
def hyper_323(n11, n1_, n_1, n)
|
217
|
+
return Math.exp(lnbico(n1_, n11) + lnbico(n-n1_, n_1-n11) - lnbico(n, n_1))
|
218
|
+
end
|
219
|
+
|
220
|
+
def hyper(n11)
|
221
|
+
return hyper0(n11, 0, 0, 0)
|
222
|
+
end
|
223
|
+
|
224
|
+
def hyper0(n11i,n1_i,n_1i,ni)
|
225
|
+
if n1_i == 0 and n_1i ==0 and ni == 0
|
226
|
+
unless n11i % 10 == 0
|
227
|
+
if n11i == @sn11+1
|
228
|
+
@sprob *= ((@sn1_ - @sn11)/(n11i.to_f))*((@sn_1 - @sn11)/(n11i.to_f + @sn - @sn1_ - @sn_1))
|
229
|
+
@sn11 = n11i
|
230
|
+
return @sprob
|
231
|
+
end
|
232
|
+
if n11i == @sn11-1
|
233
|
+
@sprob *= ((@sn11)/(@sn1_-n11i.to_f))*((@sn11+@sn-@sn1_-@sn_1)/(@sn_1-n11i.to_f))
|
234
|
+
@sn11 = n11i
|
235
|
+
return @sprob
|
236
|
+
end
|
237
|
+
end
|
238
|
+
@sn11 = n11i
|
239
|
+
else
|
240
|
+
@sn11 = n11i
|
241
|
+
@sn1_ = n1_i
|
242
|
+
@sn_1 = n_1i
|
243
|
+
@sn = ni
|
244
|
+
end
|
245
|
+
@sprob = hyper_323(@sn11,@sn1_,@sn_1,@sn)
|
246
|
+
return @sprob
|
247
|
+
end
|
248
|
+
|
249
|
+
def exact(n11,n1_,n_1,n)
|
250
|
+
|
251
|
+
p = i = j = prob = 0.0
|
252
|
+
|
253
|
+
max = n1_
|
254
|
+
max = n_1 if n_1 < max
|
255
|
+
min = n1_ + n_1 - n
|
256
|
+
min = 0 if min < 0
|
257
|
+
|
258
|
+
if min == max
|
259
|
+
@sless = 1
|
260
|
+
@sright = 1
|
261
|
+
@sleft = 1
|
262
|
+
@slarg = 1
|
263
|
+
return 1
|
264
|
+
end
|
265
|
+
|
266
|
+
prob = hyper0(n11,n1_,n_1,n)
|
267
|
+
@sleft = 0
|
268
|
+
|
269
|
+
p = hyper(min)
|
270
|
+
i = min + 1
|
271
|
+
while p < (0.99999999 * prob)
|
272
|
+
@sleft += p
|
273
|
+
p = hyper(i)
|
274
|
+
i += 1
|
275
|
+
end
|
276
|
+
|
277
|
+
i -= 1
|
278
|
+
|
279
|
+
if p < (1.00000001*prob)
|
280
|
+
@sleft += p
|
281
|
+
else
|
282
|
+
i -= 1
|
283
|
+
end
|
284
|
+
|
285
|
+
@sright = 0
|
286
|
+
|
287
|
+
p = hyper(max)
|
288
|
+
j = max - 1
|
289
|
+
while p < (0.99999999 * prob)
|
290
|
+
@sright += p
|
291
|
+
p = hyper(j)
|
292
|
+
j -= 1
|
293
|
+
end
|
294
|
+
j += 1
|
295
|
+
|
296
|
+
if p < (1.00000001*prob)
|
297
|
+
@sright += p
|
298
|
+
else
|
299
|
+
j += 1
|
300
|
+
end
|
301
|
+
|
302
|
+
if (i - n11).abs < (j - n11).abs
|
303
|
+
@sless = @sleft
|
304
|
+
@slarg = 1 - @sleft + prob
|
305
|
+
else
|
306
|
+
@sless = 1 - @sright + prob
|
307
|
+
@slarg = @sright
|
308
|
+
end
|
309
|
+
return prob
|
310
|
+
end
|
311
|
+
|
312
|
+
|
313
|
+
end # class
|
314
|
+
|
315
|
+
#
|
316
|
+
# Normal distribution
|
317
|
+
#
|
318
|
+
class NormalDistribution
|
319
|
+
# Constructs a normal distribution (defaults to zero mean and
|
320
|
+
# unity variance)
|
321
|
+
def initialize(mu=0.0, sigma=1.0)
|
322
|
+
@mean = mu
|
323
|
+
if sigma <= 0.0
|
324
|
+
return "error"
|
325
|
+
end
|
326
|
+
@stdev = sigma
|
327
|
+
@variance = sigma**2
|
328
|
+
@pdf_denominator = SQRT2PI * Math.sqrt(@variance)
|
329
|
+
@cdf_denominator = SQRT2 * Math.sqrt(@variance)
|
330
|
+
end
|
331
|
+
|
332
|
+
|
333
|
+
# Obtain single PDF value
|
334
|
+
# Returns the probability that a stochastic variable x has the value X,
|
335
|
+
# i.e. P(x=X)
|
336
|
+
def get_pdf(x)
|
337
|
+
Math.exp( -((x-@mean)**2) / (2 * @variance)) / @pdf_denominator
|
338
|
+
end
|
339
|
+
|
340
|
+
|
341
|
+
# Obtain single CDF value
|
342
|
+
# Returns the probability that a stochastic variable x is less than X,
|
343
|
+
# i.e. P(x<X)
|
344
|
+
def get_cdf(x)
|
345
|
+
complementary_error( -(x - @mean) / @cdf_denominator) / 2
|
346
|
+
end
|
347
|
+
|
348
|
+
|
349
|
+
# Obtain single inverse CDF value.
|
350
|
+
# returns the value X for which P(x<X).
|
351
|
+
def get_icdf(p)
|
352
|
+
check_range(p)
|
353
|
+
if p == 0.0
|
354
|
+
return -MAX_VALUE
|
355
|
+
end
|
356
|
+
if p == 1.0
|
357
|
+
return MAX_VALUE
|
358
|
+
end
|
359
|
+
if p == 0.5
|
360
|
+
return @mean
|
361
|
+
end
|
362
|
+
|
363
|
+
mean_save = @mean
|
364
|
+
var_save = @variance
|
365
|
+
pdf_D_save = @pdf_denominator
|
366
|
+
cdf_D_save = @cdf_denominator
|
367
|
+
@mean = 0.0
|
368
|
+
@variance = 1.0
|
369
|
+
@pdf_denominator = Math.sqrt(TWO_PI)
|
370
|
+
@cdf_denominator = SQRT2
|
371
|
+
x = find_root(p, 0.0, -100.0, 100.0)
|
372
|
+
#scale back
|
373
|
+
@mean = mean_save
|
374
|
+
@variance = var_save
|
375
|
+
@pdf_denominator = pdf_D_save
|
376
|
+
@cdf_denominator = cdf_D_save
|
377
|
+
return x * Math.sqrt(@variance) + @mean
|
378
|
+
end
|
379
|
+
|
380
|
+
private
|
381
|
+
|
382
|
+
#check that variable is between lo and hi limits.
|
383
|
+
#lo default is 0.0 and hi default is 1.0
|
384
|
+
def check_range(x, lo=0.0, hi=1.0)
|
385
|
+
raise ArgumentError.new("x cannot be nil") if x.nil?
|
386
|
+
if x < lo or x > hi
|
387
|
+
raise ArgumentError.new("x must be less than lo (#{lo}) and greater than hi (#{hi})")
|
388
|
+
end
|
389
|
+
end
|
390
|
+
|
391
|
+
|
392
|
+
def find_root(prob, guess, x_lo, x_hi)
|
393
|
+
accuracy = 1.0e-10
|
394
|
+
max_iteration = 150
|
395
|
+
x = guess
|
396
|
+
x_new = guess
|
397
|
+
error = 0.0
|
398
|
+
_pdf = 0.0
|
399
|
+
dx = 1000.0
|
400
|
+
i = 0
|
401
|
+
while ( dx.abs > accuracy && (i += 1) < max_iteration )
|
402
|
+
#Apply Newton-Raphson step
|
403
|
+
error = cdf(x) - prob
|
404
|
+
if error < 0.0
|
405
|
+
x_lo = x
|
406
|
+
else
|
407
|
+
x_hi = x
|
408
|
+
end
|
409
|
+
_pdf = pdf(x)
|
410
|
+
if _pdf != 0.0
|
411
|
+
dx = error / _pdf
|
412
|
+
x_new = x -dx
|
413
|
+
end
|
414
|
+
# If the NR fails to converge (which for example may be the
|
415
|
+
# case if the initial guess is too rough) we apply a bisection
|
416
|
+
# step to determine a more narrow interval around the root.
|
417
|
+
if x_new < x_lo || x_new > x_hi || _pdf == 0.0
|
418
|
+
x_new = (x_lo + x_hi) / 2.0
|
419
|
+
dx = x_new - x
|
420
|
+
end
|
421
|
+
x = x_new
|
422
|
+
end
|
423
|
+
return x
|
424
|
+
end
|
425
|
+
|
426
|
+
|
427
|
+
#Probability density function
|
428
|
+
def pdf(x)
|
429
|
+
if x.class == Array
|
430
|
+
pdf_vals = []
|
431
|
+
for i in (0 ... x.length)
|
432
|
+
pdf_vals[i] = get_pdf(x[i])
|
433
|
+
end
|
434
|
+
return pdf_vals
|
435
|
+
else
|
436
|
+
return get_pdf(x)
|
437
|
+
end
|
438
|
+
end
|
439
|
+
|
440
|
+
|
441
|
+
#Cummulative distribution function
|
442
|
+
def cdf(x)
|
443
|
+
if x.class == Array
|
444
|
+
cdf_vals = []
|
445
|
+
for i in (0...x.size)
|
446
|
+
cdf_vals[i] = get_cdf(x[i])
|
447
|
+
end
|
448
|
+
return cdf_vals
|
449
|
+
else
|
450
|
+
return get_cdf(x)
|
451
|
+
end
|
452
|
+
end
|
453
|
+
|
454
|
+
|
455
|
+
|
456
|
+
# Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
457
|
+
#
|
458
|
+
# Developed at SunSoft, a Sun Microsystems, Inc. business.
|
459
|
+
# Permission to use, copy, modify, and distribute this
|
460
|
+
# software is freely granted, provided that this notice
|
461
|
+
# is preserved.
|
462
|
+
#
|
463
|
+
# x
|
464
|
+
# 2 |\
|
465
|
+
# erf(x) = --------- | exp(-t*t)dt
|
466
|
+
# sqrt(pi) \|
|
467
|
+
# 0
|
468
|
+
#
|
469
|
+
# erfc(x) = 1-erf(x)
|
470
|
+
# Note that
|
471
|
+
# erf(-x) = -erf(x)
|
472
|
+
# erfc(-x) = 2 - erfc(x)
|
473
|
+
#
|
474
|
+
# Method:
|
475
|
+
# 1. For |x| in [0, 0.84375]
|
476
|
+
# erf(x) = x + x*R(x^2)
|
477
|
+
# erfc(x) = 1 - erf(x) if x in [-.84375,0.25]
|
478
|
+
# = 0.5 + ((0.5-x)-x*R) if x in [0.25,0.84375]
|
479
|
+
# where R = P/Q where P is an odd poly of degree 8 and
|
480
|
+
# Q is an odd poly of degree 10.
|
481
|
+
# -57.90
|
482
|
+
# | R - (erf(x)-x)/x | <= 2
|
483
|
+
#
|
484
|
+
#
|
485
|
+
# Remark. The formula is derived by noting
|
486
|
+
# erf(x) = (2/sqrt(pi))*(x - x^3/3 + x^5/10 - x^7/42 + ....)
|
487
|
+
# and that
|
488
|
+
# 2/sqrt(pi) = 1.128379167095512573896158903121545171688
|
489
|
+
# is close to one. The interval is chosen because the fix
|
490
|
+
# point of erf(x) is near 0.6174 (i.e., erf(x)=x when x is
|
491
|
+
# near 0.6174), and by some experiment, 0.84375 is chosen to
|
492
|
+
# guarantee the error is less than one ulp for erf.
|
493
|
+
#
|
494
|
+
# 2. For |x| in [0.84375,1.25], let s = |x| - 1, and
|
495
|
+
# c = 0.84506291151 rounded to single (24 bits)
|
496
|
+
# erf(x) = sign(x) * (c + P1(s)/Q1(s))
|
497
|
+
# erfc(x) = (1-c) - P1(s)/Q1(s) if x > 0
|
498
|
+
# 1+(c+P1(s)/Q1(s)) if x < 0
|
499
|
+
# |P1/Q1 - (erf(|x|)-c)| <= 2**-59.06
|
500
|
+
# Remark: here we use the taylor series expansion at x=1.
|
501
|
+
# erf(1+s) = erf(1) + s*Poly(s)
|
502
|
+
# = 0.845.. + P1(s)/Q1(s)
|
503
|
+
# That is, we use rational approximation to approximate
|
504
|
+
# erf(1+s) - (c = (single)0.84506291151)
|
505
|
+
# Note that |P1/Q1|< 0.078 for x in [0.84375,1.25]
|
506
|
+
# where
|
507
|
+
# P1(s) = degree 6 poly in s
|
508
|
+
# Q1(s) = degree 6 poly in s
|
509
|
+
#
|
510
|
+
# 3. For x in [1.25,1/0.35(~2.857143)],
|
511
|
+
# erfc(x) = (1/x)*exp(-x*x-0.5625+R1/S1)
|
512
|
+
# erf(x) = 1 - erfc(x)
|
513
|
+
# where
|
514
|
+
# R1(z) = degree 7 poly in z, (z=1/x^2)
|
515
|
+
# S1(z) = degree 8 poly in z
|
516
|
+
#
|
517
|
+
# 4. For x in [1/0.35,28]
|
518
|
+
# erfc(x) = (1/x)*exp(-x*x-0.5625+R2/S2) if x > 0
|
519
|
+
# = 2.0 - (1/x)*exp(-x*x-0.5625+R2/S2) if -6<x<0
|
520
|
+
# = 2.0 - tiny (if x <= -6)
|
521
|
+
# erf(x) = sign(x)*(1.0 - erfc(x)) if x < 6, else
|
522
|
+
# erf(x) = sign(x)*(1.0 - tiny)
|
523
|
+
# where
|
524
|
+
# R2(z) = degree 6 poly in z, (z=1/x^2)
|
525
|
+
# S2(z) = degree 7 poly in z
|
526
|
+
#
|
527
|
+
# Note1:
|
528
|
+
# To compute exp(-x*x-0.5625+R/S), let s be a single
|
529
|
+
# PRECISION number and s := x then
|
530
|
+
# -x*x = -s*s + (s-x)*(s+x)
|
531
|
+
# exp(-x*x-0.5626+R/S) =
|
532
|
+
# exp(-s*s-0.5625)*exp((s-x)*(s+x)+R/S)
|
533
|
+
# Note2:
|
534
|
+
# Here 4 and 5 make use of the asymptotic series
|
535
|
+
# exp(-x*x)
|
536
|
+
# erfc(x) ~ ---------- * ( 1 + Poly(1/x^2) )
|
537
|
+
# x*sqrt(pi)
|
538
|
+
# We use rational approximation to approximate
|
539
|
+
# g(s)=f(1/x^2) = log(erfc(x)*x) - x*x + 0.5625
|
540
|
+
# Here is the error bound for R1/S1 and R2/S2
|
541
|
+
# |R1/S1 - f(x)| < 2**(-62.57)
|
542
|
+
# |R2/S2 - f(x)| < 2**(-61.52)
|
543
|
+
#
|
544
|
+
# 5. For inf > x >= 28
|
545
|
+
# erf(x) = sign(x) *(1 - tiny) (raise inexact)
|
546
|
+
# erfc(x) = tiny*tiny (raise underflow) if x > 0
|
547
|
+
# = 2 - tiny if x<0
|
548
|
+
#
|
549
|
+
# 7. Special case:
|
550
|
+
# erf(0) = 0, erf(inf) = 1, erf(-inf) = -1,
|
551
|
+
# erfc(0) = 1, erfc(inf) = 0, erfc(-inf) = 2,
|
552
|
+
# erfc/erf(NaN) is NaN
|
553
|
+
#
|
554
|
+
# $efx8 = 1.02703333676410069053e00
|
555
|
+
#
|
556
|
+
# Coefficients for approximation to erf on [0,0.84375]
|
557
|
+
#
|
558
|
+
|
559
|
+
# Error function.
|
560
|
+
# Based on C-code for the error function developed at Sun Microsystems.
|
561
|
+
# Author:: Jaco van Kooten
|
562
|
+
|
563
|
+
def error(x)
|
564
|
+
e_efx = 1.28379167095512586316e-01
|
565
|
+
|
566
|
+
ePp = [ 1.28379167095512558561e-01,
|
567
|
+
-3.25042107247001499370e-01,
|
568
|
+
-2.84817495755985104766e-02,
|
569
|
+
-5.77027029648944159157e-03,
|
570
|
+
-2.37630166566501626084e-05 ]
|
571
|
+
|
572
|
+
eQq = [ 3.97917223959155352819e-01,
|
573
|
+
6.50222499887672944485e-02,
|
574
|
+
5.08130628187576562776e-03,
|
575
|
+
1.32494738004321644526e-04,
|
576
|
+
-3.96022827877536812320e-06 ]
|
577
|
+
|
578
|
+
# Coefficients for approximation to erf in [0.84375,1.25]
|
579
|
+
ePa = [-2.36211856075265944077e-03,
|
580
|
+
4.14856118683748331666e-01,
|
581
|
+
-3.72207876035701323847e-01,
|
582
|
+
3.18346619901161753674e-01,
|
583
|
+
-1.10894694282396677476e-01,
|
584
|
+
3.54783043256182359371e-02,
|
585
|
+
-2.16637559486879084300e-03 ]
|
586
|
+
|
587
|
+
eQa = [ 1.06420880400844228286e-01,
|
588
|
+
5.40397917702171048937e-01,
|
589
|
+
7.18286544141962662868e-02,
|
590
|
+
1.26171219808761642112e-01,
|
591
|
+
1.36370839120290507362e-02,
|
592
|
+
1.19844998467991074170e-02 ]
|
593
|
+
|
594
|
+
e_erx = 8.45062911510467529297e-01
|
595
|
+
|
596
|
+
abs_x = (if x >= 0.0 then x else -x end)
|
597
|
+
# 0 < |x| < 0.84375
|
598
|
+
if abs_x < 0.84375
|
599
|
+
#|x| < 2**-28
|
600
|
+
if abs_x < 3.7252902984619141e-9
|
601
|
+
retval = abs_x + abs_x * e_efx
|
602
|
+
else
|
603
|
+
s = x * x
|
604
|
+
p = ePp[0] + s * (ePp[1] + s * (ePp[2] + s * (ePp[3] + s * ePp[4])))
|
605
|
+
|
606
|
+
q = 1.0 + s * (eQq[0] + s * (eQq[1] + s *
|
607
|
+
( eQq[2] + s * (eQq[3] + s * eQq[4]))))
|
608
|
+
retval = abs_x + abs_x * (p / q)
|
609
|
+
end
|
610
|
+
elsif abs_x < 1.25
|
611
|
+
s = abs_x - 1.0
|
612
|
+
p = ePa[0] + s * (ePa[1] + s *
|
613
|
+
(ePa[2] + s * (ePa[3] + s *
|
614
|
+
(ePa[4] + s * (ePa[5] + s * ePa[6])))))
|
615
|
+
|
616
|
+
q = 1.0 + s * (eQa[0] + s *
|
617
|
+
(eQa[1] + s * (eQa[2] + s *
|
618
|
+
(eQa[3] + s * (eQa[4] + s * eQa[5])))))
|
619
|
+
retval = e_erx + p / q
|
620
|
+
|
621
|
+
elsif abs_x >= 6.0
|
622
|
+
retval = 1.0
|
623
|
+
else
|
624
|
+
retval = 1.0 - complementary_error(abs_x)
|
625
|
+
end
|
626
|
+
return (if x >= 0.0 then retval else -retval end)
|
627
|
+
end
|
628
|
+
|
629
|
+
# Complementary error function.
|
630
|
+
# Based on C-code for the error function developed at Sun Microsystems.
|
631
|
+
# author Jaco van Kooten
|
632
|
+
|
633
|
+
def complementary_error(x)
|
634
|
+
# Coefficients for approximation of erfc in [1.25,1/.35]
|
635
|
+
|
636
|
+
eRa = [-9.86494403484714822705e-03,
|
637
|
+
-6.93858572707181764372e-01,
|
638
|
+
-1.05586262253232909814e01,
|
639
|
+
-6.23753324503260060396e01,
|
640
|
+
-1.62396669462573470355e02,
|
641
|
+
-1.84605092906711035994e02,
|
642
|
+
-8.12874355063065934246e01,
|
643
|
+
-9.81432934416914548592e00 ]
|
644
|
+
|
645
|
+
eSa = [ 1.96512716674392571292e01,
|
646
|
+
1.37657754143519042600e02,
|
647
|
+
4.34565877475229228821e02,
|
648
|
+
6.45387271733267880336e02,
|
649
|
+
4.29008140027567833386e02,
|
650
|
+
1.08635005541779435134e02,
|
651
|
+
6.57024977031928170135e00,
|
652
|
+
-6.04244152148580987438e-02 ]
|
653
|
+
|
654
|
+
# Coefficients for approximation to erfc in [1/.35,28]
|
655
|
+
|
656
|
+
eRb = [-9.86494292470009928597e-03,
|
657
|
+
-7.99283237680523006574e-01,
|
658
|
+
-1.77579549177547519889e01,
|
659
|
+
-1.60636384855821916062e02,
|
660
|
+
-6.37566443368389627722e02,
|
661
|
+
-1.02509513161107724954e03,
|
662
|
+
-4.83519191608651397019e02 ]
|
663
|
+
|
664
|
+
eSb = [ 3.03380607434824582924e01,
|
665
|
+
3.25792512996573918826e02,
|
666
|
+
1.53672958608443695994e03,
|
667
|
+
3.19985821950859553908e03,
|
668
|
+
2.55305040643316442583e03,
|
669
|
+
4.74528541206955367215e02,
|
670
|
+
-2.24409524465858183362e01 ]
|
671
|
+
|
672
|
+
abs_x = (if x >= 0.0 then x else -x end)
|
673
|
+
if abs_x < 1.25
|
674
|
+
retval = 1.0 - error(abs_x)
|
675
|
+
elsif abs_x > 28.0
|
676
|
+
retval = 0.0
|
677
|
+
|
678
|
+
# 1.25 < |x| < 28
|
679
|
+
else
|
680
|
+
s = 1.0/(abs_x * abs_x)
|
681
|
+
if abs_x < 2.8571428
|
682
|
+
r = eRa[0] + s * (eRa[1] + s *
|
683
|
+
(eRa[2] + s * (eRa[3] + s * (eRa[4] + s *
|
684
|
+
(eRa[5] + s *(eRa[6] + s * eRa[7])
|
685
|
+
)))))
|
686
|
+
|
687
|
+
s = 1.0 + s * (eSa[0] + s * (eSa[1] + s *
|
688
|
+
(eSa[2] + s * (eSa[3] + s * (eSa[4] + s *
|
689
|
+
(eSa[5] + s * (eSa[6] + s * eSa[7])))))))
|
690
|
+
|
691
|
+
else
|
692
|
+
r = eRb[0] + s * (eRb[1] + s *
|
693
|
+
(eRb[2] + s * (eRb[3] + s * (eRb[4] + s *
|
694
|
+
(eRb[5] + s * eRb[6])))))
|
695
|
+
|
696
|
+
s = 1.0 + s * (eSb[0] + s *
|
697
|
+
(eSb[1] + s * (eSb[2] + s * (eSb[3] + s *
|
698
|
+
(eSb[4] + s * (eSb[5] + s * eSb[6]))))))
|
699
|
+
end
|
700
|
+
retval = Math.exp(-x * x - 0.5625 + r/s) / abs_x
|
701
|
+
end
|
702
|
+
return ( if x >= 0.0 then retval else 2.0 - retval end )
|
703
|
+
end
|
704
|
+
|
705
|
+
end # class
|
706
|
+
|
707
|
+
end # module
|