fselector 0.5.0 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +7 -0
- data/README.md +18 -7
- data/lib/fselector.rb +4 -3
- data/lib/fselector/algo_base/base.rb +7 -0
- data/lib/fselector/algo_discrete/BiNormalSeparation.rb +3 -4
- data/lib/fselector/algo_discrete/FishersExactTest.rb +5 -7
- data/lib/fselector/discretizer.rb +15 -2
- data/lib/fselector/fileio.rb +19 -4
- data/lib/fselector/util.rb +0 -585
- metadata +17 -6
- data/lib/fselector/chisq_calc.rb +0 -189
data/ChangeLog
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
2012-04-18 Tiejun Cheng <need47@gmail.com>
|
2
|
+
|
3
|
+
* require the RinRuby gem (http://rinruby.ddahl.org) to access the
|
4
|
+
statistical routines in the R package (http://www.r-project.org/)
|
5
|
+
|
6
|
+
* because of RinRuby (and thus R), removed the following modules or implementations:
|
7
|
+
RubyStats (FishersExactTest.calculate, get_icdf) and ChiSquareCalculator
|
data/README.md
CHANGED
@@ -8,8 +8,8 @@ FSelector: a Ruby gem for feature selection and ranking
|
|
8
8
|
**Email**: [need47@gmail.com](mailto:need47@gmail.com)
|
9
9
|
**Copyright**: 2012
|
10
10
|
**License**: MIT License
|
11
|
-
**Latest Version**: 0.
|
12
|
-
**Release Date**: April
|
11
|
+
**Latest Version**: 0.6.0
|
12
|
+
**Release Date**: April 19 2012
|
13
13
|
|
14
14
|
Synopsis
|
15
15
|
--------
|
@@ -25,9 +25,9 @@ missing feature values with certain criterion. FSelector acts on a
|
|
25
25
|
full-feature data set in either CSV, LibSVM or WEKA file format and
|
26
26
|
outputs a reduced data set with only selected subset of features, which
|
27
27
|
can later be used as the input for various machine learning softwares
|
28
|
-
|
29
|
-
|
30
|
-
|
28
|
+
such as LibSVM and WEKA. FSelector, as a collection of filter methods,
|
29
|
+
does not implement any classifier like support vector machines or
|
30
|
+
random forest. See below for a list of FSelector's features.
|
31
31
|
|
32
32
|
Feature List
|
33
33
|
------------
|
@@ -78,7 +78,7 @@ Feature List
|
|
78
78
|
ReliefF_c ReliefF_c continuous
|
79
79
|
TScore TS continuous
|
80
80
|
|
81
|
-
**feature selection interace:**
|
81
|
+
**note for feature selection interace:**
|
82
82
|
- for the algorithms of CFS\_d, FCBF and CFS\_c, use select\_feature!
|
83
83
|
- for other algorithms, use either select\_feature\_by\_rank! or select\_feature\_by\_score!
|
84
84
|
|
@@ -115,7 +115,13 @@ Installing
|
|
115
115
|
To install FSelector, use the following command:
|
116
116
|
|
117
117
|
$ gem install fselector
|
118
|
-
|
118
|
+
|
119
|
+
**note:** Start from version 0.5.0, FSelector uses the RinRuby gem (http://rinruby.ddahl.org)
|
120
|
+
as a seemless bridge to access the statistical routines in the R package (http://www.r-project.org),
|
121
|
+
which will greatly expand the inclusion of algorithms to FSelector, especially for those relying
|
122
|
+
on statistical test. To this end, please pre-install the R package. RinRuby should have been
|
123
|
+
auto-installed with FSelector.
|
124
|
+
|
119
125
|
Usage
|
120
126
|
-----
|
121
127
|
|
@@ -223,6 +229,11 @@ Usage
|
|
223
229
|
|
224
230
|
**4. see more examples test_*.rb under the test/ directory**
|
225
231
|
|
232
|
+
Change Log
|
233
|
+
----------
|
234
|
+
A {file:ChangeLog} is available from version 0.5.0 and upward to refelect
|
235
|
+
what's new and what's changed
|
236
|
+
|
226
237
|
Copyright
|
227
238
|
---------
|
228
239
|
FSelector © 2012 by [Tiejun Cheng](mailto:need47@gmail.com).
|
data/lib/fselector.rb
CHANGED
@@ -1,9 +1,12 @@
|
|
1
|
+
# access to the statistical routines in R package
|
2
|
+
require 'rinruby'
|
3
|
+
|
1
4
|
#
|
2
5
|
# FSelector: a Ruby gem for feature selection and ranking
|
3
6
|
#
|
4
7
|
module FSelector
|
5
8
|
# module version
|
6
|
-
VERSION = '0.
|
9
|
+
VERSION = '0.6.0'
|
7
10
|
end
|
8
11
|
|
9
12
|
ROOT = File.expand_path(File.dirname(__FILE__))
|
@@ -17,8 +20,6 @@ require "#{ROOT}/fselector/fileio.rb"
|
|
17
20
|
require "#{ROOT}/fselector/util.rb"
|
18
21
|
# entropy-related functions
|
19
22
|
require "#{ROOT}/fselector/entropy.rb"
|
20
|
-
# chi-square calculator
|
21
|
-
require "#{ROOT}/fselector/chisq_calc.rb"
|
22
23
|
# normalization for continuous data
|
23
24
|
require "#{ROOT}/fselector/normalizer.rb"
|
24
25
|
# discretization for continuous data
|
@@ -165,6 +165,13 @@ module FSelector
|
|
165
165
|
end
|
166
166
|
|
167
167
|
|
168
|
+
# get a copy of data,
|
169
|
+
# by use of the standard Marshal library
|
170
|
+
def get_data_copy
|
171
|
+
Marshal.load(Marshal.dump(@data)) if @data
|
172
|
+
end
|
173
|
+
|
174
|
+
|
168
175
|
# set data
|
169
176
|
def set_data(data)
|
170
177
|
if data and data.class == Hash
|
@@ -13,14 +13,11 @@ module FSelector
|
|
13
13
|
# ref: [An extensive empirical study of feature selection metrics for text classification](http://dl.acm.org/citation.cfm?id=944974) and [Rubystats](http://rubystats.rubyforge.org)
|
14
14
|
#
|
15
15
|
class BiNormalSeparation < BaseDiscrete
|
16
|
-
# include Ruby statistics libraries
|
17
|
-
include Rubystats
|
18
16
|
|
19
17
|
private
|
20
18
|
|
21
19
|
# calculate contribution of each feature (f) for each class (k)
|
22
20
|
def calc_contribution(f)
|
23
|
-
@nd ||= Rubystats::NormalDistribution.new
|
24
21
|
|
25
22
|
each_class do |k|
|
26
23
|
a, b, c, d = get_A(f, k), get_B(f, k), get_C(f, k), get_D(f, k)
|
@@ -28,7 +25,9 @@ module FSelector
|
|
28
25
|
s = 0.0
|
29
26
|
if not (a+c).zero? and not (b+d).zero?
|
30
27
|
tpr, fpr = a/(a+c), b/(b+d)
|
31
|
-
|
28
|
+
|
29
|
+
R.eval "rv <- qnorm(#{tpr}) - qnorm(#{fpr})"
|
30
|
+
s = R.rv.abs
|
32
31
|
end
|
33
32
|
|
34
33
|
set_feature_score(f, k, s)
|
@@ -11,24 +11,22 @@ module FSelector
|
|
11
11
|
#
|
12
12
|
# for FET, the smaller, the better, but we intentionally negate it
|
13
13
|
# so that the larger is always the better (consistent with other algorithms)
|
14
|
+
# R equivalent: fisher.test
|
14
15
|
#
|
15
16
|
# ref: [Wikipedia](http://en.wikipedia.org/wiki/Fisher's_exact_test) and [Rubystats](http://rubystats.rubyforge.org)
|
16
17
|
#
|
17
18
|
class FishersExactTest < BaseDiscrete
|
18
|
-
# include Ruby statistics libraries
|
19
|
-
include Rubystats
|
20
19
|
|
21
20
|
private
|
22
21
|
|
23
22
|
# calculate contribution of each feature (f) for each class (k)
|
24
|
-
def calc_contribution(f)
|
25
|
-
@fet ||= Rubystats::FishersExactTest.new
|
26
|
-
|
23
|
+
def calc_contribution(f)
|
27
24
|
each_class do |k|
|
28
25
|
a, b, c, d = get_A(f, k), get_B(f, k), get_C(f, k), get_D(f, k)
|
29
26
|
|
30
|
-
# note:
|
31
|
-
|
27
|
+
# note: intentionally negated it
|
28
|
+
R.eval "rv <- fisher.test(matrix(c(#{a}, #{b}, #{c}, #{d}), nrow=2))$p.value"
|
29
|
+
s = -1.0 * R.rv
|
32
30
|
|
33
31
|
set_feature_score(f, k, s)
|
34
32
|
end
|
@@ -4,8 +4,6 @@
|
|
4
4
|
module Discretizer
|
5
5
|
# include Entropy module
|
6
6
|
include Entropy
|
7
|
-
# include ChiSquareCalculator module
|
8
|
-
include ChiSquareCalculator
|
9
7
|
|
10
8
|
# discretize by equal-width intervals
|
11
9
|
#
|
@@ -334,6 +332,19 @@ module Discretizer
|
|
334
332
|
|
335
333
|
private
|
336
334
|
|
335
|
+
#
|
336
|
+
# get the Chi-square value from p-value
|
337
|
+
#
|
338
|
+
# @param [Float] pval p-value
|
339
|
+
# @param [Integer] df degree of freedom
|
340
|
+
# @return [Float] Chi-square vlaue
|
341
|
+
#
|
342
|
+
def pval2chisq(pval, df)
|
343
|
+
R.eval "chisq <- qchisq(#{1-pval}, #{df})"
|
344
|
+
R.chisq
|
345
|
+
end
|
346
|
+
|
347
|
+
|
337
348
|
#
|
338
349
|
# get index from sorted cut points
|
339
350
|
#
|
@@ -388,6 +399,7 @@ module Discretizer
|
|
388
399
|
clear_vars
|
389
400
|
end
|
390
401
|
|
402
|
+
|
391
403
|
#
|
392
404
|
# Chi2: initialization
|
393
405
|
#
|
@@ -423,6 +435,7 @@ module Discretizer
|
|
423
435
|
[bs, cs, qs]
|
424
436
|
end
|
425
437
|
|
438
|
+
|
426
439
|
#
|
427
440
|
# Chi2: merge two adjacent intervals
|
428
441
|
#
|
data/lib/fselector/fileio.rb
CHANGED
@@ -1,8 +1,23 @@
|
|
1
1
|
#
|
2
|
-
# read and write various file formats
|
2
|
+
# read and write various file formats,
|
3
|
+
# the internal data structure looks like:
|
4
|
+
#
|
5
|
+
# data = {
|
6
|
+
#
|
7
|
+
# :c1 => [ # class c1
|
8
|
+
# {:f1=>1, :f2=>2} # sample 2
|
9
|
+
# ],
|
10
|
+
#
|
11
|
+
# :c2 => [ # class c2
|
12
|
+
# {:f1=>1, :f3=>3}, # sample 1
|
13
|
+
# {:f2=>2} # sample 3
|
14
|
+
# ]
|
15
|
+
#
|
16
|
+
# }
|
17
|
+
#
|
18
|
+
# where :c1 and :c2 are class labels; :f1, :f2, and :f3 are features
|
3
19
|
#
|
4
|
-
# @note class labels and features are treated as symbols
|
5
|
-
# e.g. length => :length
|
20
|
+
# @note class labels and features are treated as symbols
|
6
21
|
#
|
7
22
|
module FileIO
|
8
23
|
#
|
@@ -40,7 +55,7 @@ module FileIO
|
|
40
55
|
if ncategory == 1
|
41
56
|
feats[f] = 1
|
42
57
|
elsif ncategory > 1
|
43
|
-
feats[f] = rand(ncategory)
|
58
|
+
feats[f] = rand(ncategory)+1
|
44
59
|
else
|
45
60
|
feats[f] = rand
|
46
61
|
end
|
data/lib/fselector/util.rb
CHANGED
@@ -149,588 +149,3 @@ end # String
|
|
149
149
|
#=>a
|
150
150
|
#=>_'b,c, d'_
|
151
151
|
#=>'e'
|
152
|
-
|
153
|
-
|
154
|
-
#
|
155
|
-
# adapted from the Ruby statistics libraries --
|
156
|
-
# [Rubystats](http://rubystats.rubyforge.org)
|
157
|
-
#
|
158
|
-
# - for Fisher's exact test (Rubystats::FishersExactTest.calculate())
|
159
|
-
# used by algo\_binary/FishersExactText.rb
|
160
|
-
# - for inverse cumulative normal distribution function (Rubystats::NormalDistribution.get\_icdf())
|
161
|
-
# used by algo\_binary/BiNormalSeparation.rb. note the original get\_icdf() function is a private
|
162
|
-
# one, so we have to open it up and that's why the codes here.
|
163
|
-
#
|
164
|
-
#
|
165
|
-
module Rubystats
|
166
|
-
MAX_VALUE = 1.2e290
|
167
|
-
SQRT2PI = 2.5066282746310005024157652848110452530069867406099
|
168
|
-
SQRT2 = 1.4142135623730950488016887242096980785696718753769
|
169
|
-
TWO_PI = 6.2831853071795864769252867665590057683943387987502
|
170
|
-
|
171
|
-
#
|
172
|
-
# Fisher's exact test calculator
|
173
|
-
#
|
174
|
-
class FishersExactTest
|
175
|
-
# new()
|
176
|
-
def initialize
|
177
|
-
@sn11 = 0.0
|
178
|
-
@sn1_ = 0.0
|
179
|
-
@sn_1 = 0.0
|
180
|
-
@sn = 0.0
|
181
|
-
@sprob = 0.0
|
182
|
-
|
183
|
-
@sleft = 0.0
|
184
|
-
@sright = 0.0
|
185
|
-
@sless = 0.0
|
186
|
-
@slarg = 0.0
|
187
|
-
|
188
|
-
@left = 0.0
|
189
|
-
@right = 0.0
|
190
|
-
@twotail = 0.0
|
191
|
-
end
|
192
|
-
|
193
|
-
|
194
|
-
# Fisher's exact test
|
195
|
-
def calculate(n11_,n12_,n21_,n22_)
|
196
|
-
n11_ *= -1 if n11_ < 0
|
197
|
-
n12_ *= -1 if n12_ < 0
|
198
|
-
n21_ *= -1 if n21_ < 0
|
199
|
-
n22_ *= -1 if n22_ < 0
|
200
|
-
n1_ = n11_ + n12_
|
201
|
-
n_1 = n11_ + n21_
|
202
|
-
n = n11_ + n12_ + n21_ + n22_
|
203
|
-
prob = exact(n11_,n1_,n_1,n)
|
204
|
-
left = @sless
|
205
|
-
right = @slarg
|
206
|
-
twotail = @sleft + @sright
|
207
|
-
twotail = 1 if twotail > 1
|
208
|
-
values_hash = { :left =>left, :right =>right, :twotail =>twotail }
|
209
|
-
return values_hash
|
210
|
-
end
|
211
|
-
|
212
|
-
private
|
213
|
-
|
214
|
-
# Reference: "Lanczos, C. 'A precision approximation
|
215
|
-
# of the gamma function', J. SIAM Numer. Anal., B, 1, 86-96, 1964."
|
216
|
-
# Translation of Alan Miller's FORTRAN-implementation
|
217
|
-
# See http://lib.stat.cmu.edu/apstat/245
|
218
|
-
def lngamm(z)
|
219
|
-
x = 0
|
220
|
-
x += 0.0000001659470187408462/(z+7)
|
221
|
-
x += 0.000009934937113930748 /(z+6)
|
222
|
-
x -= 0.1385710331296526 /(z+5)
|
223
|
-
x += 12.50734324009056 /(z+4)
|
224
|
-
x -= 176.6150291498386 /(z+3)
|
225
|
-
x += 771.3234287757674 /(z+2)
|
226
|
-
x -= 1259.139216722289 /(z+1)
|
227
|
-
x += 676.5203681218835 /(z)
|
228
|
-
x += 0.9999999999995183
|
229
|
-
|
230
|
-
return(Math.log(x)-5.58106146679532777-z+(z-0.5) * Math.log(z+6.5))
|
231
|
-
end
|
232
|
-
|
233
|
-
def lnfact(n)
|
234
|
-
if n <= 1
|
235
|
-
return 0
|
236
|
-
else
|
237
|
-
return lngamm(n+1)
|
238
|
-
end
|
239
|
-
end
|
240
|
-
|
241
|
-
def lnbico(n,k)
|
242
|
-
return lnfact(n) - lnfact(k) - lnfact(n-k)
|
243
|
-
end
|
244
|
-
|
245
|
-
def hyper_323(n11, n1_, n_1, n)
|
246
|
-
return Math.exp(lnbico(n1_, n11) + lnbico(n-n1_, n_1-n11) - lnbico(n, n_1))
|
247
|
-
end
|
248
|
-
|
249
|
-
def hyper(n11)
|
250
|
-
return hyper0(n11, 0, 0, 0)
|
251
|
-
end
|
252
|
-
|
253
|
-
def hyper0(n11i,n1_i,n_1i,ni)
|
254
|
-
if n1_i == 0 and n_1i ==0 and ni == 0
|
255
|
-
unless n11i % 10 == 0
|
256
|
-
if n11i == @sn11+1
|
257
|
-
@sprob *= ((@sn1_ - @sn11)/(n11i.to_f))*((@sn_1 - @sn11)/(n11i.to_f + @sn - @sn1_ - @sn_1))
|
258
|
-
@sn11 = n11i
|
259
|
-
return @sprob
|
260
|
-
end
|
261
|
-
if n11i == @sn11-1
|
262
|
-
@sprob *= ((@sn11)/(@sn1_-n11i.to_f))*((@sn11+@sn-@sn1_-@sn_1)/(@sn_1-n11i.to_f))
|
263
|
-
@sn11 = n11i
|
264
|
-
return @sprob
|
265
|
-
end
|
266
|
-
end
|
267
|
-
@sn11 = n11i
|
268
|
-
else
|
269
|
-
@sn11 = n11i
|
270
|
-
@sn1_ = n1_i
|
271
|
-
@sn_1 = n_1i
|
272
|
-
@sn = ni
|
273
|
-
end
|
274
|
-
@sprob = hyper_323(@sn11,@sn1_,@sn_1,@sn)
|
275
|
-
return @sprob
|
276
|
-
end
|
277
|
-
|
278
|
-
def exact(n11,n1_,n_1,n)
|
279
|
-
|
280
|
-
p = i = j = prob = 0.0
|
281
|
-
|
282
|
-
max = n1_
|
283
|
-
max = n_1 if n_1 < max
|
284
|
-
min = n1_ + n_1 - n
|
285
|
-
min = 0 if min < 0
|
286
|
-
|
287
|
-
if min == max
|
288
|
-
@sless = 1
|
289
|
-
@sright = 1
|
290
|
-
@sleft = 1
|
291
|
-
@slarg = 1
|
292
|
-
return 1
|
293
|
-
end
|
294
|
-
|
295
|
-
prob = hyper0(n11,n1_,n_1,n)
|
296
|
-
@sleft = 0
|
297
|
-
|
298
|
-
p = hyper(min)
|
299
|
-
i = min + 1
|
300
|
-
while p < (0.99999999 * prob)
|
301
|
-
@sleft += p
|
302
|
-
p = hyper(i)
|
303
|
-
i += 1
|
304
|
-
end
|
305
|
-
|
306
|
-
i -= 1
|
307
|
-
|
308
|
-
if p < (1.00000001*prob)
|
309
|
-
@sleft += p
|
310
|
-
else
|
311
|
-
i -= 1
|
312
|
-
end
|
313
|
-
|
314
|
-
@sright = 0
|
315
|
-
|
316
|
-
p = hyper(max)
|
317
|
-
j = max - 1
|
318
|
-
while p < (0.99999999 * prob)
|
319
|
-
@sright += p
|
320
|
-
p = hyper(j)
|
321
|
-
j -= 1
|
322
|
-
end
|
323
|
-
j += 1
|
324
|
-
|
325
|
-
if p < (1.00000001*prob)
|
326
|
-
@sright += p
|
327
|
-
else
|
328
|
-
j += 1
|
329
|
-
end
|
330
|
-
|
331
|
-
if (i - n11).abs < (j - n11).abs
|
332
|
-
@sless = @sleft
|
333
|
-
@slarg = 1 - @sleft + prob
|
334
|
-
else
|
335
|
-
@sless = 1 - @sright + prob
|
336
|
-
@slarg = @sright
|
337
|
-
end
|
338
|
-
return prob
|
339
|
-
end
|
340
|
-
|
341
|
-
|
342
|
-
end # class
|
343
|
-
|
344
|
-
#
|
345
|
-
# Normal distribution
|
346
|
-
#
|
347
|
-
class NormalDistribution
|
348
|
-
# Constructs a normal distribution (defaults to zero mean and
|
349
|
-
# unity variance)
|
350
|
-
def initialize(mu=0.0, sigma=1.0)
|
351
|
-
@mean = mu
|
352
|
-
if sigma <= 0.0
|
353
|
-
return "error"
|
354
|
-
end
|
355
|
-
@stdev = sigma
|
356
|
-
@variance = sigma**2
|
357
|
-
@pdf_denominator = SQRT2PI * Math.sqrt(@variance)
|
358
|
-
@cdf_denominator = SQRT2 * Math.sqrt(@variance)
|
359
|
-
end
|
360
|
-
|
361
|
-
|
362
|
-
# Obtain single PDF value
|
363
|
-
# Returns the probability that a stochastic variable x has the value X,
|
364
|
-
# i.e. P(x=X)
|
365
|
-
def get_pdf(x)
|
366
|
-
Math.exp( -((x-@mean)**2) / (2 * @variance)) / @pdf_denominator
|
367
|
-
end
|
368
|
-
|
369
|
-
|
370
|
-
# Obtain single CDF value
|
371
|
-
# Returns the probability that a stochastic variable x is less than X,
|
372
|
-
# i.e. P(x<X)
|
373
|
-
def get_cdf(x)
|
374
|
-
complementary_error( -(x - @mean) / @cdf_denominator) / 2
|
375
|
-
end
|
376
|
-
|
377
|
-
|
378
|
-
# Obtain single inverse CDF value.
|
379
|
-
# returns the value X for which P(x<X).
|
380
|
-
def get_icdf(p)
|
381
|
-
check_range(p)
|
382
|
-
if p == 0.0
|
383
|
-
return -MAX_VALUE
|
384
|
-
end
|
385
|
-
if p == 1.0
|
386
|
-
return MAX_VALUE
|
387
|
-
end
|
388
|
-
if p == 0.5
|
389
|
-
return @mean
|
390
|
-
end
|
391
|
-
|
392
|
-
mean_save = @mean
|
393
|
-
var_save = @variance
|
394
|
-
pdf_D_save = @pdf_denominator
|
395
|
-
cdf_D_save = @cdf_denominator
|
396
|
-
@mean = 0.0
|
397
|
-
@variance = 1.0
|
398
|
-
@pdf_denominator = Math.sqrt(TWO_PI)
|
399
|
-
@cdf_denominator = SQRT2
|
400
|
-
x = find_root(p, 0.0, -100.0, 100.0)
|
401
|
-
#scale back
|
402
|
-
@mean = mean_save
|
403
|
-
@variance = var_save
|
404
|
-
@pdf_denominator = pdf_D_save
|
405
|
-
@cdf_denominator = cdf_D_save
|
406
|
-
return x * Math.sqrt(@variance) + @mean
|
407
|
-
end
|
408
|
-
|
409
|
-
private
|
410
|
-
|
411
|
-
#check that variable is between lo and hi limits.
|
412
|
-
#lo default is 0.0 and hi default is 1.0
|
413
|
-
def check_range(x, lo=0.0, hi=1.0)
|
414
|
-
raise ArgumentError.new("x cannot be nil") if x.nil?
|
415
|
-
if x < lo or x > hi
|
416
|
-
raise ArgumentError.new("x must be less than lo (#{lo}) and greater than hi (#{hi})")
|
417
|
-
end
|
418
|
-
end
|
419
|
-
|
420
|
-
|
421
|
-
def find_root(prob, guess, x_lo, x_hi)
|
422
|
-
accuracy = 1.0e-10
|
423
|
-
max_iteration = 150
|
424
|
-
x = guess
|
425
|
-
x_new = guess
|
426
|
-
error = 0.0
|
427
|
-
_pdf = 0.0
|
428
|
-
dx = 1000.0
|
429
|
-
i = 0
|
430
|
-
while ( dx.abs > accuracy && (i += 1) < max_iteration )
|
431
|
-
#Apply Newton-Raphson step
|
432
|
-
error = cdf(x) - prob
|
433
|
-
if error < 0.0
|
434
|
-
x_lo = x
|
435
|
-
else
|
436
|
-
x_hi = x
|
437
|
-
end
|
438
|
-
_pdf = pdf(x)
|
439
|
-
if _pdf != 0.0
|
440
|
-
dx = error / _pdf
|
441
|
-
x_new = x -dx
|
442
|
-
end
|
443
|
-
# If the NR fails to converge (which for example may be the
|
444
|
-
# case if the initial guess is too rough) we apply a bisection
|
445
|
-
# step to determine a more narrow interval around the root.
|
446
|
-
if x_new < x_lo || x_new > x_hi || _pdf == 0.0
|
447
|
-
x_new = (x_lo + x_hi) / 2.0
|
448
|
-
dx = x_new - x
|
449
|
-
end
|
450
|
-
x = x_new
|
451
|
-
end
|
452
|
-
return x
|
453
|
-
end
|
454
|
-
|
455
|
-
|
456
|
-
#Probability density function
|
457
|
-
def pdf(x)
|
458
|
-
if x.class == Array
|
459
|
-
pdf_vals = []
|
460
|
-
for i in (0 ... x.length)
|
461
|
-
pdf_vals[i] = get_pdf(x[i])
|
462
|
-
end
|
463
|
-
return pdf_vals
|
464
|
-
else
|
465
|
-
return get_pdf(x)
|
466
|
-
end
|
467
|
-
end
|
468
|
-
|
469
|
-
|
470
|
-
#Cummulative distribution function
|
471
|
-
def cdf(x)
|
472
|
-
if x.class == Array
|
473
|
-
cdf_vals = []
|
474
|
-
for i in (0...x.size)
|
475
|
-
cdf_vals[i] = get_cdf(x[i])
|
476
|
-
end
|
477
|
-
return cdf_vals
|
478
|
-
else
|
479
|
-
return get_cdf(x)
|
480
|
-
end
|
481
|
-
end
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
# Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
486
|
-
#
|
487
|
-
# Developed at SunSoft, a Sun Microsystems, Inc. business.
|
488
|
-
# Permission to use, copy, modify, and distribute this
|
489
|
-
# software is freely granted, provided that this notice
|
490
|
-
# is preserved.
|
491
|
-
#
|
492
|
-
# x
|
493
|
-
# 2 |\
|
494
|
-
# erf(x) = --------- | exp(-t*t)dt
|
495
|
-
# sqrt(pi) \|
|
496
|
-
# 0
|
497
|
-
#
|
498
|
-
# erfc(x) = 1-erf(x)
|
499
|
-
# Note that
|
500
|
-
# erf(-x) = -erf(x)
|
501
|
-
# erfc(-x) = 2 - erfc(x)
|
502
|
-
#
|
503
|
-
# Method:
|
504
|
-
# 1. For |x| in [0, 0.84375]
|
505
|
-
# erf(x) = x + x*R(x^2)
|
506
|
-
# erfc(x) = 1 - erf(x) if x in [-.84375,0.25]
|
507
|
-
# = 0.5 + ((0.5-x)-x*R) if x in [0.25,0.84375]
|
508
|
-
# where R = P/Q where P is an odd poly of degree 8 and
|
509
|
-
# Q is an odd poly of degree 10.
|
510
|
-
# -57.90
|
511
|
-
# | R - (erf(x)-x)/x | <= 2
|
512
|
-
#
|
513
|
-
#
|
514
|
-
# Remark. The formula is derived by noting
|
515
|
-
# erf(x) = (2/sqrt(pi))*(x - x^3/3 + x^5/10 - x^7/42 + ....)
|
516
|
-
# and that
|
517
|
-
# 2/sqrt(pi) = 1.128379167095512573896158903121545171688
|
518
|
-
# is close to one. The interval is chosen because the fix
|
519
|
-
# point of erf(x) is near 0.6174 (i.e., erf(x)=x when x is
|
520
|
-
# near 0.6174), and by some experiment, 0.84375 is chosen to
|
521
|
-
# guarantee the error is less than one ulp for erf.
|
522
|
-
#
|
523
|
-
# 2. For |x| in [0.84375,1.25], let s = |x| - 1, and
|
524
|
-
# c = 0.84506291151 rounded to single (24 bits)
|
525
|
-
# erf(x) = sign(x) * (c + P1(s)/Q1(s))
|
526
|
-
# erfc(x) = (1-c) - P1(s)/Q1(s) if x > 0
|
527
|
-
# 1+(c+P1(s)/Q1(s)) if x < 0
|
528
|
-
# |P1/Q1 - (erf(|x|)-c)| <= 2**-59.06
|
529
|
-
# Remark: here we use the taylor series expansion at x=1.
|
530
|
-
# erf(1+s) = erf(1) + s*Poly(s)
|
531
|
-
# = 0.845.. + P1(s)/Q1(s)
|
532
|
-
# That is, we use rational approximation to approximate
|
533
|
-
# erf(1+s) - (c = (single)0.84506291151)
|
534
|
-
# Note that |P1/Q1|< 0.078 for x in [0.84375,1.25]
|
535
|
-
# where
|
536
|
-
# P1(s) = degree 6 poly in s
|
537
|
-
# Q1(s) = degree 6 poly in s
|
538
|
-
#
|
539
|
-
# 3. For x in [1.25,1/0.35(~2.857143)],
|
540
|
-
# erfc(x) = (1/x)*exp(-x*x-0.5625+R1/S1)
|
541
|
-
# erf(x) = 1 - erfc(x)
|
542
|
-
# where
|
543
|
-
# R1(z) = degree 7 poly in z, (z=1/x^2)
|
544
|
-
# S1(z) = degree 8 poly in z
|
545
|
-
#
|
546
|
-
# 4. For x in [1/0.35,28]
|
547
|
-
# erfc(x) = (1/x)*exp(-x*x-0.5625+R2/S2) if x > 0
|
548
|
-
# = 2.0 - (1/x)*exp(-x*x-0.5625+R2/S2) if -6<x<0
|
549
|
-
# = 2.0 - tiny (if x <= -6)
|
550
|
-
# erf(x) = sign(x)*(1.0 - erfc(x)) if x < 6, else
|
551
|
-
# erf(x) = sign(x)*(1.0 - tiny)
|
552
|
-
# where
|
553
|
-
# R2(z) = degree 6 poly in z, (z=1/x^2)
|
554
|
-
# S2(z) = degree 7 poly in z
|
555
|
-
#
|
556
|
-
# Note1:
|
557
|
-
# To compute exp(-x*x-0.5625+R/S), let s be a single
|
558
|
-
# PRECISION number and s := x then
|
559
|
-
# -x*x = -s*s + (s-x)*(s+x)
|
560
|
-
# exp(-x*x-0.5626+R/S) =
|
561
|
-
# exp(-s*s-0.5625)*exp((s-x)*(s+x)+R/S)
|
562
|
-
# Note2:
|
563
|
-
# Here 4 and 5 make use of the asymptotic series
|
564
|
-
# exp(-x*x)
|
565
|
-
# erfc(x) ~ ---------- * ( 1 + Poly(1/x^2) )
|
566
|
-
# x*sqrt(pi)
|
567
|
-
# We use rational approximation to approximate
|
568
|
-
# g(s)=f(1/x^2) = log(erfc(x)*x) - x*x + 0.5625
|
569
|
-
# Here is the error bound for R1/S1 and R2/S2
|
570
|
-
# |R1/S1 - f(x)| < 2**(-62.57)
|
571
|
-
# |R2/S2 - f(x)| < 2**(-61.52)
|
572
|
-
#
|
573
|
-
# 5. For inf > x >= 28
|
574
|
-
# erf(x) = sign(x) *(1 - tiny) (raise inexact)
|
575
|
-
# erfc(x) = tiny*tiny (raise underflow) if x > 0
|
576
|
-
# = 2 - tiny if x<0
|
577
|
-
#
|
578
|
-
# 7. Special case:
|
579
|
-
# erf(0) = 0, erf(inf) = 1, erf(-inf) = -1,
|
580
|
-
# erfc(0) = 1, erfc(inf) = 0, erfc(-inf) = 2,
|
581
|
-
# erfc/erf(NaN) is NaN
|
582
|
-
#
|
583
|
-
# $efx8 = 1.02703333676410069053e00
|
584
|
-
#
|
585
|
-
# Coefficients for approximation to erf on [0,0.84375]
|
586
|
-
#
|
587
|
-
|
588
|
-
# Error function.
|
589
|
-
# Based on C-code for the error function developed at Sun Microsystems.
|
590
|
-
# Author:: Jaco van Kooten
|
591
|
-
|
592
|
-
def error(x)
|
593
|
-
e_efx = 1.28379167095512586316e-01
|
594
|
-
|
595
|
-
ePp = [ 1.28379167095512558561e-01,
|
596
|
-
-3.25042107247001499370e-01,
|
597
|
-
-2.84817495755985104766e-02,
|
598
|
-
-5.77027029648944159157e-03,
|
599
|
-
-2.37630166566501626084e-05 ]
|
600
|
-
|
601
|
-
eQq = [ 3.97917223959155352819e-01,
|
602
|
-
6.50222499887672944485e-02,
|
603
|
-
5.08130628187576562776e-03,
|
604
|
-
1.32494738004321644526e-04,
|
605
|
-
-3.96022827877536812320e-06 ]
|
606
|
-
|
607
|
-
# Coefficients for approximation to erf in [0.84375,1.25]
|
608
|
-
ePa = [-2.36211856075265944077e-03,
|
609
|
-
4.14856118683748331666e-01,
|
610
|
-
-3.72207876035701323847e-01,
|
611
|
-
3.18346619901161753674e-01,
|
612
|
-
-1.10894694282396677476e-01,
|
613
|
-
3.54783043256182359371e-02,
|
614
|
-
-2.16637559486879084300e-03 ]
|
615
|
-
|
616
|
-
eQa = [ 1.06420880400844228286e-01,
|
617
|
-
5.40397917702171048937e-01,
|
618
|
-
7.18286544141962662868e-02,
|
619
|
-
1.26171219808761642112e-01,
|
620
|
-
1.36370839120290507362e-02,
|
621
|
-
1.19844998467991074170e-02 ]
|
622
|
-
|
623
|
-
e_erx = 8.45062911510467529297e-01
|
624
|
-
|
625
|
-
abs_x = (if x >= 0.0 then x else -x end)
|
626
|
-
# 0 < |x| < 0.84375
|
627
|
-
if abs_x < 0.84375
|
628
|
-
#|x| < 2**-28
|
629
|
-
if abs_x < 3.7252902984619141e-9
|
630
|
-
retval = abs_x + abs_x * e_efx
|
631
|
-
else
|
632
|
-
s = x * x
|
633
|
-
p = ePp[0] + s * (ePp[1] + s * (ePp[2] + s * (ePp[3] + s * ePp[4])))
|
634
|
-
|
635
|
-
q = 1.0 + s * (eQq[0] + s * (eQq[1] + s *
|
636
|
-
( eQq[2] + s * (eQq[3] + s * eQq[4]))))
|
637
|
-
retval = abs_x + abs_x * (p / q)
|
638
|
-
end
|
639
|
-
elsif abs_x < 1.25
|
640
|
-
s = abs_x - 1.0
|
641
|
-
p = ePa[0] + s * (ePa[1] + s *
|
642
|
-
(ePa[2] + s * (ePa[3] + s *
|
643
|
-
(ePa[4] + s * (ePa[5] + s * ePa[6])))))
|
644
|
-
|
645
|
-
q = 1.0 + s * (eQa[0] + s *
|
646
|
-
(eQa[1] + s * (eQa[2] + s *
|
647
|
-
(eQa[3] + s * (eQa[4] + s * eQa[5])))))
|
648
|
-
retval = e_erx + p / q
|
649
|
-
|
650
|
-
elsif abs_x >= 6.0
|
651
|
-
retval = 1.0
|
652
|
-
else
|
653
|
-
retval = 1.0 - complementary_error(abs_x)
|
654
|
-
end
|
655
|
-
return (if x >= 0.0 then retval else -retval end)
|
656
|
-
end
|
657
|
-
|
658
|
-
# Complementary error function.
|
659
|
-
# Based on C-code for the error function developed at Sun Microsystems.
|
660
|
-
# author Jaco van Kooten
|
661
|
-
|
662
|
-
def complementary_error(x)
|
663
|
-
# Coefficients for approximation of erfc in [1.25,1/.35]
|
664
|
-
|
665
|
-
eRa = [-9.86494403484714822705e-03,
|
666
|
-
-6.93858572707181764372e-01,
|
667
|
-
-1.05586262253232909814e01,
|
668
|
-
-6.23753324503260060396e01,
|
669
|
-
-1.62396669462573470355e02,
|
670
|
-
-1.84605092906711035994e02,
|
671
|
-
-8.12874355063065934246e01,
|
672
|
-
-9.81432934416914548592e00 ]
|
673
|
-
|
674
|
-
eSa = [ 1.96512716674392571292e01,
|
675
|
-
1.37657754143519042600e02,
|
676
|
-
4.34565877475229228821e02,
|
677
|
-
6.45387271733267880336e02,
|
678
|
-
4.29008140027567833386e02,
|
679
|
-
1.08635005541779435134e02,
|
680
|
-
6.57024977031928170135e00,
|
681
|
-
-6.04244152148580987438e-02 ]
|
682
|
-
|
683
|
-
# Coefficients for approximation to erfc in [1/.35,28]
|
684
|
-
|
685
|
-
eRb = [-9.86494292470009928597e-03,
|
686
|
-
-7.99283237680523006574e-01,
|
687
|
-
-1.77579549177547519889e01,
|
688
|
-
-1.60636384855821916062e02,
|
689
|
-
-6.37566443368389627722e02,
|
690
|
-
-1.02509513161107724954e03,
|
691
|
-
-4.83519191608651397019e02 ]
|
692
|
-
|
693
|
-
eSb = [ 3.03380607434824582924e01,
|
694
|
-
3.25792512996573918826e02,
|
695
|
-
1.53672958608443695994e03,
|
696
|
-
3.19985821950859553908e03,
|
697
|
-
2.55305040643316442583e03,
|
698
|
-
4.74528541206955367215e02,
|
699
|
-
-2.24409524465858183362e01 ]
|
700
|
-
|
701
|
-
abs_x = (if x >= 0.0 then x else -x end)
|
702
|
-
if abs_x < 1.25
|
703
|
-
retval = 1.0 - error(abs_x)
|
704
|
-
elsif abs_x > 28.0
|
705
|
-
retval = 0.0
|
706
|
-
|
707
|
-
# 1.25 < |x| < 28
|
708
|
-
else
|
709
|
-
s = 1.0/(abs_x * abs_x)
|
710
|
-
if abs_x < 2.8571428
|
711
|
-
r = eRa[0] + s * (eRa[1] + s *
|
712
|
-
(eRa[2] + s * (eRa[3] + s * (eRa[4] + s *
|
713
|
-
(eRa[5] + s *(eRa[6] + s * eRa[7])
|
714
|
-
)))))
|
715
|
-
|
716
|
-
s = 1.0 + s * (eSa[0] + s * (eSa[1] + s *
|
717
|
-
(eSa[2] + s * (eSa[3] + s * (eSa[4] + s *
|
718
|
-
(eSa[5] + s * (eSa[6] + s * eSa[7])))))))
|
719
|
-
|
720
|
-
else
|
721
|
-
r = eRb[0] + s * (eRb[1] + s *
|
722
|
-
(eRb[2] + s * (eRb[3] + s * (eRb[4] + s *
|
723
|
-
(eRb[5] + s * eRb[6])))))
|
724
|
-
|
725
|
-
s = 1.0 + s * (eSb[0] + s *
|
726
|
-
(eSb[1] + s * (eSb[2] + s * (eSb[3] + s *
|
727
|
-
(eSb[4] + s * (eSb[5] + s * eSb[6]))))))
|
728
|
-
end
|
729
|
-
retval = Math.exp(-x * x - 0.5625 + r/s) / abs_x
|
730
|
-
end
|
731
|
-
return ( if x >= 0.0 then retval else 2.0 - retval end )
|
732
|
-
end
|
733
|
-
|
734
|
-
end # class
|
735
|
-
|
736
|
-
end # module
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fselector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,8 +9,19 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-04-
|
13
|
-
dependencies:
|
12
|
+
date: 2012-04-19 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rinruby
|
16
|
+
requirement: &22515480 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 2.0.2
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *22515480
|
14
25
|
description: FSelector is a Ruby gem that aims to integrate various feature selection/ranking
|
15
26
|
algorithms and related functions into one single package. Welcome to contact me
|
16
27
|
(need47@gmail.com) if you'd like to contribute your own algorithms or report a bug.
|
@@ -20,8 +31,8 @@ description: FSelector is a Ruby gem that aims to integrate various feature sele
|
|
20
31
|
with certain criterion. FSelector acts on a full-feature data set in either CSV,
|
21
32
|
LibSVM or WEKA file format and outputs a reduced data set with only selected subset
|
22
33
|
of features, which can later be used as the input for various machine learning softwares
|
23
|
-
|
24
|
-
|
34
|
+
such as LibSVM and WEKA. FSelector, as a collection of filter methods, does not
|
35
|
+
implement any classifier like support vector machines or random forest.
|
25
36
|
email: need47@gmail.com
|
26
37
|
executables: []
|
27
38
|
extensions: []
|
@@ -30,6 +41,7 @@ extra_rdoc_files:
|
|
30
41
|
- LICENSE
|
31
42
|
files:
|
32
43
|
- README.md
|
44
|
+
- ChangeLog
|
33
45
|
- LICENSE
|
34
46
|
- lib/fselector/algo_base/base.rb
|
35
47
|
- lib/fselector/algo_base/base_CFS.rb
|
@@ -70,7 +82,6 @@ files:
|
|
70
82
|
- lib/fselector/algo_discrete/Sensitivity.rb
|
71
83
|
- lib/fselector/algo_discrete/Specificity.rb
|
72
84
|
- lib/fselector/algo_discrete/SymmetricalUncertainty.rb
|
73
|
-
- lib/fselector/chisq_calc.rb
|
74
85
|
- lib/fselector/discretizer.rb
|
75
86
|
- lib/fselector/ensemble.rb
|
76
87
|
- lib/fselector/entropy.rb
|
data/lib/fselector/chisq_calc.rb
DELETED
@@ -1,189 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Chi-Square Calculator
|
3
|
-
#
|
4
|
-
# This module is adpated from the on-line [Chi-square Calculator](http://www.swogstat.org/stat/public/chisq_calculator.htm)
|
5
|
-
#
|
6
|
-
# The functions for calculating normal and chi-square probabilities
|
7
|
-
# and critical values were adapted by John Walker from C implementations
|
8
|
-
# written by Gary Perlman of Wang Institute, Tyngsboro, MA 01879. The
|
9
|
-
# original C code is in the public domain.
|
10
|
-
#
|
11
|
-
# chisq2pval(chisq, df) -- calculate p-value from given
|
12
|
-
# chi-square value (chisq) and degree of freedom (df)
|
13
|
-
# pval2chisq(pval, df) -- chi-square value from given
|
14
|
-
# p-value (pvalue) and degree of freedom (df)
|
15
|
-
#
|
16
|
-
module ChiSquareCalculator
|
17
|
-
BIGX = 20.0 # max value to represent exp(x)
|
18
|
-
LOG_SQRT_PI = 0.5723649429247000870717135 # log(sqrt(pi))
|
19
|
-
I_SQRT_PI = 0.5641895835477562869480795 # 1 / sqrt(pi)
|
20
|
-
Z_MAX = 6.0 # Maximum meaningful z value
|
21
|
-
CHI_EPSILON = 0.000001 # Accuracy of critchi approximation
|
22
|
-
CHI_MAX = 99999.0 # Maximum chi-square value
|
23
|
-
|
24
|
-
#
|
25
|
-
# POCHISQ -- probability of chi-square value
|
26
|
-
#
|
27
|
-
# Adapted from:
|
28
|
-
#
|
29
|
-
# Hill, I. D. and Pike, M. C. Algorithm 299
|
30
|
-
#
|
31
|
-
# Collected Algorithms for the CACM 1967 p. 243
|
32
|
-
#
|
33
|
-
# Updated for rounding errors based on remark in
|
34
|
-
#
|
35
|
-
# ACM TOMS June 1985, page 185
|
36
|
-
#
|
37
|
-
# @param [Float] x chi-square value
|
38
|
-
# @param [Integer] df degree of freedom
|
39
|
-
# @return [Float] p-value
|
40
|
-
def pochisq(x, df)
|
41
|
-
a, y, s = nil, nil, nil
|
42
|
-
e, c, z = nil, nil, nil
|
43
|
-
|
44
|
-
even = nil # True if df is an even number
|
45
|
-
|
46
|
-
if x <= 0.0 or df < 1
|
47
|
-
return 1.0
|
48
|
-
end
|
49
|
-
|
50
|
-
a = 0.5 * x
|
51
|
-
even = ((df & 1) == 0)
|
52
|
-
|
53
|
-
if df > 1
|
54
|
-
y = ex(-a)
|
55
|
-
end
|
56
|
-
|
57
|
-
s = even ? y : (2.0 * poz(-Math.sqrt(x)))
|
58
|
-
|
59
|
-
if df > 2
|
60
|
-
x = 0.5 * (df - 1.0)
|
61
|
-
z = even ? 1.0 : 0.5
|
62
|
-
|
63
|
-
if a > BIGX
|
64
|
-
e = even ? 0.0 : LOG_SQRT_PI
|
65
|
-
c = Math.log(a)
|
66
|
-
|
67
|
-
while z <= x
|
68
|
-
e = Math.log(z) + e
|
69
|
-
s += ex(c * z - a - e)
|
70
|
-
z += 1.0
|
71
|
-
end
|
72
|
-
|
73
|
-
return s
|
74
|
-
else
|
75
|
-
e = even ? 1.0 : (I_SQRT_PI / Math.sqrt(a))
|
76
|
-
c = 0.0
|
77
|
-
|
78
|
-
while (z <= x)
|
79
|
-
e = e * (a / z)
|
80
|
-
c = c + e
|
81
|
-
z += 1.0
|
82
|
-
end
|
83
|
-
|
84
|
-
return c * y + s
|
85
|
-
end
|
86
|
-
else
|
87
|
-
return s
|
88
|
-
end
|
89
|
-
|
90
|
-
end # pochisq
|
91
|
-
|
92
|
-
# function alias
|
93
|
-
alias :chisq2pval :pochisq
|
94
|
-
|
95
|
-
|
96
|
-
#
|
97
|
-
# CRITCHI -- Compute critical chi-square value to
|
98
|
-
# produce given p. We just do a bisection
|
99
|
-
# search for a value within CHI_EPSILON,
|
100
|
-
# relying on the monotonicity of pochisq()
|
101
|
-
#
|
102
|
-
# @param [Float] p p-value
|
103
|
-
# @param [Integer] df degree of freedom
|
104
|
-
# @return [Float] chi-square value
|
105
|
-
def critchi(p, df)
|
106
|
-
minchisq = 0.0
|
107
|
-
maxchisq = CHI_MAX
|
108
|
-
|
109
|
-
chisqval = nil
|
110
|
-
|
111
|
-
if p <= 0.0
|
112
|
-
return maxchisq
|
113
|
-
else
|
114
|
-
if p >= 1.0
|
115
|
-
return 0.0
|
116
|
-
end
|
117
|
-
end
|
118
|
-
|
119
|
-
chisqval = df / Math.sqrt(p); # fair first value
|
120
|
-
|
121
|
-
while (maxchisq - minchisq) > CHI_EPSILON
|
122
|
-
if pochisq(chisqval, df) < p
|
123
|
-
maxchisq = chisqval
|
124
|
-
else
|
125
|
-
minchisq = chisqval
|
126
|
-
end
|
127
|
-
|
128
|
-
chisqval = (maxchisq + minchisq) * 0.5
|
129
|
-
end
|
130
|
-
|
131
|
-
return chisqval
|
132
|
-
end # critchi
|
133
|
-
|
134
|
-
# function alias
|
135
|
-
alias :pval2chisq :critchi
|
136
|
-
|
137
|
-
private
|
138
|
-
|
139
|
-
def ex(x)
|
140
|
-
return (x < -BIGX) ? 0.0 : Math.exp(x)
|
141
|
-
end # ex
|
142
|
-
|
143
|
-
|
144
|
-
#
|
145
|
-
# POZ -- probability of normal z value
|
146
|
-
#
|
147
|
-
# Adapted from a polynomial approximation in:
|
148
|
-
# Ibbetson D, Algorithm 209
|
149
|
-
# Collected Algorithms of the CACM 1963 p. 616
|
150
|
-
#
|
151
|
-
# Note:
|
152
|
-
# This routine has six digit accuracy, so it is only useful for absolute
|
153
|
-
# z values < 6. For z values >= to 6.0, poz() returns 0.0
|
154
|
-
#
|
155
|
-
def poz(z)
|
156
|
-
y, x, w = nil, nil, nil
|
157
|
-
|
158
|
-
if (z == 0.0)
|
159
|
-
x = 0.0
|
160
|
-
else
|
161
|
-
y = 0.5 * z.abs # Math.abs(z)
|
162
|
-
|
163
|
-
if (y >= (Z_MAX * 0.5))
|
164
|
-
x = 1.0
|
165
|
-
elsif (y < 1.0)
|
166
|
-
w = y * y
|
167
|
-
x = ((((((((0.000124818987 * w - 0.001075204047) * w +
|
168
|
-
0.005198775019) * w - 0.019198292004) * w +
|
169
|
-
0.059054035642) * w - 0.151968751364) * w +
|
170
|
-
0.319152932694) * w - 0.531923007300) * w +
|
171
|
-
0.797884560593) * y * 2.0
|
172
|
-
else
|
173
|
-
y -= 2.0
|
174
|
-
x = (((((((((((((-0.000045255659 * y +
|
175
|
-
0.000152529290) * y - 0.000019538132) * y -
|
176
|
-
0.000676904986) * y + 0.001390604284) * y -
|
177
|
-
0.000794620820) * y - 0.002034254874) * y +
|
178
|
-
0.006549791214) * y - 0.010557625006) * y +
|
179
|
-
0.011630447319) * y - 0.009279453341) * y +
|
180
|
-
0.005353579108) * y - 0.002141268741) * y +
|
181
|
-
0.000535310849) * y + 0.999936657524
|
182
|
-
end
|
183
|
-
end
|
184
|
-
|
185
|
-
return z > 0.0 ? ((x + 1.0) * 0.5) : ((1.0 - x) * 0.5)
|
186
|
-
end # poz
|
187
|
-
|
188
|
-
|
189
|
-
end # module
|