fselector 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +7 -0
- data/README.md +18 -7
- data/lib/fselector.rb +4 -3
- data/lib/fselector/algo_base/base.rb +7 -0
- data/lib/fselector/algo_discrete/BiNormalSeparation.rb +3 -4
- data/lib/fselector/algo_discrete/FishersExactTest.rb +5 -7
- data/lib/fselector/discretizer.rb +15 -2
- data/lib/fselector/fileio.rb +19 -4
- data/lib/fselector/util.rb +0 -585
- metadata +17 -6
- data/lib/fselector/chisq_calc.rb +0 -189
data/ChangeLog
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
2012-04-18 Tiejun Cheng <need47@gmail.com>
|
2
|
+
|
3
|
+
* require the RinRuby gem (http://rinruby.ddahl.org) to access the
|
4
|
+
statistical routines in the R package (http://www.r-project.org/)
|
5
|
+
|
6
|
+
* because of RinRuby (and thus R), removed the following modules or implementations:
|
7
|
+
RubyStats (FishersExactTest.calculate, get_icdf) and ChiSquareCalculator
|
data/README.md
CHANGED
@@ -8,8 +8,8 @@ FSelector: a Ruby gem for feature selection and ranking
|
|
8
8
|
**Email**: [need47@gmail.com](mailto:need47@gmail.com)
|
9
9
|
**Copyright**: 2012
|
10
10
|
**License**: MIT License
|
11
|
-
**Latest Version**: 0.
|
12
|
-
**Release Date**: April
|
11
|
+
**Latest Version**: 0.6.0
|
12
|
+
**Release Date**: April 19 2012
|
13
13
|
|
14
14
|
Synopsis
|
15
15
|
--------
|
@@ -25,9 +25,9 @@ missing feature values with certain criterion. FSelector acts on a
|
|
25
25
|
full-feature data set in either CSV, LibSVM or WEKA file format and
|
26
26
|
outputs a reduced data set with only selected subset of features, which
|
27
27
|
can later be used as the input for various machine learning softwares
|
28
|
-
|
29
|
-
|
30
|
-
|
28
|
+
such as LibSVM and WEKA. FSelector, as a collection of filter methods,
|
29
|
+
does not implement any classifier like support vector machines or
|
30
|
+
random forest. See below for a list of FSelector's features.
|
31
31
|
|
32
32
|
Feature List
|
33
33
|
------------
|
@@ -78,7 +78,7 @@ Feature List
|
|
78
78
|
ReliefF_c ReliefF_c continuous
|
79
79
|
TScore TS continuous
|
80
80
|
|
81
|
-
**feature selection interace:**
|
81
|
+
**note for feature selection interace:**
|
82
82
|
- for the algorithms of CFS\_d, FCBF and CFS\_c, use select\_feature!
|
83
83
|
- for other algorithms, use either select\_feature\_by\_rank! or select\_feature\_by\_score!
|
84
84
|
|
@@ -115,7 +115,13 @@ Installing
|
|
115
115
|
To install FSelector, use the following command:
|
116
116
|
|
117
117
|
$ gem install fselector
|
118
|
-
|
118
|
+
|
119
|
+
**note:** Start from version 0.5.0, FSelector uses the RinRuby gem (http://rinruby.ddahl.org)
|
120
|
+
as a seemless bridge to access the statistical routines in the R package (http://www.r-project.org),
|
121
|
+
which will greatly expand the inclusion of algorithms to FSelector, especially for those relying
|
122
|
+
on statistical test. To this end, please pre-install the R package. RinRuby should have been
|
123
|
+
auto-installed with FSelector.
|
124
|
+
|
119
125
|
Usage
|
120
126
|
-----
|
121
127
|
|
@@ -223,6 +229,11 @@ Usage
|
|
223
229
|
|
224
230
|
**4. see more examples test_*.rb under the test/ directory**
|
225
231
|
|
232
|
+
Change Log
|
233
|
+
----------
|
234
|
+
A {file:ChangeLog} is available from version 0.5.0 and upward to refelect
|
235
|
+
what's new and what's changed
|
236
|
+
|
226
237
|
Copyright
|
227
238
|
---------
|
228
239
|
FSelector © 2012 by [Tiejun Cheng](mailto:need47@gmail.com).
|
data/lib/fselector.rb
CHANGED
@@ -1,9 +1,12 @@
|
|
1
|
+
# access to the statistical routines in R package
|
2
|
+
require 'rinruby'
|
3
|
+
|
1
4
|
#
|
2
5
|
# FSelector: a Ruby gem for feature selection and ranking
|
3
6
|
#
|
4
7
|
module FSelector
|
5
8
|
# module version
|
6
|
-
VERSION = '0.
|
9
|
+
VERSION = '0.6.0'
|
7
10
|
end
|
8
11
|
|
9
12
|
ROOT = File.expand_path(File.dirname(__FILE__))
|
@@ -17,8 +20,6 @@ require "#{ROOT}/fselector/fileio.rb"
|
|
17
20
|
require "#{ROOT}/fselector/util.rb"
|
18
21
|
# entropy-related functions
|
19
22
|
require "#{ROOT}/fselector/entropy.rb"
|
20
|
-
# chi-square calculator
|
21
|
-
require "#{ROOT}/fselector/chisq_calc.rb"
|
22
23
|
# normalization for continuous data
|
23
24
|
require "#{ROOT}/fselector/normalizer.rb"
|
24
25
|
# discretization for continuous data
|
@@ -165,6 +165,13 @@ module FSelector
|
|
165
165
|
end
|
166
166
|
|
167
167
|
|
168
|
+
# get a copy of data,
|
169
|
+
# by use of the standard Marshal library
|
170
|
+
def get_data_copy
|
171
|
+
Marshal.load(Marshal.dump(@data)) if @data
|
172
|
+
end
|
173
|
+
|
174
|
+
|
168
175
|
# set data
|
169
176
|
def set_data(data)
|
170
177
|
if data and data.class == Hash
|
@@ -13,14 +13,11 @@ module FSelector
|
|
13
13
|
# ref: [An extensive empirical study of feature selection metrics for text classification](http://dl.acm.org/citation.cfm?id=944974) and [Rubystats](http://rubystats.rubyforge.org)
|
14
14
|
#
|
15
15
|
class BiNormalSeparation < BaseDiscrete
|
16
|
-
# include Ruby statistics libraries
|
17
|
-
include Rubystats
|
18
16
|
|
19
17
|
private
|
20
18
|
|
21
19
|
# calculate contribution of each feature (f) for each class (k)
|
22
20
|
def calc_contribution(f)
|
23
|
-
@nd ||= Rubystats::NormalDistribution.new
|
24
21
|
|
25
22
|
each_class do |k|
|
26
23
|
a, b, c, d = get_A(f, k), get_B(f, k), get_C(f, k), get_D(f, k)
|
@@ -28,7 +25,9 @@ module FSelector
|
|
28
25
|
s = 0.0
|
29
26
|
if not (a+c).zero? and not (b+d).zero?
|
30
27
|
tpr, fpr = a/(a+c), b/(b+d)
|
31
|
-
|
28
|
+
|
29
|
+
R.eval "rv <- qnorm(#{tpr}) - qnorm(#{fpr})"
|
30
|
+
s = R.rv.abs
|
32
31
|
end
|
33
32
|
|
34
33
|
set_feature_score(f, k, s)
|
@@ -11,24 +11,22 @@ module FSelector
|
|
11
11
|
#
|
12
12
|
# for FET, the smaller, the better, but we intentionally negate it
|
13
13
|
# so that the larger is always the better (consistent with other algorithms)
|
14
|
+
# R equivalent: fisher.test
|
14
15
|
#
|
15
16
|
# ref: [Wikipedia](http://en.wikipedia.org/wiki/Fisher's_exact_test) and [Rubystats](http://rubystats.rubyforge.org)
|
16
17
|
#
|
17
18
|
class FishersExactTest < BaseDiscrete
|
18
|
-
# include Ruby statistics libraries
|
19
|
-
include Rubystats
|
20
19
|
|
21
20
|
private
|
22
21
|
|
23
22
|
# calculate contribution of each feature (f) for each class (k)
|
24
|
-
def calc_contribution(f)
|
25
|
-
@fet ||= Rubystats::FishersExactTest.new
|
26
|
-
|
23
|
+
def calc_contribution(f)
|
27
24
|
each_class do |k|
|
28
25
|
a, b, c, d = get_A(f, k), get_B(f, k), get_C(f, k), get_D(f, k)
|
29
26
|
|
30
|
-
# note:
|
31
|
-
|
27
|
+
# note: intentionally negated it
|
28
|
+
R.eval "rv <- fisher.test(matrix(c(#{a}, #{b}, #{c}, #{d}), nrow=2))$p.value"
|
29
|
+
s = -1.0 * R.rv
|
32
30
|
|
33
31
|
set_feature_score(f, k, s)
|
34
32
|
end
|
@@ -4,8 +4,6 @@
|
|
4
4
|
module Discretizer
|
5
5
|
# include Entropy module
|
6
6
|
include Entropy
|
7
|
-
# include ChiSquareCalculator module
|
8
|
-
include ChiSquareCalculator
|
9
7
|
|
10
8
|
# discretize by equal-width intervals
|
11
9
|
#
|
@@ -334,6 +332,19 @@ module Discretizer
|
|
334
332
|
|
335
333
|
private
|
336
334
|
|
335
|
+
#
|
336
|
+
# get the Chi-square value from p-value
|
337
|
+
#
|
338
|
+
# @param [Float] pval p-value
|
339
|
+
# @param [Integer] df degree of freedom
|
340
|
+
# @return [Float] Chi-square vlaue
|
341
|
+
#
|
342
|
+
def pval2chisq(pval, df)
|
343
|
+
R.eval "chisq <- qchisq(#{1-pval}, #{df})"
|
344
|
+
R.chisq
|
345
|
+
end
|
346
|
+
|
347
|
+
|
337
348
|
#
|
338
349
|
# get index from sorted cut points
|
339
350
|
#
|
@@ -388,6 +399,7 @@ module Discretizer
|
|
388
399
|
clear_vars
|
389
400
|
end
|
390
401
|
|
402
|
+
|
391
403
|
#
|
392
404
|
# Chi2: initialization
|
393
405
|
#
|
@@ -423,6 +435,7 @@ module Discretizer
|
|
423
435
|
[bs, cs, qs]
|
424
436
|
end
|
425
437
|
|
438
|
+
|
426
439
|
#
|
427
440
|
# Chi2: merge two adjacent intervals
|
428
441
|
#
|
data/lib/fselector/fileio.rb
CHANGED
@@ -1,8 +1,23 @@
|
|
1
1
|
#
|
2
|
-
# read and write various file formats
|
2
|
+
# read and write various file formats,
|
3
|
+
# the internal data structure looks like:
|
4
|
+
#
|
5
|
+
# data = {
|
6
|
+
#
|
7
|
+
# :c1 => [ # class c1
|
8
|
+
# {:f1=>1, :f2=>2} # sample 2
|
9
|
+
# ],
|
10
|
+
#
|
11
|
+
# :c2 => [ # class c2
|
12
|
+
# {:f1=>1, :f3=>3}, # sample 1
|
13
|
+
# {:f2=>2} # sample 3
|
14
|
+
# ]
|
15
|
+
#
|
16
|
+
# }
|
17
|
+
#
|
18
|
+
# where :c1 and :c2 are class labels; :f1, :f2, and :f3 are features
|
3
19
|
#
|
4
|
-
# @note class labels and features are treated as symbols
|
5
|
-
# e.g. length => :length
|
20
|
+
# @note class labels and features are treated as symbols
|
6
21
|
#
|
7
22
|
module FileIO
|
8
23
|
#
|
@@ -40,7 +55,7 @@ module FileIO
|
|
40
55
|
if ncategory == 1
|
41
56
|
feats[f] = 1
|
42
57
|
elsif ncategory > 1
|
43
|
-
feats[f] = rand(ncategory)
|
58
|
+
feats[f] = rand(ncategory)+1
|
44
59
|
else
|
45
60
|
feats[f] = rand
|
46
61
|
end
|
data/lib/fselector/util.rb
CHANGED
@@ -149,588 +149,3 @@ end # String
|
|
149
149
|
#=>a
|
150
150
|
#=>_'b,c, d'_
|
151
151
|
#=>'e'
|
152
|
-
|
153
|
-
|
154
|
-
#
|
155
|
-
# adapted from the Ruby statistics libraries --
|
156
|
-
# [Rubystats](http://rubystats.rubyforge.org)
|
157
|
-
#
|
158
|
-
# - for Fisher's exact test (Rubystats::FishersExactTest.calculate())
|
159
|
-
# used by algo\_binary/FishersExactText.rb
|
160
|
-
# - for inverse cumulative normal distribution function (Rubystats::NormalDistribution.get\_icdf())
|
161
|
-
# used by algo\_binary/BiNormalSeparation.rb. note the original get\_icdf() function is a private
|
162
|
-
# one, so we have to open it up and that's why the codes here.
|
163
|
-
#
|
164
|
-
#
|
165
|
-
module Rubystats
|
166
|
-
MAX_VALUE = 1.2e290
|
167
|
-
SQRT2PI = 2.5066282746310005024157652848110452530069867406099
|
168
|
-
SQRT2 = 1.4142135623730950488016887242096980785696718753769
|
169
|
-
TWO_PI = 6.2831853071795864769252867665590057683943387987502
|
170
|
-
|
171
|
-
#
|
172
|
-
# Fisher's exact test calculator
|
173
|
-
#
|
174
|
-
class FishersExactTest
|
175
|
-
# new()
|
176
|
-
def initialize
|
177
|
-
@sn11 = 0.0
|
178
|
-
@sn1_ = 0.0
|
179
|
-
@sn_1 = 0.0
|
180
|
-
@sn = 0.0
|
181
|
-
@sprob = 0.0
|
182
|
-
|
183
|
-
@sleft = 0.0
|
184
|
-
@sright = 0.0
|
185
|
-
@sless = 0.0
|
186
|
-
@slarg = 0.0
|
187
|
-
|
188
|
-
@left = 0.0
|
189
|
-
@right = 0.0
|
190
|
-
@twotail = 0.0
|
191
|
-
end
|
192
|
-
|
193
|
-
|
194
|
-
# Fisher's exact test
|
195
|
-
def calculate(n11_,n12_,n21_,n22_)
|
196
|
-
n11_ *= -1 if n11_ < 0
|
197
|
-
n12_ *= -1 if n12_ < 0
|
198
|
-
n21_ *= -1 if n21_ < 0
|
199
|
-
n22_ *= -1 if n22_ < 0
|
200
|
-
n1_ = n11_ + n12_
|
201
|
-
n_1 = n11_ + n21_
|
202
|
-
n = n11_ + n12_ + n21_ + n22_
|
203
|
-
prob = exact(n11_,n1_,n_1,n)
|
204
|
-
left = @sless
|
205
|
-
right = @slarg
|
206
|
-
twotail = @sleft + @sright
|
207
|
-
twotail = 1 if twotail > 1
|
208
|
-
values_hash = { :left =>left, :right =>right, :twotail =>twotail }
|
209
|
-
return values_hash
|
210
|
-
end
|
211
|
-
|
212
|
-
private
|
213
|
-
|
214
|
-
# Reference: "Lanczos, C. 'A precision approximation
|
215
|
-
# of the gamma function', J. SIAM Numer. Anal., B, 1, 86-96, 1964."
|
216
|
-
# Translation of Alan Miller's FORTRAN-implementation
|
217
|
-
# See http://lib.stat.cmu.edu/apstat/245
|
218
|
-
def lngamm(z)
|
219
|
-
x = 0
|
220
|
-
x += 0.0000001659470187408462/(z+7)
|
221
|
-
x += 0.000009934937113930748 /(z+6)
|
222
|
-
x -= 0.1385710331296526 /(z+5)
|
223
|
-
x += 12.50734324009056 /(z+4)
|
224
|
-
x -= 176.6150291498386 /(z+3)
|
225
|
-
x += 771.3234287757674 /(z+2)
|
226
|
-
x -= 1259.139216722289 /(z+1)
|
227
|
-
x += 676.5203681218835 /(z)
|
228
|
-
x += 0.9999999999995183
|
229
|
-
|
230
|
-
return(Math.log(x)-5.58106146679532777-z+(z-0.5) * Math.log(z+6.5))
|
231
|
-
end
|
232
|
-
|
233
|
-
def lnfact(n)
|
234
|
-
if n <= 1
|
235
|
-
return 0
|
236
|
-
else
|
237
|
-
return lngamm(n+1)
|
238
|
-
end
|
239
|
-
end
|
240
|
-
|
241
|
-
def lnbico(n,k)
|
242
|
-
return lnfact(n) - lnfact(k) - lnfact(n-k)
|
243
|
-
end
|
244
|
-
|
245
|
-
def hyper_323(n11, n1_, n_1, n)
|
246
|
-
return Math.exp(lnbico(n1_, n11) + lnbico(n-n1_, n_1-n11) - lnbico(n, n_1))
|
247
|
-
end
|
248
|
-
|
249
|
-
def hyper(n11)
|
250
|
-
return hyper0(n11, 0, 0, 0)
|
251
|
-
end
|
252
|
-
|
253
|
-
def hyper0(n11i,n1_i,n_1i,ni)
|
254
|
-
if n1_i == 0 and n_1i ==0 and ni == 0
|
255
|
-
unless n11i % 10 == 0
|
256
|
-
if n11i == @sn11+1
|
257
|
-
@sprob *= ((@sn1_ - @sn11)/(n11i.to_f))*((@sn_1 - @sn11)/(n11i.to_f + @sn - @sn1_ - @sn_1))
|
258
|
-
@sn11 = n11i
|
259
|
-
return @sprob
|
260
|
-
end
|
261
|
-
if n11i == @sn11-1
|
262
|
-
@sprob *= ((@sn11)/(@sn1_-n11i.to_f))*((@sn11+@sn-@sn1_-@sn_1)/(@sn_1-n11i.to_f))
|
263
|
-
@sn11 = n11i
|
264
|
-
return @sprob
|
265
|
-
end
|
266
|
-
end
|
267
|
-
@sn11 = n11i
|
268
|
-
else
|
269
|
-
@sn11 = n11i
|
270
|
-
@sn1_ = n1_i
|
271
|
-
@sn_1 = n_1i
|
272
|
-
@sn = ni
|
273
|
-
end
|
274
|
-
@sprob = hyper_323(@sn11,@sn1_,@sn_1,@sn)
|
275
|
-
return @sprob
|
276
|
-
end
|
277
|
-
|
278
|
-
def exact(n11,n1_,n_1,n)
|
279
|
-
|
280
|
-
p = i = j = prob = 0.0
|
281
|
-
|
282
|
-
max = n1_
|
283
|
-
max = n_1 if n_1 < max
|
284
|
-
min = n1_ + n_1 - n
|
285
|
-
min = 0 if min < 0
|
286
|
-
|
287
|
-
if min == max
|
288
|
-
@sless = 1
|
289
|
-
@sright = 1
|
290
|
-
@sleft = 1
|
291
|
-
@slarg = 1
|
292
|
-
return 1
|
293
|
-
end
|
294
|
-
|
295
|
-
prob = hyper0(n11,n1_,n_1,n)
|
296
|
-
@sleft = 0
|
297
|
-
|
298
|
-
p = hyper(min)
|
299
|
-
i = min + 1
|
300
|
-
while p < (0.99999999 * prob)
|
301
|
-
@sleft += p
|
302
|
-
p = hyper(i)
|
303
|
-
i += 1
|
304
|
-
end
|
305
|
-
|
306
|
-
i -= 1
|
307
|
-
|
308
|
-
if p < (1.00000001*prob)
|
309
|
-
@sleft += p
|
310
|
-
else
|
311
|
-
i -= 1
|
312
|
-
end
|
313
|
-
|
314
|
-
@sright = 0
|
315
|
-
|
316
|
-
p = hyper(max)
|
317
|
-
j = max - 1
|
318
|
-
while p < (0.99999999 * prob)
|
319
|
-
@sright += p
|
320
|
-
p = hyper(j)
|
321
|
-
j -= 1
|
322
|
-
end
|
323
|
-
j += 1
|
324
|
-
|
325
|
-
if p < (1.00000001*prob)
|
326
|
-
@sright += p
|
327
|
-
else
|
328
|
-
j += 1
|
329
|
-
end
|
330
|
-
|
331
|
-
if (i - n11).abs < (j - n11).abs
|
332
|
-
@sless = @sleft
|
333
|
-
@slarg = 1 - @sleft + prob
|
334
|
-
else
|
335
|
-
@sless = 1 - @sright + prob
|
336
|
-
@slarg = @sright
|
337
|
-
end
|
338
|
-
return prob
|
339
|
-
end
|
340
|
-
|
341
|
-
|
342
|
-
end # class
|
343
|
-
|
344
|
-
#
|
345
|
-
# Normal distribution
|
346
|
-
#
|
347
|
-
class NormalDistribution
|
348
|
-
# Constructs a normal distribution (defaults to zero mean and
|
349
|
-
# unity variance)
|
350
|
-
def initialize(mu=0.0, sigma=1.0)
|
351
|
-
@mean = mu
|
352
|
-
if sigma <= 0.0
|
353
|
-
return "error"
|
354
|
-
end
|
355
|
-
@stdev = sigma
|
356
|
-
@variance = sigma**2
|
357
|
-
@pdf_denominator = SQRT2PI * Math.sqrt(@variance)
|
358
|
-
@cdf_denominator = SQRT2 * Math.sqrt(@variance)
|
359
|
-
end
|
360
|
-
|
361
|
-
|
362
|
-
# Obtain single PDF value
|
363
|
-
# Returns the probability that a stochastic variable x has the value X,
|
364
|
-
# i.e. P(x=X)
|
365
|
-
def get_pdf(x)
|
366
|
-
Math.exp( -((x-@mean)**2) / (2 * @variance)) / @pdf_denominator
|
367
|
-
end
|
368
|
-
|
369
|
-
|
370
|
-
# Obtain single CDF value
|
371
|
-
# Returns the probability that a stochastic variable x is less than X,
|
372
|
-
# i.e. P(x<X)
|
373
|
-
def get_cdf(x)
|
374
|
-
complementary_error( -(x - @mean) / @cdf_denominator) / 2
|
375
|
-
end
|
376
|
-
|
377
|
-
|
378
|
-
# Obtain single inverse CDF value.
|
379
|
-
# returns the value X for which P(x<X).
|
380
|
-
def get_icdf(p)
|
381
|
-
check_range(p)
|
382
|
-
if p == 0.0
|
383
|
-
return -MAX_VALUE
|
384
|
-
end
|
385
|
-
if p == 1.0
|
386
|
-
return MAX_VALUE
|
387
|
-
end
|
388
|
-
if p == 0.5
|
389
|
-
return @mean
|
390
|
-
end
|
391
|
-
|
392
|
-
mean_save = @mean
|
393
|
-
var_save = @variance
|
394
|
-
pdf_D_save = @pdf_denominator
|
395
|
-
cdf_D_save = @cdf_denominator
|
396
|
-
@mean = 0.0
|
397
|
-
@variance = 1.0
|
398
|
-
@pdf_denominator = Math.sqrt(TWO_PI)
|
399
|
-
@cdf_denominator = SQRT2
|
400
|
-
x = find_root(p, 0.0, -100.0, 100.0)
|
401
|
-
#scale back
|
402
|
-
@mean = mean_save
|
403
|
-
@variance = var_save
|
404
|
-
@pdf_denominator = pdf_D_save
|
405
|
-
@cdf_denominator = cdf_D_save
|
406
|
-
return x * Math.sqrt(@variance) + @mean
|
407
|
-
end
|
408
|
-
|
409
|
-
private
|
410
|
-
|
411
|
-
#check that variable is between lo and hi limits.
|
412
|
-
#lo default is 0.0 and hi default is 1.0
|
413
|
-
def check_range(x, lo=0.0, hi=1.0)
|
414
|
-
raise ArgumentError.new("x cannot be nil") if x.nil?
|
415
|
-
if x < lo or x > hi
|
416
|
-
raise ArgumentError.new("x must be less than lo (#{lo}) and greater than hi (#{hi})")
|
417
|
-
end
|
418
|
-
end
|
419
|
-
|
420
|
-
|
421
|
-
def find_root(prob, guess, x_lo, x_hi)
|
422
|
-
accuracy = 1.0e-10
|
423
|
-
max_iteration = 150
|
424
|
-
x = guess
|
425
|
-
x_new = guess
|
426
|
-
error = 0.0
|
427
|
-
_pdf = 0.0
|
428
|
-
dx = 1000.0
|
429
|
-
i = 0
|
430
|
-
while ( dx.abs > accuracy && (i += 1) < max_iteration )
|
431
|
-
#Apply Newton-Raphson step
|
432
|
-
error = cdf(x) - prob
|
433
|
-
if error < 0.0
|
434
|
-
x_lo = x
|
435
|
-
else
|
436
|
-
x_hi = x
|
437
|
-
end
|
438
|
-
_pdf = pdf(x)
|
439
|
-
if _pdf != 0.0
|
440
|
-
dx = error / _pdf
|
441
|
-
x_new = x -dx
|
442
|
-
end
|
443
|
-
# If the NR fails to converge (which for example may be the
|
444
|
-
# case if the initial guess is too rough) we apply a bisection
|
445
|
-
# step to determine a more narrow interval around the root.
|
446
|
-
if x_new < x_lo || x_new > x_hi || _pdf == 0.0
|
447
|
-
x_new = (x_lo + x_hi) / 2.0
|
448
|
-
dx = x_new - x
|
449
|
-
end
|
450
|
-
x = x_new
|
451
|
-
end
|
452
|
-
return x
|
453
|
-
end
|
454
|
-
|
455
|
-
|
456
|
-
#Probability density function
|
457
|
-
def pdf(x)
|
458
|
-
if x.class == Array
|
459
|
-
pdf_vals = []
|
460
|
-
for i in (0 ... x.length)
|
461
|
-
pdf_vals[i] = get_pdf(x[i])
|
462
|
-
end
|
463
|
-
return pdf_vals
|
464
|
-
else
|
465
|
-
return get_pdf(x)
|
466
|
-
end
|
467
|
-
end
|
468
|
-
|
469
|
-
|
470
|
-
#Cummulative distribution function
|
471
|
-
def cdf(x)
|
472
|
-
if x.class == Array
|
473
|
-
cdf_vals = []
|
474
|
-
for i in (0...x.size)
|
475
|
-
cdf_vals[i] = get_cdf(x[i])
|
476
|
-
end
|
477
|
-
return cdf_vals
|
478
|
-
else
|
479
|
-
return get_cdf(x)
|
480
|
-
end
|
481
|
-
end
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
# Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
486
|
-
#
|
487
|
-
# Developed at SunSoft, a Sun Microsystems, Inc. business.
|
488
|
-
# Permission to use, copy, modify, and distribute this
|
489
|
-
# software is freely granted, provided that this notice
|
490
|
-
# is preserved.
|
491
|
-
#
|
492
|
-
# x
|
493
|
-
# 2 |\
|
494
|
-
# erf(x) = --------- | exp(-t*t)dt
|
495
|
-
# sqrt(pi) \|
|
496
|
-
# 0
|
497
|
-
#
|
498
|
-
# erfc(x) = 1-erf(x)
|
499
|
-
# Note that
|
500
|
-
# erf(-x) = -erf(x)
|
501
|
-
# erfc(-x) = 2 - erfc(x)
|
502
|
-
#
|
503
|
-
# Method:
|
504
|
-
# 1. For |x| in [0, 0.84375]
|
505
|
-
# erf(x) = x + x*R(x^2)
|
506
|
-
# erfc(x) = 1 - erf(x) if x in [-.84375,0.25]
|
507
|
-
# = 0.5 + ((0.5-x)-x*R) if x in [0.25,0.84375]
|
508
|
-
# where R = P/Q where P is an odd poly of degree 8 and
|
509
|
-
# Q is an odd poly of degree 10.
|
510
|
-
# -57.90
|
511
|
-
# | R - (erf(x)-x)/x | <= 2
|
512
|
-
#
|
513
|
-
#
|
514
|
-
# Remark. The formula is derived by noting
|
515
|
-
# erf(x) = (2/sqrt(pi))*(x - x^3/3 + x^5/10 - x^7/42 + ....)
|
516
|
-
# and that
|
517
|
-
# 2/sqrt(pi) = 1.128379167095512573896158903121545171688
|
518
|
-
# is close to one. The interval is chosen because the fix
|
519
|
-
# point of erf(x) is near 0.6174 (i.e., erf(x)=x when x is
|
520
|
-
# near 0.6174), and by some experiment, 0.84375 is chosen to
|
521
|
-
# guarantee the error is less than one ulp for erf.
|
522
|
-
#
|
523
|
-
# 2. For |x| in [0.84375,1.25], let s = |x| - 1, and
|
524
|
-
# c = 0.84506291151 rounded to single (24 bits)
|
525
|
-
# erf(x) = sign(x) * (c + P1(s)/Q1(s))
|
526
|
-
# erfc(x) = (1-c) - P1(s)/Q1(s) if x > 0
|
527
|
-
# 1+(c+P1(s)/Q1(s)) if x < 0
|
528
|
-
# |P1/Q1 - (erf(|x|)-c)| <= 2**-59.06
|
529
|
-
# Remark: here we use the taylor series expansion at x=1.
|
530
|
-
# erf(1+s) = erf(1) + s*Poly(s)
|
531
|
-
# = 0.845.. + P1(s)/Q1(s)
|
532
|
-
# That is, we use rational approximation to approximate
|
533
|
-
# erf(1+s) - (c = (single)0.84506291151)
|
534
|
-
# Note that |P1/Q1|< 0.078 for x in [0.84375,1.25]
|
535
|
-
# where
|
536
|
-
# P1(s) = degree 6 poly in s
|
537
|
-
# Q1(s) = degree 6 poly in s
|
538
|
-
#
|
539
|
-
# 3. For x in [1.25,1/0.35(~2.857143)],
|
540
|
-
# erfc(x) = (1/x)*exp(-x*x-0.5625+R1/S1)
|
541
|
-
# erf(x) = 1 - erfc(x)
|
542
|
-
# where
|
543
|
-
# R1(z) = degree 7 poly in z, (z=1/x^2)
|
544
|
-
# S1(z) = degree 8 poly in z
|
545
|
-
#
|
546
|
-
# 4. For x in [1/0.35,28]
|
547
|
-
# erfc(x) = (1/x)*exp(-x*x-0.5625+R2/S2) if x > 0
|
548
|
-
# = 2.0 - (1/x)*exp(-x*x-0.5625+R2/S2) if -6<x<0
|
549
|
-
# = 2.0 - tiny (if x <= -6)
|
550
|
-
# erf(x) = sign(x)*(1.0 - erfc(x)) if x < 6, else
|
551
|
-
# erf(x) = sign(x)*(1.0 - tiny)
|
552
|
-
# where
|
553
|
-
# R2(z) = degree 6 poly in z, (z=1/x^2)
|
554
|
-
# S2(z) = degree 7 poly in z
|
555
|
-
#
|
556
|
-
# Note1:
|
557
|
-
# To compute exp(-x*x-0.5625+R/S), let s be a single
|
558
|
-
# PRECISION number and s := x then
|
559
|
-
# -x*x = -s*s + (s-x)*(s+x)
|
560
|
-
# exp(-x*x-0.5626+R/S) =
|
561
|
-
# exp(-s*s-0.5625)*exp((s-x)*(s+x)+R/S)
|
562
|
-
# Note2:
|
563
|
-
# Here 4 and 5 make use of the asymptotic series
|
564
|
-
# exp(-x*x)
|
565
|
-
# erfc(x) ~ ---------- * ( 1 + Poly(1/x^2) )
|
566
|
-
# x*sqrt(pi)
|
567
|
-
# We use rational approximation to approximate
|
568
|
-
# g(s)=f(1/x^2) = log(erfc(x)*x) - x*x + 0.5625
|
569
|
-
# Here is the error bound for R1/S1 and R2/S2
|
570
|
-
# |R1/S1 - f(x)| < 2**(-62.57)
|
571
|
-
# |R2/S2 - f(x)| < 2**(-61.52)
|
572
|
-
#
|
573
|
-
# 5. For inf > x >= 28
|
574
|
-
# erf(x) = sign(x) *(1 - tiny) (raise inexact)
|
575
|
-
# erfc(x) = tiny*tiny (raise underflow) if x > 0
|
576
|
-
# = 2 - tiny if x<0
|
577
|
-
#
|
578
|
-
# 7. Special case:
|
579
|
-
# erf(0) = 0, erf(inf) = 1, erf(-inf) = -1,
|
580
|
-
# erfc(0) = 1, erfc(inf) = 0, erfc(-inf) = 2,
|
581
|
-
# erfc/erf(NaN) is NaN
|
582
|
-
#
|
583
|
-
# $efx8 = 1.02703333676410069053e00
|
584
|
-
#
|
585
|
-
# Coefficients for approximation to erf on [0,0.84375]
|
586
|
-
#
|
587
|
-
|
588
|
-
# Error function.
|
589
|
-
# Based on C-code for the error function developed at Sun Microsystems.
|
590
|
-
# Author:: Jaco van Kooten
|
591
|
-
|
592
|
-
def error(x)
|
593
|
-
e_efx = 1.28379167095512586316e-01
|
594
|
-
|
595
|
-
ePp = [ 1.28379167095512558561e-01,
|
596
|
-
-3.25042107247001499370e-01,
|
597
|
-
-2.84817495755985104766e-02,
|
598
|
-
-5.77027029648944159157e-03,
|
599
|
-
-2.37630166566501626084e-05 ]
|
600
|
-
|
601
|
-
eQq = [ 3.97917223959155352819e-01,
|
602
|
-
6.50222499887672944485e-02,
|
603
|
-
5.08130628187576562776e-03,
|
604
|
-
1.32494738004321644526e-04,
|
605
|
-
-3.96022827877536812320e-06 ]
|
606
|
-
|
607
|
-
# Coefficients for approximation to erf in [0.84375,1.25]
|
608
|
-
ePa = [-2.36211856075265944077e-03,
|
609
|
-
4.14856118683748331666e-01,
|
610
|
-
-3.72207876035701323847e-01,
|
611
|
-
3.18346619901161753674e-01,
|
612
|
-
-1.10894694282396677476e-01,
|
613
|
-
3.54783043256182359371e-02,
|
614
|
-
-2.16637559486879084300e-03 ]
|
615
|
-
|
616
|
-
eQa = [ 1.06420880400844228286e-01,
|
617
|
-
5.40397917702171048937e-01,
|
618
|
-
7.18286544141962662868e-02,
|
619
|
-
1.26171219808761642112e-01,
|
620
|
-
1.36370839120290507362e-02,
|
621
|
-
1.19844998467991074170e-02 ]
|
622
|
-
|
623
|
-
e_erx = 8.45062911510467529297e-01
|
624
|
-
|
625
|
-
abs_x = (if x >= 0.0 then x else -x end)
|
626
|
-
# 0 < |x| < 0.84375
|
627
|
-
if abs_x < 0.84375
|
628
|
-
#|x| < 2**-28
|
629
|
-
if abs_x < 3.7252902984619141e-9
|
630
|
-
retval = abs_x + abs_x * e_efx
|
631
|
-
else
|
632
|
-
s = x * x
|
633
|
-
p = ePp[0] + s * (ePp[1] + s * (ePp[2] + s * (ePp[3] + s * ePp[4])))
|
634
|
-
|
635
|
-
q = 1.0 + s * (eQq[0] + s * (eQq[1] + s *
|
636
|
-
( eQq[2] + s * (eQq[3] + s * eQq[4]))))
|
637
|
-
retval = abs_x + abs_x * (p / q)
|
638
|
-
end
|
639
|
-
elsif abs_x < 1.25
|
640
|
-
s = abs_x - 1.0
|
641
|
-
p = ePa[0] + s * (ePa[1] + s *
|
642
|
-
(ePa[2] + s * (ePa[3] + s *
|
643
|
-
(ePa[4] + s * (ePa[5] + s * ePa[6])))))
|
644
|
-
|
645
|
-
q = 1.0 + s * (eQa[0] + s *
|
646
|
-
(eQa[1] + s * (eQa[2] + s *
|
647
|
-
(eQa[3] + s * (eQa[4] + s * eQa[5])))))
|
648
|
-
retval = e_erx + p / q
|
649
|
-
|
650
|
-
elsif abs_x >= 6.0
|
651
|
-
retval = 1.0
|
652
|
-
else
|
653
|
-
retval = 1.0 - complementary_error(abs_x)
|
654
|
-
end
|
655
|
-
return (if x >= 0.0 then retval else -retval end)
|
656
|
-
end
|
657
|
-
|
658
|
-
# Complementary error function.
|
659
|
-
# Based on C-code for the error function developed at Sun Microsystems.
|
660
|
-
# author Jaco van Kooten
|
661
|
-
|
662
|
-
def complementary_error(x)
|
663
|
-
# Coefficients for approximation of erfc in [1.25,1/.35]
|
664
|
-
|
665
|
-
eRa = [-9.86494403484714822705e-03,
|
666
|
-
-6.93858572707181764372e-01,
|
667
|
-
-1.05586262253232909814e01,
|
668
|
-
-6.23753324503260060396e01,
|
669
|
-
-1.62396669462573470355e02,
|
670
|
-
-1.84605092906711035994e02,
|
671
|
-
-8.12874355063065934246e01,
|
672
|
-
-9.81432934416914548592e00 ]
|
673
|
-
|
674
|
-
eSa = [ 1.96512716674392571292e01,
|
675
|
-
1.37657754143519042600e02,
|
676
|
-
4.34565877475229228821e02,
|
677
|
-
6.45387271733267880336e02,
|
678
|
-
4.29008140027567833386e02,
|
679
|
-
1.08635005541779435134e02,
|
680
|
-
6.57024977031928170135e00,
|
681
|
-
-6.04244152148580987438e-02 ]
|
682
|
-
|
683
|
-
# Coefficients for approximation to erfc in [1/.35,28]
|
684
|
-
|
685
|
-
eRb = [-9.86494292470009928597e-03,
|
686
|
-
-7.99283237680523006574e-01,
|
687
|
-
-1.77579549177547519889e01,
|
688
|
-
-1.60636384855821916062e02,
|
689
|
-
-6.37566443368389627722e02,
|
690
|
-
-1.02509513161107724954e03,
|
691
|
-
-4.83519191608651397019e02 ]
|
692
|
-
|
693
|
-
eSb = [ 3.03380607434824582924e01,
|
694
|
-
3.25792512996573918826e02,
|
695
|
-
1.53672958608443695994e03,
|
696
|
-
3.19985821950859553908e03,
|
697
|
-
2.55305040643316442583e03,
|
698
|
-
4.74528541206955367215e02,
|
699
|
-
-2.24409524465858183362e01 ]
|
700
|
-
|
701
|
-
abs_x = (if x >= 0.0 then x else -x end)
|
702
|
-
if abs_x < 1.25
|
703
|
-
retval = 1.0 - error(abs_x)
|
704
|
-
elsif abs_x > 28.0
|
705
|
-
retval = 0.0
|
706
|
-
|
707
|
-
# 1.25 < |x| < 28
|
708
|
-
else
|
709
|
-
s = 1.0/(abs_x * abs_x)
|
710
|
-
if abs_x < 2.8571428
|
711
|
-
r = eRa[0] + s * (eRa[1] + s *
|
712
|
-
(eRa[2] + s * (eRa[3] + s * (eRa[4] + s *
|
713
|
-
(eRa[5] + s *(eRa[6] + s * eRa[7])
|
714
|
-
)))))
|
715
|
-
|
716
|
-
s = 1.0 + s * (eSa[0] + s * (eSa[1] + s *
|
717
|
-
(eSa[2] + s * (eSa[3] + s * (eSa[4] + s *
|
718
|
-
(eSa[5] + s * (eSa[6] + s * eSa[7])))))))
|
719
|
-
|
720
|
-
else
|
721
|
-
r = eRb[0] + s * (eRb[1] + s *
|
722
|
-
(eRb[2] + s * (eRb[3] + s * (eRb[4] + s *
|
723
|
-
(eRb[5] + s * eRb[6])))))
|
724
|
-
|
725
|
-
s = 1.0 + s * (eSb[0] + s *
|
726
|
-
(eSb[1] + s * (eSb[2] + s * (eSb[3] + s *
|
727
|
-
(eSb[4] + s * (eSb[5] + s * eSb[6]))))))
|
728
|
-
end
|
729
|
-
retval = Math.exp(-x * x - 0.5625 + r/s) / abs_x
|
730
|
-
end
|
731
|
-
return ( if x >= 0.0 then retval else 2.0 - retval end )
|
732
|
-
end
|
733
|
-
|
734
|
-
end # class
|
735
|
-
|
736
|
-
end # module
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fselector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,8 +9,19 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-04-
|
13
|
-
dependencies:
|
12
|
+
date: 2012-04-19 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rinruby
|
16
|
+
requirement: &22515480 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 2.0.2
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *22515480
|
14
25
|
description: FSelector is a Ruby gem that aims to integrate various feature selection/ranking
|
15
26
|
algorithms and related functions into one single package. Welcome to contact me
|
16
27
|
(need47@gmail.com) if you'd like to contribute your own algorithms or report a bug.
|
@@ -20,8 +31,8 @@ description: FSelector is a Ruby gem that aims to integrate various feature sele
|
|
20
31
|
with certain criterion. FSelector acts on a full-feature data set in either CSV,
|
21
32
|
LibSVM or WEKA file format and outputs a reduced data set with only selected subset
|
22
33
|
of features, which can later be used as the input for various machine learning softwares
|
23
|
-
|
24
|
-
|
34
|
+
such as LibSVM and WEKA. FSelector, as a collection of filter methods, does not
|
35
|
+
implement any classifier like support vector machines or random forest.
|
25
36
|
email: need47@gmail.com
|
26
37
|
executables: []
|
27
38
|
extensions: []
|
@@ -30,6 +41,7 @@ extra_rdoc_files:
|
|
30
41
|
- LICENSE
|
31
42
|
files:
|
32
43
|
- README.md
|
44
|
+
- ChangeLog
|
33
45
|
- LICENSE
|
34
46
|
- lib/fselector/algo_base/base.rb
|
35
47
|
- lib/fselector/algo_base/base_CFS.rb
|
@@ -70,7 +82,6 @@ files:
|
|
70
82
|
- lib/fselector/algo_discrete/Sensitivity.rb
|
71
83
|
- lib/fselector/algo_discrete/Specificity.rb
|
72
84
|
- lib/fselector/algo_discrete/SymmetricalUncertainty.rb
|
73
|
-
- lib/fselector/chisq_calc.rb
|
74
85
|
- lib/fselector/discretizer.rb
|
75
86
|
- lib/fselector/ensemble.rb
|
76
87
|
- lib/fselector/entropy.rb
|
data/lib/fselector/chisq_calc.rb
DELETED
@@ -1,189 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Chi-Square Calculator
|
3
|
-
#
|
4
|
-
# This module is adpated from the on-line [Chi-square Calculator](http://www.swogstat.org/stat/public/chisq_calculator.htm)
|
5
|
-
#
|
6
|
-
# The functions for calculating normal and chi-square probabilities
|
7
|
-
# and critical values were adapted by John Walker from C implementations
|
8
|
-
# written by Gary Perlman of Wang Institute, Tyngsboro, MA 01879. The
|
9
|
-
# original C code is in the public domain.
|
10
|
-
#
|
11
|
-
# chisq2pval(chisq, df) -- calculate p-value from given
|
12
|
-
# chi-square value (chisq) and degree of freedom (df)
|
13
|
-
# pval2chisq(pval, df) -- chi-square value from given
|
14
|
-
# p-value (pvalue) and degree of freedom (df)
|
15
|
-
#
|
16
|
-
module ChiSquareCalculator
|
17
|
-
BIGX = 20.0 # max value to represent exp(x)
|
18
|
-
LOG_SQRT_PI = 0.5723649429247000870717135 # log(sqrt(pi))
|
19
|
-
I_SQRT_PI = 0.5641895835477562869480795 # 1 / sqrt(pi)
|
20
|
-
Z_MAX = 6.0 # Maximum meaningful z value
|
21
|
-
CHI_EPSILON = 0.000001 # Accuracy of critchi approximation
|
22
|
-
CHI_MAX = 99999.0 # Maximum chi-square value
|
23
|
-
|
24
|
-
#
|
25
|
-
# POCHISQ -- probability of chi-square value
|
26
|
-
#
|
27
|
-
# Adapted from:
|
28
|
-
#
|
29
|
-
# Hill, I. D. and Pike, M. C. Algorithm 299
|
30
|
-
#
|
31
|
-
# Collected Algorithms for the CACM 1967 p. 243
|
32
|
-
#
|
33
|
-
# Updated for rounding errors based on remark in
|
34
|
-
#
|
35
|
-
# ACM TOMS June 1985, page 185
|
36
|
-
#
|
37
|
-
# @param [Float] x chi-square value
|
38
|
-
# @param [Integer] df degree of freedom
|
39
|
-
# @return [Float] p-value
|
40
|
-
def pochisq(x, df)
|
41
|
-
a, y, s = nil, nil, nil
|
42
|
-
e, c, z = nil, nil, nil
|
43
|
-
|
44
|
-
even = nil # True if df is an even number
|
45
|
-
|
46
|
-
if x <= 0.0 or df < 1
|
47
|
-
return 1.0
|
48
|
-
end
|
49
|
-
|
50
|
-
a = 0.5 * x
|
51
|
-
even = ((df & 1) == 0)
|
52
|
-
|
53
|
-
if df > 1
|
54
|
-
y = ex(-a)
|
55
|
-
end
|
56
|
-
|
57
|
-
s = even ? y : (2.0 * poz(-Math.sqrt(x)))
|
58
|
-
|
59
|
-
if df > 2
|
60
|
-
x = 0.5 * (df - 1.0)
|
61
|
-
z = even ? 1.0 : 0.5
|
62
|
-
|
63
|
-
if a > BIGX
|
64
|
-
e = even ? 0.0 : LOG_SQRT_PI
|
65
|
-
c = Math.log(a)
|
66
|
-
|
67
|
-
while z <= x
|
68
|
-
e = Math.log(z) + e
|
69
|
-
s += ex(c * z - a - e)
|
70
|
-
z += 1.0
|
71
|
-
end
|
72
|
-
|
73
|
-
return s
|
74
|
-
else
|
75
|
-
e = even ? 1.0 : (I_SQRT_PI / Math.sqrt(a))
|
76
|
-
c = 0.0
|
77
|
-
|
78
|
-
while (z <= x)
|
79
|
-
e = e * (a / z)
|
80
|
-
c = c + e
|
81
|
-
z += 1.0
|
82
|
-
end
|
83
|
-
|
84
|
-
return c * y + s
|
85
|
-
end
|
86
|
-
else
|
87
|
-
return s
|
88
|
-
end
|
89
|
-
|
90
|
-
end # pochisq
|
91
|
-
|
92
|
-
# function alias
|
93
|
-
alias :chisq2pval :pochisq
|
94
|
-
|
95
|
-
|
96
|
-
#
|
97
|
-
# CRITCHI -- Compute critical chi-square value to
|
98
|
-
# produce given p. We just do a bisection
|
99
|
-
# search for a value within CHI_EPSILON,
|
100
|
-
# relying on the monotonicity of pochisq()
|
101
|
-
#
|
102
|
-
# @param [Float] p p-value
|
103
|
-
# @param [Integer] df degree of freedom
|
104
|
-
# @return [Float] chi-square value
|
105
|
-
def critchi(p, df)
|
106
|
-
minchisq = 0.0
|
107
|
-
maxchisq = CHI_MAX
|
108
|
-
|
109
|
-
chisqval = nil
|
110
|
-
|
111
|
-
if p <= 0.0
|
112
|
-
return maxchisq
|
113
|
-
else
|
114
|
-
if p >= 1.0
|
115
|
-
return 0.0
|
116
|
-
end
|
117
|
-
end
|
118
|
-
|
119
|
-
chisqval = df / Math.sqrt(p); # fair first value
|
120
|
-
|
121
|
-
while (maxchisq - minchisq) > CHI_EPSILON
|
122
|
-
if pochisq(chisqval, df) < p
|
123
|
-
maxchisq = chisqval
|
124
|
-
else
|
125
|
-
minchisq = chisqval
|
126
|
-
end
|
127
|
-
|
128
|
-
chisqval = (maxchisq + minchisq) * 0.5
|
129
|
-
end
|
130
|
-
|
131
|
-
return chisqval
|
132
|
-
end # critchi
|
133
|
-
|
134
|
-
# function alias
|
135
|
-
alias :pval2chisq :critchi
|
136
|
-
|
137
|
-
private
|
138
|
-
|
139
|
-
def ex(x)
|
140
|
-
return (x < -BIGX) ? 0.0 : Math.exp(x)
|
141
|
-
end # ex
|
142
|
-
|
143
|
-
|
144
|
-
#
|
145
|
-
# POZ -- probability of normal z value
|
146
|
-
#
|
147
|
-
# Adapted from a polynomial approximation in:
|
148
|
-
# Ibbetson D, Algorithm 209
|
149
|
-
# Collected Algorithms of the CACM 1963 p. 616
|
150
|
-
#
|
151
|
-
# Note:
|
152
|
-
# This routine has six digit accuracy, so it is only useful for absolute
|
153
|
-
# z values < 6. For z values >= to 6.0, poz() returns 0.0
|
154
|
-
#
|
155
|
-
def poz(z)
|
156
|
-
y, x, w = nil, nil, nil
|
157
|
-
|
158
|
-
if (z == 0.0)
|
159
|
-
x = 0.0
|
160
|
-
else
|
161
|
-
y = 0.5 * z.abs # Math.abs(z)
|
162
|
-
|
163
|
-
if (y >= (Z_MAX * 0.5))
|
164
|
-
x = 1.0
|
165
|
-
elsif (y < 1.0)
|
166
|
-
w = y * y
|
167
|
-
x = ((((((((0.000124818987 * w - 0.001075204047) * w +
|
168
|
-
0.005198775019) * w - 0.019198292004) * w +
|
169
|
-
0.059054035642) * w - 0.151968751364) * w +
|
170
|
-
0.319152932694) * w - 0.531923007300) * w +
|
171
|
-
0.797884560593) * y * 2.0
|
172
|
-
else
|
173
|
-
y -= 2.0
|
174
|
-
x = (((((((((((((-0.000045255659 * y +
|
175
|
-
0.000152529290) * y - 0.000019538132) * y -
|
176
|
-
0.000676904986) * y + 0.001390604284) * y -
|
177
|
-
0.000794620820) * y - 0.002034254874) * y +
|
178
|
-
0.006549791214) * y - 0.010557625006) * y +
|
179
|
-
0.011630447319) * y - 0.009279453341) * y +
|
180
|
-
0.005353579108) * y - 0.002141268741) * y +
|
181
|
-
0.000535310849) * y + 0.999936657524
|
182
|
-
end
|
183
|
-
end
|
184
|
-
|
185
|
-
return z > 0.0 ? ((x + 1.0) * 0.5) : ((1.0 - x) * 0.5)
|
186
|
-
end # poz
|
187
|
-
|
188
|
-
|
189
|
-
end # module
|