statsample 0.6.5 → 0.6.7
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +15 -0
- data/Manifest.txt +6 -0
- data/README.txt +30 -12
- data/Rakefile +91 -0
- data/demo/levene.rb +9 -0
- data/demo/multiple_regression.rb +1 -7
- data/demo/polychoric.rb +1 -0
- data/demo/principal_axis.rb +8 -0
- data/lib/distribution/f.rb +22 -22
- data/lib/spss.rb +99 -99
- data/lib/statsample/bivariate/polychoric.rb +32 -22
- data/lib/statsample/bivariate/tetrachoric.rb +212 -207
- data/lib/statsample/bivariate.rb +6 -6
- data/lib/statsample/codification.rb +65 -65
- data/lib/statsample/combination.rb +60 -59
- data/lib/statsample/converter/csv19.rb +12 -12
- data/lib/statsample/converters.rb +1 -1
- data/lib/statsample/dataset.rb +93 -36
- data/lib/statsample/dominanceanalysis/bootstrap.rb +66 -3
- data/lib/statsample/dominanceanalysis.rb +5 -6
- data/lib/statsample/factor/pca.rb +41 -11
- data/lib/statsample/factor/principalaxis.rb +105 -29
- data/lib/statsample/factor/rotation.rb +20 -3
- data/lib/statsample/factor.rb +1 -1
- data/lib/statsample/graph/gdchart.rb +13 -13
- data/lib/statsample/graph/svggraph.rb +166 -167
- data/lib/statsample/matrix.rb +22 -12
- data/lib/statsample/mle/logit.rb +3 -2
- data/lib/statsample/mle/probit.rb +7 -5
- data/lib/statsample/mle.rb +4 -2
- data/lib/statsample/multiset.rb +125 -124
- data/lib/statsample/permutation.rb +2 -1
- data/lib/statsample/regression/binomial/logit.rb +4 -3
- data/lib/statsample/regression/binomial/probit.rb +2 -1
- data/lib/statsample/regression/binomial.rb +62 -81
- data/lib/statsample/regression/multiple/baseengine.rb +1 -1
- data/lib/statsample/regression/multiple/gslengine.rb +1 -1
- data/lib/statsample/regression/multiple/matrixengine.rb +12 -6
- data/lib/statsample/regression/multiple.rb +15 -42
- data/lib/statsample/regression/simple.rb +93 -78
- data/lib/statsample/regression.rb +74 -2
- data/lib/statsample/reliability.rb +117 -120
- data/lib/statsample/srs.rb +156 -153
- data/lib/statsample/test/levene.rb +90 -0
- data/lib/statsample/test/umannwhitney.rb +25 -9
- data/lib/statsample/test.rb +2 -0
- data/lib/statsample/vector.rb +388 -413
- data/lib/statsample.rb +74 -30
- data/po/es/statsample.mo +0 -0
- data/test/test_bivariate.rb +5 -4
- data/test/test_combination.rb +1 -1
- data/test/test_dataset.rb +2 -2
- data/test/test_factor.rb +53 -6
- data/test/test_gsl.rb +1 -1
- data/test/test_mle.rb +1 -1
- data/test/test_regression.rb +18 -33
- data/test/test_statistics.rb +15 -33
- data/test/test_stest.rb +35 -0
- data/test/test_svg_graph.rb +2 -2
- data/test/test_vector.rb +331 -333
- metadata +38 -11
data/lib/statsample/dataset.rb
CHANGED
@@ -1,22 +1,23 @@
|
|
1
1
|
require 'statsample/vector'
|
2
2
|
|
3
3
|
class Hash
|
4
|
+
# Creates a Statsample::Dataset based on a Hash
|
4
5
|
def to_dataset(*args)
|
5
6
|
Statsample::Dataset.new(self,*args)
|
6
7
|
end
|
7
8
|
end
|
8
9
|
|
9
10
|
class Array
|
10
|
-
def prefix(s)
|
11
|
+
def prefix(s) # :nodoc:
|
11
12
|
self.collect{|c| s+c.to_s }
|
12
13
|
end
|
13
|
-
def suffix(s)
|
14
|
+
def suffix(s) # :nodoc:
|
14
15
|
self.collect{|c| c.to_s+s }
|
15
16
|
end
|
16
17
|
end
|
17
18
|
|
18
19
|
module Statsample
|
19
|
-
class DatasetException < RuntimeError
|
20
|
+
class DatasetException < RuntimeError # :nodoc:
|
20
21
|
attr_reader :ds,:exp
|
21
22
|
def initialize(ds,e)
|
22
23
|
@ds=ds
|
@@ -28,15 +29,49 @@ module Statsample
|
|
28
29
|
m
|
29
30
|
end
|
30
31
|
end
|
32
|
+
# Set of cases with values for one or more variables,
|
33
|
+
# analog to a dataframe on R or a standard data file of SPSS.
|
34
|
+
# Every vector has <tt>#field</tt> name, which represent it. By default,
|
35
|
+
# the vectors are ordered by it field name, but you can change it
|
36
|
+
# the fields order manually.
|
37
|
+
# The Dataset work as a Hash, with keys are field names
|
38
|
+
# and values are Statsample::Vector
|
39
|
+
#
|
40
|
+
#
|
41
|
+
# ==Usage
|
42
|
+
# Create a empty dataset
|
43
|
+
# Dataset.new()
|
44
|
+
# Create a dataset with three empty vectors, called <tt>v1</tt>, <tt>v2</tt> and <tt>v3</tt>
|
45
|
+
# Dataset.new(%w{v1 v2 v3})
|
46
|
+
# Create a dataset with two vectors
|
47
|
+
# Dataset.new({'v1'=>%w{1 2 3}.to_vector, 'v2'=>%w{4 5 6}.to_vector})
|
48
|
+
# Create a dataset with two given vectors (v1 and v2), with vectors on inverted order
|
49
|
+
# Dataset.new({'v2'=>v2,'v1'=>v1},['v2','v1'])
|
50
|
+
#
|
51
|
+
# The fast way to create a dataset uses Hash#to_dataset, with
|
52
|
+
# field order as arguments
|
53
|
+
# v1 = [1,2,3].to_scale
|
54
|
+
# v2 = [1,2,3].to_scale
|
55
|
+
# ds = {'v1'=>v2, 'v2'=>v2}.to_dataset(%w{v2 v1})
|
56
|
+
|
31
57
|
class Dataset
|
32
58
|
include Writable
|
33
|
-
|
59
|
+
# Hash of Statsample::Vector
|
60
|
+
attr_reader :vectors
|
61
|
+
# Ordered names of vectors
|
62
|
+
attr_reader :fields
|
63
|
+
# Number of cases
|
64
|
+
attr_reader :cases
|
65
|
+
# Location of pointer on enumerations methods (like #each)
|
66
|
+
attr_reader :i
|
67
|
+
# Deprecated: Label of vectors
|
34
68
|
attr_accessor :labels
|
35
69
|
|
36
70
|
# Generates a new dataset, using three vectors
|
37
71
|
# - Rows
|
38
72
|
# - Columns
|
39
73
|
# - Values
|
74
|
+
#
|
40
75
|
# For example, you have these values
|
41
76
|
#
|
42
77
|
# x y v
|
@@ -88,16 +123,7 @@ module Statsample
|
|
88
123
|
# order of variables. If empty, vectors keys on alfabethic order as
|
89
124
|
# used as fields
|
90
125
|
# [labels] Hash to set names for fields.
|
91
|
-
|
92
|
-
#
|
93
|
-
# Dataset.new()
|
94
|
-
# Dataset.new(%w{v1 v2 v3})
|
95
|
-
# Dataset.new({'v1'=>%w{1 2 3}.to_vector, 'v2'=>%w{4 5 6}.to_vector})
|
96
|
-
# Dataset.new({'v2'=>v2,'v1'=>v1},['v1','v2'])
|
97
|
-
#
|
98
|
-
# The fast way to create a dataset uses Hash#to_dataset, with
|
99
|
-
# fields and labels as arguments
|
100
|
-
# ds = {'v1'=>[1,2,3].to_vector}.to_dataset
|
126
|
+
|
101
127
|
#
|
102
128
|
def initialize(vectors={}, fields=[], labels={})
|
103
129
|
if vectors.instance_of? Array
|
@@ -120,7 +146,8 @@ module Statsample
|
|
120
146
|
end
|
121
147
|
matrix
|
122
148
|
end
|
123
|
-
|
149
|
+
# Retrieves label for a vector, giving a field name.
|
150
|
+
def label(v_id)
|
124
151
|
raise "Vector #{v} doesn't exists" unless @fields.include? v_id
|
125
152
|
@labels[v_id].nil? ? v_id : @labels[v_id]
|
126
153
|
end
|
@@ -233,12 +260,20 @@ module Statsample
|
|
233
260
|
ds_boot.update_valid_data
|
234
261
|
ds_boot
|
235
262
|
end
|
236
|
-
# Fast version of
|
263
|
+
# Fast version of #add_case.
|
237
264
|
# Can only add one case and no error check if performed
|
238
|
-
# You SHOULD use update_valid_data at the end of insertion cycle
|
265
|
+
# You SHOULD use #update_valid_data at the end of insertion cycle
|
239
266
|
def add_case_array(v)
|
240
267
|
v.each_index {|i| d=@vectors[@fields[i]].data; d.push(v[i])}
|
241
268
|
end
|
269
|
+
# Insert a case, using:
|
270
|
+
# * Array: size equal to number of vectors and values in the same order as fields
|
271
|
+
# * Hash: keys equal to fields
|
272
|
+
# If uvd is false, #update_valid_data is not executed after
|
273
|
+
# inserting a case. This is very useful if you want to increase the
|
274
|
+
# performance on inserting many cases,
|
275
|
+
# because #update_valid_data performs check on vectors and on the dataset
|
276
|
+
|
242
277
|
def add_case(v,uvd=true)
|
243
278
|
case v
|
244
279
|
when Array
|
@@ -258,14 +293,18 @@ module Statsample
|
|
258
293
|
update_valid_data
|
259
294
|
end
|
260
295
|
end
|
296
|
+
# Check vectors and fields after inserting data. Use only
|
297
|
+
# after #add_case_array or #add_case with second parameter to false
|
261
298
|
def update_valid_data
|
262
299
|
@fields.each{|f| @vectors[f].set_valid_data}
|
263
300
|
check_length
|
264
301
|
end
|
302
|
+
# Delete a vector
|
265
303
|
def delete_vector(name)
|
266
304
|
@fields.delete(name)
|
267
305
|
@vectors.delete(name)
|
268
306
|
end
|
307
|
+
|
269
308
|
def add_vectors_by_split_recode(name,join='-',sep=Statsample::SPLIT_TOKEN)
|
270
309
|
split=@vectors[name].split_by_separator(sep)
|
271
310
|
i=1
|
@@ -294,7 +333,7 @@ module Statsample
|
|
294
333
|
def vector_sum(fields=nil)
|
295
334
|
a=[]
|
296
335
|
fields||=@fields
|
297
|
-
collect_with_index do |i
|
336
|
+
collect_with_index do |row, i|
|
298
337
|
if(fields.find{|f| !@vectors[f].data_with_nils[i]})
|
299
338
|
nil
|
300
339
|
else
|
@@ -302,16 +341,17 @@ module Statsample
|
|
302
341
|
end
|
303
342
|
end
|
304
343
|
end
|
344
|
+
# Check if #fields attribute is correct, after inserting or deleting vectors
|
305
345
|
def check_fields(fields)
|
306
346
|
fields||=@fields
|
307
347
|
raise "Fields #{(fields-@fields).join(", ")} doesn't exists on dataset" if (fields-@fields).size>0
|
308
348
|
fields
|
309
349
|
end
|
350
|
+
|
310
351
|
# Returns a vector with the numbers of missing values for a case
|
311
|
-
|
312
352
|
def vector_missing_values(fields=nil)
|
313
353
|
fields=check_fields(fields)
|
314
|
-
collect_with_index do |i
|
354
|
+
collect_with_index do |row, i|
|
315
355
|
fields.inject(0) {|a,v|
|
316
356
|
a+ ((@vectors[v].data_with_nils[i].nil?) ? 1: 0)
|
317
357
|
}
|
@@ -319,9 +359,8 @@ module Statsample
|
|
319
359
|
end
|
320
360
|
def vector_count_characters(fields=nil)
|
321
361
|
fields=check_fields(fields)
|
322
|
-
collect_with_index do |i
|
362
|
+
collect_with_index do |row, i|
|
323
363
|
fields.inject(0){|a,v|
|
324
|
-
|
325
364
|
a+((@vectors[v].data_with_nils[i].nil?) ? 0: row[v].to_s.size)
|
326
365
|
}
|
327
366
|
end
|
@@ -353,7 +392,8 @@ module Statsample
|
|
353
392
|
end
|
354
393
|
a.to_vector(:scale)
|
355
394
|
end
|
356
|
-
|
395
|
+
# Check vectors for type and size.
|
396
|
+
def check_length # :nodoc:
|
357
397
|
size=nil
|
358
398
|
@vectors.each do |k,v|
|
359
399
|
raise Exception, "Data #{v.class} is not a vector on key #{k}" if !v.is_a? Statsample::Vector
|
@@ -368,16 +408,19 @@ module Statsample
|
|
368
408
|
end
|
369
409
|
@cases=size
|
370
410
|
end
|
371
|
-
|
372
|
-
|
411
|
+
# Retrieves each vector as [key, vector]
|
412
|
+
def each_vector # :yield: |key, vector|
|
413
|
+
@fields.each{|k| yield k, @vectors[k]}
|
373
414
|
end
|
415
|
+
|
374
416
|
if Statsample::STATSAMPLE__.respond_to?(:case_as_hash)
|
375
417
|
def case_as_hash(c) # :nodoc:
|
376
418
|
Statsample::STATSAMPLE__.case_as_hash(self,c)
|
377
419
|
end
|
378
420
|
else
|
379
|
-
|
380
|
-
|
421
|
+
# Retrieves case i as a hash
|
422
|
+
def case_as_hash(i)
|
423
|
+
_case_as_hash(i)
|
381
424
|
end
|
382
425
|
end
|
383
426
|
|
@@ -386,8 +429,9 @@ module Statsample
|
|
386
429
|
Statsample::STATSAMPLE__.case_as_array(self,c)
|
387
430
|
end
|
388
431
|
else
|
389
|
-
|
390
|
-
|
432
|
+
# Retrieves case i as a array, ordered on #fields order
|
433
|
+
def case_as_array(i)
|
434
|
+
_case_as_array(i)
|
391
435
|
end
|
392
436
|
end
|
393
437
|
def _case_as_hash(c) # :nodoc:
|
@@ -396,6 +440,7 @@ module Statsample
|
|
396
440
|
def _case_as_array(c) # :nodoc:
|
397
441
|
@fields.collect {|x| @vectors[x][c]}
|
398
442
|
end
|
443
|
+
|
399
444
|
# Returns each case as a hash
|
400
445
|
def each
|
401
446
|
begin
|
@@ -411,7 +456,7 @@ module Statsample
|
|
411
456
|
end
|
412
457
|
end
|
413
458
|
# Returns each case as hash and index
|
414
|
-
def each_with_index
|
459
|
+
def each_with_index # :yield: |case, i|
|
415
460
|
begin
|
416
461
|
@i=0
|
417
462
|
@cases.times{|i|
|
@@ -447,6 +492,7 @@ module Statsample
|
|
447
492
|
}
|
448
493
|
@i=nil
|
449
494
|
end
|
495
|
+
# Set fields order. If you omit one or more vectors,
|
450
496
|
def fields=(f)
|
451
497
|
@fields=f
|
452
498
|
check_order
|
@@ -470,6 +516,8 @@ module Statsample
|
|
470
516
|
raise ArgumentError, "You need a String or a Range"
|
471
517
|
end
|
472
518
|
end
|
519
|
+
# Retrieves a Statsample::Vector, based on the result
|
520
|
+
# of calculation performed on each case.
|
473
521
|
def collect(type=:scale)
|
474
522
|
data=[]
|
475
523
|
each {|row|
|
@@ -477,10 +525,11 @@ module Statsample
|
|
477
525
|
}
|
478
526
|
Statsample::Vector.new(data,type)
|
479
527
|
end
|
528
|
+
# Same as #collect, but giving case index as second parameter on yield.
|
480
529
|
def collect_with_index(type=:scale)
|
481
530
|
data=[]
|
482
531
|
each_with_index {|row, i|
|
483
|
-
data.push(yield(i
|
532
|
+
data.push(yield(row, i))
|
484
533
|
}
|
485
534
|
Statsample::Vector.new(data,type)
|
486
535
|
end
|
@@ -504,6 +553,8 @@ module Statsample
|
|
504
553
|
raise ArgumentError,"Should pass a Statsample::Vector"
|
505
554
|
end
|
506
555
|
end
|
556
|
+
# Return data as a matrix. Column are ordered by #fields and
|
557
|
+
# rows by orden of insertion
|
507
558
|
def to_matrix
|
508
559
|
rows=[]
|
509
560
|
self.each_array{|c|
|
@@ -511,7 +562,8 @@ module Statsample
|
|
511
562
|
}
|
512
563
|
Matrix.rows(rows)
|
513
564
|
end
|
514
|
-
|
565
|
+
|
566
|
+
if Statsample.has_gsl?
|
515
567
|
def to_matrix_gsl
|
516
568
|
rows=[]
|
517
569
|
self.each_array{|c|
|
@@ -520,15 +572,17 @@ module Statsample
|
|
520
572
|
GSL::Matrix.alloc(*rows)
|
521
573
|
end
|
522
574
|
end
|
523
|
-
|
575
|
+
|
576
|
+
def to_multiset_by_split(*fields)
|
524
577
|
require 'statsample/multiset'
|
525
578
|
if fields.size==1
|
526
579
|
to_multiset_by_split_one_field(fields[0])
|
527
580
|
else
|
528
581
|
to_multiset_by_split_multiple_fields(*fields)
|
529
582
|
end
|
530
|
-
|
531
|
-
|
583
|
+
end
|
584
|
+
|
585
|
+
# Create a new dataset with all cases which the block returns true
|
532
586
|
def filter
|
533
587
|
ds=self.dup_empty
|
534
588
|
each {|c|
|
@@ -537,6 +591,7 @@ module Statsample
|
|
537
591
|
ds.update_valid_data
|
538
592
|
ds
|
539
593
|
end
|
594
|
+
|
540
595
|
# creates a new vector with the data of a given field which the block returns true
|
541
596
|
def filter_field(field)
|
542
597
|
a=[]
|
@@ -545,6 +600,7 @@ module Statsample
|
|
545
600
|
}
|
546
601
|
a.to_vector(@vectors[field].type)
|
547
602
|
end
|
603
|
+
|
548
604
|
def to_multiset_by_split_one_field(field)
|
549
605
|
raise ArgumentError,"Should use a correct field name" if !@fields.include? field
|
550
606
|
factors=@vectors[field].factors
|
@@ -604,7 +660,7 @@ module Statsample
|
|
604
660
|
text.gsub!(f,"row['#{f}']")
|
605
661
|
end
|
606
662
|
}
|
607
|
-
collect_with_index {|i
|
663
|
+
collect_with_index {|row, i|
|
608
664
|
invalid=false
|
609
665
|
@fields.each{|f|
|
610
666
|
if @vectors[f].data_with_nils[i].nil?
|
@@ -653,6 +709,7 @@ module Statsample
|
|
653
709
|
end
|
654
710
|
# Creates a new dataset for one to many relations
|
655
711
|
# on a dataset, based on pattern of field names.
|
712
|
+
#
|
656
713
|
# for example, you have a survey for number of children
|
657
714
|
# with this structure:
|
658
715
|
# id, name, child_name_1, child_age_1, child_name_2, child_age_2
|
@@ -1,8 +1,70 @@
|
|
1
1
|
module Statsample
|
2
2
|
class DominanceAnalysis
|
3
|
+
# == Goal
|
3
4
|
# Generates Bootstrap sample to identity the replicability of a Dominance Analysis. See Azen & Bodescu (2003) for more information.
|
4
|
-
#
|
5
|
-
#
|
5
|
+
#
|
6
|
+
# == Usage
|
7
|
+
#
|
8
|
+
# require 'statsample'
|
9
|
+
# a=100.times.collect {rand}.to_scale
|
10
|
+
# b=100.times.collect {rand}.to_scale
|
11
|
+
# c=100.times.collect {rand}.to_scale
|
12
|
+
# d=100.times.collect {rand}.to_scale
|
13
|
+
# ds={'a'=>a,'b'=>b,'c'=>c,'d'=>d}.to_dataset
|
14
|
+
# ds['y']=ds.collect{|row| row['a']*5+row['b']*2+row['c']*2+row['d']*2+10*rand()}
|
15
|
+
# dab=Statsample::DominanceAnalysis::Bootstrap.new(ds2, 'y', :debug=>true)
|
16
|
+
# dab.bootstrap(100,nil)
|
17
|
+
# puts dab.summary
|
18
|
+
# <strong>Output</strong>
|
19
|
+
# Sample size: 100
|
20
|
+
# t: 1.98421693632958
|
21
|
+
#
|
22
|
+
# Linear Regression Engine: Statsample::Regression::Multiple::MatrixEngine
|
23
|
+
# Table: Bootstrap report
|
24
|
+
# --------------------------------------------------------------------------------------------
|
25
|
+
# | pairs | sD | Dij | SE(Dij) | Pij | Pji | Pno | Reproducibility |
|
26
|
+
# --------------------------------------------------------------------------------------------
|
27
|
+
# | Complete dominance |
|
28
|
+
# --------------------------------------------------------------------------------------------
|
29
|
+
# | a - b | 1.0 | 0.6150 | 0.454 | 0.550 | 0.320 | 0.130 | 0.550 |
|
30
|
+
# | a - c | 1.0 | 0.9550 | 0.175 | 0.930 | 0.020 | 0.050 | 0.930 |
|
31
|
+
# | a - d | 1.0 | 0.9750 | 0.131 | 0.960 | 0.010 | 0.030 | 0.960 |
|
32
|
+
# | b - c | 1.0 | 0.8800 | 0.276 | 0.820 | 0.060 | 0.120 | 0.820 |
|
33
|
+
# | b - d | 1.0 | 0.9250 | 0.193 | 0.860 | 0.010 | 0.130 | 0.860 |
|
34
|
+
# | c - d | 0.5 | 0.5950 | 0.346 | 0.350 | 0.160 | 0.490 | 0.490 |
|
35
|
+
# --------------------------------------------------------------------------------------------
|
36
|
+
# | Conditional dominance |
|
37
|
+
# --------------------------------------------------------------------------------------------
|
38
|
+
# | a - b | 1.0 | 0.6300 | 0.458 | 0.580 | 0.320 | 0.100 | 0.580 |
|
39
|
+
# | a - c | 1.0 | 0.9700 | 0.156 | 0.960 | 0.020 | 0.020 | 0.960 |
|
40
|
+
# | a - d | 1.0 | 0.9800 | 0.121 | 0.970 | 0.010 | 0.020 | 0.970 |
|
41
|
+
# | b - c | 1.0 | 0.8850 | 0.283 | 0.840 | 0.070 | 0.090 | 0.840 |
|
42
|
+
# | b - d | 1.0 | 0.9500 | 0.181 | 0.920 | 0.020 | 0.060 | 0.920 |
|
43
|
+
# | c - d | 0.5 | 0.5800 | 0.360 | 0.350 | 0.190 | 0.460 | 0.460 |
|
44
|
+
# --------------------------------------------------------------------------------------------
|
45
|
+
# | General Dominance |
|
46
|
+
# --------------------------------------------------------------------------------------------
|
47
|
+
# | a - b | 1.0 | 0.6500 | 0.479 | 0.650 | 0.350 | 0.000 | 0.650 |
|
48
|
+
# | a - c | 1.0 | 0.9800 | 0.141 | 0.980 | 0.020 | 0.000 | 0.980 |
|
49
|
+
# | a - d | 1.0 | 0.9900 | 0.100 | 0.990 | 0.010 | 0.000 | 0.990 |
|
50
|
+
# | b - c | 1.0 | 0.9000 | 0.302 | 0.900 | 0.100 | 0.000 | 0.900 |
|
51
|
+
# | b - d | 1.0 | 0.9700 | 0.171 | 0.970 | 0.030 | 0.000 | 0.970 |
|
52
|
+
# | c - d | 1.0 | 0.5600 | 0.499 | 0.560 | 0.440 | 0.000 | 0.560 |
|
53
|
+
# --------------------------------------------------------------------------------------------
|
54
|
+
#
|
55
|
+
# Table: General averages
|
56
|
+
# ---------------------------------------
|
57
|
+
# | var | mean | se | p.5 | p.95 |
|
58
|
+
# ---------------------------------------
|
59
|
+
# | a | 0.133 | 0.049 | 0.062 | 0.218 |
|
60
|
+
# | b | 0.106 | 0.048 | 0.029 | 0.199 |
|
61
|
+
# | c | 0.035 | 0.032 | 0.002 | 0.106 |
|
62
|
+
# | d | 0.023 | 0.019 | 0.002 | 0.062 |
|
63
|
+
# ---------------------------------------
|
64
|
+
#
|
65
|
+
# == References:
|
66
|
+
#
|
67
|
+
# * Azen, R. & Budescu, D.V. (2003). The dominance analysis approach for comparing predictors in multiple regression. <em>Psychological Methods, 8</em>(2), 129-148.
|
6
68
|
class Bootstrap
|
7
69
|
include GetText
|
8
70
|
include Writable
|
@@ -27,12 +89,13 @@ module Statsample
|
|
27
89
|
attr_accessor :alpha
|
28
90
|
# Debug?
|
29
91
|
attr_accessor :debug
|
92
|
+
# Default level of confidence for t calculation
|
93
|
+
ALPHA=0.95
|
30
94
|
# Create a new Dominance Analysis Bootstrap Object
|
31
95
|
#
|
32
96
|
# * ds: A Dataset object
|
33
97
|
# * y_var: Name of dependent variable
|
34
98
|
# * opts: Any other attribute of the class
|
35
|
-
ALPHA=0.95
|
36
99
|
def initialize(ds,y_var, opts=Hash.new)
|
37
100
|
@ds=ds
|
38
101
|
@y_var=y_var
|
@@ -1,13 +1,12 @@
|
|
1
1
|
require 'statsample/dominanceanalysis/bootstrap'
|
2
2
|
module Statsample
|
3
|
-
# Dominance Analysis is a procedure based on an examination of the R
|
3
|
+
# Dominance Analysis is a procedure based on an examination of the R<sup>2</sup> values
|
4
4
|
# for all possible subset models, to identify the relevance of one or more
|
5
5
|
# predictors in the prediction of criterium.
|
6
6
|
#
|
7
|
-
#
|
8
7
|
# See Budescu(1993), Azen & Budescu (2003, 2006) for more information.
|
9
8
|
#
|
10
|
-
#
|
9
|
+
# == Use
|
11
10
|
#
|
12
11
|
# a=1000.times.collect {rand}.to_scale
|
13
12
|
# b=1000.times.collect {rand}.to_scale
|
@@ -17,7 +16,7 @@ module Statsample
|
|
17
16
|
# da=Statsample::DominanceAnalysis.new(ds,'y')
|
18
17
|
# puts da.summary
|
19
18
|
#
|
20
|
-
# Output:
|
19
|
+
# === Output:
|
21
20
|
#
|
22
21
|
# Report: Report 2010-02-08 19:10:11 -0300
|
23
22
|
# Table: Dominance Analysis result
|
@@ -51,12 +50,12 @@ module Statsample
|
|
51
50
|
# | a - c | 1.0 | 1.0 | 1.0 |
|
52
51
|
# | b - c | 1.0 | 1.0 | 1.0 |
|
53
52
|
# -----------------------------------------
|
54
|
-
|
55
53
|
#
|
56
54
|
# == References:
|
57
55
|
# * Budescu, D. V. (1993). Dominance analysis: a new approach to the problem of relative importance of predictors in multiple regression. <em>Psychological Bulletin, 114</em>, 542-551.
|
58
56
|
# * Azen, R. & Budescu, D.V. (2003). The dominance analysis approach for comparing predictors in multiple regression. <em>Psychological Methods, 8</em>(2), 129-148.
|
59
57
|
# * Azen, R. & Budescu, D.V. (2006). Comparing predictors in Multivariate Regression Models: An extension of Dominance Analysis. <em>Journal of Educational and Behavioral Statistics, 31</em>(2), 157-180.
|
58
|
+
#
|
60
59
|
class DominanceAnalysis
|
61
60
|
include GetText
|
62
61
|
bindtextdomain("statsample")
|
@@ -366,7 +365,7 @@ module Statsample
|
|
366
365
|
generator.parse_element(t)
|
367
366
|
generator.add_html("</div>")
|
368
367
|
end
|
369
|
-
class ModelData
|
368
|
+
class ModelData # :nodoc:
|
370
369
|
attr_reader :contributions
|
371
370
|
def initialize(independent, data, da)
|
372
371
|
@independent=independent
|
@@ -1,21 +1,42 @@
|
|
1
1
|
module Statsample
|
2
2
|
module Factor
|
3
|
-
# Principal Component Analysis of a
|
4
|
-
#
|
5
|
-
#
|
6
|
-
#
|
3
|
+
# Principal Component Analysis (PCA) of a
|
4
|
+
# covariance or correlation matrix.
|
5
|
+
#
|
6
|
+
# For Principal Axis Analysis, use Statsample::Factor::PrincipalAxis
|
7
|
+
#
|
8
|
+
# == Usage:
|
9
|
+
# require 'statsample'
|
7
10
|
# a=[2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1].to_scale
|
8
11
|
# b=[2.4,0.7,2.9,2.2,3.0,2.7,1.6,1.1,1.6,0.9].to_scale
|
9
12
|
# ds={'a'=>a,'b'=>b}.to_dataset
|
10
13
|
# cor_matrix=Statsample::Bivariate.correlation_matrix(ds)
|
11
14
|
# pca=Statsample::Factor::PCA.new(cor_matrix)
|
12
|
-
#
|
15
|
+
# pca.m
|
16
|
+
# => 1
|
17
|
+
# pca.eigenvalues
|
18
|
+
# => [1.92592927269225, 0.0740707273077545]
|
19
|
+
# pca.component_matrix
|
20
|
+
# => GSL::Matrix
|
21
|
+
# [ 9.813e-01
|
22
|
+
# 9.813e-01 ]
|
23
|
+
# pca.communalities
|
24
|
+
# => [0.962964636346122, 0.962964636346122]
|
25
|
+
#
|
26
|
+
# == References:
|
27
|
+
#
|
28
|
+
# * SPSS manual
|
29
|
+
# * Smith, L. (2002). A tutorial on Principal Component Analysis. Available on http://courses.eas.ualberta.ca/eas570/pca_tutorial.pdf
|
30
|
+
#
|
13
31
|
class PCA
|
14
|
-
|
32
|
+
# Name of analysis
|
33
|
+
attr_accessor :name
|
34
|
+
# Number of factors. Set by default to the number of factors
|
35
|
+
# with eigen values > 1
|
36
|
+
attr_accessor :m
|
15
37
|
include GetText
|
16
38
|
bindtextdomain("statsample")
|
17
39
|
|
18
|
-
|
19
40
|
def initialize(matrix ,opts=Hash.new)
|
20
41
|
if matrix.respond_to? :to_gsl
|
21
42
|
matrix=matrix.to_gsl
|
@@ -42,6 +63,7 @@ module Factor
|
|
42
63
|
}
|
43
64
|
@ds=h.to_dataset
|
44
65
|
end
|
66
|
+
|
45
67
|
# Feature vector for m factors
|
46
68
|
def feature_vector(m=nil)
|
47
69
|
m||=@m
|
@@ -69,10 +91,10 @@ module Factor
|
|
69
91
|
gammas.push(Math::sqrt(@eigenpairs[i][0]))
|
70
92
|
}
|
71
93
|
gamma_m=GSL::Matrix.diagonal(gammas)
|
72
|
-
omega_m*(gamma_m)
|
94
|
+
(omega_m*(gamma_m)).to_matrix
|
73
95
|
end
|
74
|
-
#
|
75
|
-
def
|
96
|
+
# Communalities for all variables given m factors
|
97
|
+
def communalities(m=nil)
|
76
98
|
m||=@m
|
77
99
|
h=[]
|
78
100
|
@n_variables.times do |i|
|
@@ -84,9 +106,11 @@ module Factor
|
|
84
106
|
end
|
85
107
|
h
|
86
108
|
end
|
109
|
+
# Array with eigenvalues
|
87
110
|
def eigenvalues
|
88
111
|
@eigenpairs.collect {|c| c[0] }
|
89
112
|
end
|
113
|
+
|
90
114
|
def calculate_eigenpairs
|
91
115
|
eigval, eigvec= GSL::Eigen.symmv(@matrix)
|
92
116
|
@eigenpairs={}
|
@@ -95,13 +119,18 @@ module Factor
|
|
95
119
|
}
|
96
120
|
@eigenpairs=@eigenpairs.sort.reverse
|
97
121
|
end
|
122
|
+
def summary
|
123
|
+
rp=ReportBuilder.new()
|
124
|
+
rp.add(self)
|
125
|
+
rp.to_text
|
126
|
+
end
|
98
127
|
def to_reportbuilder(generator) # :nodoc:
|
99
128
|
anchor=generator.add_toc_entry(_("PCA: ")+name)
|
100
129
|
generator.add_html "<div class='pca'>"+_("PCA")+" #{@name}<a name='#{anchor}'></a>"
|
101
130
|
|
102
131
|
generator.add_text "Number of factors: #{m}"
|
103
132
|
t=ReportBuilder::Table.new(:name=>_("Communalities"), :header=>["Variable","Initial","Extraction"])
|
104
|
-
|
133
|
+
communalities(m).each_with_index {|com,i|
|
105
134
|
t.add_row([i, 1.0, sprintf("%0.3f", com)])
|
106
135
|
}
|
107
136
|
generator.parse_element(t)
|
@@ -122,6 +151,7 @@ module Factor
|
|
122
151
|
generator.parse_element(t)
|
123
152
|
generator.add_html("</div>")
|
124
153
|
end
|
154
|
+
private :calculate_eigenpairs, :create_centered_ds
|
125
155
|
end
|
126
156
|
end
|
127
157
|
end
|