statsample 0.6.5 → 0.6.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. data/History.txt +15 -0
  2. data/Manifest.txt +6 -0
  3. data/README.txt +30 -12
  4. data/Rakefile +91 -0
  5. data/demo/levene.rb +9 -0
  6. data/demo/multiple_regression.rb +1 -7
  7. data/demo/polychoric.rb +1 -0
  8. data/demo/principal_axis.rb +8 -0
  9. data/lib/distribution/f.rb +22 -22
  10. data/lib/spss.rb +99 -99
  11. data/lib/statsample/bivariate/polychoric.rb +32 -22
  12. data/lib/statsample/bivariate/tetrachoric.rb +212 -207
  13. data/lib/statsample/bivariate.rb +6 -6
  14. data/lib/statsample/codification.rb +65 -65
  15. data/lib/statsample/combination.rb +60 -59
  16. data/lib/statsample/converter/csv19.rb +12 -12
  17. data/lib/statsample/converters.rb +1 -1
  18. data/lib/statsample/dataset.rb +93 -36
  19. data/lib/statsample/dominanceanalysis/bootstrap.rb +66 -3
  20. data/lib/statsample/dominanceanalysis.rb +5 -6
  21. data/lib/statsample/factor/pca.rb +41 -11
  22. data/lib/statsample/factor/principalaxis.rb +105 -29
  23. data/lib/statsample/factor/rotation.rb +20 -3
  24. data/lib/statsample/factor.rb +1 -1
  25. data/lib/statsample/graph/gdchart.rb +13 -13
  26. data/lib/statsample/graph/svggraph.rb +166 -167
  27. data/lib/statsample/matrix.rb +22 -12
  28. data/lib/statsample/mle/logit.rb +3 -2
  29. data/lib/statsample/mle/probit.rb +7 -5
  30. data/lib/statsample/mle.rb +4 -2
  31. data/lib/statsample/multiset.rb +125 -124
  32. data/lib/statsample/permutation.rb +2 -1
  33. data/lib/statsample/regression/binomial/logit.rb +4 -3
  34. data/lib/statsample/regression/binomial/probit.rb +2 -1
  35. data/lib/statsample/regression/binomial.rb +62 -81
  36. data/lib/statsample/regression/multiple/baseengine.rb +1 -1
  37. data/lib/statsample/regression/multiple/gslengine.rb +1 -1
  38. data/lib/statsample/regression/multiple/matrixengine.rb +12 -6
  39. data/lib/statsample/regression/multiple.rb +15 -42
  40. data/lib/statsample/regression/simple.rb +93 -78
  41. data/lib/statsample/regression.rb +74 -2
  42. data/lib/statsample/reliability.rb +117 -120
  43. data/lib/statsample/srs.rb +156 -153
  44. data/lib/statsample/test/levene.rb +90 -0
  45. data/lib/statsample/test/umannwhitney.rb +25 -9
  46. data/lib/statsample/test.rb +2 -0
  47. data/lib/statsample/vector.rb +388 -413
  48. data/lib/statsample.rb +74 -30
  49. data/po/es/statsample.mo +0 -0
  50. data/test/test_bivariate.rb +5 -4
  51. data/test/test_combination.rb +1 -1
  52. data/test/test_dataset.rb +2 -2
  53. data/test/test_factor.rb +53 -6
  54. data/test/test_gsl.rb +1 -1
  55. data/test/test_mle.rb +1 -1
  56. data/test/test_regression.rb +18 -33
  57. data/test/test_statistics.rb +15 -33
  58. data/test/test_stest.rb +35 -0
  59. data/test/test_svg_graph.rb +2 -2
  60. data/test/test_vector.rb +331 -333
  61. metadata +38 -11
@@ -1,22 +1,23 @@
1
1
  require 'statsample/vector'
2
2
 
3
3
  class Hash
4
+ # Creates a Statsample::Dataset based on a Hash
4
5
  def to_dataset(*args)
5
6
  Statsample::Dataset.new(self,*args)
6
7
  end
7
8
  end
8
9
 
9
10
  class Array
10
- def prefix(s)
11
+ def prefix(s) # :nodoc:
11
12
  self.collect{|c| s+c.to_s }
12
13
  end
13
- def suffix(s)
14
+ def suffix(s) # :nodoc:
14
15
  self.collect{|c| c.to_s+s }
15
16
  end
16
17
  end
17
18
 
18
19
  module Statsample
19
- class DatasetException < RuntimeError
20
+ class DatasetException < RuntimeError # :nodoc:
20
21
  attr_reader :ds,:exp
21
22
  def initialize(ds,e)
22
23
  @ds=ds
@@ -28,15 +29,49 @@ module Statsample
28
29
  m
29
30
  end
30
31
  end
32
+ # Set of cases with values for one or more variables,
33
+ # analog to a dataframe on R or a standard data file of SPSS.
34
+ # Every vector has <tt>#field</tt> name, which represent it. By default,
35
+ # the vectors are ordered by it field name, but you can change it
36
+ # the fields order manually.
37
+ # The Dataset work as a Hash, with keys are field names
38
+ # and values are Statsample::Vector
39
+ #
40
+ #
41
+ # ==Usage
42
+ # Create a empty dataset
43
+ # Dataset.new()
44
+ # Create a dataset with three empty vectors, called <tt>v1</tt>, <tt>v2</tt> and <tt>v3</tt>
45
+ # Dataset.new(%w{v1 v2 v3})
46
+ # Create a dataset with two vectors
47
+ # Dataset.new({'v1'=>%w{1 2 3}.to_vector, 'v2'=>%w{4 5 6}.to_vector})
48
+ # Create a dataset with two given vectors (v1 and v2), with vectors on inverted order
49
+ # Dataset.new({'v2'=>v2,'v1'=>v1},['v2','v1'])
50
+ #
51
+ # The fast way to create a dataset uses Hash#to_dataset, with
52
+ # field order as arguments
53
+ # v1 = [1,2,3].to_scale
54
+ # v2 = [1,2,3].to_scale
55
+ # ds = {'v1'=>v2, 'v2'=>v2}.to_dataset(%w{v2 v1})
56
+
31
57
  class Dataset
32
58
  include Writable
33
- attr_reader :vectors, :fields, :cases, :i
59
+ # Hash of Statsample::Vector
60
+ attr_reader :vectors
61
+ # Ordered names of vectors
62
+ attr_reader :fields
63
+ # Number of cases
64
+ attr_reader :cases
65
+ # Location of pointer on enumerations methods (like #each)
66
+ attr_reader :i
67
+ # Deprecated: Label of vectors
34
68
  attr_accessor :labels
35
69
 
36
70
  # Generates a new dataset, using three vectors
37
71
  # - Rows
38
72
  # - Columns
39
73
  # - Values
74
+ #
40
75
  # For example, you have these values
41
76
  #
42
77
  # x y v
@@ -88,16 +123,7 @@ module Statsample
88
123
  # order of variables. If empty, vectors keys on alfabethic order as
89
124
  # used as fields
90
125
  # [labels] Hash to set names for fields.
91
- #
92
- #
93
- # Dataset.new()
94
- # Dataset.new(%w{v1 v2 v3})
95
- # Dataset.new({'v1'=>%w{1 2 3}.to_vector, 'v2'=>%w{4 5 6}.to_vector})
96
- # Dataset.new({'v2'=>v2,'v1'=>v1},['v1','v2'])
97
- #
98
- # The fast way to create a dataset uses Hash#to_dataset, with
99
- # fields and labels as arguments
100
- # ds = {'v1'=>[1,2,3].to_vector}.to_dataset
126
+
101
127
  #
102
128
  def initialize(vectors={}, fields=[], labels={})
103
129
  if vectors.instance_of? Array
@@ -120,7 +146,8 @@ module Statsample
120
146
  end
121
147
  matrix
122
148
  end
123
- def label(v_id)
149
+ # Retrieves label for a vector, giving a field name.
150
+ def label(v_id)
124
151
  raise "Vector #{v} doesn't exists" unless @fields.include? v_id
125
152
  @labels[v_id].nil? ? v_id : @labels[v_id]
126
153
  end
@@ -233,12 +260,20 @@ module Statsample
233
260
  ds_boot.update_valid_data
234
261
  ds_boot
235
262
  end
236
- # Fast version of add case
263
+ # Fast version of #add_case.
237
264
  # Can only add one case and no error check if performed
238
- # You SHOULD use update_valid_data at the end of insertion cycle
265
+ # You SHOULD use #update_valid_data at the end of insertion cycle
239
266
  def add_case_array(v)
240
267
  v.each_index {|i| d=@vectors[@fields[i]].data; d.push(v[i])}
241
268
  end
269
+ # Insert a case, using:
270
+ # * Array: size equal to number of vectors and values in the same order as fields
271
+ # * Hash: keys equal to fields
272
+ # If uvd is false, #update_valid_data is not executed after
273
+ # inserting a case. This is very useful if you want to increase the
274
+ # performance on inserting many cases,
275
+ # because #update_valid_data performs check on vectors and on the dataset
276
+
242
277
  def add_case(v,uvd=true)
243
278
  case v
244
279
  when Array
@@ -258,14 +293,18 @@ module Statsample
258
293
  update_valid_data
259
294
  end
260
295
  end
296
+ # Check vectors and fields after inserting data. Use only
297
+ # after #add_case_array or #add_case with second parameter to false
261
298
  def update_valid_data
262
299
  @fields.each{|f| @vectors[f].set_valid_data}
263
300
  check_length
264
301
  end
302
+ # Delete a vector
265
303
  def delete_vector(name)
266
304
  @fields.delete(name)
267
305
  @vectors.delete(name)
268
306
  end
307
+
269
308
  def add_vectors_by_split_recode(name,join='-',sep=Statsample::SPLIT_TOKEN)
270
309
  split=@vectors[name].split_by_separator(sep)
271
310
  i=1
@@ -294,7 +333,7 @@ module Statsample
294
333
  def vector_sum(fields=nil)
295
334
  a=[]
296
335
  fields||=@fields
297
- collect_with_index do |i,row|
336
+ collect_with_index do |row, i|
298
337
  if(fields.find{|f| !@vectors[f].data_with_nils[i]})
299
338
  nil
300
339
  else
@@ -302,16 +341,17 @@ module Statsample
302
341
  end
303
342
  end
304
343
  end
344
+ # Check if #fields attribute is correct, after inserting or deleting vectors
305
345
  def check_fields(fields)
306
346
  fields||=@fields
307
347
  raise "Fields #{(fields-@fields).join(", ")} doesn't exists on dataset" if (fields-@fields).size>0
308
348
  fields
309
349
  end
350
+
310
351
  # Returns a vector with the numbers of missing values for a case
311
-
312
352
  def vector_missing_values(fields=nil)
313
353
  fields=check_fields(fields)
314
- collect_with_index do |i,row|
354
+ collect_with_index do |row, i|
315
355
  fields.inject(0) {|a,v|
316
356
  a+ ((@vectors[v].data_with_nils[i].nil?) ? 1: 0)
317
357
  }
@@ -319,9 +359,8 @@ module Statsample
319
359
  end
320
360
  def vector_count_characters(fields=nil)
321
361
  fields=check_fields(fields)
322
- collect_with_index do |i,row|
362
+ collect_with_index do |row, i|
323
363
  fields.inject(0){|a,v|
324
-
325
364
  a+((@vectors[v].data_with_nils[i].nil?) ? 0: row[v].to_s.size)
326
365
  }
327
366
  end
@@ -353,7 +392,8 @@ module Statsample
353
392
  end
354
393
  a.to_vector(:scale)
355
394
  end
356
- def check_length
395
+ # Check vectors for type and size.
396
+ def check_length # :nodoc:
357
397
  size=nil
358
398
  @vectors.each do |k,v|
359
399
  raise Exception, "Data #{v.class} is not a vector on key #{k}" if !v.is_a? Statsample::Vector
@@ -368,16 +408,19 @@ module Statsample
368
408
  end
369
409
  @cases=size
370
410
  end
371
- def each_vector
372
- @fields.each{|k| yield k,@vectors[k]}
411
+ # Retrieves each vector as [key, vector]
412
+ def each_vector # :yield: |key, vector|
413
+ @fields.each{|k| yield k, @vectors[k]}
373
414
  end
415
+
374
416
  if Statsample::STATSAMPLE__.respond_to?(:case_as_hash)
375
417
  def case_as_hash(c) # :nodoc:
376
418
  Statsample::STATSAMPLE__.case_as_hash(self,c)
377
419
  end
378
420
  else
379
- def case_as_hash(c)
380
- _case_as_hash(c)
421
+ # Retrieves case i as a hash
422
+ def case_as_hash(i)
423
+ _case_as_hash(i)
381
424
  end
382
425
  end
383
426
 
@@ -386,8 +429,9 @@ module Statsample
386
429
  Statsample::STATSAMPLE__.case_as_array(self,c)
387
430
  end
388
431
  else
389
- def case_as_array(c)
390
- _case_as_array(c)
432
+ # Retrieves case i as a array, ordered on #fields order
433
+ def case_as_array(i)
434
+ _case_as_array(i)
391
435
  end
392
436
  end
393
437
  def _case_as_hash(c) # :nodoc:
@@ -396,6 +440,7 @@ module Statsample
396
440
  def _case_as_array(c) # :nodoc:
397
441
  @fields.collect {|x| @vectors[x][c]}
398
442
  end
443
+
399
444
  # Returns each case as a hash
400
445
  def each
401
446
  begin
@@ -411,7 +456,7 @@ module Statsample
411
456
  end
412
457
  end
413
458
  # Returns each case as hash and index
414
- def each_with_index
459
+ def each_with_index # :yield: |case, i|
415
460
  begin
416
461
  @i=0
417
462
  @cases.times{|i|
@@ -447,6 +492,7 @@ module Statsample
447
492
  }
448
493
  @i=nil
449
494
  end
495
+ # Set fields order. If you omit one or more vectors,
450
496
  def fields=(f)
451
497
  @fields=f
452
498
  check_order
@@ -470,6 +516,8 @@ module Statsample
470
516
  raise ArgumentError, "You need a String or a Range"
471
517
  end
472
518
  end
519
+ # Retrieves a Statsample::Vector, based on the result
520
+ # of calculation performed on each case.
473
521
  def collect(type=:scale)
474
522
  data=[]
475
523
  each {|row|
@@ -477,10 +525,11 @@ module Statsample
477
525
  }
478
526
  Statsample::Vector.new(data,type)
479
527
  end
528
+ # Same as #collect, but giving case index as second parameter on yield.
480
529
  def collect_with_index(type=:scale)
481
530
  data=[]
482
531
  each_with_index {|row, i|
483
- data.push(yield(i,row))
532
+ data.push(yield(row, i))
484
533
  }
485
534
  Statsample::Vector.new(data,type)
486
535
  end
@@ -504,6 +553,8 @@ module Statsample
504
553
  raise ArgumentError,"Should pass a Statsample::Vector"
505
554
  end
506
555
  end
556
+ # Return data as a matrix. Column are ordered by #fields and
557
+ # rows by orden of insertion
507
558
  def to_matrix
508
559
  rows=[]
509
560
  self.each_array{|c|
@@ -511,7 +562,8 @@ module Statsample
511
562
  }
512
563
  Matrix.rows(rows)
513
564
  end
514
- if HAS_GSL
565
+
566
+ if Statsample.has_gsl?
515
567
  def to_matrix_gsl
516
568
  rows=[]
517
569
  self.each_array{|c|
@@ -520,15 +572,17 @@ module Statsample
520
572
  GSL::Matrix.alloc(*rows)
521
573
  end
522
574
  end
523
- def to_multiset_by_split(*fields)
575
+
576
+ def to_multiset_by_split(*fields)
524
577
  require 'statsample/multiset'
525
578
  if fields.size==1
526
579
  to_multiset_by_split_one_field(fields[0])
527
580
  else
528
581
  to_multiset_by_split_multiple_fields(*fields)
529
582
  end
530
- end
531
- # create a new dataset with all the data which the block returns true
583
+ end
584
+
585
+ # Create a new dataset with all cases which the block returns true
532
586
  def filter
533
587
  ds=self.dup_empty
534
588
  each {|c|
@@ -537,6 +591,7 @@ module Statsample
537
591
  ds.update_valid_data
538
592
  ds
539
593
  end
594
+
540
595
  # creates a new vector with the data of a given field which the block returns true
541
596
  def filter_field(field)
542
597
  a=[]
@@ -545,6 +600,7 @@ module Statsample
545
600
  }
546
601
  a.to_vector(@vectors[field].type)
547
602
  end
603
+
548
604
  def to_multiset_by_split_one_field(field)
549
605
  raise ArgumentError,"Should use a correct field name" if !@fields.include? field
550
606
  factors=@vectors[field].factors
@@ -604,7 +660,7 @@ module Statsample
604
660
  text.gsub!(f,"row['#{f}']")
605
661
  end
606
662
  }
607
- collect_with_index {|i,row|
663
+ collect_with_index {|row, i|
608
664
  invalid=false
609
665
  @fields.each{|f|
610
666
  if @vectors[f].data_with_nils[i].nil?
@@ -653,6 +709,7 @@ module Statsample
653
709
  end
654
710
  # Creates a new dataset for one to many relations
655
711
  # on a dataset, based on pattern of field names.
712
+ #
656
713
  # for example, you have a survey for number of children
657
714
  # with this structure:
658
715
  # id, name, child_name_1, child_age_1, child_name_2, child_age_2
@@ -1,8 +1,70 @@
1
1
  module Statsample
2
2
  class DominanceAnalysis
3
+ # == Goal
3
4
  # Generates Bootstrap sample to identity the replicability of a Dominance Analysis. See Azen & Bodescu (2003) for more information.
4
- # References:
5
- # * Azen, R. & Budescu, D.V. (2003). The dominance analysis approach for comparing predictors in multiple regression. _Psychological Methods, 8_(2), 129-148.
5
+ #
6
+ # == Usage
7
+ #
8
+ # require 'statsample'
9
+ # a=100.times.collect {rand}.to_scale
10
+ # b=100.times.collect {rand}.to_scale
11
+ # c=100.times.collect {rand}.to_scale
12
+ # d=100.times.collect {rand}.to_scale
13
+ # ds={'a'=>a,'b'=>b,'c'=>c,'d'=>d}.to_dataset
14
+ # ds['y']=ds.collect{|row| row['a']*5+row['b']*2+row['c']*2+row['d']*2+10*rand()}
15
+ # dab=Statsample::DominanceAnalysis::Bootstrap.new(ds2, 'y', :debug=>true)
16
+ # dab.bootstrap(100,nil)
17
+ # puts dab.summary
18
+ # <strong>Output</strong>
19
+ # Sample size: 100
20
+ # t: 1.98421693632958
21
+ #
22
+ # Linear Regression Engine: Statsample::Regression::Multiple::MatrixEngine
23
+ # Table: Bootstrap report
24
+ # --------------------------------------------------------------------------------------------
25
+ # | pairs | sD | Dij | SE(Dij) | Pij | Pji | Pno | Reproducibility |
26
+ # --------------------------------------------------------------------------------------------
27
+ # | Complete dominance |
28
+ # --------------------------------------------------------------------------------------------
29
+ # | a - b | 1.0 | 0.6150 | 0.454 | 0.550 | 0.320 | 0.130 | 0.550 |
30
+ # | a - c | 1.0 | 0.9550 | 0.175 | 0.930 | 0.020 | 0.050 | 0.930 |
31
+ # | a - d | 1.0 | 0.9750 | 0.131 | 0.960 | 0.010 | 0.030 | 0.960 |
32
+ # | b - c | 1.0 | 0.8800 | 0.276 | 0.820 | 0.060 | 0.120 | 0.820 |
33
+ # | b - d | 1.0 | 0.9250 | 0.193 | 0.860 | 0.010 | 0.130 | 0.860 |
34
+ # | c - d | 0.5 | 0.5950 | 0.346 | 0.350 | 0.160 | 0.490 | 0.490 |
35
+ # --------------------------------------------------------------------------------------------
36
+ # | Conditional dominance |
37
+ # --------------------------------------------------------------------------------------------
38
+ # | a - b | 1.0 | 0.6300 | 0.458 | 0.580 | 0.320 | 0.100 | 0.580 |
39
+ # | a - c | 1.0 | 0.9700 | 0.156 | 0.960 | 0.020 | 0.020 | 0.960 |
40
+ # | a - d | 1.0 | 0.9800 | 0.121 | 0.970 | 0.010 | 0.020 | 0.970 |
41
+ # | b - c | 1.0 | 0.8850 | 0.283 | 0.840 | 0.070 | 0.090 | 0.840 |
42
+ # | b - d | 1.0 | 0.9500 | 0.181 | 0.920 | 0.020 | 0.060 | 0.920 |
43
+ # | c - d | 0.5 | 0.5800 | 0.360 | 0.350 | 0.190 | 0.460 | 0.460 |
44
+ # --------------------------------------------------------------------------------------------
45
+ # | General Dominance |
46
+ # --------------------------------------------------------------------------------------------
47
+ # | a - b | 1.0 | 0.6500 | 0.479 | 0.650 | 0.350 | 0.000 | 0.650 |
48
+ # | a - c | 1.0 | 0.9800 | 0.141 | 0.980 | 0.020 | 0.000 | 0.980 |
49
+ # | a - d | 1.0 | 0.9900 | 0.100 | 0.990 | 0.010 | 0.000 | 0.990 |
50
+ # | b - c | 1.0 | 0.9000 | 0.302 | 0.900 | 0.100 | 0.000 | 0.900 |
51
+ # | b - d | 1.0 | 0.9700 | 0.171 | 0.970 | 0.030 | 0.000 | 0.970 |
52
+ # | c - d | 1.0 | 0.5600 | 0.499 | 0.560 | 0.440 | 0.000 | 0.560 |
53
+ # --------------------------------------------------------------------------------------------
54
+ #
55
+ # Table: General averages
56
+ # ---------------------------------------
57
+ # | var | mean | se | p.5 | p.95 |
58
+ # ---------------------------------------
59
+ # | a | 0.133 | 0.049 | 0.062 | 0.218 |
60
+ # | b | 0.106 | 0.048 | 0.029 | 0.199 |
61
+ # | c | 0.035 | 0.032 | 0.002 | 0.106 |
62
+ # | d | 0.023 | 0.019 | 0.002 | 0.062 |
63
+ # ---------------------------------------
64
+ #
65
+ # == References:
66
+ #
67
+ # * Azen, R. & Budescu, D.V. (2003). The dominance analysis approach for comparing predictors in multiple regression. <em>Psychological Methods, 8</em>(2), 129-148.
6
68
  class Bootstrap
7
69
  include GetText
8
70
  include Writable
@@ -27,12 +89,13 @@ module Statsample
27
89
  attr_accessor :alpha
28
90
  # Debug?
29
91
  attr_accessor :debug
92
+ # Default level of confidence for t calculation
93
+ ALPHA=0.95
30
94
  # Create a new Dominance Analysis Bootstrap Object
31
95
  #
32
96
  # * ds: A Dataset object
33
97
  # * y_var: Name of dependent variable
34
98
  # * opts: Any other attribute of the class
35
- ALPHA=0.95
36
99
  def initialize(ds,y_var, opts=Hash.new)
37
100
  @ds=ds
38
101
  @y_var=y_var
@@ -1,13 +1,12 @@
1
1
  require 'statsample/dominanceanalysis/bootstrap'
2
2
  module Statsample
3
- # Dominance Analysis is a procedure based on an examination of the R^2 values
3
+ # Dominance Analysis is a procedure based on an examination of the R<sup>2</sup> values
4
4
  # for all possible subset models, to identify the relevance of one or more
5
5
  # predictors in the prediction of criterium.
6
6
  #
7
- #
8
7
  # See Budescu(1993), Azen & Budescu (2003, 2006) for more information.
9
8
  #
10
- # Example:
9
+ # == Use
11
10
  #
12
11
  # a=1000.times.collect {rand}.to_scale
13
12
  # b=1000.times.collect {rand}.to_scale
@@ -17,7 +16,7 @@ module Statsample
17
16
  # da=Statsample::DominanceAnalysis.new(ds,'y')
18
17
  # puts da.summary
19
18
  #
20
- # Output:
19
+ # === Output:
21
20
  #
22
21
  # Report: Report 2010-02-08 19:10:11 -0300
23
22
  # Table: Dominance Analysis result
@@ -51,12 +50,12 @@ module Statsample
51
50
  # | a - c | 1.0 | 1.0 | 1.0 |
52
51
  # | b - c | 1.0 | 1.0 | 1.0 |
53
52
  # -----------------------------------------
54
-
55
53
  #
56
54
  # == References:
57
55
  # * Budescu, D. V. (1993). Dominance analysis: a new approach to the problem of relative importance of predictors in multiple regression. <em>Psychological Bulletin, 114</em>, 542-551.
58
56
  # * Azen, R. & Budescu, D.V. (2003). The dominance analysis approach for comparing predictors in multiple regression. <em>Psychological Methods, 8</em>(2), 129-148.
59
57
  # * Azen, R. & Budescu, D.V. (2006). Comparing predictors in Multivariate Regression Models: An extension of Dominance Analysis. <em>Journal of Educational and Behavioral Statistics, 31</em>(2), 157-180.
58
+ #
60
59
  class DominanceAnalysis
61
60
  include GetText
62
61
  bindtextdomain("statsample")
@@ -366,7 +365,7 @@ module Statsample
366
365
  generator.parse_element(t)
367
366
  generator.add_html("</div>")
368
367
  end
369
- class ModelData
368
+ class ModelData # :nodoc:
370
369
  attr_reader :contributions
371
370
  def initialize(independent, data, da)
372
371
  @independent=independent
@@ -1,21 +1,42 @@
1
1
  module Statsample
2
2
  module Factor
3
- # Principal Component Analysis of a given covariance or correlation matrix.
4
- # For factorial Analysis, use Statsample::Factor::PrincipalAxis
5
- # Reference: SPSS manual
6
- # Use:
3
+ # Principal Component Analysis (PCA) of a
4
+ # covariance or correlation matrix.
5
+ #
6
+ # For Principal Axis Analysis, use Statsample::Factor::PrincipalAxis
7
+ #
8
+ # == Usage:
9
+ # require 'statsample'
7
10
  # a=[2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1].to_scale
8
11
  # b=[2.4,0.7,2.9,2.2,3.0,2.7,1.6,1.1,1.6,0.9].to_scale
9
12
  # ds={'a'=>a,'b'=>b}.to_dataset
10
13
  # cor_matrix=Statsample::Bivariate.correlation_matrix(ds)
11
14
  # pca=Statsample::Factor::PCA.new(cor_matrix)
12
- # p pca.component_matrix
15
+ # pca.m
16
+ # => 1
17
+ # pca.eigenvalues
18
+ # => [1.92592927269225, 0.0740707273077545]
19
+ # pca.component_matrix
20
+ # => GSL::Matrix
21
+ # [ 9.813e-01
22
+ # 9.813e-01 ]
23
+ # pca.communalities
24
+ # => [0.962964636346122, 0.962964636346122]
25
+ #
26
+ # == References:
27
+ #
28
+ # * SPSS manual
29
+ # * Smith, L. (2002). A tutorial on Principal Component Analysis. Available on http://courses.eas.ualberta.ca/eas570/pca_tutorial.pdf
30
+ #
13
31
  class PCA
14
- attr_accessor :name, :m
32
+ # Name of analysis
33
+ attr_accessor :name
34
+ # Number of factors. Set by default to the number of factors
35
+ # with eigen values > 1
36
+ attr_accessor :m
15
37
  include GetText
16
38
  bindtextdomain("statsample")
17
39
 
18
-
19
40
  def initialize(matrix ,opts=Hash.new)
20
41
  if matrix.respond_to? :to_gsl
21
42
  matrix=matrix.to_gsl
@@ -42,6 +63,7 @@ module Factor
42
63
  }
43
64
  @ds=h.to_dataset
44
65
  end
66
+
45
67
  # Feature vector for m factors
46
68
  def feature_vector(m=nil)
47
69
  m||=@m
@@ -69,10 +91,10 @@ module Factor
69
91
  gammas.push(Math::sqrt(@eigenpairs[i][0]))
70
92
  }
71
93
  gamma_m=GSL::Matrix.diagonal(gammas)
72
- omega_m*(gamma_m)
94
+ (omega_m*(gamma_m)).to_matrix
73
95
  end
74
- # Communality for all variables given m factors
75
- def communality(m=nil)
96
+ # Communalities for all variables given m factors
97
+ def communalities(m=nil)
76
98
  m||=@m
77
99
  h=[]
78
100
  @n_variables.times do |i|
@@ -84,9 +106,11 @@ module Factor
84
106
  end
85
107
  h
86
108
  end
109
+ # Array with eigenvalues
87
110
  def eigenvalues
88
111
  @eigenpairs.collect {|c| c[0] }
89
112
  end
113
+
90
114
  def calculate_eigenpairs
91
115
  eigval, eigvec= GSL::Eigen.symmv(@matrix)
92
116
  @eigenpairs={}
@@ -95,13 +119,18 @@ module Factor
95
119
  }
96
120
  @eigenpairs=@eigenpairs.sort.reverse
97
121
  end
122
+ def summary
123
+ rp=ReportBuilder.new()
124
+ rp.add(self)
125
+ rp.to_text
126
+ end
98
127
  def to_reportbuilder(generator) # :nodoc:
99
128
  anchor=generator.add_toc_entry(_("PCA: ")+name)
100
129
  generator.add_html "<div class='pca'>"+_("PCA")+" #{@name}<a name='#{anchor}'></a>"
101
130
 
102
131
  generator.add_text "Number of factors: #{m}"
103
132
  t=ReportBuilder::Table.new(:name=>_("Communalities"), :header=>["Variable","Initial","Extraction"])
104
- communality(m).each_with_index {|com,i|
133
+ communalities(m).each_with_index {|com,i|
105
134
  t.add_row([i, 1.0, sprintf("%0.3f", com)])
106
135
  }
107
136
  generator.parse_element(t)
@@ -122,6 +151,7 @@ module Factor
122
151
  generator.parse_element(t)
123
152
  generator.add_html("</div>")
124
153
  end
154
+ private :calculate_eigenpairs, :create_centered_ds
125
155
  end
126
156
  end
127
157
  end