statsample 0.18.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. data.tar.gz.sig +0 -0
  2. data/History.txt +23 -0
  3. data/Manifest.txt +28 -17
  4. data/Rakefile +3 -2
  5. data/benchmarks/correlation_matrix_15_variables.rb +31 -0
  6. data/benchmarks/correlation_matrix_5_variables.rb +32 -0
  7. data/benchmarks/correlation_matrix_methods/correlation_matrix.ds +0 -0
  8. data/benchmarks/correlation_matrix_methods/correlation_matrix.html +93 -0
  9. data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +75 -0
  10. data/benchmarks/correlation_matrix_methods/correlation_matrix.xls +0 -0
  11. data/benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods +0 -0
  12. data/benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods +0 -0
  13. data/benchmarks/correlation_matrix_methods/results.ds +0 -0
  14. data/benchmarks/factor_map.rb +37 -0
  15. data/benchmarks/helpers_benchmark.rb +5 -0
  16. data/examples/boxplot.rb +13 -14
  17. data/examples/correlation_matrix.rb +16 -8
  18. data/examples/dataset.rb +13 -4
  19. data/examples/dominance_analysis.rb +23 -17
  20. data/examples/dominance_analysis_bootstrap.rb +28 -22
  21. data/examples/histogram.rb +8 -9
  22. data/examples/icc.rb +20 -21
  23. data/examples/levene.rb +10 -4
  24. data/examples/multiple_regression.rb +9 -28
  25. data/examples/multivariate_correlation.rb +9 -3
  26. data/examples/parallel_analysis.rb +20 -16
  27. data/examples/polychoric.rb +15 -9
  28. data/examples/principal_axis.rb +18 -6
  29. data/examples/reliability.rb +26 -13
  30. data/examples/scatterplot.rb +10 -6
  31. data/examples/t_test.rb +15 -6
  32. data/examples/tetrachoric.rb +9 -2
  33. data/examples/u_test.rb +12 -4
  34. data/examples/vector.rb +13 -2
  35. data/examples/velicer_map_test.rb +33 -26
  36. data/lib/statsample.rb +32 -12
  37. data/lib/statsample/analysis.rb +79 -0
  38. data/lib/statsample/analysis/suite.rb +72 -0
  39. data/lib/statsample/analysis/suitereportbuilder.rb +38 -0
  40. data/lib/statsample/bivariate.rb +70 -16
  41. data/lib/statsample/dataset.rb +25 -19
  42. data/lib/statsample/dominanceanalysis.rb +2 -2
  43. data/lib/statsample/factor.rb +2 -0
  44. data/lib/statsample/factor/map.rb +16 -10
  45. data/lib/statsample/factor/parallelanalysis.rb +9 -3
  46. data/lib/statsample/factor/pca.rb +28 -32
  47. data/lib/statsample/factor/rotation.rb +15 -8
  48. data/lib/statsample/graph/boxplot.rb +3 -4
  49. data/lib/statsample/graph/histogram.rb +2 -1
  50. data/lib/statsample/graph/scatterplot.rb +1 -0
  51. data/lib/statsample/matrix.rb +106 -16
  52. data/lib/statsample/regression.rb +4 -1
  53. data/lib/statsample/regression/binomial.rb +1 -1
  54. data/lib/statsample/regression/multiple/baseengine.rb +19 -9
  55. data/lib/statsample/regression/multiple/gslengine.rb +127 -126
  56. data/lib/statsample/regression/multiple/matrixengine.rb +8 -5
  57. data/lib/statsample/regression/multiple/rubyengine.rb +1 -1
  58. data/lib/statsample/regression/simple.rb +31 -6
  59. data/lib/statsample/reliability.rb +11 -3
  60. data/lib/statsample/reliability/scaleanalysis.rb +4 -4
  61. data/lib/statsample/shorthand.rb +81 -0
  62. data/lib/statsample/test/chisquare.rb +1 -1
  63. data/lib/statsample/vector.rb +163 -163
  64. data/lib/statsample/vector/gsl.rb +106 -0
  65. data/references.txt +2 -2
  66. data/{data → test/fixtures}/crime.txt +0 -0
  67. data/{data → test/fixtures}/hartman_23.matrix +0 -0
  68. data/{data → test/fixtures}/repeated_fields.csv +0 -0
  69. data/{data → test/fixtures}/test_binomial.csv +0 -0
  70. data/test/{test_csv.csv → fixtures/test_csv.csv} +0 -0
  71. data/test/{test_xls.xls → fixtures/test_xls.xls} +0 -0
  72. data/{data → test/fixtures}/tetmat_matrix.txt +0 -0
  73. data/{data → test/fixtures}/tetmat_test.txt +0 -0
  74. data/test/helpers_tests.rb +18 -2
  75. data/test/test_analysis.rb +118 -0
  76. data/test/test_anovatwoway.rb +1 -1
  77. data/test/test_anovatwowaywithdataset.rb +1 -1
  78. data/test/test_anovawithvectors.rb +1 -2
  79. data/test/test_bartlettsphericity.rb +1 -2
  80. data/test/test_bivariate.rb +64 -22
  81. data/test/test_codification.rb +1 -2
  82. data/test/test_crosstab.rb +1 -2
  83. data/test/test_csv.rb +3 -4
  84. data/test/test_dataset.rb +24 -3
  85. data/test/test_dominance_analysis.rb +1 -2
  86. data/test/test_factor.rb +8 -69
  87. data/test/test_factor_map.rb +43 -0
  88. data/test/test_factor_pa.rb +54 -0
  89. data/test/test_ggobi.rb +1 -1
  90. data/test/test_gsl.rb +12 -18
  91. data/test/test_histogram.rb +1 -2
  92. data/test/test_logit.rb +62 -18
  93. data/test/test_matrix.rb +4 -5
  94. data/test/test_mle.rb +3 -4
  95. data/test/test_regression.rb +21 -2
  96. data/test/test_reliability.rb +3 -3
  97. data/test/test_reliability_icc.rb +1 -1
  98. data/test/test_reliability_skillscale.rb +20 -4
  99. data/test/test_resample.rb +1 -2
  100. data/test/test_rserve_extension.rb +1 -2
  101. data/test/test_srs.rb +1 -2
  102. data/test/test_statistics.rb +1 -2
  103. data/test/test_stest.rb +1 -2
  104. data/test/test_stratified.rb +1 -2
  105. data/test/test_test_f.rb +1 -2
  106. data/test/test_test_t.rb +1 -2
  107. data/test/test_umannwhitney.rb +1 -2
  108. data/test/test_vector.rb +117 -18
  109. data/test/test_xls.rb +2 -3
  110. data/web/Rakefile +39 -0
  111. metadata +109 -29
  112. metadata.gz.sig +0 -0
  113. data/examples/parallel_analysis_tetrachoric.rb +0 -31
  114. data/lib/distribution.rb +0 -25
  115. data/lib/distribution/chisquare.rb +0 -23
  116. data/lib/distribution/f.rb +0 -35
  117. data/lib/distribution/normal.rb +0 -60
  118. data/lib/distribution/normalbivariate.rb +0 -284
  119. data/lib/distribution/normalmultivariate.rb +0 -73
  120. data/lib/distribution/t.rb +0 -55
  121. data/test/test_distribution.rb +0 -73
@@ -1,7 +1,4 @@
1
1
  require 'statsample/bivariate/pearson'
2
-
3
-
4
-
5
2
  module Statsample
6
3
  # Diverse methods and classes to calculate bivariate relations
7
4
  # Specific classes:
@@ -11,7 +8,6 @@ module Statsample
11
8
  module Bivariate
12
9
  autoload(:Polychoric, 'statsample/bivariate/polychoric')
13
10
  autoload(:Tetrachoric, 'statsample/bivariate/tetrachoric')
14
-
15
11
  class << self
16
12
  # Covariance between two vectors
17
13
  def covariance(v1,v2)
@@ -27,8 +23,8 @@ module Statsample
27
23
  def maximum_likehood_dichotomic(pred,real)
28
24
  preda,reala=Statsample.only_valid_clone(pred,real)
29
25
  sum=0
30
- pred.each_index{|i|
31
- sum+=(real[i]*Math::log(pred[i])) + ((1-real[i])*Math::log(1-pred[i]))
26
+ preda.each_index{|i|
27
+ sum+=(reala[i]*Math::log(preda[i])) + ((1-reala[i])*Math::log(1-preda[i]))
32
28
  }
33
29
  sum
34
30
  end
@@ -101,6 +97,20 @@ module Statsample
101
97
  cdf*n_tails
102
98
  end
103
99
  end
100
+
101
+
102
+ # Predicted time for pairwise correlation matrix, in miliseconds
103
+ # See benchmarks/correlation_matrix.rb to see mode of calculation
104
+
105
+ def prediction_pairwise(vars,cases)
106
+ ((-0.518111-0.000746*cases+1.235608*vars+0.000740*cases*vars)**2) / 100
107
+ end
108
+ # Predicted time for optimized correlation matrix, in miliseconds
109
+ # See benchmarks/correlation_matrix.rb to see mode of calculation
110
+
111
+ def prediction_optimized(vars,cases)
112
+ ((4+0.018128*cases+0.246871*vars+0.001169*vars*cases)**2) / 100
113
+ end
104
114
  # Returns residual score after delete variance
105
115
  # from another variable
106
116
  #
@@ -128,10 +138,35 @@ module Statsample
128
138
 
129
139
  end
130
140
 
141
+ def covariance_matrix_optimized(ds)
142
+ x=ds.to_gsl
143
+ n=x.row_size
144
+ m=x.column_size
145
+ means=((1/n.to_f)*GSL::Matrix.ones(1,n)*x).row(0)
146
+ centered=x-(GSL::Matrix.ones(n,m)*GSL::Matrix.diag(means))
147
+ ss=centered.transpose*centered
148
+ s=((1/(n-1).to_f))*ss
149
+ s
150
+ end
151
+
131
152
  # Covariance matrix.
132
153
  # Order of rows and columns depends on Dataset#fields order
133
154
 
134
155
  def covariance_matrix(ds)
156
+ vars,cases=ds.fields.size,ds.cases
157
+ if !ds.has_missing_data? and Statsample.has_gsl? and prediction_optimized(vars,cases) < prediction_pairwise(vars,cases)
158
+ cm=covariance_matrix_optimized(ds)
159
+ else
160
+ cm=covariance_matrix_pairwise(ds)
161
+
162
+ end
163
+ cm.extend(Statsample::CovariateMatrix)
164
+ cm.fields=ds.fields
165
+ cm
166
+ end
167
+
168
+
169
+ def covariance_matrix_pairwise(ds)
135
170
  cache={}
136
171
  matrix=ds.collect_matrix do |row,col|
137
172
  if (ds[row].type!=:scale or ds[col].type!=:scale)
@@ -148,15 +183,34 @@ module Statsample
148
183
  end
149
184
  end
150
185
  end
151
- matrix.extend CovariateMatrix
152
- matrix.fields=ds.fields
153
186
  matrix
154
187
  end
155
188
 
156
189
  # Correlation matrix.
157
190
  # Order of rows and columns depends on Dataset#fields order
158
-
159
191
  def correlation_matrix(ds)
192
+ vars,cases=ds.fields.size,ds.cases
193
+ if !ds.has_missing_data? and Statsample.has_gsl? and prediction_optimized(vars,cases) < prediction_pairwise(vars,cases)
194
+ cm=correlation_matrix_optimized(ds)
195
+ else
196
+ cm=correlation_matrix_pairwise(ds)
197
+ end
198
+ cm.extend(Statsample::CovariateMatrix)
199
+ cm.fields=ds.fields
200
+ cm
201
+ end
202
+
203
+ def correlation_matrix_optimized(ds)
204
+ s=covariance_matrix_optimized(ds)
205
+ sds=GSL::Matrix.diagonal(s.diagonal.sqrt.pow(-1))
206
+ cm=sds*s*sds
207
+ # Fix diagonal
208
+ s.row_size.times {|i|
209
+ cm[i,i]=1.0
210
+ }
211
+ cm
212
+ end
213
+ def correlation_matrix_pairwise(ds)
160
214
  cache={}
161
215
  cm=ds.collect_matrix do |row,col|
162
216
  if row==col
@@ -173,9 +227,6 @@ module Statsample
173
227
  end
174
228
  end
175
229
  end
176
- cm.extend(Statsample::CovariateMatrix)
177
- cm.fields=ds.fields
178
- cm
179
230
  end
180
231
 
181
232
  # Retrieves the n valid pairwise.
@@ -220,7 +271,7 @@ module Statsample
220
271
  m1=ds.filter_field('c') {|c| c['d']!=f0}
221
272
  ((m1.mean-m0.mean).to_f / ds['c'].sdp) * Math::sqrt(m0.size*m1.size.to_f / ds.cases**2)
222
273
  end
223
- # Kendall Rank Correlation Coefficient.
274
+ # Kendall Rank Correlation Coefficient (Tau a)
224
275
  # Based on Hervé Adbi article
225
276
  def tau_a(v1,v2)
226
277
  v1a,v2a=Statsample.only_valid_clone(v1,v2)
@@ -231,12 +282,15 @@ module Statsample
231
282
  delta= o1.size*2-(o2 & o1).size*2
232
283
  1-(delta * 2 / (n*(n-1)).to_f)
233
284
  end
234
- # Calculates Tau b correlation.
235
- #
285
+ # Calculates Goodman and Kruskal’s Tau b correlation.
286
+ # Tb is an asymmetric P-R-E measure of association for nominal scales
287
+ # (Mielke, X)
288
+ #
236
289
  # Tau-b defines perfect association as strict monotonicity. Although it
237
290
  # requires strict monotonicity to reach 1.0, it does not penalize ties as
238
291
  # much as some other measures.
239
- #
292
+ # == Reference
293
+ # Mielke, P. GOODMAN–KRUSKAL TAU AND GAMMA.
240
294
  # Source: http://faculty.chass.ncsu.edu/garson/PA765/assocordinal.htm
241
295
  def tau_b(matrix)
242
296
  v=pairs(matrix)
@@ -115,6 +115,10 @@ module Statsample
115
115
  ds.update_valid_data
116
116
  ds
117
117
  end
118
+ # Return true if any vector has missing data
119
+ def has_missing_data?
120
+ @vectors.any? {|k,v| v.has_missing_data?}
121
+ end
118
122
  # Creates a new dataset. A dataset is a set of ordered named vectors
119
123
  # of the same size.
120
124
  #
@@ -128,6 +132,10 @@ module Statsample
128
132
  @@n_dataset||=0
129
133
  @@n_dataset+=1
130
134
  @name=_("Dataset %d") % @@n_dataset
135
+ @cases=0
136
+ @gsl=nil
137
+ @i=nil
138
+
131
139
  if vectors.instance_of? Array
132
140
  @fields=vectors.dup
133
141
  @vectors=vectors.inject({}){|a,x| a[x]=Statsample::Vector.new(); a}
@@ -138,17 +146,6 @@ module Statsample
138
146
  check_order
139
147
  check_length
140
148
  end
141
- @i=nil
142
- end
143
- #
144
- # Returns a GSL::matrix
145
- #
146
- def to_gsl_matrix
147
- matrix=GSL::Matrix.alloc(cases,@vectors.size)
148
- each_array do |row|
149
- row.each_index{|y| matrix.set(@i,y,row[y]) }
150
- end
151
- matrix
152
149
  end
153
150
  #
154
151
  # Creates a copy of the given dataset, deleting all the cases with
@@ -375,6 +372,7 @@ module Statsample
375
372
  # Check vectors and fields after inserting data. Use only
376
373
  # after #add_case_array or #add_case with second parameter to false
377
374
  def update_valid_data
375
+ @gsl=nil
378
376
  @fields.each{|f| @vectors[f].set_valid_data}
379
377
  check_length
380
378
  end
@@ -491,7 +489,6 @@ module Statsample
491
489
  size=v.size
492
490
  else
493
491
  if v.size!=size
494
- p v.to_a.size
495
492
  raise Exception, "Vector #{k} have size #{v.size} and dataset have size #{size}"
496
493
  end
497
494
  end
@@ -629,7 +626,6 @@ module Statsample
629
626
  end
630
627
  # Recode a vector based on a block
631
628
  def recode!(vector_name)
632
-
633
629
  0.upto(@cases-1) {|i|
634
630
  @vectors[vector_name].data[i]=yield case_as_hash(i)
635
631
  }
@@ -658,13 +654,23 @@ module Statsample
658
654
  end
659
655
 
660
656
  if Statsample.has_gsl?
661
- def to_matrix_gsl
662
- rows=[]
663
- self.each_array{|c|
664
- rows.push(c)
665
- }
666
- GSL::Matrix.alloc(*rows)
657
+ def clear_gsl
658
+ @gsl=nil
667
659
  end
660
+
661
+ def to_gsl
662
+ if @gsl.nil?
663
+ if cases.nil?
664
+ update_valid_data
665
+ end
666
+ @gsl=GSL::Matrix.alloc(cases,fields.size)
667
+ self.each_array{|c|
668
+ @gsl.set_row(@i,c)
669
+ }
670
+ end
671
+ @gsl
672
+ end
673
+
668
674
  end
669
675
 
670
676
  # Return a correlation matrix for fields included as parameters.
@@ -107,8 +107,8 @@ module Statsample
107
107
  else
108
108
  @regression_class= UNIVARIATE_REGRESSION_CLASS
109
109
  @method_association=:r2
110
-
111
110
  end
111
+
112
112
  @name=nil
113
113
  opts.each{|k,v|
114
114
  self.send("#{k}=",v) if self.respond_to? k
@@ -117,7 +117,7 @@ module Statsample
117
117
  @dependent=[@dependent] unless @dependent.is_a? Array
118
118
 
119
119
  @predictors ||= input.fields-@dependent
120
-
120
+
121
121
  @name=_("Dominance Analysis: %s over %s") % [ @predictors.flatten.join(",") , @dependent.join(",")] if @name.nil?
122
122
 
123
123
  if input.is_a? Statsample::Dataset
@@ -41,8 +41,10 @@ module Statsample
41
41
  aicm
42
42
  end
43
43
  def self.anti_image_correlation_matrix(matrix)
44
+ matrix=matrix.to_matrix
44
45
  s=Matrix.diag(*(matrix.inverse.diagonal)).sqrt.inverse
45
46
  aicm=s*matrix.inverse*s
47
+
46
48
  aicm.extend(Statsample::CovariateMatrix)
47
49
  aicm.fields=matrix.fields if matrix.respond_to? :fields
48
50
  aicm
@@ -48,32 +48,37 @@ module Statsample
48
48
  attr_reader :fm
49
49
  # Smallest average squared correlation
50
50
  attr_reader :minfm
51
+
52
+ attr_accessor :use_gsl
51
53
  def self.with_dataset(ds,opts=Hash.new)
52
54
  new(ds.correlation_matrix,opts)
53
55
  end
54
56
  def initialize(matrix, opts=Hash.new)
55
57
  @matrix=matrix
56
58
  opts_default={
59
+ :use_gsl=>true,
57
60
  :name=>_("Velicer's MAP")
58
61
  }
59
62
  @opts=opts_default.merge(opts)
60
63
  opts_default.keys.each {|k| send("#{k}=", @opts[k]) }
61
64
  end
62
65
  def compute
63
- eigen=@matrix.eigen
64
- eigvect,@eigenvalues=eigen[:eigenvectors], eigen[:eigenvalues]
65
- loadings=eigvect*(Matrix.diag(*@eigenvalues).sqrt)
66
+ gsl_m=(use_gsl and Statsample.has_gsl?) ? @matrix.to_gsl : @matrix
67
+ klass_m=gsl_m.class
68
+ eigvect,@eigenvalues=gsl_m.eigenvectors_matrix, gsl_m.eigenvalues
69
+ eigenvalues_sqrt=@eigenvalues.collect {|v| Math.sqrt(v)}
70
+ loadings=eigvect*(klass_m.diagonal(*eigenvalues_sqrt))
66
71
  fm=Array.new(@matrix.row_size)
67
72
  ncol=@matrix.column_size
68
- fm[0]=(@matrix.mssq - ncol).quo(ncol*(ncol-1))
73
+
74
+ fm[0]=(gsl_m.mssq - ncol).quo(ncol*(ncol-1))
75
+
69
76
  (ncol-1).times do |m|
70
77
  puts "MAP:Eigenvalue #{m+1}" if $DEBUG
71
78
  a=loadings[0..(loadings.row_size-1),0..m]
72
- partcov= @matrix - (a*a.t)
73
- pc_prediag=partcov.row_size.times.map{|i|
74
- 1.quo(Math::sqrt(partcov[i,i]))
75
- }
76
- d=Matrix.diag(*pc_prediag)
79
+ partcov= gsl_m - (a*a.transpose)
80
+
81
+ d=klass_m.diagonal(*(partcov.diagonal.collect {|v| Math::sqrt(1/v)}))
77
82
  pr=d*partcov*d
78
83
  fm[m+1]=(pr.mssq-ncol).quo(ncol*(ncol-1))
79
84
  end
@@ -81,7 +86,7 @@ module Statsample
81
86
  nfactors=0
82
87
  @errors=[]
83
88
  fm.each_with_index do |v,s|
84
- if v.is_a? Complex
89
+ if defined?(Complex) and v.is_a? ::Complex
85
90
  @errors.push(s)
86
91
  else
87
92
  if v < minfm
@@ -93,6 +98,7 @@ module Statsample
93
98
  @number_of_factors=nfactors
94
99
  @fm=fm
95
100
  @minfm=minfm
101
+
96
102
  end
97
103
  def report_building(g) #:nodoc:
98
104
  g.section(:name=>@name) do |s|
@@ -58,7 +58,7 @@ module Statsample
58
58
  attr_accessor :no_data
59
59
  # Show extra information if true
60
60
  attr_accessor :debug
61
-
61
+ attr_accessor :use_gsl
62
62
  def initialize(ds, opts=Hash.new)
63
63
  @ds=ds
64
64
  @fields=@ds.fields
@@ -74,6 +74,7 @@ module Statsample
74
74
  :no_data=>false,
75
75
  :matrix_method=>:correlation_matrix
76
76
  }
77
+ @use_gsl=Statsample.has_gsl?
77
78
  @opts=opts_default.merge(opts)
78
79
  @opts[:matrix_method]==:correlation_matrix if @opts[:bootstrap_method]==:parameters
79
80
  opts_default.keys.each {|k| send("#{k}=", @opts[k]) }
@@ -120,11 +121,12 @@ module Statsample
120
121
  # Perform calculation. Shouldn't be called directly for the user
121
122
  def compute
122
123
 
124
+
123
125
  @original=Statsample::Bivariate.send(matrix_method, @ds).eigenvalues unless no_data
124
126
  @ds_eigenvalues=Statsample::Dataset.new((1..@n_variables).map{|v| "ev_%05d" % v})
125
127
  @ds_eigenvalues.fields.each {|f| @ds_eigenvalues[f].type=:scale}
126
128
  if bootstrap_method==:parameter or bootstrap_method==:random
127
- rng = Distribution::Normal.rng_ugaussian
129
+ rng = Distribution::Normal.rng
128
130
  end
129
131
 
130
132
  @iterations.times do |i|
@@ -132,16 +134,20 @@ module Statsample
132
134
  puts "#{@name}: Iteration #{i}" if $DEBUG or debug
133
135
  # Create a dataset of dummy values
134
136
  ds_bootstrap=Statsample::Dataset.new(@ds.fields)
137
+
135
138
  @fields.each do |f|
136
139
  if bootstrap_method==:random
137
140
  ds_bootstrap[f]=@n_cases.times.map {|c| rng.call}.to_scale
138
141
  elsif bootstrap_method==:data
139
- ds_bootstrap[f]=ds[f].sample_with_replacement(@n_cases).to_scale
142
+ ds_bootstrap[f]=ds[f].sample_with_replacement(@n_cases)
140
143
  else
141
144
  raise "bootstrap_method doesn't recogniced"
142
145
  end
143
146
  end
147
+ ds_bootstrap.update_valid_data
148
+
144
149
  matrix=Statsample::Bivariate.send(matrix_method, ds_bootstrap)
150
+ matrix=matrix.to_gsl if @use_gsl
145
151
  if smc
146
152
  smc_v=matrix.inverse.diagonal.map{|ii| 1-(1.quo(ii))}
147
153
  smc_v.each_with_index do |v,ii|
@@ -50,7 +50,7 @@ module Factor
50
50
  attr_accessor :summary_parallel_analysis
51
51
  # Type of rotation. By default, Statsample::Factor::Rotation::Varimax
52
52
  attr_accessor :rotation_type
53
- attr_accessor :type
53
+ attr_accessor :matrix_type
54
54
  def initialize(matrix, opts=Hash.new)
55
55
  @use_gsl=nil
56
56
  @name=_("Principal Component Analysis")
@@ -58,7 +58,7 @@ module Factor
58
58
  @n_variables=@matrix.column_size
59
59
  @variables_names=(@matrix.respond_to? :fields) ? @matrix.fields : @n_variables.times.map {|i| _("VAR_%d") % (i+1)}
60
60
 
61
- @type = @matrix.respond_to?(:type) ? @matrix.type : :correlation
61
+ @matrix_type = @matrix.respond_to?(:_type) ? @matrix._type : :correlation
62
62
 
63
63
  @m=nil
64
64
 
@@ -103,30 +103,45 @@ module Factor
103
103
  # So, i=variable, j=component
104
104
  def feature_matrix(m=nil)
105
105
  m||=@m
106
- omega_m=::Matrix.build(@n_variables, m) {0}
107
- m.times do |i|
108
- omega_m.column= i, @eigenpairs[i][1]
106
+ if @use_gsl
107
+ omega_m=GSL::Matrix.zeros(@n_variables,m)
108
+ ev=eigenvectors
109
+ m.times do |i|
110
+ omega_m.set_column(i,ev[i])
111
+ end
112
+ omega_m
113
+ else
114
+ omega_m=::Matrix.build(@n_variables, m) {0}
115
+ m.times do |i|
116
+ omega_m.column= i, @eigenpairs[i][1]
117
+ end
118
+ omega_m
109
119
  end
110
- omega_m
111
120
  end
112
121
  # Returns Principal Components for +input+ matrix or dataset
113
122
  # The number of PC to return is equal to parameter +m+.
114
- # If +m+ isn't set, m set to number of PCs selected at object creation.
123
+ # If +m+ isn't set, m set to number of PCs selected at object creation.
124
+ # Use covariance matrix
125
+
115
126
  def principal_components(input, m=nil)
116
- data_matrix=input.to_matrix
117
- var_names=(data_matrix.respond_to? :fields_y) ? data_matrix.fields_y : data_matrix.column_size.times.map {|i| "VAR_%d" % (i+1)}
127
+ if @use_gsl
128
+ data_matrix=input.to_gsl
129
+ else
130
+ data_matrix=input.to_matrix
131
+ end
118
132
  m||=@m
119
133
 
120
134
  raise "data matrix variables<>pca variables" if data_matrix.column_size!=@n_variables
121
135
 
122
136
  fv=feature_matrix(m)
123
137
  pcs=(fv.transpose*data_matrix.transpose).transpose
138
+
124
139
  pcs.extend Statsample::NamedMatrix
125
140
  pcs.fields_y=m.times.map {|i| "PC_%d" % (i+1)}
126
141
  pcs.to_dataset
127
142
  end
128
143
  def component_matrix(m=nil)
129
- var="component_matrix_#{type}"
144
+ var="component_matrix_#{matrix_type}"
130
145
  send(var,m)
131
146
  end
132
147
  # Matrix with correlations between components and
@@ -141,7 +156,7 @@ module Factor
141
156
  cm[i,j]=ff[i,j] * Math.sqrt(eigenvalues[j] / @matrix[i,i])
142
157
  }
143
158
  }
144
- cm.extend CovariateMatrix
159
+ cm.extend NamedMatrix
145
160
  cm.name=_("Component matrix (from covariance)")
146
161
  cm.fields_x = @variables_names
147
162
  cm.fields_y = m.times.map {|i| "PC_%d" % (i+1)}
@@ -187,32 +202,13 @@ module Factor
187
202
  end
188
203
  def eigenvectors
189
204
  @eigenpairs.collect {|c|
190
- c[1].to_matrix
205
+ @use_gsl ? c[1].to_gsl : c[1].to_vector
191
206
  }
192
207
  end
193
208
  def calculate_eigenpairs
194
- if @use_gsl
195
- calculate_eigenpairs_gsl
196
- else
197
- calculate_eigenpairs_ruby
198
- end
209
+ @eigenpairs= @use_gsl ? @matrix.to_gsl.eigenpairs : @matrix.to_matrix.eigenpairs_ruby
199
210
  end
200
211
 
201
- def calculate_eigenpairs_ruby #:nodoc:
202
- @eigenpairs = @matrix.eigenpairs_ruby
203
- end
204
- # Eigenvectors calculated with gsl
205
- # Note: The signs of some vectors could be different of
206
- # ruby generated
207
- def calculate_eigenpairs_gsl #:nodoc:
208
- eigval, eigvec= GSL::Eigen.symmv(@matrix.to_gsl)
209
- #puts "***"
210
- ep=eigval.size.times.map {|i|
211
- ev=eigvec.get_col(i)
212
- [eigval[i], ev]
213
- }
214
- @eigenpairs=ep.sort{|a,b| a[0]<=>b[0]}.reverse
215
- end
216
212
 
217
213
  def report_building(builder) # :nodoc:
218
214
  builder.section(:name=>@name) do |generator|