statsample 0.18.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (121) hide show
  1. data.tar.gz.sig +0 -0
  2. data/History.txt +23 -0
  3. data/Manifest.txt +28 -17
  4. data/Rakefile +3 -2
  5. data/benchmarks/correlation_matrix_15_variables.rb +31 -0
  6. data/benchmarks/correlation_matrix_5_variables.rb +32 -0
  7. data/benchmarks/correlation_matrix_methods/correlation_matrix.ds +0 -0
  8. data/benchmarks/correlation_matrix_methods/correlation_matrix.html +93 -0
  9. data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +75 -0
  10. data/benchmarks/correlation_matrix_methods/correlation_matrix.xls +0 -0
  11. data/benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods +0 -0
  12. data/benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods +0 -0
  13. data/benchmarks/correlation_matrix_methods/results.ds +0 -0
  14. data/benchmarks/factor_map.rb +37 -0
  15. data/benchmarks/helpers_benchmark.rb +5 -0
  16. data/examples/boxplot.rb +13 -14
  17. data/examples/correlation_matrix.rb +16 -8
  18. data/examples/dataset.rb +13 -4
  19. data/examples/dominance_analysis.rb +23 -17
  20. data/examples/dominance_analysis_bootstrap.rb +28 -22
  21. data/examples/histogram.rb +8 -9
  22. data/examples/icc.rb +20 -21
  23. data/examples/levene.rb +10 -4
  24. data/examples/multiple_regression.rb +9 -28
  25. data/examples/multivariate_correlation.rb +9 -3
  26. data/examples/parallel_analysis.rb +20 -16
  27. data/examples/polychoric.rb +15 -9
  28. data/examples/principal_axis.rb +18 -6
  29. data/examples/reliability.rb +26 -13
  30. data/examples/scatterplot.rb +10 -6
  31. data/examples/t_test.rb +15 -6
  32. data/examples/tetrachoric.rb +9 -2
  33. data/examples/u_test.rb +12 -4
  34. data/examples/vector.rb +13 -2
  35. data/examples/velicer_map_test.rb +33 -26
  36. data/lib/statsample.rb +32 -12
  37. data/lib/statsample/analysis.rb +79 -0
  38. data/lib/statsample/analysis/suite.rb +72 -0
  39. data/lib/statsample/analysis/suitereportbuilder.rb +38 -0
  40. data/lib/statsample/bivariate.rb +70 -16
  41. data/lib/statsample/dataset.rb +25 -19
  42. data/lib/statsample/dominanceanalysis.rb +2 -2
  43. data/lib/statsample/factor.rb +2 -0
  44. data/lib/statsample/factor/map.rb +16 -10
  45. data/lib/statsample/factor/parallelanalysis.rb +9 -3
  46. data/lib/statsample/factor/pca.rb +28 -32
  47. data/lib/statsample/factor/rotation.rb +15 -8
  48. data/lib/statsample/graph/boxplot.rb +3 -4
  49. data/lib/statsample/graph/histogram.rb +2 -1
  50. data/lib/statsample/graph/scatterplot.rb +1 -0
  51. data/lib/statsample/matrix.rb +106 -16
  52. data/lib/statsample/regression.rb +4 -1
  53. data/lib/statsample/regression/binomial.rb +1 -1
  54. data/lib/statsample/regression/multiple/baseengine.rb +19 -9
  55. data/lib/statsample/regression/multiple/gslengine.rb +127 -126
  56. data/lib/statsample/regression/multiple/matrixengine.rb +8 -5
  57. data/lib/statsample/regression/multiple/rubyengine.rb +1 -1
  58. data/lib/statsample/regression/simple.rb +31 -6
  59. data/lib/statsample/reliability.rb +11 -3
  60. data/lib/statsample/reliability/scaleanalysis.rb +4 -4
  61. data/lib/statsample/shorthand.rb +81 -0
  62. data/lib/statsample/test/chisquare.rb +1 -1
  63. data/lib/statsample/vector.rb +163 -163
  64. data/lib/statsample/vector/gsl.rb +106 -0
  65. data/references.txt +2 -2
  66. data/{data → test/fixtures}/crime.txt +0 -0
  67. data/{data → test/fixtures}/hartman_23.matrix +0 -0
  68. data/{data → test/fixtures}/repeated_fields.csv +0 -0
  69. data/{data → test/fixtures}/test_binomial.csv +0 -0
  70. data/test/{test_csv.csv → fixtures/test_csv.csv} +0 -0
  71. data/test/{test_xls.xls → fixtures/test_xls.xls} +0 -0
  72. data/{data → test/fixtures}/tetmat_matrix.txt +0 -0
  73. data/{data → test/fixtures}/tetmat_test.txt +0 -0
  74. data/test/helpers_tests.rb +18 -2
  75. data/test/test_analysis.rb +118 -0
  76. data/test/test_anovatwoway.rb +1 -1
  77. data/test/test_anovatwowaywithdataset.rb +1 -1
  78. data/test/test_anovawithvectors.rb +1 -2
  79. data/test/test_bartlettsphericity.rb +1 -2
  80. data/test/test_bivariate.rb +64 -22
  81. data/test/test_codification.rb +1 -2
  82. data/test/test_crosstab.rb +1 -2
  83. data/test/test_csv.rb +3 -4
  84. data/test/test_dataset.rb +24 -3
  85. data/test/test_dominance_analysis.rb +1 -2
  86. data/test/test_factor.rb +8 -69
  87. data/test/test_factor_map.rb +43 -0
  88. data/test/test_factor_pa.rb +54 -0
  89. data/test/test_ggobi.rb +1 -1
  90. data/test/test_gsl.rb +12 -18
  91. data/test/test_histogram.rb +1 -2
  92. data/test/test_logit.rb +62 -18
  93. data/test/test_matrix.rb +4 -5
  94. data/test/test_mle.rb +3 -4
  95. data/test/test_regression.rb +21 -2
  96. data/test/test_reliability.rb +3 -3
  97. data/test/test_reliability_icc.rb +1 -1
  98. data/test/test_reliability_skillscale.rb +20 -4
  99. data/test/test_resample.rb +1 -2
  100. data/test/test_rserve_extension.rb +1 -2
  101. data/test/test_srs.rb +1 -2
  102. data/test/test_statistics.rb +1 -2
  103. data/test/test_stest.rb +1 -2
  104. data/test/test_stratified.rb +1 -2
  105. data/test/test_test_f.rb +1 -2
  106. data/test/test_test_t.rb +1 -2
  107. data/test/test_umannwhitney.rb +1 -2
  108. data/test/test_vector.rb +117 -18
  109. data/test/test_xls.rb +2 -3
  110. data/web/Rakefile +39 -0
  111. metadata +109 -29
  112. metadata.gz.sig +0 -0
  113. data/examples/parallel_analysis_tetrachoric.rb +0 -31
  114. data/lib/distribution.rb +0 -25
  115. data/lib/distribution/chisquare.rb +0 -23
  116. data/lib/distribution/f.rb +0 -35
  117. data/lib/distribution/normal.rb +0 -60
  118. data/lib/distribution/normalbivariate.rb +0 -284
  119. data/lib/distribution/normalmultivariate.rb +0 -73
  120. data/lib/distribution/t.rb +0 -55
  121. data/test/test_distribution.rb +0 -73
@@ -1,7 +1,4 @@
1
1
  require 'statsample/bivariate/pearson'
2
-
3
-
4
-
5
2
  module Statsample
6
3
  # Diverse methods and classes to calculate bivariate relations
7
4
  # Specific classes:
@@ -11,7 +8,6 @@ module Statsample
11
8
  module Bivariate
12
9
  autoload(:Polychoric, 'statsample/bivariate/polychoric')
13
10
  autoload(:Tetrachoric, 'statsample/bivariate/tetrachoric')
14
-
15
11
  class << self
16
12
  # Covariance between two vectors
17
13
  def covariance(v1,v2)
@@ -27,8 +23,8 @@ module Statsample
27
23
  def maximum_likehood_dichotomic(pred,real)
28
24
  preda,reala=Statsample.only_valid_clone(pred,real)
29
25
  sum=0
30
- pred.each_index{|i|
31
- sum+=(real[i]*Math::log(pred[i])) + ((1-real[i])*Math::log(1-pred[i]))
26
+ preda.each_index{|i|
27
+ sum+=(reala[i]*Math::log(preda[i])) + ((1-reala[i])*Math::log(1-preda[i]))
32
28
  }
33
29
  sum
34
30
  end
@@ -101,6 +97,20 @@ module Statsample
101
97
  cdf*n_tails
102
98
  end
103
99
  end
100
+
101
+
102
+ # Predicted time for pairwise correlation matrix, in miliseconds
103
+ # See benchmarks/correlation_matrix.rb to see mode of calculation
104
+
105
+ def prediction_pairwise(vars,cases)
106
+ ((-0.518111-0.000746*cases+1.235608*vars+0.000740*cases*vars)**2) / 100
107
+ end
108
+ # Predicted time for optimized correlation matrix, in miliseconds
109
+ # See benchmarks/correlation_matrix.rb to see mode of calculation
110
+
111
+ def prediction_optimized(vars,cases)
112
+ ((4+0.018128*cases+0.246871*vars+0.001169*vars*cases)**2) / 100
113
+ end
104
114
  # Returns residual score after delete variance
105
115
  # from another variable
106
116
  #
@@ -128,10 +138,35 @@ module Statsample
128
138
 
129
139
  end
130
140
 
141
+ def covariance_matrix_optimized(ds)
142
+ x=ds.to_gsl
143
+ n=x.row_size
144
+ m=x.column_size
145
+ means=((1/n.to_f)*GSL::Matrix.ones(1,n)*x).row(0)
146
+ centered=x-(GSL::Matrix.ones(n,m)*GSL::Matrix.diag(means))
147
+ ss=centered.transpose*centered
148
+ s=((1/(n-1).to_f))*ss
149
+ s
150
+ end
151
+
131
152
  # Covariance matrix.
132
153
  # Order of rows and columns depends on Dataset#fields order
133
154
 
134
155
  def covariance_matrix(ds)
156
+ vars,cases=ds.fields.size,ds.cases
157
+ if !ds.has_missing_data? and Statsample.has_gsl? and prediction_optimized(vars,cases) < prediction_pairwise(vars,cases)
158
+ cm=covariance_matrix_optimized(ds)
159
+ else
160
+ cm=covariance_matrix_pairwise(ds)
161
+
162
+ end
163
+ cm.extend(Statsample::CovariateMatrix)
164
+ cm.fields=ds.fields
165
+ cm
166
+ end
167
+
168
+
169
+ def covariance_matrix_pairwise(ds)
135
170
  cache={}
136
171
  matrix=ds.collect_matrix do |row,col|
137
172
  if (ds[row].type!=:scale or ds[col].type!=:scale)
@@ -148,15 +183,34 @@ module Statsample
148
183
  end
149
184
  end
150
185
  end
151
- matrix.extend CovariateMatrix
152
- matrix.fields=ds.fields
153
186
  matrix
154
187
  end
155
188
 
156
189
  # Correlation matrix.
157
190
  # Order of rows and columns depends on Dataset#fields order
158
-
159
191
  def correlation_matrix(ds)
192
+ vars,cases=ds.fields.size,ds.cases
193
+ if !ds.has_missing_data? and Statsample.has_gsl? and prediction_optimized(vars,cases) < prediction_pairwise(vars,cases)
194
+ cm=correlation_matrix_optimized(ds)
195
+ else
196
+ cm=correlation_matrix_pairwise(ds)
197
+ end
198
+ cm.extend(Statsample::CovariateMatrix)
199
+ cm.fields=ds.fields
200
+ cm
201
+ end
202
+
203
+ def correlation_matrix_optimized(ds)
204
+ s=covariance_matrix_optimized(ds)
205
+ sds=GSL::Matrix.diagonal(s.diagonal.sqrt.pow(-1))
206
+ cm=sds*s*sds
207
+ # Fix diagonal
208
+ s.row_size.times {|i|
209
+ cm[i,i]=1.0
210
+ }
211
+ cm
212
+ end
213
+ def correlation_matrix_pairwise(ds)
160
214
  cache={}
161
215
  cm=ds.collect_matrix do |row,col|
162
216
  if row==col
@@ -173,9 +227,6 @@ module Statsample
173
227
  end
174
228
  end
175
229
  end
176
- cm.extend(Statsample::CovariateMatrix)
177
- cm.fields=ds.fields
178
- cm
179
230
  end
180
231
 
181
232
  # Retrieves the n valid pairwise.
@@ -220,7 +271,7 @@ module Statsample
220
271
  m1=ds.filter_field('c') {|c| c['d']!=f0}
221
272
  ((m1.mean-m0.mean).to_f / ds['c'].sdp) * Math::sqrt(m0.size*m1.size.to_f / ds.cases**2)
222
273
  end
223
- # Kendall Rank Correlation Coefficient.
274
+ # Kendall Rank Correlation Coefficient (Tau a)
224
275
  # Based on Hervé Adbi article
225
276
  def tau_a(v1,v2)
226
277
  v1a,v2a=Statsample.only_valid_clone(v1,v2)
@@ -231,12 +282,15 @@ module Statsample
231
282
  delta= o1.size*2-(o2 & o1).size*2
232
283
  1-(delta * 2 / (n*(n-1)).to_f)
233
284
  end
234
- # Calculates Tau b correlation.
235
- #
285
+ # Calculates Goodman and Kruskal’s Tau b correlation.
286
+ # Tb is an asymmetric P-R-E measure of association for nominal scales
287
+ # (Mielke, X)
288
+ #
236
289
  # Tau-b defines perfect association as strict monotonicity. Although it
237
290
  # requires strict monotonicity to reach 1.0, it does not penalize ties as
238
291
  # much as some other measures.
239
- #
292
+ # == Reference
293
+ # Mielke, P. GOODMAN–KRUSKAL TAU AND GAMMA.
240
294
  # Source: http://faculty.chass.ncsu.edu/garson/PA765/assocordinal.htm
241
295
  def tau_b(matrix)
242
296
  v=pairs(matrix)
@@ -115,6 +115,10 @@ module Statsample
115
115
  ds.update_valid_data
116
116
  ds
117
117
  end
118
+ # Return true if any vector has missing data
119
+ def has_missing_data?
120
+ @vectors.any? {|k,v| v.has_missing_data?}
121
+ end
118
122
  # Creates a new dataset. A dataset is a set of ordered named vectors
119
123
  # of the same size.
120
124
  #
@@ -128,6 +132,10 @@ module Statsample
128
132
  @@n_dataset||=0
129
133
  @@n_dataset+=1
130
134
  @name=_("Dataset %d") % @@n_dataset
135
+ @cases=0
136
+ @gsl=nil
137
+ @i=nil
138
+
131
139
  if vectors.instance_of? Array
132
140
  @fields=vectors.dup
133
141
  @vectors=vectors.inject({}){|a,x| a[x]=Statsample::Vector.new(); a}
@@ -138,17 +146,6 @@ module Statsample
138
146
  check_order
139
147
  check_length
140
148
  end
141
- @i=nil
142
- end
143
- #
144
- # Returns a GSL::matrix
145
- #
146
- def to_gsl_matrix
147
- matrix=GSL::Matrix.alloc(cases,@vectors.size)
148
- each_array do |row|
149
- row.each_index{|y| matrix.set(@i,y,row[y]) }
150
- end
151
- matrix
152
149
  end
153
150
  #
154
151
  # Creates a copy of the given dataset, deleting all the cases with
@@ -375,6 +372,7 @@ module Statsample
375
372
  # Check vectors and fields after inserting data. Use only
376
373
  # after #add_case_array or #add_case with second parameter to false
377
374
  def update_valid_data
375
+ @gsl=nil
378
376
  @fields.each{|f| @vectors[f].set_valid_data}
379
377
  check_length
380
378
  end
@@ -491,7 +489,6 @@ module Statsample
491
489
  size=v.size
492
490
  else
493
491
  if v.size!=size
494
- p v.to_a.size
495
492
  raise Exception, "Vector #{k} have size #{v.size} and dataset have size #{size}"
496
493
  end
497
494
  end
@@ -629,7 +626,6 @@ module Statsample
629
626
  end
630
627
  # Recode a vector based on a block
631
628
  def recode!(vector_name)
632
-
633
629
  0.upto(@cases-1) {|i|
634
630
  @vectors[vector_name].data[i]=yield case_as_hash(i)
635
631
  }
@@ -658,13 +654,23 @@ module Statsample
658
654
  end
659
655
 
660
656
  if Statsample.has_gsl?
661
- def to_matrix_gsl
662
- rows=[]
663
- self.each_array{|c|
664
- rows.push(c)
665
- }
666
- GSL::Matrix.alloc(*rows)
657
+ def clear_gsl
658
+ @gsl=nil
667
659
  end
660
+
661
+ def to_gsl
662
+ if @gsl.nil?
663
+ if cases.nil?
664
+ update_valid_data
665
+ end
666
+ @gsl=GSL::Matrix.alloc(cases,fields.size)
667
+ self.each_array{|c|
668
+ @gsl.set_row(@i,c)
669
+ }
670
+ end
671
+ @gsl
672
+ end
673
+
668
674
  end
669
675
 
670
676
  # Return a correlation matrix for fields included as parameters.
@@ -107,8 +107,8 @@ module Statsample
107
107
  else
108
108
  @regression_class= UNIVARIATE_REGRESSION_CLASS
109
109
  @method_association=:r2
110
-
111
110
  end
111
+
112
112
  @name=nil
113
113
  opts.each{|k,v|
114
114
  self.send("#{k}=",v) if self.respond_to? k
@@ -117,7 +117,7 @@ module Statsample
117
117
  @dependent=[@dependent] unless @dependent.is_a? Array
118
118
 
119
119
  @predictors ||= input.fields-@dependent
120
-
120
+
121
121
  @name=_("Dominance Analysis: %s over %s") % [ @predictors.flatten.join(",") , @dependent.join(",")] if @name.nil?
122
122
 
123
123
  if input.is_a? Statsample::Dataset
@@ -41,8 +41,10 @@ module Statsample
41
41
  aicm
42
42
  end
43
43
  def self.anti_image_correlation_matrix(matrix)
44
+ matrix=matrix.to_matrix
44
45
  s=Matrix.diag(*(matrix.inverse.diagonal)).sqrt.inverse
45
46
  aicm=s*matrix.inverse*s
47
+
46
48
  aicm.extend(Statsample::CovariateMatrix)
47
49
  aicm.fields=matrix.fields if matrix.respond_to? :fields
48
50
  aicm
@@ -48,32 +48,37 @@ module Statsample
48
48
  attr_reader :fm
49
49
  # Smallest average squared correlation
50
50
  attr_reader :minfm
51
+
52
+ attr_accessor :use_gsl
51
53
  def self.with_dataset(ds,opts=Hash.new)
52
54
  new(ds.correlation_matrix,opts)
53
55
  end
54
56
  def initialize(matrix, opts=Hash.new)
55
57
  @matrix=matrix
56
58
  opts_default={
59
+ :use_gsl=>true,
57
60
  :name=>_("Velicer's MAP")
58
61
  }
59
62
  @opts=opts_default.merge(opts)
60
63
  opts_default.keys.each {|k| send("#{k}=", @opts[k]) }
61
64
  end
62
65
  def compute
63
- eigen=@matrix.eigen
64
- eigvect,@eigenvalues=eigen[:eigenvectors], eigen[:eigenvalues]
65
- loadings=eigvect*(Matrix.diag(*@eigenvalues).sqrt)
66
+ gsl_m=(use_gsl and Statsample.has_gsl?) ? @matrix.to_gsl : @matrix
67
+ klass_m=gsl_m.class
68
+ eigvect,@eigenvalues=gsl_m.eigenvectors_matrix, gsl_m.eigenvalues
69
+ eigenvalues_sqrt=@eigenvalues.collect {|v| Math.sqrt(v)}
70
+ loadings=eigvect*(klass_m.diagonal(*eigenvalues_sqrt))
66
71
  fm=Array.new(@matrix.row_size)
67
72
  ncol=@matrix.column_size
68
- fm[0]=(@matrix.mssq - ncol).quo(ncol*(ncol-1))
73
+
74
+ fm[0]=(gsl_m.mssq - ncol).quo(ncol*(ncol-1))
75
+
69
76
  (ncol-1).times do |m|
70
77
  puts "MAP:Eigenvalue #{m+1}" if $DEBUG
71
78
  a=loadings[0..(loadings.row_size-1),0..m]
72
- partcov= @matrix - (a*a.t)
73
- pc_prediag=partcov.row_size.times.map{|i|
74
- 1.quo(Math::sqrt(partcov[i,i]))
75
- }
76
- d=Matrix.diag(*pc_prediag)
79
+ partcov= gsl_m - (a*a.transpose)
80
+
81
+ d=klass_m.diagonal(*(partcov.diagonal.collect {|v| Math::sqrt(1/v)}))
77
82
  pr=d*partcov*d
78
83
  fm[m+1]=(pr.mssq-ncol).quo(ncol*(ncol-1))
79
84
  end
@@ -81,7 +86,7 @@ module Statsample
81
86
  nfactors=0
82
87
  @errors=[]
83
88
  fm.each_with_index do |v,s|
84
- if v.is_a? Complex
89
+ if defined?(Complex) and v.is_a? ::Complex
85
90
  @errors.push(s)
86
91
  else
87
92
  if v < minfm
@@ -93,6 +98,7 @@ module Statsample
93
98
  @number_of_factors=nfactors
94
99
  @fm=fm
95
100
  @minfm=minfm
101
+
96
102
  end
97
103
  def report_building(g) #:nodoc:
98
104
  g.section(:name=>@name) do |s|
@@ -58,7 +58,7 @@ module Statsample
58
58
  attr_accessor :no_data
59
59
  # Show extra information if true
60
60
  attr_accessor :debug
61
-
61
+ attr_accessor :use_gsl
62
62
  def initialize(ds, opts=Hash.new)
63
63
  @ds=ds
64
64
  @fields=@ds.fields
@@ -74,6 +74,7 @@ module Statsample
74
74
  :no_data=>false,
75
75
  :matrix_method=>:correlation_matrix
76
76
  }
77
+ @use_gsl=Statsample.has_gsl?
77
78
  @opts=opts_default.merge(opts)
78
79
  @opts[:matrix_method]==:correlation_matrix if @opts[:bootstrap_method]==:parameters
79
80
  opts_default.keys.each {|k| send("#{k}=", @opts[k]) }
@@ -120,11 +121,12 @@ module Statsample
120
121
  # Perform calculation. Shouldn't be called directly for the user
121
122
  def compute
122
123
 
124
+
123
125
  @original=Statsample::Bivariate.send(matrix_method, @ds).eigenvalues unless no_data
124
126
  @ds_eigenvalues=Statsample::Dataset.new((1..@n_variables).map{|v| "ev_%05d" % v})
125
127
  @ds_eigenvalues.fields.each {|f| @ds_eigenvalues[f].type=:scale}
126
128
  if bootstrap_method==:parameter or bootstrap_method==:random
127
- rng = Distribution::Normal.rng_ugaussian
129
+ rng = Distribution::Normal.rng
128
130
  end
129
131
 
130
132
  @iterations.times do |i|
@@ -132,16 +134,20 @@ module Statsample
132
134
  puts "#{@name}: Iteration #{i}" if $DEBUG or debug
133
135
  # Create a dataset of dummy values
134
136
  ds_bootstrap=Statsample::Dataset.new(@ds.fields)
137
+
135
138
  @fields.each do |f|
136
139
  if bootstrap_method==:random
137
140
  ds_bootstrap[f]=@n_cases.times.map {|c| rng.call}.to_scale
138
141
  elsif bootstrap_method==:data
139
- ds_bootstrap[f]=ds[f].sample_with_replacement(@n_cases).to_scale
142
+ ds_bootstrap[f]=ds[f].sample_with_replacement(@n_cases)
140
143
  else
141
144
  raise "bootstrap_method doesn't recogniced"
142
145
  end
143
146
  end
147
+ ds_bootstrap.update_valid_data
148
+
144
149
  matrix=Statsample::Bivariate.send(matrix_method, ds_bootstrap)
150
+ matrix=matrix.to_gsl if @use_gsl
145
151
  if smc
146
152
  smc_v=matrix.inverse.diagonal.map{|ii| 1-(1.quo(ii))}
147
153
  smc_v.each_with_index do |v,ii|
@@ -50,7 +50,7 @@ module Factor
50
50
  attr_accessor :summary_parallel_analysis
51
51
  # Type of rotation. By default, Statsample::Factor::Rotation::Varimax
52
52
  attr_accessor :rotation_type
53
- attr_accessor :type
53
+ attr_accessor :matrix_type
54
54
  def initialize(matrix, opts=Hash.new)
55
55
  @use_gsl=nil
56
56
  @name=_("Principal Component Analysis")
@@ -58,7 +58,7 @@ module Factor
58
58
  @n_variables=@matrix.column_size
59
59
  @variables_names=(@matrix.respond_to? :fields) ? @matrix.fields : @n_variables.times.map {|i| _("VAR_%d") % (i+1)}
60
60
 
61
- @type = @matrix.respond_to?(:type) ? @matrix.type : :correlation
61
+ @matrix_type = @matrix.respond_to?(:_type) ? @matrix._type : :correlation
62
62
 
63
63
  @m=nil
64
64
 
@@ -103,30 +103,45 @@ module Factor
103
103
  # So, i=variable, j=component
104
104
  def feature_matrix(m=nil)
105
105
  m||=@m
106
- omega_m=::Matrix.build(@n_variables, m) {0}
107
- m.times do |i|
108
- omega_m.column= i, @eigenpairs[i][1]
106
+ if @use_gsl
107
+ omega_m=GSL::Matrix.zeros(@n_variables,m)
108
+ ev=eigenvectors
109
+ m.times do |i|
110
+ omega_m.set_column(i,ev[i])
111
+ end
112
+ omega_m
113
+ else
114
+ omega_m=::Matrix.build(@n_variables, m) {0}
115
+ m.times do |i|
116
+ omega_m.column= i, @eigenpairs[i][1]
117
+ end
118
+ omega_m
109
119
  end
110
- omega_m
111
120
  end
112
121
  # Returns Principal Components for +input+ matrix or dataset
113
122
  # The number of PC to return is equal to parameter +m+.
114
- # If +m+ isn't set, m set to number of PCs selected at object creation.
123
+ # If +m+ isn't set, m set to number of PCs selected at object creation.
124
+ # Use covariance matrix
125
+
115
126
  def principal_components(input, m=nil)
116
- data_matrix=input.to_matrix
117
- var_names=(data_matrix.respond_to? :fields_y) ? data_matrix.fields_y : data_matrix.column_size.times.map {|i| "VAR_%d" % (i+1)}
127
+ if @use_gsl
128
+ data_matrix=input.to_gsl
129
+ else
130
+ data_matrix=input.to_matrix
131
+ end
118
132
  m||=@m
119
133
 
120
134
  raise "data matrix variables<>pca variables" if data_matrix.column_size!=@n_variables
121
135
 
122
136
  fv=feature_matrix(m)
123
137
  pcs=(fv.transpose*data_matrix.transpose).transpose
138
+
124
139
  pcs.extend Statsample::NamedMatrix
125
140
  pcs.fields_y=m.times.map {|i| "PC_%d" % (i+1)}
126
141
  pcs.to_dataset
127
142
  end
128
143
  def component_matrix(m=nil)
129
- var="component_matrix_#{type}"
144
+ var="component_matrix_#{matrix_type}"
130
145
  send(var,m)
131
146
  end
132
147
  # Matrix with correlations between components and
@@ -141,7 +156,7 @@ module Factor
141
156
  cm[i,j]=ff[i,j] * Math.sqrt(eigenvalues[j] / @matrix[i,i])
142
157
  }
143
158
  }
144
- cm.extend CovariateMatrix
159
+ cm.extend NamedMatrix
145
160
  cm.name=_("Component matrix (from covariance)")
146
161
  cm.fields_x = @variables_names
147
162
  cm.fields_y = m.times.map {|i| "PC_%d" % (i+1)}
@@ -187,32 +202,13 @@ module Factor
187
202
  end
188
203
  def eigenvectors
189
204
  @eigenpairs.collect {|c|
190
- c[1].to_matrix
205
+ @use_gsl ? c[1].to_gsl : c[1].to_vector
191
206
  }
192
207
  end
193
208
  def calculate_eigenpairs
194
- if @use_gsl
195
- calculate_eigenpairs_gsl
196
- else
197
- calculate_eigenpairs_ruby
198
- end
209
+ @eigenpairs= @use_gsl ? @matrix.to_gsl.eigenpairs : @matrix.to_matrix.eigenpairs_ruby
199
210
  end
200
211
 
201
- def calculate_eigenpairs_ruby #:nodoc:
202
- @eigenpairs = @matrix.eigenpairs_ruby
203
- end
204
- # Eigenvectors calculated with gsl
205
- # Note: The signs of some vectors could be different of
206
- # ruby generated
207
- def calculate_eigenpairs_gsl #:nodoc:
208
- eigval, eigvec= GSL::Eigen.symmv(@matrix.to_gsl)
209
- #puts "***"
210
- ep=eigval.size.times.map {|i|
211
- ev=eigvec.get_col(i)
212
- [eigval[i], ev]
213
- }
214
- @eigenpairs=ep.sort{|a,b| a[0]<=>b[0]}.reverse
215
- end
216
212
 
217
213
  def report_building(builder) # :nodoc:
218
214
  builder.section(:name=>@name) do |generator|