statsample 0.13.1 → 0.14.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -29,27 +29,42 @@ module Factor
29
29
  # * Smith, L. (2002). A tutorial on Principal Component Analysis. Available on http://courses.eas.ualberta.ca/eas570/pca_tutorial.pdf
30
30
  #
31
31
  class PCA
32
+ include Summarizable
32
33
  # Name of analysis
33
34
  attr_accessor :name
35
+
34
36
  # Number of factors. Set by default to the number of factors
35
37
  # with eigen values > 1
36
38
  attr_accessor :m
37
39
  # Use GSL if available
38
40
  attr_accessor :use_gsl
39
- include Summarizable
40
-
41
+ # Add to the summary a rotation report
42
+ attr_accessor :summary_rotation
43
+ # Add to the summary a parallel analysis report
44
+ attr_accessor :summary_parallel_analysis
45
+ # Type of rotation. By default, Statsample::Factor::Rotation::Varimax
46
+ attr_accessor :rotation_type
47
+
41
48
  def initialize(matrix, opts=Hash.new)
42
- @use_gsl=nil
49
+ @use_gsl=nil
43
50
  @name=_("Principal Component Analysis")
44
51
  @matrix=matrix
45
52
  @n_variables=@matrix.column_size
46
53
  @m=nil
54
+
55
+ @rotation_type=Statsample::Factor::Varimax
56
+
47
57
  opts.each{|k,v|
48
58
  self.send("#{k}=",v) if self.respond_to? k
49
59
  }
50
60
  if @use_gsl.nil?
51
61
  @use_gsl=Statsample.has_gsl?
52
62
  end
63
+ if @matrix.respond_to? :fields
64
+ @variables_names=@matrix.fields
65
+ else
66
+ @variables_names=@n_variables.times.map {|i| "V#{i+1}"}
67
+ end
53
68
  calculate_eigenpairs
54
69
  if @m.nil?
55
70
  # Set number of factors with eigenvalues > 1
@@ -57,6 +72,9 @@ module Factor
57
72
  end
58
73
 
59
74
  end
75
+ def rotation
76
+ @rotation_type.new(component_matrix)
77
+ end
60
78
  def create_centered_ds
61
79
  h={}
62
80
  @original_ds.factors.each {|f|
@@ -93,7 +111,13 @@ module Factor
93
111
  gammas.push(Math::sqrt(@eigenpairs[i][0]))
94
112
  }
95
113
  gamma_m=::Matrix.diagonal(*gammas)
96
- (omega_m*(gamma_m)).to_matrix
114
+ cm=(omega_m*(gamma_m)).to_matrix
115
+
116
+ cm.extend CovariateMatrix
117
+ cm.name=_("Component matrix")
118
+ cm.fields_x = @variables_names
119
+ cm.fields_y = m.times.map {|i| "component_#{i+1}"}
120
+ cm
97
121
  end
98
122
  # Communalities for all variables given m factors
99
123
  def communalities(m=nil)
@@ -122,12 +146,7 @@ module Factor
122
146
  end
123
147
 
124
148
  def calculate_eigenpairs_ruby
125
- eigval, eigvec= @matrix.eigenvaluesJacobi, @matrix.cJacobiV
126
- @eigenpairs={}
127
- eigval.to_a.each_index {|i|
128
- @eigenpairs[eigval[i]]=eigvec.column(i)
129
- }
130
- @eigenpairs=@eigenpairs.sort.reverse
149
+ @eigenpairs = @matrix.eigenpairs
131
150
  end
132
151
  def calculate_eigenpairs_gsl
133
152
  eigval, eigvec= GSL::Eigen.symmv(@matrix.to_gsl)
@@ -140,25 +159,24 @@ module Factor
140
159
 
141
160
  def report_building(builder) # :nodoc:
142
161
  builder.section(:name=>@name) do |generator|
143
- generator.text _("Number of factors: %d") % m
144
- generator.table(:name=>_("Communalities"), :header=>[_("Variable"),_("Initial"),_("Extraction")]) do |t|
145
- communalities(m).each_with_index {|com,i|
146
- t.row([i, 1.0, sprintf("%0.3f", com)])
147
- }
148
- end
149
- generator.table(:name=>_("Eigenvalues"), :header=>[_("Variable"),_("Value")]) do |t|
150
- eigenvalues.each_with_index {|eigenvalue,i|
151
- t.row([i, sprintf("%0.3f",eigenvalue)])
152
- }
153
- end
154
-
155
- generator.table(:name=>_("Component Matrix"), :header=>[_("Variable")]+m.times.collect {|c| c+1}) do |t|
156
- i=0
157
- component_matrix(m).to_a.each do |row|
158
- t.row([i]+row.collect {|c| sprintf("%0.3f",c)})
159
- i+=1
162
+ generator.text _("Number of factors: %d") % m
163
+ generator.table(:name=>_("Communalities"), :header=>[_("Variable"),_("Initial"),_("Extraction")]) do |t|
164
+ communalities(m).each_with_index {|com, i|
165
+ t.row([@variables_names[i], 1.0, sprintf("%0.3f", com)])
166
+ }
167
+ end
168
+
169
+ generator.table(:name=>_("Total Variance Explained"), :header=>[_("Component"), _("E.Total"), _("%"), _("Cum. %")]) do |t|
170
+ ac_eigen=0
171
+ eigenvalues.each_with_index {|eigenvalue,i|
172
+ ac_eigen+=eigenvalue
173
+ t.row([_("Component %d") % (i+1), sprintf("%0.3f",eigenvalue), sprintf("%0.3f%%", eigenvalue*100.quo(@n_variables)), sprintf("%0.3f",ac_eigen*100.quo(@n_variables))])
174
+ }
175
+ end
176
+ generator.parse_element(component_matrix(m))
177
+ if (summary_rotation)
178
+ generator.parse_element(rotation)
160
179
  end
161
- end
162
180
  end
163
181
  end
164
182
  private :calculate_eigenpairs, :create_centered_ds
@@ -29,31 +29,39 @@ module Factor
29
29
  class PrincipalAxis
30
30
  include DirtyMemoize
31
31
  include Summarizable
32
- # Minimum difference between succesive iterations on sum of communalities
33
- DELTA=1e-3
34
- # Maximum number of iterations
35
- MAX_ITERATIONS=50
36
- # Number of factors. Set by default to the number of factors
37
- # with eigen values > 1 on PCA over data
38
- attr_accessor :m
39
-
40
32
  # Name of analysis
41
33
  attr_accessor :name
34
+
35
+ # Number of factors. Set by default to the number of factors
36
+ # with eigenvalues > 1 (Kaiser criterion).
37
+ #
38
+ # _Warning:_ Kaiser criterion overfactors! Give yourself some time
39
+ # and use Horn's Parallel Analysis.
40
+ #
41
+ attr_accessor :m
42
42
 
43
43
  # Number of iterations required to converge
44
44
  attr_reader :iterations
45
+
45
46
  # Initial eigenvalues
46
47
  attr_reader :initial_eigenvalues
47
- # Tolerance for iteratios.
48
+
49
+ # Tolerance for iterations
48
50
  attr_accessor :epsilon
51
+
49
52
  # Use SMC(squared multiple correlations) as diagonal. If false, use 1
50
53
  attr_accessor :smc
54
+
51
55
  # Maximum number of iterations
52
56
  attr_accessor :max_iterations
57
+
53
58
  # Eigenvalues of factor analysis
54
59
  attr_reader :eigenvalues
55
60
 
56
-
61
+ # Minimum difference between succesive iterations on sum of communalities
62
+ DELTA=1e-3
63
+ # Maximum number of iterations
64
+ MAX_ITERATIONS=25
57
65
 
58
66
  def initialize(matrix, opts=Hash.new)
59
67
  @matrix=matrix
@@ -62,7 +70,7 @@ module Factor
62
70
  else
63
71
  @fields=@matrix.row_size.times.map {|i| _("Variable %d") % (i+1)}
64
72
  end
65
-
73
+ @n_variables=@matrix.row_size
66
74
  @name=""
67
75
  @m=nil
68
76
  @initial_eigenvalues=nil
@@ -74,7 +82,11 @@ module Factor
74
82
  opts.each{|k,v|
75
83
  self.send("#{k}=",v) if self.respond_to? k
76
84
  }
77
-
85
+ if @matrix.respond_to? :fields
86
+ @variables_names=@matrix.fields
87
+ else
88
+ @variables_names=@n_variables.times.map {|i| "V#{i+1}"}
89
+ end
78
90
  if @m.nil?
79
91
  pca=PCA.new(::Matrix.rows(@matrix.to_a))
80
92
  @m=pca.m
@@ -119,7 +131,7 @@ module Factor
119
131
  pca=PCA.new(::Matrix.rows(work_matrix))
120
132
  @communalities=pca.communalities(m)
121
133
  @eigenvalues=pca.eigenvalues
122
- com_sum=@communalities.inject(0) {|ac,v| ac+v}
134
+ com_sum = @communalities.inject(0) {|ac,v| ac+v}
123
135
  jump=true
124
136
 
125
137
  break if (com_sum-prev_sum).abs<@delta
@@ -131,6 +143,11 @@ module Factor
131
143
 
132
144
  end
133
145
  @component_matrix=pca.component_matrix(m)
146
+ @component_matrix.extend CovariateMatrix
147
+ @component_matrix.name=_("Factor Matrix")
148
+ @component_matrix.fields_x = @variables_names
149
+ @component_matrix.fields_y = m.times.map {|i| "factor_#{i+1}"}
150
+
134
151
  end
135
152
  alias :compute :iterate
136
153
 
@@ -182,18 +199,33 @@ module Factor
182
199
  t.row([@fields[i], sprintf("%0.4f", initial_communalities[i]), sprintf("%0.3f", com)])
183
200
  }
184
201
  end
185
- s.table(:name=>_("Eigenvalues"), :header=>[_("Variable"),_("Value")]) do |t|
202
+ s.table(:name=>_("Total Variance"), :header=>[_("Factor"), _("I.E.Total"), _("I.E. %"), _("I.E.Cum. %"),
203
+ _("S.L.Total"), _("S.L. %"), _("S.L.Cum. %")
204
+ ]) do |t|
205
+ ac_eigen,ac_i_eigen=0,0
186
206
  @initial_eigenvalues.each_with_index {|eigenvalue,i|
187
- t.row([@fields[i], sprintf("%0.3f",eigenvalue)])
207
+ ac_i_eigen+=eigenvalue
208
+ ac_eigen+=@eigenvalues[i]
209
+ new_row=[
210
+ _("Factor %d") % (i+1),
211
+ sprintf("%0.3f",eigenvalue),
212
+ sprintf("%0.3f%%", eigenvalue*100.quo(@n_variables)),
213
+ sprintf("%0.3f",ac_i_eigen*100.quo(@n_variables))
214
+ ]
215
+ if i<@m
216
+ new_row.concat [
217
+ sprintf("%0.3f", @eigenvalues[i]),
218
+ sprintf("%0.3f%%", @eigenvalues[i]*100.quo(@n_variables)),
219
+ sprintf("%0.3f",ac_eigen*100.quo(@n_variables))
220
+ ]
221
+ else
222
+ new_row.concat ["","",""]
223
+ end
224
+
225
+ t.row new_row
188
226
  }
189
227
  end
190
- s.table(:name=>_("Component Matrix"), :header=>["Variable"]+m.times.collect {|c| c+1}) do |t|
191
- i=0
192
- component_matrix(m).to_a.each do |row|
193
- t.row([@fields[i]]+row.collect {|c| sprintf("%0.3f",c)})
194
- i+=1
195
- end
196
- end
228
+ s.parse_element(component_matrix)
197
229
  end
198
230
  end
199
231
 
@@ -1,10 +1,12 @@
1
1
  module Statsample
2
2
  module Factor
3
- # Base class for rotate matrixes
4
- # References:
3
+ # Base class for component matrix rotation.
4
+ #
5
+ # == References:
5
6
  # * SPSS Manual
6
- # * Johnny Lin code for IDL: http://www.johnny-lin.com/idl_code/varimax_k58.pro
7
- # Use Varimax, Equimax or Quartimax for desired type of rotation
7
+ # * Johnny Lin code for IDL: [http://www.johnny-lin.com/idl_code/varimax_k58.pro]
8
+ #
9
+ # Use subclasses Varimax, Equimax or Quartimax for desired type of rotation
8
10
  # Use:
9
11
  # a = Matrix[ [ 0.4320, 0.8129, 0.3872]
10
12
  # , [ 0.7950, -0.5416, 0.2565]
@@ -18,6 +20,7 @@ module Factor
18
20
  class Rotation
19
21
  EPSILON=1e-15
20
22
  MAX_ITERATIONS=25
23
+ include Summarizable
21
24
  include DirtyMemoize
22
25
  attr_reader :iterations, :rotated, :component_transformation_matrix, :h2
23
26
  # Maximum number of iterations
@@ -29,6 +32,7 @@ module Factor
29
32
  dirty_memoize :iterations, :rotated, :component_transformation_matrix, :h2
30
33
 
31
34
  def initialize(matrix, opts=Hash.new)
35
+ @name=_("%s rotation") % rotation_name
32
36
  @matrix=matrix
33
37
  @n=@matrix.row_size # Variables, p on original
34
38
  @m=@matrix.column_size # Factors, r on original
@@ -41,6 +45,12 @@ module Factor
41
45
  self.send("#{k}=",v) if self.respond_to? k
42
46
  }
43
47
  end
48
+ def report_building(g)
49
+ g.section(:name=>@name) do |s|
50
+ s.parse_element(rotated)
51
+ s.parse_element(component_transformation_matrix)
52
+ end
53
+ end
44
54
  alias_method :communalities, :h2
45
55
  alias_method :rotated_component_matrix, :rotated
46
56
  def compute
@@ -111,7 +121,33 @@ module Factor
111
121
  end #i
112
122
  end # while
113
123
  @rotated=h*bh
124
+ @rotated.extend CovariateMatrix
125
+ @rotated.name=_("Rotated Component matrix")
126
+
127
+ if @matrix.respond_to? :fields_x
128
+ @rotated.fields_x = @matrix.fields_x
129
+ else
130
+ @rotated.fields_x = @n.times.map {|i| "var_#{i+1}"}
131
+ end
132
+ if @matrix.respond_to? :fields_y
133
+ @rotated.fields_y = @matrix.fields_y
134
+ else
135
+ @rotated.fields_y = @m.times.map {|i| "var_#{i+1}"}
136
+ end
137
+
138
+
139
+
114
140
  @component_transformation_matrix=t
141
+ @component_transformation_matrix.extend CovariateMatrix
142
+ @component_transformation_matrix.name=_("Component transformation matrix")
143
+
144
+ if @matrix.respond_to? :fields_y
145
+ @component_transformation_matrix.fields = @matrix.fields_y
146
+
147
+ else
148
+ @component_transformation_matrix.fields = @m.times.map {|i| "var_#{i+1}"}
149
+ end
150
+
115
151
  @rotated
116
152
  end
117
153
 
@@ -123,6 +159,9 @@ module Factor
123
159
  def y(a,b,c,d)
124
160
  c-((a**2-b**2) / @n.to_f)
125
161
  end
162
+ def rotation_name
163
+ "Varimax"
164
+ end
126
165
  end
127
166
  class Equimax < Rotation
128
167
  def x(a,b,c,d)
@@ -131,6 +170,10 @@ module Factor
131
170
  def y(a,b,c,d)
132
171
  c-@m*((a**2-b**2) / (2*@n.to_f))
133
172
  end
173
+ def rotation_name
174
+ "Equimax"
175
+ end
176
+
134
177
  end
135
178
  class Quartimax < Rotation
136
179
  def x(a,b,c,d)
@@ -139,6 +182,10 @@ module Factor
139
182
  def y(a,b,c,d)
140
183
  c
141
184
  end
185
+ def rotation_name
186
+ "Quartimax"
187
+ end
188
+
142
189
  end
143
190
  end
144
191
  end
@@ -57,20 +57,20 @@ module Statsample
57
57
  end
58
58
  def correlation
59
59
  if(type==:covariance)
60
- matrix=Matrix.rows(row_size.times.collect { |i|
61
- column_size.times.collect { |j|
62
- if i==j
63
- 1.0
64
- else
65
- self[i,j].quo(Math::sqrt(self[i,i])*Math::sqrt(self[j,j]))
66
- end
67
- }
68
- })
69
- matrix.extend CovariateMatrix
70
- matrix.fields_x=fields_x
71
- matrix.fields_y=fields_y
72
- matrix.type=:correlation
73
- matrix
60
+ matrix=Matrix.rows(row_size.times.collect { |i|
61
+ column_size.times.collect { |j|
62
+ if i==j
63
+ 1.0
64
+ else
65
+ self[i,j].quo(Math::sqrt(self[i,i])*Math::sqrt(self[j,j]))
66
+ end
67
+ }
68
+ })
69
+ matrix.extend CovariateMatrix
70
+ matrix.fields_x=fields_x
71
+ matrix.fields_y=fields_y
72
+ matrix.type=:correlation
73
+ matrix
74
74
  else
75
75
  self
76
76
  end
@@ -30,6 +30,7 @@ module Statsample
30
30
  # Get Cronbach's alpha from a covariance matrix
31
31
  def cronbach_alpha_from_covariance_matrix(cov)
32
32
  n=cov.row_size
33
+ raise "covariance matrix should have at least 2 variables" if n < 2
33
34
  s2=n.times.inject(0) {|ac,i| ac+cov[i,i]}
34
35
  (n.quo(n-1))*(1-(s2.quo(cov.total_sum)))
35
36
  end
@@ -32,10 +32,14 @@ module Statsample
32
32
  attr_accessor :summary_pca
33
33
  # Add Principal Axis to summary
34
34
  attr_accessor :summary_principal_axis
35
+ # Add Parallel Analysis to summary
36
+ attr_accessor :summary_parallel_analysis
35
37
  # Options for Factor::PCA object
36
38
  attr_accessor :pca_options
37
39
  # Options for Factor::PrincipalAxis
38
40
  attr_accessor :principal_axis_options
41
+ # Options for Parallel Analysis
42
+ attr_accessor :parallel_analysis_options
39
43
  # Generates a new MultiScaleAnalysis
40
44
  # Opts could be any accessor of the class
41
45
  # * :name,
@@ -50,12 +54,15 @@ module Statsample
50
54
  #
51
55
  def initialize(opts=Hash.new, &block)
52
56
  @scales=Hash.new
57
+ @scales_keys=Array.new
53
58
  opts_default={ :name=>_("Multiple Scale analysis"),
54
59
  :summary_correlation_matrix=>false,
55
60
  :summary_pca=>false,
56
61
  :summary_principal_axis=>false,
62
+ :summary_parallel_analysis=>false,
57
63
  :pca_options=>Hash.new,
58
- :principal_axis_options=>Hash.new
64
+ :principal_axis_options=>Hash.new,
65
+ :parallel_analysis_options=>Hash.new
59
66
  }
60
67
  @opts=opts_default.merge(opts)
61
68
  @opts.each{|k,v|
@@ -72,38 +79,49 @@ module Statsample
72
79
  #
73
80
  # If second parameters is empty, returns the ScaleAnalysis
74
81
  # <tt>code</tt>.
75
- def scale(code,ds=nil, opts=nil)
82
+ def scale(code, ds=nil, opts=nil)
76
83
  if ds.nil?
77
84
  @scales[code]
78
85
  else
79
86
  opts={:name=>_("Scale %s") % code} if opts.nil?
87
+ @scales_keys.push(code)
80
88
  @scales[code]=ScaleAnalysis.new(ds, opts)
81
89
  end
82
90
  end
83
91
  # Delete ScaleAnalysis named <tt>code</tt>
84
92
  def delete_scale(code)
93
+ @scales_keys.delete code
85
94
  @scales.delete code
86
95
  end
87
96
  # Retrieves a Principal Component Analysis (Factor::PCA)
88
97
  # using all scales, using <tt>opts</tt> a options.
89
98
  def pca(opts=nil)
90
99
  opts||=pca_options
91
- Statsample::Factor::PCA.new(correlation_matrix,opts)
100
+ Statsample::Factor::PCA.new(correlation_matrix, opts)
92
101
  end
93
102
  # Retrieves a PrincipalAxis Analysis (Factor::PrincipalAxis)
94
103
  # using all scales, using <tt>opts</tt> a options.
95
104
  def principal_axis_analysis(opts=nil)
96
105
  opts||=principal_axis_options
97
- Statsample::Factor::PrincipalAxis.new(correlation_matrix,opts)
106
+ Statsample::Factor::PrincipalAxis.new(correlation_matrix, opts)
107
+ end
108
+ def dataset_from_scales
109
+ ds=Dataset.new(@scales_keys)
110
+ @scales.each_pair do |code,scale|
111
+ ds[code.to_s]=scale.ds.vector_sum
112
+ ds[code.to_s].name=scale.name
113
+ end
114
+ ds.update_valid_data
115
+ ds
116
+ end
117
+ def parallel_analysis(opts=nil)
118
+ opts||=parallel_analysis_options
119
+ Statsample::Factor::ParallelAnalysis.new(dataset_from_scales, opts)
98
120
  end
99
121
  # Retrieves a Correlation Matrix between scales.
100
122
  #
101
123
  def correlation_matrix
102
- vectors=Hash.new
103
- @scales.each_pair do |code,scale|
104
- vectors[code.to_s]=scale.ds.vector_sum
105
- end
106
- Statsample::Bivariate.correlation_matrix(vectors.to_dataset)
124
+ Statsample::Bivariate.correlation_matrix(dataset_from_scales)
107
125
  end
108
126
  def report_building(b) # :nodoc:
109
127
  b.section(:name=>name) do |s|
@@ -126,7 +144,14 @@ module Statsample
126
144
  s.section(:name=>_("Principal Axis for %s") % name) do |s2|
127
145
  s2.parse_element(principal_axis_analysis)
128
146
  end
129
- end
147
+ end
148
+
149
+ if summary_parallel_analysis
150
+ s.section(:name=>_("Parallel Analysis for %s") % name) do |s2|
151
+ s2.parse_element(parallel_analysis)
152
+ end
153
+ end
154
+
130
155
  end
131
156
  end
132
157
  end