statsample 0.13.1 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -29,27 +29,42 @@ module Factor
29
29
  # * Smith, L. (2002). A tutorial on Principal Component Analysis. Available on http://courses.eas.ualberta.ca/eas570/pca_tutorial.pdf
30
30
  #
31
31
  class PCA
32
+ include Summarizable
32
33
  # Name of analysis
33
34
  attr_accessor :name
35
+
34
36
  # Number of factors. Set by default to the number of factors
35
37
  # with eigen values > 1
36
38
  attr_accessor :m
37
39
  # Use GSL if available
38
40
  attr_accessor :use_gsl
39
- include Summarizable
40
-
41
+ # Add to the summary a rotation report
42
+ attr_accessor :summary_rotation
43
+ # Add to the summary a parallel analysis report
44
+ attr_accessor :summary_parallel_analysis
45
+ # Type of rotation. By default, Statsample::Factor::Rotation::Varimax
46
+ attr_accessor :rotation_type
47
+
41
48
  def initialize(matrix, opts=Hash.new)
42
- @use_gsl=nil
49
+ @use_gsl=nil
43
50
  @name=_("Principal Component Analysis")
44
51
  @matrix=matrix
45
52
  @n_variables=@matrix.column_size
46
53
  @m=nil
54
+
55
+ @rotation_type=Statsample::Factor::Varimax
56
+
47
57
  opts.each{|k,v|
48
58
  self.send("#{k}=",v) if self.respond_to? k
49
59
  }
50
60
  if @use_gsl.nil?
51
61
  @use_gsl=Statsample.has_gsl?
52
62
  end
63
+ if @matrix.respond_to? :fields
64
+ @variables_names=@matrix.fields
65
+ else
66
+ @variables_names=@n_variables.times.map {|i| "V#{i+1}"}
67
+ end
53
68
  calculate_eigenpairs
54
69
  if @m.nil?
55
70
  # Set number of factors with eigenvalues > 1
@@ -57,6 +72,9 @@ module Factor
57
72
  end
58
73
 
59
74
  end
75
+ def rotation
76
+ @rotation_type.new(component_matrix)
77
+ end
60
78
  def create_centered_ds
61
79
  h={}
62
80
  @original_ds.factors.each {|f|
@@ -93,7 +111,13 @@ module Factor
93
111
  gammas.push(Math::sqrt(@eigenpairs[i][0]))
94
112
  }
95
113
  gamma_m=::Matrix.diagonal(*gammas)
96
- (omega_m*(gamma_m)).to_matrix
114
+ cm=(omega_m*(gamma_m)).to_matrix
115
+
116
+ cm.extend CovariateMatrix
117
+ cm.name=_("Component matrix")
118
+ cm.fields_x = @variables_names
119
+ cm.fields_y = m.times.map {|i| "component_#{i+1}"}
120
+ cm
97
121
  end
98
122
  # Communalities for all variables given m factors
99
123
  def communalities(m=nil)
@@ -122,12 +146,7 @@ module Factor
122
146
  end
123
147
 
124
148
  def calculate_eigenpairs_ruby
125
- eigval, eigvec= @matrix.eigenvaluesJacobi, @matrix.cJacobiV
126
- @eigenpairs={}
127
- eigval.to_a.each_index {|i|
128
- @eigenpairs[eigval[i]]=eigvec.column(i)
129
- }
130
- @eigenpairs=@eigenpairs.sort.reverse
149
+ @eigenpairs = @matrix.eigenpairs
131
150
  end
132
151
  def calculate_eigenpairs_gsl
133
152
  eigval, eigvec= GSL::Eigen.symmv(@matrix.to_gsl)
@@ -140,25 +159,24 @@ module Factor
140
159
 
141
160
  def report_building(builder) # :nodoc:
142
161
  builder.section(:name=>@name) do |generator|
143
- generator.text _("Number of factors: %d") % m
144
- generator.table(:name=>_("Communalities"), :header=>[_("Variable"),_("Initial"),_("Extraction")]) do |t|
145
- communalities(m).each_with_index {|com,i|
146
- t.row([i, 1.0, sprintf("%0.3f", com)])
147
- }
148
- end
149
- generator.table(:name=>_("Eigenvalues"), :header=>[_("Variable"),_("Value")]) do |t|
150
- eigenvalues.each_with_index {|eigenvalue,i|
151
- t.row([i, sprintf("%0.3f",eigenvalue)])
152
- }
153
- end
154
-
155
- generator.table(:name=>_("Component Matrix"), :header=>[_("Variable")]+m.times.collect {|c| c+1}) do |t|
156
- i=0
157
- component_matrix(m).to_a.each do |row|
158
- t.row([i]+row.collect {|c| sprintf("%0.3f",c)})
159
- i+=1
162
+ generator.text _("Number of factors: %d") % m
163
+ generator.table(:name=>_("Communalities"), :header=>[_("Variable"),_("Initial"),_("Extraction")]) do |t|
164
+ communalities(m).each_with_index {|com, i|
165
+ t.row([@variables_names[i], 1.0, sprintf("%0.3f", com)])
166
+ }
167
+ end
168
+
169
+ generator.table(:name=>_("Total Variance Explained"), :header=>[_("Component"), _("E.Total"), _("%"), _("Cum. %")]) do |t|
170
+ ac_eigen=0
171
+ eigenvalues.each_with_index {|eigenvalue,i|
172
+ ac_eigen+=eigenvalue
173
+ t.row([_("Component %d") % (i+1), sprintf("%0.3f",eigenvalue), sprintf("%0.3f%%", eigenvalue*100.quo(@n_variables)), sprintf("%0.3f",ac_eigen*100.quo(@n_variables))])
174
+ }
175
+ end
176
+ generator.parse_element(component_matrix(m))
177
+ if (summary_rotation)
178
+ generator.parse_element(rotation)
160
179
  end
161
- end
162
180
  end
163
181
  end
164
182
  private :calculate_eigenpairs, :create_centered_ds
@@ -29,31 +29,39 @@ module Factor
29
29
  class PrincipalAxis
30
30
  include DirtyMemoize
31
31
  include Summarizable
32
- # Minimum difference between succesive iterations on sum of communalities
33
- DELTA=1e-3
34
- # Maximum number of iterations
35
- MAX_ITERATIONS=50
36
- # Number of factors. Set by default to the number of factors
37
- # with eigen values > 1 on PCA over data
38
- attr_accessor :m
39
-
40
32
  # Name of analysis
41
33
  attr_accessor :name
34
+
35
+ # Number of factors. Set by default to the number of factors
36
+ # with eigenvalues > 1 (Kaiser criterion).
37
+ #
38
+ # _Warning:_ Kaiser criterion overfactors! Give yourself some time
39
+ # and use Horn's Parallel Analysis.
40
+ #
41
+ attr_accessor :m
42
42
 
43
43
  # Number of iterations required to converge
44
44
  attr_reader :iterations
45
+
45
46
  # Initial eigenvalues
46
47
  attr_reader :initial_eigenvalues
47
- # Tolerance for iteratios.
48
+
49
+ # Tolerance for iterations
48
50
  attr_accessor :epsilon
51
+
49
52
  # Use SMC(squared multiple correlations) as diagonal. If false, use 1
50
53
  attr_accessor :smc
54
+
51
55
  # Maximum number of iterations
52
56
  attr_accessor :max_iterations
57
+
53
58
  # Eigenvalues of factor analysis
54
59
  attr_reader :eigenvalues
55
60
 
56
-
61
+ # Minimum difference between succesive iterations on sum of communalities
62
+ DELTA=1e-3
63
+ # Maximum number of iterations
64
+ MAX_ITERATIONS=25
57
65
 
58
66
  def initialize(matrix, opts=Hash.new)
59
67
  @matrix=matrix
@@ -62,7 +70,7 @@ module Factor
62
70
  else
63
71
  @fields=@matrix.row_size.times.map {|i| _("Variable %d") % (i+1)}
64
72
  end
65
-
73
+ @n_variables=@matrix.row_size
66
74
  @name=""
67
75
  @m=nil
68
76
  @initial_eigenvalues=nil
@@ -74,7 +82,11 @@ module Factor
74
82
  opts.each{|k,v|
75
83
  self.send("#{k}=",v) if self.respond_to? k
76
84
  }
77
-
85
+ if @matrix.respond_to? :fields
86
+ @variables_names=@matrix.fields
87
+ else
88
+ @variables_names=@n_variables.times.map {|i| "V#{i+1}"}
89
+ end
78
90
  if @m.nil?
79
91
  pca=PCA.new(::Matrix.rows(@matrix.to_a))
80
92
  @m=pca.m
@@ -119,7 +131,7 @@ module Factor
119
131
  pca=PCA.new(::Matrix.rows(work_matrix))
120
132
  @communalities=pca.communalities(m)
121
133
  @eigenvalues=pca.eigenvalues
122
- com_sum=@communalities.inject(0) {|ac,v| ac+v}
134
+ com_sum = @communalities.inject(0) {|ac,v| ac+v}
123
135
  jump=true
124
136
 
125
137
  break if (com_sum-prev_sum).abs<@delta
@@ -131,6 +143,11 @@ module Factor
131
143
 
132
144
  end
133
145
  @component_matrix=pca.component_matrix(m)
146
+ @component_matrix.extend CovariateMatrix
147
+ @component_matrix.name=_("Factor Matrix")
148
+ @component_matrix.fields_x = @variables_names
149
+ @component_matrix.fields_y = m.times.map {|i| "factor_#{i+1}"}
150
+
134
151
  end
135
152
  alias :compute :iterate
136
153
 
@@ -182,18 +199,33 @@ module Factor
182
199
  t.row([@fields[i], sprintf("%0.4f", initial_communalities[i]), sprintf("%0.3f", com)])
183
200
  }
184
201
  end
185
- s.table(:name=>_("Eigenvalues"), :header=>[_("Variable"),_("Value")]) do |t|
202
+ s.table(:name=>_("Total Variance"), :header=>[_("Factor"), _("I.E.Total"), _("I.E. %"), _("I.E.Cum. %"),
203
+ _("S.L.Total"), _("S.L. %"), _("S.L.Cum. %")
204
+ ]) do |t|
205
+ ac_eigen,ac_i_eigen=0,0
186
206
  @initial_eigenvalues.each_with_index {|eigenvalue,i|
187
- t.row([@fields[i], sprintf("%0.3f",eigenvalue)])
207
+ ac_i_eigen+=eigenvalue
208
+ ac_eigen+=@eigenvalues[i]
209
+ new_row=[
210
+ _("Factor %d") % (i+1),
211
+ sprintf("%0.3f",eigenvalue),
212
+ sprintf("%0.3f%%", eigenvalue*100.quo(@n_variables)),
213
+ sprintf("%0.3f",ac_i_eigen*100.quo(@n_variables))
214
+ ]
215
+ if i<@m
216
+ new_row.concat [
217
+ sprintf("%0.3f", @eigenvalues[i]),
218
+ sprintf("%0.3f%%", @eigenvalues[i]*100.quo(@n_variables)),
219
+ sprintf("%0.3f",ac_eigen*100.quo(@n_variables))
220
+ ]
221
+ else
222
+ new_row.concat ["","",""]
223
+ end
224
+
225
+ t.row new_row
188
226
  }
189
227
  end
190
- s.table(:name=>_("Component Matrix"), :header=>["Variable"]+m.times.collect {|c| c+1}) do |t|
191
- i=0
192
- component_matrix(m).to_a.each do |row|
193
- t.row([@fields[i]]+row.collect {|c| sprintf("%0.3f",c)})
194
- i+=1
195
- end
196
- end
228
+ s.parse_element(component_matrix)
197
229
  end
198
230
  end
199
231
 
@@ -1,10 +1,12 @@
1
1
  module Statsample
2
2
  module Factor
3
- # Base class for rotate matrixes
4
- # References:
3
+ # Base class for component matrix rotation.
4
+ #
5
+ # == References:
5
6
  # * SPSS Manual
6
- # * Johnny Lin code for IDL: http://www.johnny-lin.com/idl_code/varimax_k58.pro
7
- # Use Varimax, Equimax or Quartimax for desired type of rotation
7
+ # * Johnny Lin code for IDL: [http://www.johnny-lin.com/idl_code/varimax_k58.pro]
8
+ #
9
+ # Use subclasses Varimax, Equimax or Quartimax for desired type of rotation
8
10
  # Use:
9
11
  # a = Matrix[ [ 0.4320, 0.8129, 0.3872]
10
12
  # , [ 0.7950, -0.5416, 0.2565]
@@ -18,6 +20,7 @@ module Factor
18
20
  class Rotation
19
21
  EPSILON=1e-15
20
22
  MAX_ITERATIONS=25
23
+ include Summarizable
21
24
  include DirtyMemoize
22
25
  attr_reader :iterations, :rotated, :component_transformation_matrix, :h2
23
26
  # Maximum number of iterations
@@ -29,6 +32,7 @@ module Factor
29
32
  dirty_memoize :iterations, :rotated, :component_transformation_matrix, :h2
30
33
 
31
34
  def initialize(matrix, opts=Hash.new)
35
+ @name=_("%s rotation") % rotation_name
32
36
  @matrix=matrix
33
37
  @n=@matrix.row_size # Variables, p on original
34
38
  @m=@matrix.column_size # Factors, r on original
@@ -41,6 +45,12 @@ module Factor
41
45
  self.send("#{k}=",v) if self.respond_to? k
42
46
  }
43
47
  end
48
+ def report_building(g)
49
+ g.section(:name=>@name) do |s|
50
+ s.parse_element(rotated)
51
+ s.parse_element(component_transformation_matrix)
52
+ end
53
+ end
44
54
  alias_method :communalities, :h2
45
55
  alias_method :rotated_component_matrix, :rotated
46
56
  def compute
@@ -111,7 +121,33 @@ module Factor
111
121
  end #i
112
122
  end # while
113
123
  @rotated=h*bh
124
+ @rotated.extend CovariateMatrix
125
+ @rotated.name=_("Rotated Component matrix")
126
+
127
+ if @matrix.respond_to? :fields_x
128
+ @rotated.fields_x = @matrix.fields_x
129
+ else
130
+ @rotated.fields_x = @n.times.map {|i| "var_#{i+1}"}
131
+ end
132
+ if @matrix.respond_to? :fields_y
133
+ @rotated.fields_y = @matrix.fields_y
134
+ else
135
+ @rotated.fields_y = @m.times.map {|i| "var_#{i+1}"}
136
+ end
137
+
138
+
139
+
114
140
  @component_transformation_matrix=t
141
+ @component_transformation_matrix.extend CovariateMatrix
142
+ @component_transformation_matrix.name=_("Component transformation matrix")
143
+
144
+ if @matrix.respond_to? :fields_y
145
+ @component_transformation_matrix.fields = @matrix.fields_y
146
+
147
+ else
148
+ @component_transformation_matrix.fields = @m.times.map {|i| "var_#{i+1}"}
149
+ end
150
+
115
151
  @rotated
116
152
  end
117
153
 
@@ -123,6 +159,9 @@ module Factor
123
159
  def y(a,b,c,d)
124
160
  c-((a**2-b**2) / @n.to_f)
125
161
  end
162
+ def rotation_name
163
+ "Varimax"
164
+ end
126
165
  end
127
166
  class Equimax < Rotation
128
167
  def x(a,b,c,d)
@@ -131,6 +170,10 @@ module Factor
131
170
  def y(a,b,c,d)
132
171
  c-@m*((a**2-b**2) / (2*@n.to_f))
133
172
  end
173
+ def rotation_name
174
+ "Equimax"
175
+ end
176
+
134
177
  end
135
178
  class Quartimax < Rotation
136
179
  def x(a,b,c,d)
@@ -139,6 +182,10 @@ module Factor
139
182
  def y(a,b,c,d)
140
183
  c
141
184
  end
185
+ def rotation_name
186
+ "Quartimax"
187
+ end
188
+
142
189
  end
143
190
  end
144
191
  end
@@ -57,20 +57,20 @@ module Statsample
57
57
  end
58
58
  def correlation
59
59
  if(type==:covariance)
60
- matrix=Matrix.rows(row_size.times.collect { |i|
61
- column_size.times.collect { |j|
62
- if i==j
63
- 1.0
64
- else
65
- self[i,j].quo(Math::sqrt(self[i,i])*Math::sqrt(self[j,j]))
66
- end
67
- }
68
- })
69
- matrix.extend CovariateMatrix
70
- matrix.fields_x=fields_x
71
- matrix.fields_y=fields_y
72
- matrix.type=:correlation
73
- matrix
60
+ matrix=Matrix.rows(row_size.times.collect { |i|
61
+ column_size.times.collect { |j|
62
+ if i==j
63
+ 1.0
64
+ else
65
+ self[i,j].quo(Math::sqrt(self[i,i])*Math::sqrt(self[j,j]))
66
+ end
67
+ }
68
+ })
69
+ matrix.extend CovariateMatrix
70
+ matrix.fields_x=fields_x
71
+ matrix.fields_y=fields_y
72
+ matrix.type=:correlation
73
+ matrix
74
74
  else
75
75
  self
76
76
  end
@@ -30,6 +30,7 @@ module Statsample
30
30
  # Get Cronbach's alpha from a covariance matrix
31
31
  def cronbach_alpha_from_covariance_matrix(cov)
32
32
  n=cov.row_size
33
+ raise "covariance matrix should have at least 2 variables" if n < 2
33
34
  s2=n.times.inject(0) {|ac,i| ac+cov[i,i]}
34
35
  (n.quo(n-1))*(1-(s2.quo(cov.total_sum)))
35
36
  end
@@ -32,10 +32,14 @@ module Statsample
32
32
  attr_accessor :summary_pca
33
33
  # Add Principal Axis to summary
34
34
  attr_accessor :summary_principal_axis
35
+ # Add Parallel Analysis to summary
36
+ attr_accessor :summary_parallel_analysis
35
37
  # Options for Factor::PCA object
36
38
  attr_accessor :pca_options
37
39
  # Options for Factor::PrincipalAxis
38
40
  attr_accessor :principal_axis_options
41
+ # Options for Parallel Analysis
42
+ attr_accessor :parallel_analysis_options
39
43
  # Generates a new MultiScaleAnalysis
40
44
  # Opts could be any accessor of the class
41
45
  # * :name,
@@ -50,12 +54,15 @@ module Statsample
50
54
  #
51
55
  def initialize(opts=Hash.new, &block)
52
56
  @scales=Hash.new
57
+ @scales_keys=Array.new
53
58
  opts_default={ :name=>_("Multiple Scale analysis"),
54
59
  :summary_correlation_matrix=>false,
55
60
  :summary_pca=>false,
56
61
  :summary_principal_axis=>false,
62
+ :summary_parallel_analysis=>false,
57
63
  :pca_options=>Hash.new,
58
- :principal_axis_options=>Hash.new
64
+ :principal_axis_options=>Hash.new,
65
+ :parallel_analysis_options=>Hash.new
59
66
  }
60
67
  @opts=opts_default.merge(opts)
61
68
  @opts.each{|k,v|
@@ -72,38 +79,49 @@ module Statsample
72
79
  #
73
80
  # If second parameters is empty, returns the ScaleAnalysis
74
81
  # <tt>code</tt>.
75
- def scale(code,ds=nil, opts=nil)
82
+ def scale(code, ds=nil, opts=nil)
76
83
  if ds.nil?
77
84
  @scales[code]
78
85
  else
79
86
  opts={:name=>_("Scale %s") % code} if opts.nil?
87
+ @scales_keys.push(code)
80
88
  @scales[code]=ScaleAnalysis.new(ds, opts)
81
89
  end
82
90
  end
83
91
  # Delete ScaleAnalysis named <tt>code</tt>
84
92
  def delete_scale(code)
93
+ @scales_keys.delete code
85
94
  @scales.delete code
86
95
  end
87
96
  # Retrieves a Principal Component Analysis (Factor::PCA)
88
97
  # using all scales, using <tt>opts</tt> a options.
89
98
  def pca(opts=nil)
90
99
  opts||=pca_options
91
- Statsample::Factor::PCA.new(correlation_matrix,opts)
100
+ Statsample::Factor::PCA.new(correlation_matrix, opts)
92
101
  end
93
102
  # Retrieves a PrincipalAxis Analysis (Factor::PrincipalAxis)
94
103
  # using all scales, using <tt>opts</tt> a options.
95
104
  def principal_axis_analysis(opts=nil)
96
105
  opts||=principal_axis_options
97
- Statsample::Factor::PrincipalAxis.new(correlation_matrix,opts)
106
+ Statsample::Factor::PrincipalAxis.new(correlation_matrix, opts)
107
+ end
108
+ def dataset_from_scales
109
+ ds=Dataset.new(@scales_keys)
110
+ @scales.each_pair do |code,scale|
111
+ ds[code.to_s]=scale.ds.vector_sum
112
+ ds[code.to_s].name=scale.name
113
+ end
114
+ ds.update_valid_data
115
+ ds
116
+ end
117
+ def parallel_analysis(opts=nil)
118
+ opts||=parallel_analysis_options
119
+ Statsample::Factor::ParallelAnalysis.new(dataset_from_scales, opts)
98
120
  end
99
121
  # Retrieves a Correlation Matrix between scales.
100
122
  #
101
123
  def correlation_matrix
102
- vectors=Hash.new
103
- @scales.each_pair do |code,scale|
104
- vectors[code.to_s]=scale.ds.vector_sum
105
- end
106
- Statsample::Bivariate.correlation_matrix(vectors.to_dataset)
124
+ Statsample::Bivariate.correlation_matrix(dataset_from_scales)
107
125
  end
108
126
  def report_building(b) # :nodoc:
109
127
  b.section(:name=>name) do |s|
@@ -126,7 +144,14 @@ module Statsample
126
144
  s.section(:name=>_("Principal Axis for %s") % name) do |s2|
127
145
  s2.parse_element(principal_axis_analysis)
128
146
  end
129
- end
147
+ end
148
+
149
+ if summary_parallel_analysis
150
+ s.section(:name=>_("Parallel Analysis for %s") % name) do |s2|
151
+ s2.parse_element(parallel_analysis)
152
+ end
153
+ end
154
+
130
155
  end
131
156
  end
132
157
  end