statsample 0.17.0 → 0.18.0

Sign up to get free protection for your applications and to get access to all the features.
data.tar.gz.sig CHANGED
Binary file
data/History.txt CHANGED
@@ -1,3 +1,30 @@
1
+ === 0.18.0 / 2011-01-07
2
+ * New Statsample.load_excel
3
+ * New Statsample.load_csv
4
+ * Statsample::Dataset#[] accepts an array of fields and uses clone
5
+ * New Dataset#correlation_matrix and Statsample::Dataset#covariance_matrix
6
+ * Statsample::Dataset.filter add labels to vectors
7
+ * Principal Components generation complete on PCA (covariance matrix prefered)
8
+ * Added note on Statsample::Factor::PCA about erratic signs on eigenvalues,
9
+ * Statsample::Factor::PCA.component_matrix calculated different for covariance matrix
10
+ * Improved summary for PCA using covariance matrix
11
+ * New attribute :label_angle for Statsample::Graph::Boxplot
12
+ * Fixed Scatterplots scaling problems
13
+ * New attributes for Scatterplots: groups, minimum_x, minimum_y, maximum_x,
14
+ * New Statsample::Multiset#union allows to create a new dataset based on a m
15
+ * New Statsample::Multiset#each to traverse through datasets
16
+ * Bug fix: Vector#standarized and Vector#percentile crash on nil data
17
+ * Bug fix: Vector#mean and Vector#sd crash on data without valid values
18
+ * Modified methods names on Statsample::Factor::PCA : feature_vector to feature_matrix, data_transformation to principal_components
19
+ * Added Statsample::Vector.vector_centered
20
+ * Factor::MAP.with_dataset() implemented
21
+ * Bug fix: Factor::MAP with correlation matrix with non-real eigenvalues crashes * Added documentation for Graph::Histogram
22
+ * Added MPA to Reliability::MultiScaleAnalysis
23
+ * Added custom names for returned vectors and datasets
24
+ * Updated spanish traslation
25
+ * Graph::Histogram updated. Custom x and y max and min, optional normal distribution drawing
26
+ * Updated Histogram class, with several new methods compatibles with GSL::Histogram
27
+
1
28
  === 0.17.0 / 2010-12-09
2
29
  * Added Statsample::Graph::Histogram and Statsample::Graph::Boxplot
3
30
  * Added Statsample::Reliability::SkillScaleAnalysis for analysis of skill based scales.
data/Manifest.txt CHANGED
@@ -105,6 +105,7 @@ po/es/statsample.po
105
105
  po/statsample.pot
106
106
  references.txt
107
107
  setup.rb
108
+ test/fixtures/bank2.dat
108
109
  test/fixtures/correlation_matrix.rb
109
110
  test/helpers_tests.rb
110
111
  test/test_anovaoneway.rb
data/Rakefile CHANGED
@@ -5,7 +5,6 @@ $:.unshift(File.dirname(__FILE__)+'/lib/')
5
5
 
6
6
  require 'rubygems'
7
7
  require 'statsample'
8
-
9
8
  require 'hoe'
10
9
  Hoe.plugin :git
11
10
 
@@ -41,9 +40,9 @@ h=Hoe.spec('statsample') do
41
40
  #self.testlib=:minitest
42
41
  self.rubyforge_name = "ruby-statsample"
43
42
  self.developer('Claudio Bustos', 'clbustos@gmail.com')
44
- self.extra_deps << ["spreadsheet","~>0.6.0"] << ["reportbuilder", "~>1.4"] << ["minimization", "~>0.2.0"] << ["fastercsv", ">0"] << ["dirty-memoize", "~>0.0"] << ["extendmatrix","~>0.3.1"] << ["statsample-bivariate-extension", ">0"] << ["rserve-client", "~>0.2.5"] << ["rubyvis", "~>0.3.3"]
43
+ self.extra_deps << ["spreadsheet","~>0.6.5"] << ["reportbuilder", "~>1.4"] << ["minimization", "~>0.2.0"] << ["fastercsv", ">0"] << ["dirty-memoize", "~>0.0"] << ["extendmatrix","~>0.3.1"] << ["statsample-bivariate-extension", ">0"] << ["rserve-client", "~>0.2.5"] << ["rubyvis", "~>0.4.0"]
45
44
 
46
- self.extra_dev_deps << ["shoulda"] << ["minitest", "~>2.0"]
45
+ self.extra_dev_deps << ["hoe","~>0"] << ["shoulda","~>0"] << ["minitest", "~>2.0"]
47
46
  self.clean_globs << "test/images/*" << "demo/item_analysis/*" << "demo/Regression"
48
47
  self.post_install_message = <<-EOF
49
48
  ***************************************************
@@ -1,14 +1,13 @@
1
1
  #!/usr/bin/ruby
2
2
  $:.unshift(File.dirname(__FILE__)+'/../lib/')
3
3
  $:.unshift('/home/cdx/dev/reportbuilder/lib/')
4
-
5
- require 'benchmark'
6
4
  require 'statsample'
7
- n=1000
8
- a=n.times.map {|i| rand()*20}.to_scale
9
- hg=Statsample::Graph::Histogram.new(a, :bins=>15)
5
+ n=3000
6
+ rng=Distribution::Normal.rng_ugaussian
7
+ a=n.times.map {|i| rng.call()*20}.to_scale
8
+ hg=Statsample::Graph::Histogram.new(a, :bins=>20, :line_normal_distribution=>true )
10
9
 
11
10
  rb=ReportBuilder.new
12
- rb.add(a.histogram)
11
+ #rb.add(a.histogram)
13
12
  rb.add(hg)
14
- puts rb.to_text
13
+ rb.save_html('histogram.html')
data/lib/statsample.rb CHANGED
@@ -118,7 +118,7 @@ module Statsample
118
118
  @@has_gsl
119
119
  end
120
120
 
121
- VERSION = '0.17.0'
121
+ VERSION = '0.18.0'
122
122
  SPLIT_TOKEN = ","
123
123
  autoload(:Database, 'statsample/converters')
124
124
  autoload(:Anova, 'statsample/anova')
@@ -157,6 +157,30 @@ module Statsample
157
157
  false
158
158
  end
159
159
  end
160
+ # Import an Excel file. Cache result by default
161
+ def load_excel(filename, opts=Hash.new, cache=true)
162
+ file_ds=filename+".ds"
163
+ if cache and (File.exists? file_ds and File.mtime(file_ds)>File.mtime(filename))
164
+ ds=Statsample.load(file_ds)
165
+ else
166
+ ds=Statsample::Excel.read(filename)
167
+ ds.save(file_ds) if cache
168
+ end
169
+ ds
170
+ end
171
+
172
+ # Import an Excel file. Cache result by default
173
+ def load_csv(filename, opts=Hash.new, cache=true)
174
+ file_ds=filename+".ds"
175
+ if cache and (File.exists? file_ds and File.mtime(file_ds)>File.mtime(filename))
176
+ ds=Statsample.load(file_ds)
177
+ else
178
+ ds=Statsample::CSV.read(filename,opts)
179
+ ds.save(file_ds) if cache
180
+ end
181
+ ds
182
+ end
183
+
160
184
 
161
185
  # Create a matrix using vectors as columns.
162
186
  # Use:
@@ -58,13 +58,6 @@ module Statsample
58
58
  # Calculate sum of squares
59
59
  ss=sum_of_squares(v1a,v2a)
60
60
  ss.quo(Math::sqrt(v1a.sum_of_squares) * Math::sqrt(v2a.sum_of_squares))
61
- =begin
62
- v1s,v2s=v1a.vector_standarized,v2a.vector_standarized
63
- t=0
64
- siz=v1s.size
65
- (0...v1s.size).each {|i| t+=(v1s[i]*v2s[i]) }
66
- t.quo(v2s.size-1)
67
- =end
68
61
  end
69
62
  alias :correlation :pearson
70
63
  # Retrieves the value for t test for a pearson correlation
@@ -17,6 +17,7 @@ module Statsample
17
17
  fields=[]
18
18
  sth.column_info.each {|c|
19
19
  vectors[c['name']]=Statsample::Vector.new([])
20
+ vectors[c['name']].name=c['name']
20
21
  vectors[c['name']].type= (c['type_name']=='INTEGER' or c['type_name']=='DOUBLE') ? :scale : :nominal
21
22
  fields.push(c['name'])
22
23
  }
@@ -35,7 +36,7 @@ module Statsample
35
36
  # dbh = DBI.connect("DBI:Mysql:database:localhost", "user", "password")
36
37
  # Statsample::Database.insert(ds,dbh,"test")
37
38
  #
38
- def insert(ds, dbh,table)
39
+ def insert(ds, dbh, table)
39
40
  require 'dbi'
40
41
  query="INSERT INTO #{table} ("+ds.fields.join(",")+") VALUES ("+((["?"]*ds.fields.size).join(","))+")"
41
42
  sth=dbh.prepare(query)
@@ -235,6 +236,7 @@ module Statsample
235
236
  fields.each {|f|
236
237
  ds[f].name=f
237
238
  }
239
+ ds.name=filename
238
240
  ds
239
241
  end
240
242
  end
@@ -25,7 +25,7 @@ module Statsample
25
25
  end
26
26
  def to_s
27
27
  m="Error on iteration: "+@exp.message+"\n"+@exp.backtrace.join("\n")
28
- m+="\nRow: #{@ds.i}" unless @ds.i.nil?
28
+ m+="\nRow ##{@ds.i}:#{@ds.case_as_hash(@ds.i)}" unless @ds.i.nil?
29
29
  m
30
30
  end
31
31
  end
@@ -140,7 +140,6 @@ module Statsample
140
140
  end
141
141
  @i=nil
142
142
  end
143
-
144
143
  #
145
144
  # Returns a GSL::matrix
146
145
  #
@@ -239,6 +238,7 @@ module Statsample
239
238
  ds[f]=@vectors[f]
240
239
  }
241
240
  ds.fields=fields_to_include
241
+ ds.name=@name
242
242
  ds.update_valid_data
243
243
  ds
244
244
  end
@@ -419,13 +419,15 @@ module Statsample
419
419
  # if fields parameter is empty, sum all fields
420
420
  def vector_sum(fields=nil)
421
421
  fields||=@fields
422
- collect_with_index do |row, i|
422
+ vector=collect_with_index do |row, i|
423
423
  if(fields.find{|f| !@vectors[f].data_with_nils[i]})
424
424
  nil
425
425
  else
426
426
  fields.inject(0) {|ac,v| ac + row[v].to_f}
427
427
  end
428
428
  end
429
+ vector.name=_("Sum from %s") % @name
430
+ vector
429
431
  end
430
432
  # Check if #fields attribute is correct, after inserting or deleting vectors
431
433
  def check_fields(fields)
@@ -476,7 +478,9 @@ module Statsample
476
478
  a.push(sum.quo(size-invalids))
477
479
  end
478
480
  end
479
- a.to_vector(:scale)
481
+ a=a.to_vector(:scale)
482
+ a.name=_("Means from %s") % @name
483
+ a
480
484
  end
481
485
  # Check vectors for type and size.
482
486
  def check_length # :nodoc:
@@ -598,8 +602,9 @@ module Statsample
598
602
  def[](i)
599
603
  if i.is_a? Range
600
604
  fields=from_to(i.begin,i.end)
601
- vectors=fields.inject({}) {|a,v| a[v]=@vectors[v];a}
602
- Dataset.new(vectors,fields)
605
+ clone(*fields)
606
+ elsif i.is_a? Array
607
+ clone(i)
603
608
  else
604
609
  raise Exception,"Vector '#{i}' doesn't exists on dataset" unless @vectors.has_key?(i)
605
610
  @vectors[i]
@@ -661,16 +666,36 @@ module Statsample
661
666
  GSL::Matrix.alloc(*rows)
662
667
  end
663
668
  end
664
-
665
-
669
+
670
+ # Return a correlation matrix for fields included as parameters.
671
+ # By default, uses all fields of dataset
672
+ def correlation_matrix(fields=nil)
673
+ if fields
674
+ ds=clone(fields)
675
+ else
676
+ ds=self
677
+ end
678
+ Statsample::Bivariate.correlation_matrix(ds)
679
+ end
680
+ # Return a correlation matrix for fields included as parameters.
681
+ # By default, uses all fields of dataset
682
+ def covariance_matrix(fields=nil)
683
+ if fields
684
+ ds=clone(fields)
685
+ else
686
+ ds=self
687
+ end
688
+ Statsample::Bivariate.covariance_matrix(ds)
689
+ end
666
690
 
667
691
  # Create a new dataset with all cases which the block returns true
668
692
  def filter
669
693
  ds=self.dup_empty
670
694
  each {|c|
671
- ds.add_case(c,false) if yield c
695
+ ds.add_case(c, false) if yield c
672
696
  }
673
697
  ds.update_valid_data
698
+ ds.name=_("%s(filtered)") % @name
674
699
  ds
675
700
  end
676
701
 
@@ -712,6 +737,8 @@ module Statsample
712
737
  # puts "Vector #{k1}:"+v1.to_s
713
738
  v1.type=@vectors[k1].type
714
739
  v1.name=@vectors[k1].name
740
+ v1.labels=@vectors[k1].labels
741
+
715
742
  }
716
743
  }
717
744
  ms
@@ -737,9 +764,16 @@ module Statsample
737
764
 
738
765
  ms.datasets.each do |k,ds|
739
766
  ds.update_valid_data
767
+ ds.name=fields.size.times.map {|i|
768
+ f=fields[i]
769
+ sk=k[i]
770
+ @vectors[f].labeling(sk)
771
+ }.join("-")
740
772
  ds.vectors.each{|k1,v1|
741
773
  v1.type=@vectors[k1].type
742
774
  v1.name=@vectors[k1].name
775
+ v1.labels=@vectors[k1].labels
776
+
743
777
  }
744
778
  end
745
779
  ms
@@ -805,7 +839,7 @@ module Statsample
805
839
  vr
806
840
  end
807
841
  def to_s
808
- "#<"+self.class.to_s+":"+self.object_id.to_s+" @fields=["+@fields.join(",")+"] cases="+@vectors[@fields[0]].size.to_s
842
+ "#<"+self.class.to_s+":"+self.object_id.to_s+" @name=#{@name} @fields=["+@fields.join(",")+"] cases="+@vectors[@fields[0]].size.to_s
809
843
  end
810
844
  def inspect
811
845
  self.to_s
@@ -13,7 +13,18 @@ module Statsample
13
13
  # * Statsample::Factor::Varimax
14
14
  # * Statsample::Factor::Equimax
15
15
  # * Statsample::Factor::Quartimax
16
- # See documentation of each class to use it
16
+ # * Classes for determining the number of components
17
+ # * Statsample::Factor::MAP
18
+ # * Statsample::Factor::ParallelAnalysis
19
+ #
20
+ # About number of components, O'Connor(2000) said:
21
+ # The two procedures [PA and MAP ] complement each other nicely,
22
+ # in that the MAP tends to err (when it does err) in the direction
23
+ # of underextraction, whereas parallel analysis tends to err
24
+ # (when it does err) in the direction of overextraction.
25
+ # Optimal decisions are thus likely to be made after considering
26
+ # the results of both analytic procedures. (p.10)
27
+
17
28
  module Factor
18
29
  # Anti-image covariance matrix.
19
30
  # Useful for inspection of desireability of data for factor analysis.
@@ -48,6 +48,9 @@ module Statsample
48
48
  attr_reader :fm
49
49
  # Smallest average squared correlation
50
50
  attr_reader :minfm
51
+ def self.with_dataset(ds,opts=Hash.new)
52
+ new(ds.correlation_matrix,opts)
53
+ end
51
54
  def initialize(matrix, opts=Hash.new)
52
55
  @matrix=matrix
53
56
  opts_default={
@@ -76,10 +79,15 @@ module Statsample
76
79
  end
77
80
  minfm=fm[0]
78
81
  nfactors=0
82
+ @errors=[]
79
83
  fm.each_with_index do |v,s|
80
- if v < minfm
81
- minfm=v
82
- nfactors=s
84
+ if v.is_a? Complex
85
+ @errors.push(s)
86
+ else
87
+ if v < minfm
88
+ minfm=v
89
+ nfactors=s
90
+ end
83
91
  end
84
92
  end
85
93
  @number_of_factors=nfactors
@@ -89,13 +97,13 @@ module Statsample
89
97
  def report_building(g) #:nodoc:
90
98
  g.section(:name=>@name) do |s|
91
99
  s.table(:name=>_("Eigenvalues"),:header=>[_("Value")]) do |t|
92
- eigenvalues.each do |e|
93
- t.row(["%0.6f" % e])
100
+ eigenvalues.each_with_index do |e,i|
101
+ t.row([@errors.include?(i) ? "*" : "%0.6f" % e])
94
102
  end
95
103
  end
96
104
  s.table(:name=>_("Velicer's Average Squared Correlations"), :header=>[_("number of components"),_("average square correlation")]) do |t|
97
105
  fm.each_with_index do |v,i|
98
- t.row(["%d" % i, "%0.6f" % v])
106
+ t.row(["%d" % i, @errors.include?(i) ? "*" : "%0.6f" % v])
99
107
  end
100
108
  end
101
109
  s.text(_("The smallest average squared correlation is : %0.6f" % minfm))
@@ -39,15 +39,12 @@ module Statsample
39
39
  attr_reader :ds
40
40
  # Bootstrap method. <tt>:random</tt> used by default
41
41
  # * <tt>:random</tt>: uses number of variables and cases for the dataset
42
- # * <tt>:data</tt> : sample with replacement from actual data.
43
-
42
+ # * <tt>:data</tt> : sample with replacement from actual data.
44
43
  attr_accessor :bootstrap_method
45
44
  # Uses smc on diagonal of matrixes, to perform simulation
46
45
  # of a Principal Axis analysis.
47
46
  # By default, false.
48
-
49
47
  attr_accessor :smc
50
-
51
48
  # Percentil over bootstrap eigenvalue should be accepted. 95 by default
52
49
  attr_accessor :percentil
53
50
  # Correlation matrix used with :raw_data . <tt>:correlation_matrix</tt> used by default
@@ -1,8 +1,14 @@
1
+ # encoding: UTF-8
1
2
  module Statsample
2
3
  module Factor
3
- # Principal Component Analysis (PCA) of a
4
- # covariance or correlation matrix.
4
+ # Principal Component Analysis (PCA) of a covariance or
5
+ # correlation matrix..
5
6
  #
7
+ # NOTE: Sign of second and later eigenvalues could be different
8
+ # using Ruby or GSL, so values for PCs and component matrix
9
+ # should differ, because extendmatrix and gsl's methods to calculate
10
+ # eigenvectors are different. Using R is worse, cause first
11
+ # eigenvector could have negative values!
6
12
  # For Principal Axis Analysis, use Statsample::Factor::PrincipalAxis
7
13
  #
8
14
  # == Usage:
@@ -26,6 +32,7 @@ module Factor
26
32
  # == References:
27
33
  # * SPSS Manual
28
34
  # * Smith, L. (2002). A tutorial on Principal Component Analysis. Available on http://courses.eas.ualberta.ca/eas570/pca_tutorial.pdf
35
+ # * Härdle, W. & Simar, L. (2003). Applied Multivariate Statistical Analysis. Springer
29
36
  #
30
37
  class PCA
31
38
  include Summarizable
@@ -43,12 +50,16 @@ module Factor
43
50
  attr_accessor :summary_parallel_analysis
44
51
  # Type of rotation. By default, Statsample::Factor::Rotation::Varimax
45
52
  attr_accessor :rotation_type
46
-
53
+ attr_accessor :type
47
54
  def initialize(matrix, opts=Hash.new)
48
55
  @use_gsl=nil
49
56
  @name=_("Principal Component Analysis")
50
57
  @matrix=matrix
51
- @n_variables=@matrix.column_size
58
+ @n_variables=@matrix.column_size
59
+ @variables_names=(@matrix.respond_to? :fields) ? @matrix.fields : @n_variables.times.map {|i| _("VAR_%d") % (i+1)}
60
+
61
+ @type = @matrix.respond_to?(:type) ? @matrix.type : :correlation
62
+
52
63
  @m=nil
53
64
 
54
65
  @rotation_type=Statsample::Factor::Varimax
@@ -65,15 +76,19 @@ module Factor
65
76
  @variables_names=@n_variables.times.map {|i| "V#{i+1}"}
66
77
  end
67
78
  calculate_eigenpairs
79
+
68
80
  if @m.nil?
69
81
  # Set number of factors with eigenvalues > 1
70
82
  @m=@eigenpairs.find_all {|ev,ec| ev>=1.0}.size
71
83
  end
72
-
84
+
73
85
  end
74
86
  def rotation
75
87
  @rotation_type.new(component_matrix)
76
88
  end
89
+ def total_eigenvalues
90
+ eigenvalues.inject(0) {|ac,v| ac+v}
91
+ end
77
92
  def create_centered_ds
78
93
  h={}
79
94
  @original_ds.factors.each {|f|
@@ -83,8 +98,10 @@ module Factor
83
98
  @ds=h.to_dataset
84
99
  end
85
100
 
86
- # Feature vector for m factors
87
- def feature_vector(m=nil)
101
+ # Feature matrix for +m+ factors
102
+ # Returns +m+ eigenvectors as columns.
103
+ # So, i=variable, j=component
104
+ def feature_matrix(m=nil)
88
105
  m||=@m
89
106
  omega_m=::Matrix.build(@n_variables, m) {0}
90
107
  m.times do |i|
@@ -92,15 +109,48 @@ module Factor
92
109
  end
93
110
  omega_m
94
111
  end
95
- # data_transformation
96
- def data_transformation(data_matrix, m)
112
+ # Returns Principal Components for +input+ matrix or dataset
113
+ # The number of PC to return is equal to parameter +m+.
114
+ # If +m+ isn't set, m set to number of PCs selected at object creation.
115
+ def principal_components(input, m=nil)
116
+ data_matrix=input.to_matrix
117
+ var_names=(data_matrix.respond_to? :fields_y) ? data_matrix.fields_y : data_matrix.column_size.times.map {|i| "VAR_%d" % (i+1)}
97
118
  m||=@m
98
- raise "Data variables number should be equal to original variable number" if data_matrix.size2!=@n_variables
99
- fv=feature_vector(m)
100
- (fv.transpose*data_matrix.transpose).transpose
119
+
120
+ raise "data matrix variables<>pca variables" if data_matrix.column_size!=@n_variables
121
+
122
+ fv=feature_matrix(m)
123
+ pcs=(fv.transpose*data_matrix.transpose).transpose
124
+ pcs.extend Statsample::NamedMatrix
125
+ pcs.fields_y=m.times.map {|i| "PC_%d" % (i+1)}
126
+ pcs.to_dataset
101
127
  end
102
- # Component matrix for m factors
103
128
  def component_matrix(m=nil)
129
+ var="component_matrix_#{type}"
130
+ send(var,m)
131
+ end
132
+ # Matrix with correlations between components and
133
+ # variables. Based on Härdle & Simar (2003, p.243)
134
+ def component_matrix_covariance(m=nil)
135
+ m||=@m
136
+ raise "m should be > 0" if m<1
137
+ ff=feature_matrix(m)
138
+ cm=::Matrix.build(@n_variables, m) {0}
139
+ @n_variables.times {|i|
140
+ m.times {|j|
141
+ cm[i,j]=ff[i,j] * Math.sqrt(eigenvalues[j] / @matrix[i,i])
142
+ }
143
+ }
144
+ cm.extend CovariateMatrix
145
+ cm.name=_("Component matrix (from covariance)")
146
+ cm.fields_x = @variables_names
147
+ cm.fields_y = m.times.map {|i| "PC_%d" % (i+1)}
148
+
149
+ cm
150
+ end
151
+ # Matrix with correlations between components and
152
+ # variables
153
+ def component_matrix_correlation(m=nil)
104
154
  m||=@m
105
155
  raise "m should be > 0" if m<1
106
156
  omega_m=::Matrix.build(@n_variables, m) {0}
@@ -115,17 +165,17 @@ module Factor
115
165
  cm.extend CovariateMatrix
116
166
  cm.name=_("Component matrix")
117
167
  cm.fields_x = @variables_names
118
- cm.fields_y = m.times.map {|i| "component_#{i+1}"}
168
+ cm.fields_y = m.times.map {|i| "PC_%d" % (i+1)}
119
169
  cm
120
170
  end
121
- # Communalities for all variables given m factors
122
171
  def communalities(m=nil)
172
+
123
173
  m||=@m
124
174
  h=[]
125
175
  @n_variables.times do |i|
126
176
  sum=0
127
177
  m.times do |j|
128
- sum+=@eigenpairs[j][0].abs*@eigenpairs[j][1][i]**2
178
+ sum+=(@eigenpairs[j][0].abs*@eigenpairs[j][1][i]**2)
129
179
  end
130
180
  h.push(sum)
131
181
  end
@@ -135,7 +185,11 @@ module Factor
135
185
  def eigenvalues
136
186
  @eigenpairs.collect {|c| c[0] }
137
187
  end
138
-
188
+ def eigenvectors
189
+ @eigenpairs.collect {|c|
190
+ c[1].to_matrix
191
+ }
192
+ end
139
193
  def calculate_eigenpairs
140
194
  if @use_gsl
141
195
  calculate_eigenpairs_gsl
@@ -144,14 +198,18 @@ module Factor
144
198
  end
145
199
  end
146
200
 
147
- def calculate_eigenpairs_ruby
201
+ def calculate_eigenpairs_ruby #:nodoc:
148
202
  @eigenpairs = @matrix.eigenpairs_ruby
149
203
  end
150
- def calculate_eigenpairs_gsl
204
+ # Eigenvectors calculated with gsl
205
+ # Note: The signs of some vectors could be different of
206
+ # ruby generated
207
+ def calculate_eigenpairs_gsl #:nodoc:
151
208
  eigval, eigvec= GSL::Eigen.symmv(@matrix.to_gsl)
152
-
209
+ #puts "***"
153
210
  ep=eigval.size.times.map {|i|
154
- [eigval[i], eigvec.get_col(i)]
211
+ ev=eigvec.get_col(i)
212
+ [eigval[i], ev]
155
213
  }
156
214
  @eigenpairs=ep.sort{|a,b| a[0]<=>b[0]}.reverse
157
215
  end
@@ -159,20 +217,23 @@ module Factor
159
217
  def report_building(builder) # :nodoc:
160
218
  builder.section(:name=>@name) do |generator|
161
219
  generator.text _("Number of factors: %d") % m
162
- generator.table(:name=>_("Communalities"), :header=>[_("Variable"),_("Initial"),_("Extraction")]) do |t|
220
+ generator.table(:name=>_("Communalities"), :header=>[_("Variable"),_("Initial"),_("Extraction"), _("%")]) do |t|
163
221
  communalities(m).each_with_index {|com, i|
164
- t.row([@variables_names[i], 1.0, sprintf("%0.3f", com)])
222
+ perc=com*100.quo(@matrix[i,i])
223
+ t.row([@variables_names[i], "%0.3f" % @matrix[i,i] , "%0.3f" % com, "%0.3f" % perc])
165
224
  }
166
225
  end
167
-
226
+ te=total_eigenvalues
168
227
  generator.table(:name=>_("Total Variance Explained"), :header=>[_("Component"), _("E.Total"), _("%"), _("Cum. %")]) do |t|
169
228
  ac_eigen=0
170
229
  eigenvalues.each_with_index {|eigenvalue,i|
171
230
  ac_eigen+=eigenvalue
172
- t.row([_("Component %d") % (i+1), sprintf("%0.3f",eigenvalue), sprintf("%0.3f%%", eigenvalue*100.quo(@n_variables)), sprintf("%0.3f",ac_eigen*100.quo(@n_variables))])
231
+ t.row([_("Component %d") % (i+1), sprintf("%0.3f",eigenvalue), sprintf("%0.3f%%", eigenvalue*100.quo(te)), sprintf("%0.3f",ac_eigen*100.quo(te))])
173
232
  }
174
233
  end
234
+
175
235
  generator.parse_element(component_matrix(m))
236
+
176
237
  if (summary_rotation)
177
238
  generator.parse_element(rotation)
178
239
  end