statsample 0.17.0 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data.tar.gz.sig CHANGED
Binary file
data/History.txt CHANGED
@@ -1,3 +1,30 @@
1
+ === 0.18.0 / 2011-01-07
2
+ * New Statsample.load_excel
3
+ * New Statsample.load_csv
4
+ * Statsample::Dataset#[] accepts an array of fields and uses clone
5
+ * New Dataset#correlation_matrix and Statsample::Dataset#covariance_matrix
6
+ * Statsample::Dataset.filter add labels to vectors
7
+ * Principal Components generation complete on PCA (covariance matrix prefered)
8
+ * Added note on Statsample::Factor::PCA about erratic signs on eigenvalues,
9
+ * Statsample::Factor::PCA.component_matrix calculated different for covariance matrix
10
+ * Improved summary for PCA using covariance matrix
11
+ * New attribute :label_angle for Statsample::Graph::Boxplot
12
+ * Fixed Scatterplots scaling problems
13
+ * New attributes for Scatterplots: groups, minimum_x, minimum_y, maximum_x,
14
+ * New Statsample::Multiset#union allows to create a new dataset based on a m
15
+ * New Statsample::Multiset#each to traverse through datasets
16
+ * Bug fix: Vector#standarized and Vector#percentile crash on nil data
17
+ * Bug fix: Vector#mean and Vector#sd crash on data without valid values
18
+ * Modified methods names on Statsample::Factor::PCA : feature_vector to feature_matrix, data_transformation to principal_components
19
+ * Added Statsample::Vector.vector_centered
20
+ * Factor::MAP.with_dataset() implemented
21
+ * Bug fix: Factor::MAP with correlation matrix with non-real eigenvalues crashes * Added documentation for Graph::Histogram
22
+ * Added MPA to Reliability::MultiScaleAnalysis
23
+ * Added custom names for returned vectors and datasets
24
+ * Updated spanish traslation
25
+ * Graph::Histogram updated. Custom x and y max and min, optional normal distribution drawing
26
+ * Updated Histogram class, with several new methods compatibles with GSL::Histogram
27
+
1
28
  === 0.17.0 / 2010-12-09
2
29
  * Added Statsample::Graph::Histogram and Statsample::Graph::Boxplot
3
30
  * Added Statsample::Reliability::SkillScaleAnalysis for analysis of skill based scales.
data/Manifest.txt CHANGED
@@ -105,6 +105,7 @@ po/es/statsample.po
105
105
  po/statsample.pot
106
106
  references.txt
107
107
  setup.rb
108
+ test/fixtures/bank2.dat
108
109
  test/fixtures/correlation_matrix.rb
109
110
  test/helpers_tests.rb
110
111
  test/test_anovaoneway.rb
data/Rakefile CHANGED
@@ -5,7 +5,6 @@ $:.unshift(File.dirname(__FILE__)+'/lib/')
5
5
 
6
6
  require 'rubygems'
7
7
  require 'statsample'
8
-
9
8
  require 'hoe'
10
9
  Hoe.plugin :git
11
10
 
@@ -41,9 +40,9 @@ h=Hoe.spec('statsample') do
41
40
  #self.testlib=:minitest
42
41
  self.rubyforge_name = "ruby-statsample"
43
42
  self.developer('Claudio Bustos', 'clbustos@gmail.com')
44
- self.extra_deps << ["spreadsheet","~>0.6.0"] << ["reportbuilder", "~>1.4"] << ["minimization", "~>0.2.0"] << ["fastercsv", ">0"] << ["dirty-memoize", "~>0.0"] << ["extendmatrix","~>0.3.1"] << ["statsample-bivariate-extension", ">0"] << ["rserve-client", "~>0.2.5"] << ["rubyvis", "~>0.3.3"]
43
+ self.extra_deps << ["spreadsheet","~>0.6.5"] << ["reportbuilder", "~>1.4"] << ["minimization", "~>0.2.0"] << ["fastercsv", ">0"] << ["dirty-memoize", "~>0.0"] << ["extendmatrix","~>0.3.1"] << ["statsample-bivariate-extension", ">0"] << ["rserve-client", "~>0.2.5"] << ["rubyvis", "~>0.4.0"]
45
44
 
46
- self.extra_dev_deps << ["shoulda"] << ["minitest", "~>2.0"]
45
+ self.extra_dev_deps << ["hoe","~>0"] << ["shoulda","~>0"] << ["minitest", "~>2.0"]
47
46
  self.clean_globs << "test/images/*" << "demo/item_analysis/*" << "demo/Regression"
48
47
  self.post_install_message = <<-EOF
49
48
  ***************************************************
@@ -1,14 +1,13 @@
1
1
  #!/usr/bin/ruby
2
2
  $:.unshift(File.dirname(__FILE__)+'/../lib/')
3
3
  $:.unshift('/home/cdx/dev/reportbuilder/lib/')
4
-
5
- require 'benchmark'
6
4
  require 'statsample'
7
- n=1000
8
- a=n.times.map {|i| rand()*20}.to_scale
9
- hg=Statsample::Graph::Histogram.new(a, :bins=>15)
5
+ n=3000
6
+ rng=Distribution::Normal.rng_ugaussian
7
+ a=n.times.map {|i| rng.call()*20}.to_scale
8
+ hg=Statsample::Graph::Histogram.new(a, :bins=>20, :line_normal_distribution=>true )
10
9
 
11
10
  rb=ReportBuilder.new
12
- rb.add(a.histogram)
11
+ #rb.add(a.histogram)
13
12
  rb.add(hg)
14
- puts rb.to_text
13
+ rb.save_html('histogram.html')
data/lib/statsample.rb CHANGED
@@ -118,7 +118,7 @@ module Statsample
118
118
  @@has_gsl
119
119
  end
120
120
 
121
- VERSION = '0.17.0'
121
+ VERSION = '0.18.0'
122
122
  SPLIT_TOKEN = ","
123
123
  autoload(:Database, 'statsample/converters')
124
124
  autoload(:Anova, 'statsample/anova')
@@ -157,6 +157,30 @@ module Statsample
157
157
  false
158
158
  end
159
159
  end
160
+ # Import an Excel file. Cache result by default
161
+ def load_excel(filename, opts=Hash.new, cache=true)
162
+ file_ds=filename+".ds"
163
+ if cache and (File.exists? file_ds and File.mtime(file_ds)>File.mtime(filename))
164
+ ds=Statsample.load(file_ds)
165
+ else
166
+ ds=Statsample::Excel.read(filename)
167
+ ds.save(file_ds) if cache
168
+ end
169
+ ds
170
+ end
171
+
172
+ # Import an Excel file. Cache result by default
173
+ def load_csv(filename, opts=Hash.new, cache=true)
174
+ file_ds=filename+".ds"
175
+ if cache and (File.exists? file_ds and File.mtime(file_ds)>File.mtime(filename))
176
+ ds=Statsample.load(file_ds)
177
+ else
178
+ ds=Statsample::CSV.read(filename,opts)
179
+ ds.save(file_ds) if cache
180
+ end
181
+ ds
182
+ end
183
+
160
184
 
161
185
  # Create a matrix using vectors as columns.
162
186
  # Use:
@@ -58,13 +58,6 @@ module Statsample
58
58
  # Calculate sum of squares
59
59
  ss=sum_of_squares(v1a,v2a)
60
60
  ss.quo(Math::sqrt(v1a.sum_of_squares) * Math::sqrt(v2a.sum_of_squares))
61
- =begin
62
- v1s,v2s=v1a.vector_standarized,v2a.vector_standarized
63
- t=0
64
- siz=v1s.size
65
- (0...v1s.size).each {|i| t+=(v1s[i]*v2s[i]) }
66
- t.quo(v2s.size-1)
67
- =end
68
61
  end
69
62
  alias :correlation :pearson
70
63
  # Retrieves the value for t test for a pearson correlation
@@ -17,6 +17,7 @@ module Statsample
17
17
  fields=[]
18
18
  sth.column_info.each {|c|
19
19
  vectors[c['name']]=Statsample::Vector.new([])
20
+ vectors[c['name']].name=c['name']
20
21
  vectors[c['name']].type= (c['type_name']=='INTEGER' or c['type_name']=='DOUBLE') ? :scale : :nominal
21
22
  fields.push(c['name'])
22
23
  }
@@ -35,7 +36,7 @@ module Statsample
35
36
  # dbh = DBI.connect("DBI:Mysql:database:localhost", "user", "password")
36
37
  # Statsample::Database.insert(ds,dbh,"test")
37
38
  #
38
- def insert(ds, dbh,table)
39
+ def insert(ds, dbh, table)
39
40
  require 'dbi'
40
41
  query="INSERT INTO #{table} ("+ds.fields.join(",")+") VALUES ("+((["?"]*ds.fields.size).join(","))+")"
41
42
  sth=dbh.prepare(query)
@@ -235,6 +236,7 @@ module Statsample
235
236
  fields.each {|f|
236
237
  ds[f].name=f
237
238
  }
239
+ ds.name=filename
238
240
  ds
239
241
  end
240
242
  end
@@ -25,7 +25,7 @@ module Statsample
25
25
  end
26
26
  def to_s
27
27
  m="Error on iteration: "+@exp.message+"\n"+@exp.backtrace.join("\n")
28
- m+="\nRow: #{@ds.i}" unless @ds.i.nil?
28
+ m+="\nRow ##{@ds.i}:#{@ds.case_as_hash(@ds.i)}" unless @ds.i.nil?
29
29
  m
30
30
  end
31
31
  end
@@ -140,7 +140,6 @@ module Statsample
140
140
  end
141
141
  @i=nil
142
142
  end
143
-
144
143
  #
145
144
  # Returns a GSL::matrix
146
145
  #
@@ -239,6 +238,7 @@ module Statsample
239
238
  ds[f]=@vectors[f]
240
239
  }
241
240
  ds.fields=fields_to_include
241
+ ds.name=@name
242
242
  ds.update_valid_data
243
243
  ds
244
244
  end
@@ -419,13 +419,15 @@ module Statsample
419
419
  # if fields parameter is empty, sum all fields
420
420
  def vector_sum(fields=nil)
421
421
  fields||=@fields
422
- collect_with_index do |row, i|
422
+ vector=collect_with_index do |row, i|
423
423
  if(fields.find{|f| !@vectors[f].data_with_nils[i]})
424
424
  nil
425
425
  else
426
426
  fields.inject(0) {|ac,v| ac + row[v].to_f}
427
427
  end
428
428
  end
429
+ vector.name=_("Sum from %s") % @name
430
+ vector
429
431
  end
430
432
  # Check if #fields attribute is correct, after inserting or deleting vectors
431
433
  def check_fields(fields)
@@ -476,7 +478,9 @@ module Statsample
476
478
  a.push(sum.quo(size-invalids))
477
479
  end
478
480
  end
479
- a.to_vector(:scale)
481
+ a=a.to_vector(:scale)
482
+ a.name=_("Means from %s") % @name
483
+ a
480
484
  end
481
485
  # Check vectors for type and size.
482
486
  def check_length # :nodoc:
@@ -598,8 +602,9 @@ module Statsample
598
602
  def[](i)
599
603
  if i.is_a? Range
600
604
  fields=from_to(i.begin,i.end)
601
- vectors=fields.inject({}) {|a,v| a[v]=@vectors[v];a}
602
- Dataset.new(vectors,fields)
605
+ clone(*fields)
606
+ elsif i.is_a? Array
607
+ clone(i)
603
608
  else
604
609
  raise Exception,"Vector '#{i}' doesn't exists on dataset" unless @vectors.has_key?(i)
605
610
  @vectors[i]
@@ -661,16 +666,36 @@ module Statsample
661
666
  GSL::Matrix.alloc(*rows)
662
667
  end
663
668
  end
664
-
665
-
669
+
670
+ # Return a correlation matrix for fields included as parameters.
671
+ # By default, uses all fields of dataset
672
+ def correlation_matrix(fields=nil)
673
+ if fields
674
+ ds=clone(fields)
675
+ else
676
+ ds=self
677
+ end
678
+ Statsample::Bivariate.correlation_matrix(ds)
679
+ end
680
+ # Return a correlation matrix for fields included as parameters.
681
+ # By default, uses all fields of dataset
682
+ def covariance_matrix(fields=nil)
683
+ if fields
684
+ ds=clone(fields)
685
+ else
686
+ ds=self
687
+ end
688
+ Statsample::Bivariate.covariance_matrix(ds)
689
+ end
666
690
 
667
691
  # Create a new dataset with all cases which the block returns true
668
692
  def filter
669
693
  ds=self.dup_empty
670
694
  each {|c|
671
- ds.add_case(c,false) if yield c
695
+ ds.add_case(c, false) if yield c
672
696
  }
673
697
  ds.update_valid_data
698
+ ds.name=_("%s(filtered)") % @name
674
699
  ds
675
700
  end
676
701
 
@@ -712,6 +737,8 @@ module Statsample
712
737
  # puts "Vector #{k1}:"+v1.to_s
713
738
  v1.type=@vectors[k1].type
714
739
  v1.name=@vectors[k1].name
740
+ v1.labels=@vectors[k1].labels
741
+
715
742
  }
716
743
  }
717
744
  ms
@@ -737,9 +764,16 @@ module Statsample
737
764
 
738
765
  ms.datasets.each do |k,ds|
739
766
  ds.update_valid_data
767
+ ds.name=fields.size.times.map {|i|
768
+ f=fields[i]
769
+ sk=k[i]
770
+ @vectors[f].labeling(sk)
771
+ }.join("-")
740
772
  ds.vectors.each{|k1,v1|
741
773
  v1.type=@vectors[k1].type
742
774
  v1.name=@vectors[k1].name
775
+ v1.labels=@vectors[k1].labels
776
+
743
777
  }
744
778
  end
745
779
  ms
@@ -805,7 +839,7 @@ module Statsample
805
839
  vr
806
840
  end
807
841
  def to_s
808
- "#<"+self.class.to_s+":"+self.object_id.to_s+" @fields=["+@fields.join(",")+"] cases="+@vectors[@fields[0]].size.to_s
842
+ "#<"+self.class.to_s+":"+self.object_id.to_s+" @name=#{@name} @fields=["+@fields.join(",")+"] cases="+@vectors[@fields[0]].size.to_s
809
843
  end
810
844
  def inspect
811
845
  self.to_s
@@ -13,7 +13,18 @@ module Statsample
13
13
  # * Statsample::Factor::Varimax
14
14
  # * Statsample::Factor::Equimax
15
15
  # * Statsample::Factor::Quartimax
16
- # See documentation of each class to use it
16
+ # * Classes for determining the number of components
17
+ # * Statsample::Factor::MAP
18
+ # * Statsample::Factor::ParallelAnalysis
19
+ #
20
+ # About number of components, O'Connor(2000) said:
21
+ # The two procedures [PA and MAP ] complement each other nicely,
22
+ # in that the MAP tends to err (when it does err) in the direction
23
+ # of underextraction, whereas parallel analysis tends to err
24
+ # (when it does err) in the direction of overextraction.
25
+ # Optimal decisions are thus likely to be made after considering
26
+ # the results of both analytic procedures. (p.10)
27
+
17
28
  module Factor
18
29
  # Anti-image covariance matrix.
19
30
  # Useful for inspection of desireability of data for factor analysis.
@@ -48,6 +48,9 @@ module Statsample
48
48
  attr_reader :fm
49
49
  # Smallest average squared correlation
50
50
  attr_reader :minfm
51
+ def self.with_dataset(ds,opts=Hash.new)
52
+ new(ds.correlation_matrix,opts)
53
+ end
51
54
  def initialize(matrix, opts=Hash.new)
52
55
  @matrix=matrix
53
56
  opts_default={
@@ -76,10 +79,15 @@ module Statsample
76
79
  end
77
80
  minfm=fm[0]
78
81
  nfactors=0
82
+ @errors=[]
79
83
  fm.each_with_index do |v,s|
80
- if v < minfm
81
- minfm=v
82
- nfactors=s
84
+ if v.is_a? Complex
85
+ @errors.push(s)
86
+ else
87
+ if v < minfm
88
+ minfm=v
89
+ nfactors=s
90
+ end
83
91
  end
84
92
  end
85
93
  @number_of_factors=nfactors
@@ -89,13 +97,13 @@ module Statsample
89
97
  def report_building(g) #:nodoc:
90
98
  g.section(:name=>@name) do |s|
91
99
  s.table(:name=>_("Eigenvalues"),:header=>[_("Value")]) do |t|
92
- eigenvalues.each do |e|
93
- t.row(["%0.6f" % e])
100
+ eigenvalues.each_with_index do |e,i|
101
+ t.row([@errors.include?(i) ? "*" : "%0.6f" % e])
94
102
  end
95
103
  end
96
104
  s.table(:name=>_("Velicer's Average Squared Correlations"), :header=>[_("number of components"),_("average square correlation")]) do |t|
97
105
  fm.each_with_index do |v,i|
98
- t.row(["%d" % i, "%0.6f" % v])
106
+ t.row(["%d" % i, @errors.include?(i) ? "*" : "%0.6f" % v])
99
107
  end
100
108
  end
101
109
  s.text(_("The smallest average squared correlation is : %0.6f" % minfm))
@@ -39,15 +39,12 @@ module Statsample
39
39
  attr_reader :ds
40
40
  # Bootstrap method. <tt>:random</tt> used by default
41
41
  # * <tt>:random</tt>: uses number of variables and cases for the dataset
42
- # * <tt>:data</tt> : sample with replacement from actual data.
43
-
42
+ # * <tt>:data</tt> : sample with replacement from actual data.
44
43
  attr_accessor :bootstrap_method
45
44
  # Uses smc on diagonal of matrixes, to perform simulation
46
45
  # of a Principal Axis analysis.
47
46
  # By default, false.
48
-
49
47
  attr_accessor :smc
50
-
51
48
  # Percentil over bootstrap eigenvalue should be accepted. 95 by default
52
49
  attr_accessor :percentil
53
50
  # Correlation matrix used with :raw_data . <tt>:correlation_matrix</tt> used by default
@@ -1,8 +1,14 @@
1
+ # encoding: UTF-8
1
2
  module Statsample
2
3
  module Factor
3
- # Principal Component Analysis (PCA) of a
4
- # covariance or correlation matrix.
4
+ # Principal Component Analysis (PCA) of a covariance or
5
+ # correlation matrix..
5
6
  #
7
+ # NOTE: Sign of second and later eigenvalues could be different
8
+ # using Ruby or GSL, so values for PCs and component matrix
9
+ # should differ, because extendmatrix and gsl's methods to calculate
10
+ # eigenvectors are different. Using R is worse, cause first
11
+ # eigenvector could have negative values!
6
12
  # For Principal Axis Analysis, use Statsample::Factor::PrincipalAxis
7
13
  #
8
14
  # == Usage:
@@ -26,6 +32,7 @@ module Factor
26
32
  # == References:
27
33
  # * SPSS Manual
28
34
  # * Smith, L. (2002). A tutorial on Principal Component Analysis. Available on http://courses.eas.ualberta.ca/eas570/pca_tutorial.pdf
35
+ # * Härdle, W. & Simar, L. (2003). Applied Multivariate Statistical Analysis. Springer
29
36
  #
30
37
  class PCA
31
38
  include Summarizable
@@ -43,12 +50,16 @@ module Factor
43
50
  attr_accessor :summary_parallel_analysis
44
51
  # Type of rotation. By default, Statsample::Factor::Rotation::Varimax
45
52
  attr_accessor :rotation_type
46
-
53
+ attr_accessor :type
47
54
  def initialize(matrix, opts=Hash.new)
48
55
  @use_gsl=nil
49
56
  @name=_("Principal Component Analysis")
50
57
  @matrix=matrix
51
- @n_variables=@matrix.column_size
58
+ @n_variables=@matrix.column_size
59
+ @variables_names=(@matrix.respond_to? :fields) ? @matrix.fields : @n_variables.times.map {|i| _("VAR_%d") % (i+1)}
60
+
61
+ @type = @matrix.respond_to?(:type) ? @matrix.type : :correlation
62
+
52
63
  @m=nil
53
64
 
54
65
  @rotation_type=Statsample::Factor::Varimax
@@ -65,15 +76,19 @@ module Factor
65
76
  @variables_names=@n_variables.times.map {|i| "V#{i+1}"}
66
77
  end
67
78
  calculate_eigenpairs
79
+
68
80
  if @m.nil?
69
81
  # Set number of factors with eigenvalues > 1
70
82
  @m=@eigenpairs.find_all {|ev,ec| ev>=1.0}.size
71
83
  end
72
-
84
+
73
85
  end
74
86
  def rotation
75
87
  @rotation_type.new(component_matrix)
76
88
  end
89
+ def total_eigenvalues
90
+ eigenvalues.inject(0) {|ac,v| ac+v}
91
+ end
77
92
  def create_centered_ds
78
93
  h={}
79
94
  @original_ds.factors.each {|f|
@@ -83,8 +98,10 @@ module Factor
83
98
  @ds=h.to_dataset
84
99
  end
85
100
 
86
- # Feature vector for m factors
87
- def feature_vector(m=nil)
101
+ # Feature matrix for +m+ factors
102
+ # Returns +m+ eigenvectors as columns.
103
+ # So, i=variable, j=component
104
+ def feature_matrix(m=nil)
88
105
  m||=@m
89
106
  omega_m=::Matrix.build(@n_variables, m) {0}
90
107
  m.times do |i|
@@ -92,15 +109,48 @@ module Factor
92
109
  end
93
110
  omega_m
94
111
  end
95
- # data_transformation
96
- def data_transformation(data_matrix, m)
112
+ # Returns Principal Components for +input+ matrix or dataset
113
+ # The number of PC to return is equal to parameter +m+.
114
+ # If +m+ isn't set, m set to number of PCs selected at object creation.
115
+ def principal_components(input, m=nil)
116
+ data_matrix=input.to_matrix
117
+ var_names=(data_matrix.respond_to? :fields_y) ? data_matrix.fields_y : data_matrix.column_size.times.map {|i| "VAR_%d" % (i+1)}
97
118
  m||=@m
98
- raise "Data variables number should be equal to original variable number" if data_matrix.size2!=@n_variables
99
- fv=feature_vector(m)
100
- (fv.transpose*data_matrix.transpose).transpose
119
+
120
+ raise "data matrix variables<>pca variables" if data_matrix.column_size!=@n_variables
121
+
122
+ fv=feature_matrix(m)
123
+ pcs=(fv.transpose*data_matrix.transpose).transpose
124
+ pcs.extend Statsample::NamedMatrix
125
+ pcs.fields_y=m.times.map {|i| "PC_%d" % (i+1)}
126
+ pcs.to_dataset
101
127
  end
102
- # Component matrix for m factors
103
128
  def component_matrix(m=nil)
129
+ var="component_matrix_#{type}"
130
+ send(var,m)
131
+ end
132
+ # Matrix with correlations between components and
133
+ # variables. Based on Härdle & Simar (2003, p.243)
134
+ def component_matrix_covariance(m=nil)
135
+ m||=@m
136
+ raise "m should be > 0" if m<1
137
+ ff=feature_matrix(m)
138
+ cm=::Matrix.build(@n_variables, m) {0}
139
+ @n_variables.times {|i|
140
+ m.times {|j|
141
+ cm[i,j]=ff[i,j] * Math.sqrt(eigenvalues[j] / @matrix[i,i])
142
+ }
143
+ }
144
+ cm.extend CovariateMatrix
145
+ cm.name=_("Component matrix (from covariance)")
146
+ cm.fields_x = @variables_names
147
+ cm.fields_y = m.times.map {|i| "PC_%d" % (i+1)}
148
+
149
+ cm
150
+ end
151
+ # Matrix with correlations between components and
152
+ # variables
153
+ def component_matrix_correlation(m=nil)
104
154
  m||=@m
105
155
  raise "m should be > 0" if m<1
106
156
  omega_m=::Matrix.build(@n_variables, m) {0}
@@ -115,17 +165,17 @@ module Factor
115
165
  cm.extend CovariateMatrix
116
166
  cm.name=_("Component matrix")
117
167
  cm.fields_x = @variables_names
118
- cm.fields_y = m.times.map {|i| "component_#{i+1}"}
168
+ cm.fields_y = m.times.map {|i| "PC_%d" % (i+1)}
119
169
  cm
120
170
  end
121
- # Communalities for all variables given m factors
122
171
  def communalities(m=nil)
172
+
123
173
  m||=@m
124
174
  h=[]
125
175
  @n_variables.times do |i|
126
176
  sum=0
127
177
  m.times do |j|
128
- sum+=@eigenpairs[j][0].abs*@eigenpairs[j][1][i]**2
178
+ sum+=(@eigenpairs[j][0].abs*@eigenpairs[j][1][i]**2)
129
179
  end
130
180
  h.push(sum)
131
181
  end
@@ -135,7 +185,11 @@ module Factor
135
185
  def eigenvalues
136
186
  @eigenpairs.collect {|c| c[0] }
137
187
  end
138
-
188
+ def eigenvectors
189
+ @eigenpairs.collect {|c|
190
+ c[1].to_matrix
191
+ }
192
+ end
139
193
  def calculate_eigenpairs
140
194
  if @use_gsl
141
195
  calculate_eigenpairs_gsl
@@ -144,14 +198,18 @@ module Factor
144
198
  end
145
199
  end
146
200
 
147
- def calculate_eigenpairs_ruby
201
+ def calculate_eigenpairs_ruby #:nodoc:
148
202
  @eigenpairs = @matrix.eigenpairs_ruby
149
203
  end
150
- def calculate_eigenpairs_gsl
204
+ # Eigenvectors calculated with gsl
205
+ # Note: The signs of some vectors could be different of
206
+ # ruby generated
207
+ def calculate_eigenpairs_gsl #:nodoc:
151
208
  eigval, eigvec= GSL::Eigen.symmv(@matrix.to_gsl)
152
-
209
+ #puts "***"
153
210
  ep=eigval.size.times.map {|i|
154
- [eigval[i], eigvec.get_col(i)]
211
+ ev=eigvec.get_col(i)
212
+ [eigval[i], ev]
155
213
  }
156
214
  @eigenpairs=ep.sort{|a,b| a[0]<=>b[0]}.reverse
157
215
  end
@@ -159,20 +217,23 @@ module Factor
159
217
  def report_building(builder) # :nodoc:
160
218
  builder.section(:name=>@name) do |generator|
161
219
  generator.text _("Number of factors: %d") % m
162
- generator.table(:name=>_("Communalities"), :header=>[_("Variable"),_("Initial"),_("Extraction")]) do |t|
220
+ generator.table(:name=>_("Communalities"), :header=>[_("Variable"),_("Initial"),_("Extraction"), _("%")]) do |t|
163
221
  communalities(m).each_with_index {|com, i|
164
- t.row([@variables_names[i], 1.0, sprintf("%0.3f", com)])
222
+ perc=com*100.quo(@matrix[i,i])
223
+ t.row([@variables_names[i], "%0.3f" % @matrix[i,i] , "%0.3f" % com, "%0.3f" % perc])
165
224
  }
166
225
  end
167
-
226
+ te=total_eigenvalues
168
227
  generator.table(:name=>_("Total Variance Explained"), :header=>[_("Component"), _("E.Total"), _("%"), _("Cum. %")]) do |t|
169
228
  ac_eigen=0
170
229
  eigenvalues.each_with_index {|eigenvalue,i|
171
230
  ac_eigen+=eigenvalue
172
- t.row([_("Component %d") % (i+1), sprintf("%0.3f",eigenvalue), sprintf("%0.3f%%", eigenvalue*100.quo(@n_variables)), sprintf("%0.3f",ac_eigen*100.quo(@n_variables))])
231
+ t.row([_("Component %d") % (i+1), sprintf("%0.3f",eigenvalue), sprintf("%0.3f%%", eigenvalue*100.quo(te)), sprintf("%0.3f",ac_eigen*100.quo(te))])
173
232
  }
174
233
  end
234
+
175
235
  generator.parse_element(component_matrix(m))
236
+
176
237
  if (summary_rotation)
177
238
  generator.parse_element(rotation)
178
239
  end