RubyGems - statsample - Versions diffs - 0.6.5 → 0.6.7 - Mend

statsample 0.6.5 → 0.6.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

data/History.txt +15 -0
data/Manifest.txt +6 -0
data/README.txt +30 -12
data/Rakefile +91 -0
data/demo/levene.rb +9 -0
data/demo/multiple_regression.rb +1 -7
data/demo/polychoric.rb +1 -0
data/demo/principal_axis.rb +8 -0
data/lib/distribution/f.rb +22 -22
data/lib/spss.rb +99 -99
data/lib/statsample/bivariate/polychoric.rb +32 -22
data/lib/statsample/bivariate/tetrachoric.rb +212 -207
data/lib/statsample/bivariate.rb +6 -6
data/lib/statsample/codification.rb +65 -65
data/lib/statsample/combination.rb +60 -59
data/lib/statsample/converter/csv19.rb +12 -12
data/lib/statsample/converters.rb +1 -1
data/lib/statsample/dataset.rb +93 -36
data/lib/statsample/dominanceanalysis/bootstrap.rb +66 -3
data/lib/statsample/dominanceanalysis.rb +5 -6
data/lib/statsample/factor/pca.rb +41 -11
data/lib/statsample/factor/principalaxis.rb +105 -29
data/lib/statsample/factor/rotation.rb +20 -3
data/lib/statsample/factor.rb +1 -1
data/lib/statsample/graph/gdchart.rb +13 -13
data/lib/statsample/graph/svggraph.rb +166 -167
data/lib/statsample/matrix.rb +22 -12
data/lib/statsample/mle/logit.rb +3 -2
data/lib/statsample/mle/probit.rb +7 -5
data/lib/statsample/mle.rb +4 -2
data/lib/statsample/multiset.rb +125 -124
data/lib/statsample/permutation.rb +2 -1
data/lib/statsample/regression/binomial/logit.rb +4 -3
data/lib/statsample/regression/binomial/probit.rb +2 -1
data/lib/statsample/regression/binomial.rb +62 -81
data/lib/statsample/regression/multiple/baseengine.rb +1 -1
data/lib/statsample/regression/multiple/gslengine.rb +1 -1
data/lib/statsample/regression/multiple/matrixengine.rb +12 -6
data/lib/statsample/regression/multiple.rb +15 -42
data/lib/statsample/regression/simple.rb +93 -78
data/lib/statsample/regression.rb +74 -2
data/lib/statsample/reliability.rb +117 -120
data/lib/statsample/srs.rb +156 -153
data/lib/statsample/test/levene.rb +90 -0
data/lib/statsample/test/umannwhitney.rb +25 -9
data/lib/statsample/test.rb +2 -0
data/lib/statsample/vector.rb +388 -413
data/lib/statsample.rb +74 -30
data/po/es/statsample.mo +0 -0
data/test/test_bivariate.rb +5 -4
data/test/test_combination.rb +1 -1
data/test/test_dataset.rb +2 -2
data/test/test_factor.rb +53 -6
data/test/test_gsl.rb +1 -1
data/test/test_mle.rb +1 -1
data/test/test_regression.rb +18 -33
data/test/test_statistics.rb +15 -33
data/test/test_stest.rb +35 -0
data/test/test_svg_graph.rb +2 -2
data/test/test_vector.rb +331 -333
metadata +38 -11

data/lib/statsample/dataset.rb CHANGED Viewed

@@ -1,22 +1,23 @@
 require 'statsample/vector'
 class Hash
+  # Creates a Statsample::Dataset based on a Hash
   def to_dataset(*args)
     Statsample::Dataset.new(self,*args)
   end
 end
 class Array
-  def prefix(s)
+  def prefix(s) # :nodoc:
     self.collect{|c| s+c.to_s }
   end
-  def suffix(s)
+  def suffix(s) # :nodoc:
     self.collect{|c| c.to_s+s }
   end
 end
 module Statsample
-  class DatasetException < RuntimeError
+  class DatasetException < RuntimeError # :nodoc:
     attr_reader :ds,:exp
     def initialize(ds,e)
       @ds=ds
@@ -28,15 +29,49 @@ module Statsample
       m
     end
   end
+  # Set of cases with values for one or more variables,
+  # analog to a dataframe on R or a standard data file of SPSS.
+  # Every vector has <tt>#field</tt> name, which represent it. By default,
+  # the vectors are ordered by it field name, but you can change it
+  # the fields order manually.
+  # The Dataset work as a Hash, with keys are field names
+  # and values are Statsample::Vector
+  #
+  #
+  # ==Usage
+  # Create a empty dataset
+  #   Dataset.new()
+  # Create a dataset with three empty vectors, called <tt>v1</tt>, <tt>v2</tt> and <tt>v3</tt>
+  #   Dataset.new(%w{v1 v2 v3})
+  # Create a dataset with two vectors
+  #   Dataset.new({'v1'=>%w{1 2 3}.to_vector, 'v2'=>%w{4 5 6}.to_vector})
+  # Create a dataset with two given vectors (v1 and v2), with vectors on inverted order
+  #   Dataset.new({'v2'=>v2,'v1'=>v1},['v2','v1'])
+  #
+  # The fast way to create a dataset uses Hash#to_dataset, with
+  # field order  as arguments
+  #   v1 = [1,2,3].to_scale
+  #   v2 = [1,2,3].to_scale
+  #   ds = {'v1'=>v2, 'v2'=>v2}.to_dataset(%w{v2 v1})
   class Dataset
     include Writable
-    attr_reader :vectors, :fields, :cases, :i
+    # Hash of Statsample::Vector
+    attr_reader :vectors
+    # Ordered names of vectors
+    attr_reader :fields
+    # Number of cases
+    attr_reader :cases
+    # Location of pointer on enumerations methods (like #each)
+    attr_reader :i
+    # Deprecated: Label of vectors
     attr_accessor :labels
     # Generates a new dataset, using three vectors
     # - Rows
     # - Columns
     # - Values
+    #
     # For example, you have these values
     #
     #   x   y   v
@@ -88,16 +123,7 @@ module Statsample
     # order of variables. If empty, vectors keys on alfabethic order as
     # used as fields
     # [labels]  Hash to set names for fields.
-    #
-    #
-    #   Dataset.new()
-    #   Dataset.new(%w{v1 v2 v3})
-    #   Dataset.new({'v1'=>%w{1 2 3}.to_vector, 'v2'=>%w{4 5 6}.to_vector})
-    #   Dataset.new({'v2'=>v2,'v1'=>v1},['v1','v2'])
-    #
-    # The fast way to create a dataset uses Hash#to_dataset, with
-    # fields and labels as arguments
-    #   ds = {'v1'=>[1,2,3].to_vector}.to_dataset
     #
     def initialize(vectors={}, fields=[], labels={})
       if vectors.instance_of? Array
@@ -120,7 +146,8 @@ module Statsample
       end
       matrix
     end
-    def label(v_id)
+    # Retrieves label for a vector, giving a field name.
+    def label(v_id)
       raise "Vector #{v} doesn't exists" unless @fields.include? v_id
       @labels[v_id].nil? ? v_id : @labels[v_id]
     end
@@ -233,12 +260,20 @@ module Statsample
       ds_boot.update_valid_data
       ds_boot
     end
-    # Fast version of add case
+    # Fast version of #add_case.
     # Can only add one case and no error check if performed
-    # You SHOULD use update_valid_data at the end of insertion cycle
+    # You SHOULD use #update_valid_data at the end of insertion cycle
     def add_case_array(v)
       v.each_index {|i| d=@vectors[@fields[i]].data; d.push(v[i])}
     end
+    # Insert a case, using:
+    # * Array: size equal to number of vectors and values in the same order as fields
+    # * Hash: keys equal to fields
+    # If uvd is false, #update_valid_data is not executed after
+    # inserting a case. This is very useful if you want to increase the
+    # performance on inserting many cases,
+    # because #update_valid_data performs check on vectors and on the dataset
     def add_case(v,uvd=true)
       case v
       when Array
@@ -258,14 +293,18 @@ module Statsample
         update_valid_data
       end
     end
+    # Check vectors and fields after inserting data. Use only
+    # after  #add_case_array or #add_case with second parameter to false
     def update_valid_data
       @fields.each{|f| @vectors[f].set_valid_data}
       check_length
     end
+    # Delete a vector
     def delete_vector(name)
       @fields.delete(name)
       @vectors.delete(name)
     end
     def add_vectors_by_split_recode(name,join='-',sep=Statsample::SPLIT_TOKEN)
       split=@vectors[name].split_by_separator(sep)
       i=1
@@ -294,7 +333,7 @@ module Statsample
 		def vector_sum(fields=nil)
 			a=[]
 			fields||=@fields
-			collect_with_index do |i,row|
+			collect_with_index do |row, i|
 				if(fields.find{|f| !@vectors[f].data_with_nils[i]})
 					nil
 				else
@@ -302,16 +341,17 @@ module Statsample
 				end
       end
 		end
+    # Check if #fields attribute is correct, after inserting or deleting vectors
     def check_fields(fields)
       fields||=@fields
       raise "Fields #{(fields-@fields).join(", ")} doesn't exists on dataset" if (fields-@fields).size>0
       fields
     end
     # Returns a vector with the numbers of missing values for a case
     def vector_missing_values(fields=nil)
       fields=check_fields(fields)
-      collect_with_index do |i,row|
+      collect_with_index do |row, i|
         fields.inject(0) {|a,v|
           a+ ((@vectors[v].data_with_nils[i].nil?) ? 1: 0)
         }
@@ -319,9 +359,8 @@ module Statsample
     end
     def vector_count_characters(fields=nil)
       fields=check_fields(fields)
-      collect_with_index do |i,row|
+      collect_with_index do |row, i|
         fields.inject(0){|a,v|
           a+((@vectors[v].data_with_nils[i].nil?) ? 0: row[v].to_s.size)
         }
       end
@@ -353,7 +392,8 @@ module Statsample
       end
       a.to_vector(:scale)
     end
-    def check_length
+    # Check vectors for type and size.
+    def check_length # :nodoc:
       size=nil
       @vectors.each do |k,v|
         raise Exception, "Data #{v.class} is not a vector on key #{k}" if !v.is_a? Statsample::Vector
@@ -368,16 +408,19 @@ module Statsample
       end
       @cases=size
     end
-    def each_vector
-      @fields.each{|k| yield k,@vectors[k]}
+    # Retrieves each vector as [key, vector]
+    def each_vector # :yield: |key, vector|
+      @fields.each{|k| yield k, @vectors[k]}
     end
     if Statsample::STATSAMPLE__.respond_to?(:case_as_hash)
       def case_as_hash(c) # :nodoc:
         Statsample::STATSAMPLE__.case_as_hash(self,c)
       end
     else
-      def case_as_hash(c)
-        _case_as_hash(c)
+      # Retrieves case i as a hash
+      def case_as_hash(i)
+        _case_as_hash(i)
       end
     end
@@ -386,8 +429,9 @@ module Statsample
         Statsample::STATSAMPLE__.case_as_array(self,c)
       end
     else
-      def case_as_array(c)
-        _case_as_array(c)
+      # Retrieves case i as a array, ordered on #fields order
+      def case_as_array(i)
+        _case_as_array(i)
       end
     end
     def _case_as_hash(c) # :nodoc:
@@ -396,6 +440,7 @@ module Statsample
     def _case_as_array(c) # :nodoc:
       @fields.collect {|x| @vectors[x][c]}
     end
     # Returns each case as a hash
     def each
       begin
@@ -411,7 +456,7 @@ module Statsample
       end
     end
     # Returns each case as hash and index
-    def each_with_index
+    def each_with_index # :yield: |case, i|
       begin
         @i=0
         @cases.times{|i|
@@ -447,6 +492,7 @@ module Statsample
       }
       @i=nil
     end
+    # Set fields order. If you omit one or more vectors,
     def fields=(f)
       @fields=f
       check_order
@@ -470,6 +516,8 @@ module Statsample
         raise ArgumentError, "You need a String or a Range"
       end
     end
+    # Retrieves a Statsample::Vector, based on the result
+    # of calculation performed on each case.
     def collect(type=:scale)
       data=[]
       each {|row|
@@ -477,10 +525,11 @@ module Statsample
       }
       Statsample::Vector.new(data,type)
     end
+    # Same as #collect, but giving case index as second parameter on yield.
     def collect_with_index(type=:scale)
       data=[]
       each_with_index {|row, i|
-        data.push(yield(i,row))
+        data.push(yield(row, i))
       }
       Statsample::Vector.new(data,type)
     end
@@ -504,6 +553,8 @@ module Statsample
         raise ArgumentError,"Should pass a Statsample::Vector"
       end
     end
+    # Return data as a matrix. Column are ordered by #fields and
+    # rows by orden of insertion
     def to_matrix
       rows=[]
       self.each_array{|c|
@@ -511,7 +562,8 @@ module Statsample
       }
       Matrix.rows(rows)
     end
-    if HAS_GSL
+    if Statsample.has_gsl?
       def to_matrix_gsl
       rows=[]
       self.each_array{|c|
@@ -520,15 +572,17 @@ module Statsample
       GSL::Matrix.alloc(*rows)
       end
     end
-		def to_multiset_by_split(*fields)
+    def to_multiset_by_split(*fields)
 			require 'statsample/multiset'
 			if fields.size==1
 				to_multiset_by_split_one_field(fields[0])
 			else
 				to_multiset_by_split_multiple_fields(*fields)
 			end
-		end
-    # create a new dataset with all the data which the block returns true
+    end
+    # Create a new dataset with all cases which the block returns true
     def filter
       ds=self.dup_empty
       each {|c|
@@ -537,6 +591,7 @@ module Statsample
       ds.update_valid_data
       ds
     end
 		# creates a new vector with the data of a given field which the block returns true
 		def filter_field(field)
 			a=[]
@@ -545,6 +600,7 @@ module Statsample
 			}
 			a.to_vector(@vectors[field].type)
 		end
     def to_multiset_by_split_one_field(field)
       raise ArgumentError,"Should use a correct field name" if !@fields.include? field
       factors=@vectors[field].factors
@@ -604,7 +660,7 @@ module Statsample
           text.gsub!(f,"row['#{f}']")
         end
       }
-      collect_with_index {|i,row|
+      collect_with_index {|row, i|
         invalid=false
         @fields.each{|f|
           if @vectors[f].data_with_nils[i].nil?
@@ -653,6 +709,7 @@ module Statsample
     end
     # Creates a new dataset for one to many relations
     # on a dataset, based on pattern of field names.
+    #
     # for example, you have a survey for number of children
     # with this structure:
     #   id, name, child_name_1, child_age_1, child_name_2, child_age_2

data/lib/statsample/dominanceanalysis/bootstrap.rb CHANGED Viewed

@@ -1,8 +1,70 @@
 module Statsample
   class DominanceAnalysis
+    # == Goal
     # Generates Bootstrap sample to identity the replicability of a Dominance Analysis. See Azen & Bodescu (2003) for more information.
-    # References:
-    # * Azen, R. & Budescu, D.V. (2003). The dominance analysis approach for comparing predictors in multiple regression. _Psychological Methods, 8_(2), 129-148.
+    #
+    # == Usage
+    #
+    #  require 'statsample'
+    #  a=100.times.collect {rand}.to_scale
+    #  b=100.times.collect {rand}.to_scale
+    #  c=100.times.collect {rand}.to_scale
+    #  d=100.times.collect {rand}.to_scale
+    #  ds={'a'=>a,'b'=>b,'c'=>c,'d'=>d}.to_dataset
+    #  ds['y']=ds.collect{|row| row['a']*5+row['b']*2+row['c']*2+row['d']*2+10*rand()}
+    #  dab=Statsample::DominanceAnalysis::Bootstrap.new(ds2, 'y', :debug=>true)
+    #  dab.bootstrap(100,nil)
+    #  puts dab.summary
+    # <strong>Output</strong>
+    #   Sample size: 100
+    #  t: 1.98421693632958
+    #
+    #  Linear Regression Engine: Statsample::Regression::Multiple::MatrixEngine
+    #  Table: Bootstrap report
+    #  --------------------------------------------------------------------------------------------
+    #  | pairs                 | sD  | Dij    | SE(Dij) | Pij   | Pji   | Pno   | Reproducibility |
+    #  --------------------------------------------------------------------------------------------
+    #  | Complete dominance    |
+    #  --------------------------------------------------------------------------------------------
+    #  | a - b                 | 1.0 | 0.6150 | 0.454   | 0.550 | 0.320 | 0.130 | 0.550           |
+    #  | a - c                 | 1.0 | 0.9550 | 0.175   | 0.930 | 0.020 | 0.050 | 0.930           |
+    #  | a - d                 | 1.0 | 0.9750 | 0.131   | 0.960 | 0.010 | 0.030 | 0.960           |
+    #  | b - c                 | 1.0 | 0.8800 | 0.276   | 0.820 | 0.060 | 0.120 | 0.820           |
+    #  | b - d                 | 1.0 | 0.9250 | 0.193   | 0.860 | 0.010 | 0.130 | 0.860           |
+    #  | c - d                 | 0.5 | 0.5950 | 0.346   | 0.350 | 0.160 | 0.490 | 0.490           |
+    #  --------------------------------------------------------------------------------------------
+    #  | Conditional dominance |
+    #  --------------------------------------------------------------------------------------------
+    #  | a - b                 | 1.0 | 0.6300 | 0.458   | 0.580 | 0.320 | 0.100 | 0.580           |
+    #  | a - c                 | 1.0 | 0.9700 | 0.156   | 0.960 | 0.020 | 0.020 | 0.960           |
+    #  | a - d                 | 1.0 | 0.9800 | 0.121   | 0.970 | 0.010 | 0.020 | 0.970           |
+    #  | b - c                 | 1.0 | 0.8850 | 0.283   | 0.840 | 0.070 | 0.090 | 0.840           |
+    #  | b - d                 | 1.0 | 0.9500 | 0.181   | 0.920 | 0.020 | 0.060 | 0.920           |
+    #  | c - d                 | 0.5 | 0.5800 | 0.360   | 0.350 | 0.190 | 0.460 | 0.460           |
+    #  --------------------------------------------------------------------------------------------
+    #  | General Dominance     |
+    #  --------------------------------------------------------------------------------------------
+    #  | a - b                 | 1.0 | 0.6500 | 0.479   | 0.650 | 0.350 | 0.000 | 0.650           |
+    #  | a - c                 | 1.0 | 0.9800 | 0.141   | 0.980 | 0.020 | 0.000 | 0.980           |
+    #  | a - d                 | 1.0 | 0.9900 | 0.100   | 0.990 | 0.010 | 0.000 | 0.990           |
+    #  | b - c                 | 1.0 | 0.9000 | 0.302   | 0.900 | 0.100 | 0.000 | 0.900           |
+    #  | b - d                 | 1.0 | 0.9700 | 0.171   | 0.970 | 0.030 | 0.000 | 0.970           |
+    #  | c - d                 | 1.0 | 0.5600 | 0.499   | 0.560 | 0.440 | 0.000 | 0.560           |
+    #  --------------------------------------------------------------------------------------------
+    #
+    #  Table: General averages
+    #  ---------------------------------------
+    #  | var | mean  | se    | p.5   | p.95  |
+    #  ---------------------------------------
+    #  | a   | 0.133 | 0.049 | 0.062 | 0.218 |
+    #  | b   | 0.106 | 0.048 | 0.029 | 0.199 |
+    #  | c   | 0.035 | 0.032 | 0.002 | 0.106 |
+    #  | d   | 0.023 | 0.019 | 0.002 | 0.062 |
+    #  ---------------------------------------
+    #
+    # == References:
+    #
+    # * Azen, R. & Budescu, D.V. (2003). The dominance analysis approach for comparing predictors in multiple regression. <em>Psychological Methods, 8</em>(2), 129-148.
     class Bootstrap
       include GetText
       include Writable
@@ -27,12 +89,13 @@ module Statsample
       attr_accessor :alpha
       # Debug?
       attr_accessor :debug
+      # Default level of confidence for t calculation
+      ALPHA=0.95
       # Create a new Dominance Analysis Bootstrap Object
       #
       # * ds: A Dataset object
       # * y_var: Name of dependent variable
       # * opts: Any other attribute of the class
-      ALPHA=0.95
       def initialize(ds,y_var, opts=Hash.new)
         @ds=ds
         @y_var=y_var

data/lib/statsample/dominanceanalysis.rb CHANGED Viewed

@@ -1,13 +1,12 @@
 require 'statsample/dominanceanalysis/bootstrap'
 module Statsample
-  # Dominance Analysis is a procedure based on an examination of the R^2 values
+  # Dominance Analysis is a procedure based on an examination of the R<sup>2</sup> values
   # for all possible subset models, to identify the relevance of one or more
   # predictors in the prediction of criterium.
   #
-  #
   # See Budescu(1993), Azen & Budescu (2003, 2006) for more information.
   #
-  # Example:
+  # == Use
   #
   #  a=1000.times.collect {rand}.to_scale
   #  b=1000.times.collect {rand}.to_scale
@@ -17,7 +16,7 @@ module Statsample
   #  da=Statsample::DominanceAnalysis.new(ds,'y')
   #  puts da.summary
   #
-  # Output:
+  # === Output:
   #
   #  Report: Report 2010-02-08 19:10:11 -0300
   #  Table: Dominance Analysis result
@@ -51,12 +50,12 @@ module Statsample
   #  | a - c | 1.0   | 1.0         | 1.0     |
   #  | b - c | 1.0   | 1.0         | 1.0     |
   #  -----------------------------------------
   #
   # == References:
   # * Budescu, D. V. (1993). Dominance analysis: a new approach to the problem of relative importance of predictors in multiple regression. <em>Psychological Bulletin, 114</em>, 542-551.
   # * Azen, R. & Budescu, D.V. (2003). The dominance analysis approach for comparing predictors in multiple regression. <em>Psychological Methods, 8</em>(2), 129-148.
   # * Azen, R. & Budescu, D.V. (2006). Comparing predictors in Multivariate Regression Models: An extension of Dominance Analysis. <em>Journal of Educational and Behavioral Statistics, 31</em>(2), 157-180.
+  #
   class DominanceAnalysis
     include GetText
     bindtextdomain("statsample")
@@ -366,7 +365,7 @@ module Statsample
       generator.parse_element(t)
       generator.add_html("</div>")
     end
-    class ModelData
+    class ModelData # :nodoc:
       attr_reader :contributions
       def initialize(independent, data, da)
         @independent=independent

data/lib/statsample/factor/pca.rb CHANGED Viewed

@@ -1,21 +1,42 @@
 module Statsample
 module Factor
-  # Principal Component Analysis of a given covariance or correlation matrix.
-  # For factorial Analysis, use Statsample::Factor::PrincipalAxis
-  # Reference: SPSS manual
-  #   Use:
+  # Principal Component Analysis (PCA) of a
+  # covariance or correlation matrix.
+  #
+  # For Principal Axis Analysis, use Statsample::Factor::PrincipalAxis
+  #
+  # == Usage:
+  #   require 'statsample'
   #   a=[2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1].to_scale
   #   b=[2.4,0.7,2.9,2.2,3.0,2.7,1.6,1.1,1.6,0.9].to_scale
   #   ds={'a'=>a,'b'=>b}.to_dataset
   #   cor_matrix=Statsample::Bivariate.correlation_matrix(ds)
   #   pca=Statsample::Factor::PCA.new(cor_matrix)
-  #   p pca.component_matrix
+  #   pca.m
+  #   => 1
+  #   pca.eigenvalues
+  #   => [1.92592927269225, 0.0740707273077545]
+  #   pca.component_matrix
+  #   => GSL::Matrix
+  #   [  9.813e-01
+  #     9.813e-01 ]
+  #   pca.communalities
+  #   => [0.962964636346122, 0.962964636346122]
+  #
+  # == References:
+  #
+  # * SPSS manual
+  # * Smith, L. (2002). A tutorial on Principal Component Analysis. Available on http://courses.eas.ualberta.ca/eas570/pca_tutorial.pdf
+  #
   class PCA
-    attr_accessor :name, :m
+    # Name of analysis
+    attr_accessor :name
+    # Number of factors. Set by default to the number of factors
+    # with eigen values > 1
+    attr_accessor :m
     include GetText
     bindtextdomain("statsample")
     def initialize(matrix ,opts=Hash.new)
       if matrix.respond_to? :to_gsl
         matrix=matrix.to_gsl
@@ -42,6 +63,7 @@ module Factor
       }
       @ds=h.to_dataset
     end
     # Feature vector for m factors
     def feature_vector(m=nil)
       m||=@m
@@ -69,10 +91,10 @@ module Factor
         gammas.push(Math::sqrt(@eigenpairs[i][0]))
       }
       gamma_m=GSL::Matrix.diagonal(gammas)
-      omega_m*(gamma_m)
+      (omega_m*(gamma_m)).to_matrix
     end
-    # Communality for all variables given m factors
-    def communality(m=nil)
+    # Communalities for all variables given m factors
+    def communalities(m=nil)
       m||=@m
       h=[]
       @n_variables.times do |i|
@@ -84,9 +106,11 @@ module Factor
       end
       h
     end
+    # Array with eigenvalues
     def eigenvalues
       @eigenpairs.collect {|c| c[0] }
     end
     def calculate_eigenpairs
       eigval, eigvec= GSL::Eigen.symmv(@matrix)
       @eigenpairs={}
@@ -95,13 +119,18 @@ module Factor
       }
       @eigenpairs=@eigenpairs.sort.reverse
     end
+    def summary
+      rp=ReportBuilder.new()
+      rp.add(self)
+      rp.to_text
+    end
     def to_reportbuilder(generator) # :nodoc:
       anchor=generator.add_toc_entry(_("PCA: ")+name)
       generator.add_html "<div class='pca'>"+_("PCA")+" #{@name}<a name='#{anchor}'></a>"
       generator.add_text "Number of factors: #{m}"
       t=ReportBuilder::Table.new(:name=>_("Communalities"), :header=>["Variable","Initial","Extraction"])
-      communality(m).each_with_index {|com,i|
+      communalities(m).each_with_index {|com,i|
         t.add_row([i, 1.0, sprintf("%0.3f", com)])
       }
       generator.parse_element(t)
@@ -122,6 +151,7 @@ module Factor
       generator.parse_element(t)
       generator.add_html("</div>")
     end
+    private :calculate_eigenpairs, :create_centered_ds
   end
 end
 end