statsample 0.5.1 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (51) hide show
  1. data/History.txt +12 -0
  2. data/Manifest.txt +13 -0
  3. data/README.txt +2 -1
  4. data/demo/pca.rb +29 -0
  5. data/demo/umann.rb +8 -0
  6. data/lib/distribution.rb +0 -1
  7. data/lib/matrix_extension.rb +35 -21
  8. data/lib/statsample.rb +31 -28
  9. data/lib/statsample/anova.rb +7 -2
  10. data/lib/statsample/bivariate.rb +17 -11
  11. data/lib/statsample/codification.rb +136 -87
  12. data/lib/statsample/combination.rb +0 -2
  13. data/lib/statsample/converter/csv18.rb +1 -1
  14. data/lib/statsample/converter/csv19.rb +1 -1
  15. data/lib/statsample/converters.rb +176 -171
  16. data/lib/statsample/crosstab.rb +227 -154
  17. data/lib/statsample/dataset.rb +94 -12
  18. data/lib/statsample/dominanceanalysis.rb +69 -62
  19. data/lib/statsample/dominanceanalysis/bootstrap.rb +25 -21
  20. data/lib/statsample/factor.rb +18 -0
  21. data/lib/statsample/factor/pca.rb +128 -0
  22. data/lib/statsample/factor/principalaxis.rb +133 -0
  23. data/lib/statsample/factor/rotation.rb +125 -0
  24. data/lib/statsample/histogram.rb +99 -0
  25. data/lib/statsample/mle.rb +125 -126
  26. data/lib/statsample/mle/logit.rb +91 -91
  27. data/lib/statsample/mle/probit.rb +84 -85
  28. data/lib/statsample/multiset.rb +1 -1
  29. data/lib/statsample/permutation.rb +96 -0
  30. data/lib/statsample/regression.rb +1 -1
  31. data/lib/statsample/regression/binomial.rb +89 -89
  32. data/lib/statsample/regression/binomial/logit.rb +9 -9
  33. data/lib/statsample/regression/binomial/probit.rb +9 -9
  34. data/lib/statsample/regression/multiple.rb +8 -14
  35. data/lib/statsample/regression/multiple/gslengine.rb +1 -1
  36. data/lib/statsample/regression/multiple/rubyengine.rb +55 -55
  37. data/lib/statsample/resample.rb +12 -17
  38. data/lib/statsample/srs.rb +4 -1
  39. data/lib/statsample/test.rb +23 -22
  40. data/lib/statsample/test/umannwhitney.rb +182 -0
  41. data/lib/statsample/vector.rb +854 -815
  42. data/test/test_bivariate.rb +132 -132
  43. data/test/test_codification.rb +71 -50
  44. data/test/test_dataset.rb +19 -1
  45. data/test/test_factor.rb +44 -0
  46. data/test/test_histogram.rb +26 -0
  47. data/test/test_permutation.rb +37 -0
  48. data/test/test_statistics.rb +74 -63
  49. data/test/test_umannwhitney.rb +17 -0
  50. data/test/test_vector.rb +46 -30
  51. metadata +31 -4
@@ -1,20 +1,15 @@
1
1
  module Statsample
2
- module Resample
3
- class << self
4
- def repeat_and_save(times,&action)
5
- (1..times).inject([]) {|a,x|
6
- a.push(action.call)
7
- a
8
- }
9
- end
10
-
11
- def generate (size,low,upper)
12
- range=upper-low+1
13
- Vector.new((0...size).collect {|x|
14
- rand(range)+low
15
- },:scale)
16
- end
17
-
18
- end
2
+ module Resample
3
+ class << self
4
+ def repeat_and_save(times,&action)
5
+ (1..times).inject([]) {|a,x| a.push(action.call); a}
6
+ end
7
+
8
+ def generate (size,low,upper)
9
+ range=upper-low+1
10
+ Vector.new((0...size).collect {|x| rand(range)+low },:scale)
11
+ end
12
+
19
13
  end
14
+ end
20
15
  end
@@ -35,6 +35,8 @@ module Statsample
35
35
  n0=estimation_n0(d,prop,margin)
36
36
  n0.quo( 1 + ((n0 - 1).quo(n_pobl)))
37
37
  end
38
+
39
+
38
40
  # Proportion confidence interval with t values
39
41
  # Uses estimated proportion, sample without replacement.
40
42
 
@@ -42,6 +44,7 @@ module Statsample
42
44
  t = Distribution::T.p_value(1-((1-margin).quo(2)) , n_sample-1)
43
45
  proportion_confidence_interval(prop,n_sample,n_population, t)
44
46
  end
47
+
45
48
  # Proportion confidence interval with z values
46
49
  # Uses estimated proportion, sample without replacement.
47
50
  def proportion_confidence_interval_z(p, n_sample, n_population, margin=0.95)
@@ -53,7 +56,7 @@ module Statsample
53
56
 
54
57
  def proportion_confidence_interval(p, sam,pop , x)
55
58
  f=sam.quo(pop)
56
- one_range=x * Math::sqrt((qf(sam, pop) * p * (1-p)) / (sam-1)) + (1.quo(sam * 2.0))
59
+ one_range=x * Math::sqrt((qf(sam, pop) * p * (1-p)).quo (sam-1)) + (1.quo(sam * 2.0))
57
60
  [p-one_range, p+one_range]
58
61
  end
59
62
  # Standard deviation for sample distribution of a proportion
@@ -1,25 +1,26 @@
1
1
  module Statsample
2
- # module for several statistical tests
3
- module Test
4
- # Calculate chi square for two Matrix
5
- class << self
6
- def chi_square(real,expected)
7
- raise TypeError, "Both argument should be Matrix" unless real.is_a? Matrix and expected.is_a?Matrix
8
- sum=0
9
- (0...real.row_size).each {|row_i|
10
- (0...real.column_size).each {|col_i|
11
-
12
- val=((real[row_i,col_i].to_f - expected[row_i,col_i].to_f)**2) / expected[row_i,col_i].to_f
13
- # puts "Real: #{real[row_i,col_i].to_f} ; esperado: #{expected[row_i,col_i].to_f}"
14
- # puts "Diferencial al cuadrado: #{(real[row_i,col_i].to_f - expected[row_i,col_i].to_f)**2}"
15
- sum+=val
16
- }
17
- }
18
- sum
19
- end
20
- def t_significance
21
-
22
- end
23
- end
2
+ # Module for several statistical tests
3
+
4
+ module Test
5
+ autoload(:UMannWhitney, 'statsample/test/umannwhitney')
6
+ # Calculate chi square for two Matrix
7
+ class << self
8
+ def chi_square(real,expected)
9
+ raise TypeError, "Both argument should be Matrix" unless real.is_a? Matrix and expected.is_a?Matrix
10
+ sum=0
11
+ (0...real.row_size).each {|row_i|
12
+ (0...real.column_size).each {|col_i|
13
+ val=((real[row_i,col_i].to_f - expected[row_i,col_i].to_f)**2) / expected[row_i,col_i].to_f
14
+ # puts "Real: #{real[row_i,col_i].to_f} ; esperado: #{expected[row_i,col_i].to_f}"
15
+ # puts "Diferencial al cuadrado: #{(real[row_i,col_i].to_f - expected[row_i,col_i].to_f)**2}"
16
+ sum+=val
17
+ }
18
+ }
19
+ sum
20
+ end
21
+ def u_mannwhitney(v1p,v2p)
22
+ Statsample::Test::UMannWhitney.new(v1p,v2p)
23
+ end
24
24
  end
25
+ end
25
26
  end
@@ -0,0 +1,182 @@
1
+ module Statsample
2
+ module Test
3
+ #
4
+ # = U Mann-Whitney test
5
+ #
6
+ # Non-parametric test for assessing whether two independent samples
7
+ # of observations come from the same distribution.
8
+ #
9
+ # == Assumptions
10
+ #
11
+ # * The two samples under investigation in the test are independent of each other and the observations within each sample are independent.
12
+ # * The observations are comparable (i.e., for any two observations, one can assess whether they are equal or, if not, which one is greater).
13
+ # * The variances in the two groups are approximately equal.
14
+ #
15
+ # Higher differences of distributions correspond to
16
+ # to lower values of U.
17
+ #
18
+ class UMannWhitney
19
+ # Max for m*n allowed for exact calculation of probability
20
+ MAX_MN_EXACT=10000
21
+
22
+ # Exact probability based on Dinneen & Blakesley (1973) algorithm
23
+ # This is the algorithm used on SPSS
24
+ #
25
+ # Reference: Dinneen, L., & Blakesley, B. (1973). Algorithm AS 62: A Generator for the Sampling Distribution of the Mann- Whitney U Statistic. Journal of the Royal Statistical Society, 22(2), 269-273
26
+ #
27
+ def self.exact_probability_distribution_as62(n1,n2)
28
+
29
+ freq=[]
30
+ work=[]
31
+ mn1=n1*n2+1
32
+ max_u=n1*n2
33
+ minmn=n1<n2 ? n1 : n2
34
+ maxmn=n1>n2 ? n1 : n2
35
+ n1=maxmn+1
36
+ (1..n1).each{|i| freq[i]=1}
37
+ n1+=1
38
+ (n1..mn1).each{|i| freq[i]=0}
39
+ work[1]=0
40
+ xin=maxmn
41
+ (2..minmn).each do |i|
42
+ work[i]=0
43
+ xin=xin+maxmn
44
+ n1=xin+2
45
+ l=1+xin.quo(2)
46
+ k=i
47
+ (1..l).each do |j|
48
+ k=k+1
49
+ n1=n1-1
50
+ sum=freq[j]+work[j]
51
+ freq[j]=sum
52
+ work[k]=sum-freq[n1]
53
+ freq[n1]=sum
54
+ end
55
+ end
56
+
57
+ # Generate percentages for normal U
58
+ dist=(1+max_u/2).to_i
59
+ freq.shift
60
+ total=freq.inject(0) {|a,v| a+v }
61
+ (0...dist).collect {|i|
62
+ if i!=max_u-i
63
+ ues=freq[i]*2
64
+ else
65
+ ues=freq[i]
66
+ end
67
+ ues.quo(total)
68
+ }
69
+ end
70
+
71
+ # Generate distribution for permutations
72
+
73
+ def self.distribution_permutations(n1,n2)
74
+ base=[0]*n1+[1]*n2
75
+ po=Statsample::Permutation.new(base)
76
+ upper=0
77
+ total=n1*n2
78
+ req={}
79
+ po.each do |perm|
80
+ r0,s0=0,0
81
+ perm.each_index {|c_i|
82
+ if perm[c_i]==0
83
+ r0+=c_i+1
84
+ s0+=1
85
+ end
86
+ }
87
+ u1=r0-((s0*(s0+1)).quo(2))
88
+ u2=total-u1
89
+ temp_u= (u1 <= u2) ? u1 : u2
90
+ req[perm]=temp_u
91
+ end
92
+ req
93
+ end
94
+ # Sample 1 Rank sum
95
+ attr_reader :r1
96
+ # Sample 2 Rank sum
97
+ attr_reader :r2
98
+ # Sample 1 U
99
+ attr_reader :u1
100
+ # Sample 2 U
101
+ attr_reader :u2
102
+ # U Value
103
+ attr_reader :u
104
+ # Compensation for ties
105
+ attr_reader :t
106
+ def initialize(v1,v2)
107
+ @n1=v1.valid_data.size
108
+ @n2=v2.valid_data.size
109
+
110
+ data=(v1.valid_data+v2.valid_data).to_scale
111
+ groups=(([0]*@n1)+([1]*@n2)).to_vector
112
+ ds={'g'=>groups, 'data'=>data}.to_dataset
113
+ @t=nil
114
+ @ties=data.data.size!=data.data.uniq.size
115
+ if(@ties)
116
+ adjust_for_ties(ds['data'])
117
+ end
118
+ ds['ranked']=ds['data'].ranked(:scale)
119
+
120
+ @n=ds.cases
121
+
122
+ @r1=ds.filter{|r| r['g']==0}['ranked'].sum
123
+ @r2=((ds.cases*(ds.cases+1)).quo(2))-r1
124
+ @u1=r1-((@n1*(@n1+1)).quo(2))
125
+ @u2=r2-((@n2*(@n2+1)).quo(2))
126
+ @u=(u1<u2) ? u1 : u2
127
+ end
128
+ def summary
129
+ out=<<-HEREDOC
130
+ Mann-Whitney U
131
+ Sum of ranks v1: #{@r1.to_f}
132
+ Sum of ranks v1: #{@r2.to_f}
133
+ U Value: #{@u.to_f}
134
+ Z: #{sprintf("%0.3f",z)} (p: #{sprintf("%0.3f",z_probability)})
135
+ HEREDOC
136
+ if @n1*@n2<MAX_MN_EXACT
137
+ out+="Exact p (Dinneen & Blakesley): #{sprintf("%0.3f",exact_probability)}"
138
+ end
139
+ out
140
+ end
141
+ # Exact probability of finding values of U lower or equal to sample on U distribution. Use with caution with m*n>100000
142
+ # Reference: Dinneen & Blakesley (1973)
143
+ def exact_probability
144
+ dist=UMannWhitney.exact_probability_distribution_as62(@n1,@n2)
145
+ sum=0
146
+ (0..@u.to_i).each {|i|
147
+ sum+=dist[i]
148
+ }
149
+ sum
150
+ end
151
+ # Reference: http://europe.isixsigma.com/library/content/c080806a.asp
152
+ def adjust_for_ties(data)
153
+ @t=data.frequencies.find_all{|k,v| v>1}.inject(0) {|a,v|
154
+ a+(v[1]**3-v[1]).quo(12)
155
+ }
156
+ end
157
+ # Z value for U, with adjust for ties.
158
+ # For large samples, U is approximately normally distributed.
159
+ # In that case, you can use z to obtain probabily for U.
160
+ # Reference: SPSS Manual
161
+ def z
162
+ mu=(@n1*@n2).quo(2)
163
+ if(!@ties)
164
+ ou=Math::sqrt(((@n1*@n2)*(@n1+@n2+1)).quo(12))
165
+ else
166
+ n=@n1+@n2
167
+ first=(@n1*@n2).quo(n*(n-1))
168
+ second=((n**3-n).quo(12))-@t
169
+ ou=Math::sqrt(first*second)
170
+ end
171
+ (@u-mu).quo(ou)
172
+ end
173
+ # Assuming H_0, the proportion of cdf with values of U lower
174
+ # than the sample.
175
+ # Use with more than 30 cases per group.
176
+ def z_probability
177
+ (1-Distribution::Normal.cdf(z.abs()))*2
178
+ end
179
+ end
180
+
181
+ end
182
+ end
@@ -1,3 +1,4 @@
1
+ require 'date'
1
2
  class Array
2
3
  # Creates a new Statsample::Vector object
3
4
  # Argument should be equal to Vector.new
@@ -9,25 +10,24 @@ class Array
9
10
  Statsample::Vector.new(self,:scale,*args)
10
11
  end
11
12
  end
12
-
13
13
  module Statsample
14
- class << self
15
- # Create a matrix using vectors as columns.
16
- # Use:
17
- #
18
- # matrix=Statsample.vector_cols_matrix(v1,v2)
19
- def vector_cols_matrix(*vs)
20
- # test
21
- size=vs[0].size
22
- vs.each{|v|
23
- raise ArgumentError,"Arguments should be Vector" unless v.instance_of? Statsample::Vector
24
- raise ArgumentError,"Vectors size should be the same" if v.size!=size
25
- }
26
- Matrix.rows((0...size).to_a.collect() {|i|
27
- vs.collect{|v| v[i]}
28
- })
29
- end
30
- end
14
+ class << self
15
+ # Create a matrix using vectors as columns.
16
+ # Use:
17
+ #
18
+ # matrix=Statsample.vector_cols_matrix(v1,v2)
19
+ def vector_cols_matrix(*vs)
20
+ # test
21
+ size=vs[0].size
22
+ vs.each{|v|
23
+ raise ArgumentError,"Arguments should be Vector" unless v.instance_of? Statsample::Vector
24
+ raise ArgumentError,"Vectors size should be the same" if v.size!=size
25
+ }
26
+ Matrix.rows((0...size).to_a.collect() {|i|
27
+ vs.collect{|v| v[i]}
28
+ })
29
+ end
30
+ end
31
31
  # Returns a duplicate of the input vectors, without missing data
32
32
  # for any of the vectors.
33
33
  #
@@ -46,834 +46,873 @@ module Statsample
46
46
  ds.vectors.values
47
47
  end
48
48
 
49
- class Vector
50
- include Enumerable
51
- include Writable
52
- DEFAULT_OPTIONS={
53
- :missing_values=>[],
54
- :labels=>{}
49
+ class Vector
50
+ include Enumerable
51
+ include Writable
52
+ DEFAULT_OPTIONS={
53
+ :missing_values=>[],
54
+ :today_values=>['NOW','TODAY', :NOW, :TODAY],
55
+ :labels=>{}
56
+ }
57
+ # Level of measurement. Could be :nominal, :ordinal or :scale
58
+ attr_reader :type
59
+ # Original data.
60
+ attr_reader :data
61
+ # Valid data. Equal to data, minus values assigned as missing values
62
+ attr_reader :valid_data
63
+ # Array of values considered as missing. Nil is a missing value, by default
64
+ attr_reader :missing_values
65
+ # Array of values considered as "Today", with date type. "NOW", "TODAY", :NOW and :TODAY are 'today' values, by default
66
+ attr_reader :today_values
67
+ # Missing values array
68
+ attr_reader :missing_data
69
+ # Original data, with all missing values replaced by nils
70
+ attr_reader :data_with_nils
71
+ # Date date, with all missing values replaced by nils
72
+ attr_reader :date_data_with_nils
73
+ # GSL Object, only available with rbgsl extension and type==:scale
74
+ attr_reader :gsl
75
+ # Change label for specific values
76
+ attr_accessor :labels
77
+ # Creates a new Vector object.
78
+ # [data] Array of data.
79
+ # [type] Level of meausurement. See Vector#type
80
+ # [opts] Options
81
+ # [:missing_values] Array of missing values. See Vector#missing_values
82
+ # [:today_values] Array of 'today' values. See Vector#today_values
83
+ # [:labels] Labels for data values
84
+ #
85
+ # The fast way to create a vector uses Array.to_vector or Array.to_scale.
86
+ #
87
+ # v=[1,2,3,4].to_vector(:scale)
88
+ # v=[1,2,3,4].to_scale
89
+ #
90
+
91
+ def initialize(data=[], t=:nominal, opts=Hash.new)
92
+ raise "Data should be an array" unless data.is_a? Array
93
+ @data=data
94
+ @type=t
95
+ opts=DEFAULT_OPTIONS.merge(opts)
96
+ @missing_values=opts[:missing_values]
97
+ @labels=opts[:labels]
98
+ @today_values=opts[:today_values]
99
+ @valid_data=[]
100
+ @data_with_nils=[]
101
+ @date_data_with_nils=[]
102
+ @missing_data=[]
103
+ @has_missing_data=nil
104
+ @scale_data=nil
105
+ set_valid_data_intern
106
+ self.type=t
107
+ end
108
+ # Creates a duplicate of the Vector.
109
+ # Note: data, missing_values and labels are duplicated, so
110
+ # changes on original vector doesn't propages to copies.
111
+ def dup
112
+ Vector.new(@data.dup,@type, :missing_values => @missing_values.dup, :labels => @labels.dup)
113
+ end
114
+ # Returns an empty duplicate of the vector. Maintains the type,
115
+ # missing values and labels.
116
+ def dup_empty
117
+ Vector.new([],@type, :missing_values => @missing_values.dup, :labels => @labels.dup)
118
+ end
119
+ # Raises an exception if type of vector is inferior to t type
120
+ def check_type(t)
121
+ raise NoMethodError if (t==:scale and @type!=:scale) or (t==:ordinal and @type==:nominal) or (t==:date)
122
+ end
123
+ private :check_type
124
+
125
+ # Return a vector usign the standarized values for data
126
+ # with sd with denominator N
127
+ def vector_standarized_pop
128
+ vector_standarized(true)
129
+ end
130
+ # Return a vector usign the standarized values for data
131
+ # with sd with denominator n-1
132
+
133
+ def vector_standarized(use_population=false)
134
+ raise "Should be a scale" unless @type==:scale
135
+ m=mean
136
+ sd=use_population ? sdp : sds
137
+ @data_with_nils.collect{|x|
138
+ if !x.nil?
139
+ (x.to_f - m).quo(sd)
140
+ else
141
+ nil
142
+ end
143
+ }.to_vector(:scale)
144
+ end
145
+
146
+ alias_method :standarized, :vector_standarized
147
+
148
+ def box_cox_transformation(lambda) # :nodoc:
149
+ raise "Should be a scale" unless @type==:scale
150
+ @data_with_nils.collect{|x|
151
+ if !x.nil?
152
+ if(lambda==0)
153
+ Math.log(x)
154
+ else
155
+ (x**lambda-1).quo(lambda)
156
+ end
157
+ else
158
+ nil
159
+ end
160
+ }.to_vector(:scale)
161
+ end
162
+
163
+ # Vector equality.
164
+ # Two vector will be the same if their data, missing values, type, labels are equals
165
+ def ==(v2)
166
+ raise TypeError,"Argument should be a Vector" unless v2.instance_of? Statsample::Vector
167
+ @data==v2.data and @missing_values==v2.missing_values and @type==v2.type and @labels=v2.labels
168
+ end
169
+
170
+ def _dump(i) # :nodoc:
171
+ Marshal.dump({'data'=>@data,'missing_values'=>@missing_values, 'labels'=>@labels, 'type'=>@type})
172
+ end
173
+
174
+ def self._load(data) # :nodoc:
175
+ h=Marshal.load(data)
176
+ Vector.new(h['data'], h['type'],:missing_values=> h['missing_values'], :labels=>h['labels'])
177
+ end
178
+ # Returns a new vector, with data modified by block.
179
+ # Equivalent to create a Vector after #collect on data
180
+ def recode
181
+ @data.collect{|x|
182
+ yield x
183
+ }.to_vector(@type)
184
+ end
185
+ # Modifies current vector, with data modified by block.
186
+ # Equivalent to #collect! on @data
187
+ def recode!
188
+ @data.collect!{|x|
189
+ yield x
55
190
  }
56
- # Level of measurement. Could be :nominal, :ordinal or :scale
57
- attr_reader :type
58
- # Original data.
59
- attr_reader :data
60
- # Valid data. Equal to data, minus values assigned as missing values
61
- attr_reader :valid_data
62
- # Array of values considered as missing. Nil is a missing value, by default
63
- attr_reader :missing_values
64
- # Missing values array
65
- attr_reader :missing_data
66
- # Original data, with all missing values replaced by nils
67
- attr_reader :data_with_nils
68
- # GSL Object, only available with rbgsl extension and type==:scale
69
- attr_reader :gsl
70
- # Change label for specific values
71
- attr_accessor :labels
72
- # Creates a new Vector object.
73
- # [data] Array of data.
74
- # [type] Level of meausurement. See Vector#type
75
- # [opts] Options
76
- # [:missing_values] Array of missing values. See Vector#missing_values
77
- # [:labels] Labels for data values
78
- #
79
- # The fast way to create a vector uses Array.to_vector or Array.to_scale.
80
- #
81
- # v=[1,2,3,4].to_vector(:scale)
82
- # v=[1,2,3,4].to_scale
83
- #
84
-
85
- def initialize(data=[], t=:nominal, opts=Hash.new)
86
- raise "Data should be an array" unless data.is_a? Array
87
- @data=data
88
- @type=t
89
- opts=DEFAULT_OPTIONS.merge(opts)
90
- @missing_values=opts[:missing_values]
91
- @labels=opts[:labels]
92
- @valid_data=[]
93
- @data_with_nils=[]
94
- @missing_data=[]
95
- @has_missing_data=nil
96
- @scale_data=nil
97
- set_valid_data_intern
98
- self.type=t
99
- end
100
- # Creates a duplicate of the Vector.
101
- # Note: data, missing_values and labels are duplicated, so
102
- # changes on original vector doesn't propages to copies.
103
- def dup
104
- Vector.new(@data.dup,@type, :missing_values => @missing_values.dup, :labels => @labels.dup)
105
- end
106
- # Returns an empty duplicate of the vector. Maintains the type,
107
- # missing values and labels.
108
- def dup_empty
109
- Vector.new([],@type, :missing_values => @missing_values.dup, :labels => @labels.dup)
110
- end
111
- # Raises an exception if type of vector is inferior to t type
112
- def check_type(t)
113
- raise NoMethodError if (t==:scale and @type!=:scale) or (t==:ordinal and @type==:nominal)
114
- end
115
- private :check_type
116
-
117
- # Return a vector usign the standarized values for data
118
- # with sd with denominator N
119
- def vector_standarized_pop
120
- vector_standarized(true)
121
- end
122
- # Return a vector usign the standarized values for data
123
- # with sd with denominator n-1
124
-
125
- def vector_standarized(use_population=false)
126
- raise "Should be a scale" unless @type==:scale
127
- m=mean
128
- sd=use_population ? sdp : sds
129
- @data_with_nils.collect{|x|
130
- if !x.nil?
131
- (x.to_f - m).quo(sd)
132
- else
133
- nil
134
- end
135
- }.to_vector(:scale)
136
- end
137
-
138
- alias_method :standarized, :vector_standarized
139
-
140
- def box_cox_transformation(lambda) # :nodoc:
141
- raise "Should be a scale" unless @type==:scale
142
- @data_with_nils.collect{|x|
143
- if !x.nil?
144
- if(lambda==0)
145
- Math.log(x)
146
- else
147
- (x**lambda-1).quo(lambda)
148
- end
149
- else
191
+ set_valid_data
192
+ end
193
+ # Dicotomize the vector with 0 and 1, based on lowest value
194
+ # If parameter if defined, this value and lower
195
+ # will be 0 and higher, 1
196
+ def dichotomize(low=nil)
197
+ fs=factors
198
+ low||=factors.min
199
+ @data_with_nils.collect{|x|
200
+ if x.nil?
201
+ nil
202
+ elsif x>low
203
+ 1
204
+ else
205
+ 0
206
+ end
207
+ }.to_scale
208
+ end
209
+ # Iterate on each item.
210
+ # Equivalent to
211
+ # @data.each{|x| yield x}
212
+ def each
213
+ @data.each{|x| yield(x) }
214
+ end
215
+
216
+ # Iterate on each item, retrieving index
217
+ def each_index
218
+ (0...@data.size).each {|i|
219
+ yield(i)
220
+ }
221
+ end
222
+ # Add a value at the end of the vector.
223
+ # If second argument set to false, you should update the Vector usign
224
+ # Vector.set_valid_data at the end of your insertion cycle
225
+ #
226
+ def add(v,update_valid=true)
227
+ @data.push(v)
228
+ set_valid_data if update_valid
229
+ end
230
+ # Update valid_data, missing_data, data_with_nils and gsl
231
+ # at the end of an insertion.
232
+ #
233
+ # Use after Vector.add(v,false)
234
+ # Usage:
235
+ # v=Statsample::Vector.new
236
+ # v.add(2,false)
237
+ # v.add(4,false)
238
+ # v.data
239
+ # => [2,3]
240
+ # v.valid_data
241
+ # => []
242
+ # v.set_valid_data
243
+ # v.valid_data
244
+ # => [2,3]
245
+ def set_valid_data
246
+ @valid_data.clear
247
+ @missing_data.clear
248
+ @data_with_nils.clear
249
+ @date_data_with_nils.clear
250
+ @gsl=nil
251
+ set_valid_data_intern
252
+ set_scale_data if(@type==:scale)
253
+ set_date_data if(@type==:date)
254
+ end
255
+
256
+ if Statsample::STATSAMPLE__.respond_to?(:set_valid_data_intern)
257
+ def set_valid_data_intern #:nodoc:
258
+ Statsample::STATSAMPLE__.set_valid_data_intern(self)
259
+ end
260
+ else
261
+ def set_valid_data_intern #:nodoc:
262
+ _set_valid_data_intern
263
+ end
264
+ end
265
+ def _set_valid_data_intern #:nodoc:
266
+ @data.each do |n|
267
+ if is_valid? n
268
+ @valid_data.push(n)
269
+ @data_with_nils.push(n)
270
+ else
271
+ @data_with_nils.push(nil)
272
+ @missing_data.push(n)
273
+ end
274
+ end
275
+ @has_missing_data=@missing_data.size>0
276
+ end
277
+
278
+ # Retrieves true if data has one o more missing values
279
+ def has_missing_data?
280
+ @has_missing_data
281
+ end
282
+ # Retrieves label for value x. Retrieves x if
283
+ # no label defined.
284
+ def labeling(x)
285
+ @labels.has_key?(x) ? @labels[x].to_s : x.to_s
286
+ end
287
+ # Returns a Vector with data with labels replaced by the label.
288
+ def vector_labeled
289
+ d=@data.collect{|x|
290
+ if @labels.has_key? x
291
+ @labels[x]
292
+ else
293
+ x
294
+ end
295
+ }
296
+ Vector.new(d,@type)
297
+ end
298
+ # Size of total data
299
+ def size
300
+ @data.size
301
+ end
302
+ alias_method :n, :size
303
+
304
+ # Retrieves i element of data
305
+ def [](i)
306
+ @data[i]
307
+ end
308
+ # Set i element of data.
309
+ # Note: Use set_valid_data if you include missing values
310
+ def []=(i,v)
311
+ @data[i]=v
312
+ end
313
+ # Return true if a value is valid (not nil and not included on missing values)
314
+ def is_valid?(x)
315
+ !(x.nil? or @missing_values.include? x)
316
+ end
317
+ # Set missing_values.
318
+ # if update_valid = false, you should use
319
+ # set_valid_data after all changes
320
+ def missing_values=(vals)
321
+ @missing_values = vals
322
+ set_valid_data
323
+ end
324
+ def today_values=(vals)
325
+ @today_values = vals
326
+ set_valid_data
327
+ end
328
+ # Set level of measurement.
329
+ def type=(t)
330
+ @type=t
331
+ set_scale_data if(t==:scale)
332
+ set_date_data if (t==:date)
333
+ end
334
+ def to_a
335
+ @data.dup
336
+ end
337
+ alias_method :to_ary, :to_a
338
+
339
+ # Vector sum.
340
+ # - If v is a scalar, add this value to all elements
341
+ # - If v is a Array or a Vector, should be of the same size of this vector
342
+ # every item of this vector will be added to the value of the
343
+ # item at the same position on the other vector
344
+ def +(v)
345
+ _vector_ari("+",v)
346
+ end
347
+ # Vector rest.
348
+ # - If v is a scalar, rest this value to all elements
349
+ # - If v is a Array or a Vector, should be of the same
350
+ # size of this vector
351
+ # every item of this vector will be rested to the value of the
352
+ # item at the same position on the other vector
353
+
354
+ def -(v)
355
+ _vector_ari("-",v)
356
+ end
357
+ # Reports all values that doesn't comply with a condition.
358
+ # Returns a hash with the index of data and the invalid data.
359
+ def verify
360
+ h={}
361
+ (0...@data.size).to_a.each{|i|
362
+ if !(yield @data[i])
363
+ h[i]=@data[i]
364
+ end
365
+ }
366
+ h
367
+ end
368
+ def _vector_ari(method,v) # :nodoc:
369
+ if(v.is_a? Vector or v.is_a? Array)
370
+ if v.size==@data.size
371
+ # i=0
372
+ sum=[]
373
+ 0.upto(v.size-1) {|i|
374
+ if((v.is_a? Vector and v.is_valid?(v[i]) and is_valid?(@data[i])) or (v.is_a? Array and !v[i].nil? and !data[i].nil?))
375
+ sum.push(@data[i].send(method,v[i]))
376
+ else
377
+ sum.push(nil)
378
+ end
379
+ }
380
+ Statsample::Vector.new(sum)
381
+ else
382
+ raise ArgumentError, "The array/vector parameter should be of the same size of the original vector"
383
+ end
384
+ elsif(v.respond_to? method )
385
+ Statsample::Vector.new(
386
+ @data.collect {|x|
387
+ if(!x.nil?)
388
+ x.send(method,v)
389
+ else
150
390
  nil
151
- end
152
- }.to_vector(:scale)
153
- end
154
-
155
- # Vector equality.
156
- # Two vector will be the same if their data, missing values, type, labels are equals
157
- def ==(v2)
158
- raise TypeError,"Argument should be a Vector" unless v2.instance_of? Statsample::Vector
159
- @data==v2.data and @missing_values==v2.missing_values and @type==v2.type and @labels=v2.labels
160
- end
161
-
162
- def _dump(i) # :nodoc:
163
- Marshal.dump({'data'=>@data,'missing_values'=>@missing_values, 'labels'=>@labels, 'type'=>@type})
164
- end
165
-
166
- def self._load(data) # :nodoc:
167
- h=Marshal.load(data)
168
- Vector.new(h['data'], h['type'],:missing_values=> h['missing_values'], :labels=>h['labels'])
169
- end
170
- # Returns a new vector, with data modified by block.
171
- # Equivalent to create a Vector after #collect on data
172
- def recode
173
- @data.collect{|x|
174
- yield x
175
- }.to_vector(@type)
176
- end
177
- # Modifies current vector, with data modified by block.
178
- # Equivalent to #collect! on @data
179
- def recode!
180
- @data.collect!{|x|
181
- yield x
182
- }
183
- set_valid_data
184
- end
185
- # Dicotomize the vector with 0 and 1, based on lowest value
186
- # If parameter if defined, this value and lower
187
- # will be 0 and higher, 1
188
- def dichotomize(low=nil)
189
- fs=factors
190
- low||=factors.min
191
- @data_with_nils.collect{|x|
192
- if x.nil?
193
- nil
194
- elsif x>low
195
- 1
196
- else
197
- 0
198
- end
199
- }.to_scale
200
- end
201
- # Iterate on each item.
202
- # Equivalent to
203
- # @data.each{|x| yield x}
204
- def each
205
- @data.each{|x| yield(x) }
206
- end
207
-
208
- # Iterate on each item, retrieving index
209
- def each_index
210
- (0...@data.size).each {|i|
211
- yield(i)
391
+ end
212
392
  }
213
- end
214
- # Add a value at the end of the vector.
215
- # If second argument set to false, you should update the Vector usign
216
- # Vector.set_valid_data at the end of your insertion cycle
217
- #
218
- def add(v,update_valid=true)
219
- @data.push(v)
220
- set_valid_data if update_valid
221
- end
222
- # Update valid_data, missing_data, data_with_nils and gsl
223
- # at the end of an insertion.
224
- #
225
- # Use after Vector.add(v,false)
226
- # Usage:
227
- # v=Statsample::Vector.new
228
- # v.add(2,false)
229
- # v.add(4,false)
230
- # v.data
231
- # => [2,3]
232
- # v.valid_data
233
- # => []
234
- # v.set_valid_data
235
- # v.valid_data
236
- # => [2,3]
237
- def set_valid_data
238
- @valid_data.clear
239
- @missing_data.clear
240
- @data_with_nils.clear
241
- @gsl=nil
242
- set_valid_data_intern
243
- set_scale_data if(@type==:scale)
244
- end
245
-
246
- if Statsample::STATSAMPLE__.respond_to?(:set_valid_data_intern)
247
- def set_valid_data_intern #:nodoc:
248
- Statsample::STATSAMPLE__.set_valid_data_intern(self)
249
- end
393
+ )
250
394
  else
251
- def set_valid_data_intern #:nodoc:
252
- _set_valid_data_intern
253
- end
254
- end
255
- def _set_valid_data_intern #:nodoc:
256
- @data.each do |n|
257
- if is_valid? n
258
- @valid_data.push(n)
259
- @data_with_nils.push(n)
260
- else
261
- @data_with_nils.push(nil)
262
- @missing_data.push(n)
263
- end
264
- end
265
- @has_missing_data=@missing_data.size>0
395
+ raise TypeError,"You should pass a scalar or a array/vector"
266
396
  end
267
-
268
- # Retrieves true if data has one o more missing values
269
- def has_missing_data?
270
- @has_missing_data
271
- end
272
- # Retrieves label for value x. Retrieves x if
273
- # no label defined.
274
- def labeling(x)
275
- @labels.has_key?(x) ? @labels[x].to_s : x.to_s
276
- end
277
- # Returns a Vector with data with labels replaced by the label.
278
- def vector_labeled
279
- d=@data.collect{|x|
280
- if @labels.has_key? x
281
- @labels[x]
282
- else
283
- x
284
- end
285
- }
286
- Vector.new(d,@type)
287
- end
288
- # Size of total data
289
- def size
290
- @data.size
291
- end
292
- alias_method :n, :size
293
-
294
- # Retrieves i element of data
295
- def [](i)
296
- @data[i]
297
- end
298
- # Set i element of data.
299
- # Note: Use set_valid_data if you include missing values
300
- def []=(i,v)
301
- @data[i]=v
302
- end
303
- # Return true if a value is valid (not nil and not included on missing values)
304
- def is_valid?(x)
305
- !(x.nil? or @missing_values.include? x)
306
- end
307
- # Set missing_values.
308
- # if update_valid = false, you should use
309
- # set_valid_data after all changes
310
- def missing_values=(vals)
311
- @missing_values = vals
312
- set_valid_data
313
- end
314
- # Set level of measurement.
315
- def type=(t)
316
- @type=t
317
- set_scale_data if(t==:scale)
318
- end
319
- def to_a
320
- @data.dup
321
- end
322
- alias_method :to_ary, :to_a
323
-
324
- # Vector sum.
325
- # - If v is a scalar, add this value to all elements
326
- # - If v is a Array or a Vector, should be of the same size of this vector
327
- # every item of this vector will be added to the value of the
328
- # item at the same position on the other vector
329
- def +(v)
330
- _vector_ari("+",v)
331
- end
332
- # Vector rest.
333
- # - If v is a scalar, rest this value to all elements
334
- # - If v is a Array or a Vector, should be of the same
335
- # size of this vector
336
- # every item of this vector will be rested to the value of the
337
- # item at the same position on the other vector
338
397
 
339
- def -(v)
340
- _vector_ari("-",v)
341
- end
342
- # Reports all values that doesn't comply with a condition.
343
- # Returns a hash with the index of data and the invalid data.
344
- def verify
345
- h={}
346
- (0...@data.size).to_a.each{|i|
347
- if !(yield @data[i])
348
- h[i]=@data[i]
349
- end
350
- }
351
- h
352
- end
353
- def _vector_ari(method,v) # :nodoc:
354
- if(v.is_a? Vector or v.is_a? Array)
355
- if v.size==@data.size
356
- # i=0
357
- sum=[]
358
- 0.upto(v.size-1) {|i|
359
- if((v.is_a? Vector and v.is_valid?(v[i]) and is_valid?(@data[i])) or (v.is_a? Array and !v[i].nil? and !data[i].nil?))
360
- sum.push(@data[i].send(method,v[i]))
361
- else
362
- sum.push(nil)
363
- end
364
- }
365
- Statsample::Vector.new(sum)
366
- else
367
- raise ArgumentError, "The array/vector parameter should be of the same size of the original vector"
368
- end
369
- elsif(v.respond_to? method )
370
- Statsample::Vector.new(
371
- @data.collect {|x|
372
- if(!x.nil?)
373
- x.send(method,v)
374
- else
375
- nil
376
- end
377
- }
378
- )
379
- else
380
- raise TypeError,"You should pass a scalar or a array/vector"
381
- end
382
-
383
- end
384
- # Return an array with the data splitted by a separator.
385
- # a=Vector.new(["a,b","c,d","a,b","d"])
386
- # a.splitted
387
- # =>
388
- # [["a","b"],["c","d"],["a","b"],["d"]]
389
- def splitted(sep=Statsample::SPLIT_TOKEN)
390
- @data.collect{|x|
391
- if x.nil?
392
- nil
393
- elsif (x.respond_to? :split)
394
- x.split(sep)
395
- else
396
- [x]
397
- end
398
- }
399
- end
400
- # Returns a hash of Vectors, defined by the different values
401
- # defined on the fields
402
- # Example:
403
- #
404
- # a=Vector.new(["a,b","c,d","a,b"])
405
- # a.split_by_separator
406
- # => {"a"=>#<Statsample::Type::Nominal:0x7f2dbcc09d88
407
- # @data=[1, 0, 1]>,
408
- # "b"=>#<Statsample::Type::Nominal:0x7f2dbcc09c48
409
- # @data=[1, 1, 0]>,
410
- # "c"=>#<Statsample::Type::Nominal:0x7f2dbcc09b08
411
- # @data=[0, 1, 1]>}
412
- #
413
- def split_by_separator(sep=Statsample::SPLIT_TOKEN)
414
- split_data=splitted(sep)
415
- factors=split_data.flatten.uniq.compact
416
- out=factors.inject({}) {|a,x|
417
- a[x]=[]
418
- a
419
- }
420
- split_data.each{|r|
421
- if r.nil?
422
- factors.each{|f|
423
- out[f].push(nil)
424
- }
425
- else
398
+ end
399
+ # Return an array with the data splitted by a separator.
400
+ # a=Vector.new(["a,b","c,d","a,b","d"])
401
+ # a.splitted
402
+ # =>
403
+ # [["a","b"],["c","d"],["a","b"],["d"]]
404
+ def splitted(sep=Statsample::SPLIT_TOKEN)
405
+ @data.collect{|x|
406
+ if x.nil?
407
+ nil
408
+ elsif (x.respond_to? :split)
409
+ x.split(sep)
410
+ else
411
+ [x]
412
+ end
413
+ }
414
+ end
415
+ # Returns a hash of Vectors, defined by the different values
416
+ # defined on the fields
417
+ # Example:
418
+ #
419
+ # a=Vector.new(["a,b","c,d","a,b"])
420
+ # a.split_by_separator
421
+ # => {"a"=>#<Statsample::Type::Nominal:0x7f2dbcc09d88
422
+ # @data=[1, 0, 1]>,
423
+ # "b"=>#<Statsample::Type::Nominal:0x7f2dbcc09c48
424
+ # @data=[1, 1, 0]>,
425
+ # "c"=>#<Statsample::Type::Nominal:0x7f2dbcc09b08
426
+ # @data=[0, 1, 1]>}
427
+ #
428
+ def split_by_separator(sep=Statsample::SPLIT_TOKEN)
429
+ split_data=splitted(sep)
430
+ factors=split_data.flatten.uniq.compact
431
+ out=factors.inject({}) {|a,x|
432
+ a[x]=[]
433
+ a
434
+ }
435
+ split_data.each{|r|
436
+ if r.nil?
426
437
  factors.each{|f|
427
- out[f].push(r.include?(f) ? 1:0)
438
+ out[f].push(nil)
428
439
  }
429
- end
430
- }
431
- out.inject({}){|s,v|
432
- s[v[0]]=Vector.new(v[1],:nominal)
433
- s
440
+ else
441
+ factors.each{|f|
442
+ out[f].push(r.include?(f) ? 1:0)
434
443
  }
444
+ end
445
+ }
446
+ out.inject({}){|s,v|
447
+ s[v[0]]=Vector.new(v[1],:nominal)
448
+ s
449
+ }
450
+ end
451
+ def split_by_separator_freq(sep=Statsample::SPLIT_TOKEN)
452
+ split_by_separator(sep).inject({}) {|a,v|
453
+ a[v[0]]=v[1].inject {|s,x| s+x.to_i}
454
+ a
455
+ }
456
+ end
457
+
458
+ # Returns an random sample of size n, with replacement,
459
+ # only with valid data.
460
+ #
461
+ # In all the trails, every item have the same probability
462
+ # of been selected.
463
+ def sample_with_replacement(sample=1)
464
+ if(@type!=:scale or !HAS_GSL)
465
+ vds=@valid_data.size
466
+ (0...sample).collect{ @valid_data[rand(vds)] }
467
+ else
468
+ r = GSL::Rng.alloc(GSL::Rng::MT19937,rand(10000))
469
+ r.sample(@gsl, sample).to_a
470
+ end
471
+ end
472
+ # Returns an random sample of size n, without replacement,
473
+ # only with valid data.
474
+ #
475
+ # Every element could only be selected once.
476
+ #
477
+ # A sample of the same size of the vector is the vector itself.
478
+
479
+ def sample_without_replacement(sample=1)
480
+ if(@type!=:scale or !HAS_GSL)
481
+ raise ArgumentError, "Sample size couldn't be greater than n" if sample>@valid_data.size
482
+ out=[]
483
+ size=@valid_data.size
484
+ while out.size<sample
485
+ value=rand(size)
486
+ out.push(value) if !out.include?value
487
+ end
488
+ out.collect{|i|@data[i]}
489
+ else
490
+ r = GSL::Rng.alloc(GSL::Rng::MT19937,rand(10000))
491
+ r.choose(@gsl, sample).to_a
492
+ end
493
+ end
494
+ # Retrieves number of cases which comply condition.
495
+ # If block given, retrieves number of instances where
496
+ # block returns true.
497
+ # If other values given, retrieves the frequency for
498
+ # this value.
499
+ def count(x=false)
500
+ if block_given?
501
+ r=@data.inject(0) {|s, i|
502
+ r=yield i
503
+ s+(r ? 1 : 0)
504
+ }
505
+ r.nil? ? 0 : r
506
+ else
507
+ frequencies[x].nil? ? 0 : frequencies[x]
508
+ end
509
+ end
510
+
511
+ # Returns the database type for the vector, according to its content
512
+
513
+ def db_type(dbs='mysql')
514
+ # first, detect any character not number
515
+ if @data.find {|v| v.to_s=~/\d{2,2}-\d{2,2}-\d{4,4}/} or @data.find {|v| v.to_s=~/\d{4,4}-\d{2,2}-\d{2,2}/}
516
+ return "DATE"
517
+ elsif @data.find {|v| v.to_s=~/[^0-9e.-]/ }
518
+ return "VARCHAR (255)"
519
+ elsif @data.find {|v| v.to_s=~/\./}
520
+ return "DOUBLE"
521
+ else
522
+ return "INTEGER"
523
+ end
524
+ end
525
+ # Return true if all data is Date, "today" values or nil
526
+ def can_be_date?
527
+ if @data.find {|v|
528
+ !v.nil? and !v.is_a? Date and !v.is_a? Time and (v.is_a? String and !@today_values.include? v) and (v.is_a? String and !(v=~/\d{4,4}[-\/]\d{1,2}[-\/]\d{1,2}/))}
529
+ false
530
+ else
531
+ true
435
532
  end
436
- def split_by_separator_freq(sep=Statsample::SPLIT_TOKEN)
437
- split_by_separator(sep).inject({}) {|a,v|
438
- a[v[0]]=v[1].inject {|s,x| s+x.to_i}
533
+ end
534
+ # Return true if all data is Numeric or nil
535
+ def can_be_scale?
536
+ if @data.find {|v| !v.nil? and !v.is_a? Numeric and !@missing_values.include? v}
537
+ false
538
+ else
539
+ true
540
+ end
541
+ end
542
+
543
+ def to_s
544
+ sprintf("Vector(type:%s, n:%d)[%s]",@type.to_s,@data.size, @data.collect{|d| d.nil? ? "nil":d}.join(","))
545
+ end
546
+ # Ugly name. Really, create a Vector for standard 'matrix' package.
547
+ # <tt>dir</tt> could be :horizontal or :vertical
548
+ def to_matrix(dir=:horizontal)
549
+ case dir
550
+ when :horizontal
551
+ Matrix[@data]
552
+ when :vertical
553
+ Matrix.columns([@data])
554
+ end
555
+ end
556
+ def inspect
557
+ self.to_s
558
+ end
559
+ # Retrieves uniques values for data.
560
+ def factors
561
+ if @type==:scale
562
+ @scale_data.uniq.sort
563
+ elsif @type==:date
564
+ @date_data_with_nils.uniq.sort
565
+ else
566
+ @valid_data.uniq.sort
567
+ end
568
+ end
569
+ if Statsample::STATSAMPLE__.respond_to?(:frequencies)
570
+ # Returns a hash with the distribution of frecuencies for
571
+ # the sample
572
+ def frequencies
573
+ Statsample::STATSAMPLE__.frequencies(@valid_data)
574
+ end
575
+ else
576
+ def frequencies #:nodoc:
577
+ _frequencies
578
+ end
579
+ end
580
+ def _frequencies #:nodoc:
581
+ @valid_data.inject(Hash.new) {|a,x|
582
+ a[x]||=0
583
+ a[x]=a[x]+1
584
+ a
585
+ }
586
+ end
587
+ # Plot frequencies on a chart, using gnuplot
588
+ def plot_frequencies
589
+ require 'gnuplot'
590
+ x=[]
591
+ y=[]
592
+ self.frequencies.sort.each{|k,v|
593
+ x.push(k)
594
+ y.push(v)
595
+ }
596
+ Gnuplot.open do |gp|
597
+ Gnuplot::Plot.new( gp ) do |plot|
598
+ plot.boxwidth("0.9 absolute")
599
+ plot.yrange("[0:#{y.max}]")
600
+ plot.style("fill solid 1.00 border -1")
601
+ plot.set("xtics border in scale 1,0.5 nomirror rotate by -45 offset character 0, 0, 0")
602
+ plot.style("histogram")
603
+ plot.style("data histogram")
604
+ i=-1
605
+ plot.set("xtics","("+x.collect{|v| i+=1; sprintf("\"%s\" %d",v,i)}.join(",")+")")
606
+ plot.data << Gnuplot::DataSet.new( [y] ) do |ds|
607
+ end
608
+ end
609
+ end
610
+
611
+ end
612
+
613
+
614
+ # Returns the most frequent item.
615
+ def mode
616
+ frequencies.max{|a,b| a[1]<=>b[1]}[0]
617
+ end
618
+ # The numbers of item with valid data.
619
+ def n_valid
620
+ @valid_data.size
621
+ end
622
+ # Returns a hash with the distribution of proportions of
623
+ # the sample.
624
+ def proportions
625
+ frequencies.inject({}){|a,v|
626
+ a[v[0]] = v[1].quo(n_valid)
439
627
  a
440
628
  }
441
629
  end
442
-
443
- # Returns an random sample of size n, with replacement,
444
- # only with valid data.
445
- #
446
- # In all the trails, every item have the same probability
447
- # of been selected.
448
- def sample_with_replacement(sample=1)
449
- if(@type!=:scale or !HAS_GSL)
450
- vds=@valid_data.size
451
- (0...sample).collect{ @valid_data[rand(vds)] }
452
- else
453
- r = GSL::Rng.alloc(GSL::Rng::MT19937,rand(10000))
454
- r.sample(@gsl, sample).to_a
455
- end
456
- end
457
- # Returns an random sample of size n, without replacement,
458
- # only with valid data.
459
- #
460
- # Every element could only be selected once.
461
- #
462
- # A sample of the same size of the vector is the vector itself.
463
-
464
- def sample_without_replacement(sample=1)
465
- if(@type!=:scale or !HAS_GSL)
466
- raise ArgumentError, "Sample size couldn't be greater than n" if sample>@valid_data.size
467
- out=[]
468
- size=@valid_data.size
469
- while out.size<sample
470
- value=rand(size)
471
- out.push(value) if !out.include?value
472
- end
473
- out.collect{|i|@data[i]}
474
- else
475
- r = GSL::Rng.alloc(GSL::Rng::MT19937,rand(10000))
476
- r.choose(@gsl, sample).to_a
630
+ # Proportion of a given value.
631
+ def proportion(v=1)
632
+ frequencies[v].quo(@valid_data.size)
633
+ end
634
+ def summary(out="")
635
+ out << sprintf("n valid:%d\n",n_valid)
636
+ out << sprintf("factors:%s\n",factors.join(","))
637
+ out << "mode:"+mode.to_s+"\n"
638
+ out << "Distribution:\n"
639
+ frequencies.sort.each{|k,v|
640
+ key=labels.has_key?(k) ? labels[k]:k
641
+ out << sprintf("%s : %s (%0.2f%%)\n",key,v, (v.quo(n_valid))*100)
642
+ }
643
+ if(@type==:ordinal)
644
+ out << "median:"+median.to_s+"\n"
477
645
  end
478
- end
479
- # Retrieves number of cases which comply condition.
480
- # If block given, retrieves number of instances where
481
- # block returns true.
482
- # If other values given, retrieves the frequency for
483
- # this value.
484
- def count(x=false)
485
- if block_given?
486
- r=@data.inject(0) {|s, i|
487
- r=yield i
488
- s+(r ? 1 : 0)
489
- }
490
- r.nil? ? 0 : r
491
- else
492
- frequencies[x].nil? ? 0 : frequencies[x]
646
+ if(@type==:scale)
647
+ out << "mean:"+mean.to_s+"\n"
648
+ out << "sd:"+sd.to_s+"\n"
649
+
493
650
  end
651
+ out
494
652
  end
495
-
496
- # Returns the database type for the vector, according to its content
497
-
498
- def db_type(dbs='mysql')
499
- # first, detect any character not number
500
- if @data.find {|v| v.to_s=~/\d{2,2}-\d{2,2}-\d{4,4}/} or @data.find {|v| v.to_s=~/\d{4,4}-\d{2,2}-\d{2,2}/}
501
- return "DATE"
502
- elsif @data.find {|v| v.to_s=~/[^0-9e.-]/ }
503
- return "VARCHAR (255)"
504
- elsif @data.find {|v| v.to_s=~/\./}
505
- return "DOUBLE"
506
- else
507
- return "INTEGER"
653
+
654
+ # Variance of p, according to poblation size
655
+ def variance_proportion(n_poblation, v=1)
656
+ Statsample::proportion_variance_sample(self.proportion(v), @valid_data.size, n_poblation)
657
+ end
658
+ # Variance of p, according to poblation size
659
+ def variance_total(n_poblation, v=1)
660
+ Statsample::total_variance_sample(self.proportion(v), @valid_data.size, n_poblation)
661
+ end
662
+ def proportion_confidence_interval_t(n_poblation,margin=0.95,v=1)
663
+ Statsample::proportion_confidence_interval_t(proportion(v), @valid_data.size, n_poblation, margin)
664
+ end
665
+ def proportion_confidence_interval_z(n_poblation,margin=0.95,v=1)
666
+ Statsample::proportion_confidence_interval_z(proportion(v), @valid_data.size, n_poblation, margin)
667
+ end
668
+
669
+ self.instance_methods.find_all{|met| met=~/_slow$/}.each do |met|
670
+ met_or=met.gsub("_slow","")
671
+ if !self.method_defined?(met_or)
672
+ alias_method met_or, met
673
+ end
674
+ end
675
+ ######
676
+ ### Ordinal Methods
677
+ ######
678
+
679
+ # Return the value of the percentil q
680
+ def percentil(q)
681
+ check_type :ordinal
682
+ sorted=@valid_data.sort
683
+ v= (n_valid * q).quo(100)
684
+ if(v.to_i!=v)
685
+ sorted[v.to_i]
686
+ else
687
+ (sorted[(v-0.5).to_i].to_f + sorted[(v+0.5).to_i]).quo(2)
688
+ end
689
+ end
690
+ # Returns a ranked vector.
691
+ def ranked(type=:ordinal)
692
+ check_type :ordinal
693
+ i=0
694
+ r=frequencies.sort.inject({}){|a,v|
695
+ a[v[0]]=(i+1 + i+v[1]).quo(2)
696
+ i+=v[1]
697
+ a
698
+ }
699
+ @data.collect {|c| r[c] }.to_vector(type)
700
+ end
701
+ # Return the median (percentil 50)
702
+ def median
703
+ check_type :ordinal
704
+ if HAS_GSL and @type==:scale
705
+ sorted=GSL::Vector.alloc(@scale_data.sort)
706
+ GSL::Stats::median_from_sorted_data(sorted)
707
+ else
708
+ percentil(50)
709
+ end
710
+ end
711
+ # Minimun value
712
+ def min;
713
+ check_type :ordinal
714
+ @valid_data.min;
715
+ end
716
+ # Maximum value
717
+ def max;
718
+ check_type :ordinal
719
+ @valid_data.max;
720
+ end
721
+ def set_date_data # :nodoc:
722
+ @date_data_with_nils=@data.collect do|x|
723
+ if x.is_a? Date
724
+ x
725
+ elsif x.is_a? Time
726
+ Date.new(x.year, x.month, x.day)
727
+ elsif x.is_a? String and x=~/(\d{4,4})[-\/](\d{1,2})[-\/](\d{1,2})/
728
+ Date.new($1.to_i,$2.to_i,$3.to_i)
729
+ elsif @today_values.include? x
730
+ Date.today()
731
+ elsif @missing_values.include? x or x.nil?
732
+ nil
508
733
  end
734
+ end
509
735
  end
510
- # Return true if all data is Numeric or nil
511
- def can_be_scale?
512
- if @data.find {|v| !v.nil? and !v.is_a? Numeric}
513
- false
736
+ def set_scale_data # :nodoc
737
+ @scale_data=@valid_data.collect do|x|
738
+ if x.is_a? Numeric
739
+ x
740
+ elsif x.is_a? String and x.to_i==x.to_f
741
+ x.to_i
514
742
  else
515
- true
516
- end
743
+ x.to_f
744
+ end
745
+ end
746
+ if HAS_GSL
747
+ @gsl=GSL::Vector.alloc(@scale_data) if @scale_data.size>0
748
+ end
749
+ end
750
+ private :set_scale_data
751
+
752
+ # The range of the data (max - min)
753
+ def range;
754
+ check_type :scale
755
+ @scale_data.max - @scale_data.min
756
+ end
757
+ # The sum of values for the data
758
+ def sum
759
+ check_type :scale
760
+ @scale_data.inject(0){|a,x|x+a} ;
761
+ end
762
+ # The arithmetical mean of data
763
+ def mean
764
+ check_type :scale
765
+ sum.to_f.quo(n_valid)
766
+ end
767
+ # Sum of squares for the data around a value.
768
+ # By default, this value is the mean
769
+ # ss= sum{(xi-m)^2}
770
+ #
771
+ def sum_of_squares(m=nil)
772
+ check_type :scale
773
+ m||=mean
774
+ @scale_data.inject(0){|a,x| a+(x-m).square}
517
775
  end
518
776
 
519
- def to_s
520
- sprintf("Vector(type:%s, n:%d)[%s]",@type.to_s,@data.size, @data.collect{|d| d.nil? ? "nil":d}.join(","))
521
- end
522
- # Ugly name. Really, create a Vector for standard 'matrix' package.
523
- # <tt>dir</tt> could be :horizontal or :vertical
524
- def to_matrix(dir=:horizontal)
525
- case dir
526
- when :horizontal
527
- Matrix[@data]
528
- when :vertical
529
- Matrix.columns([@data])
530
- end
531
- end
532
- def inspect
533
- self.to_s
777
+ # Sum of squared deviation
778
+ def sum_of_squared_deviation
779
+ check_type :scale
780
+ @scale_data.inject(0) {|a,x| x.square+a} - (sum.square.quo(n_valid))
534
781
  end
535
- # Retrieves uniques values for data.
536
- def factors
537
- if @type==:scale
538
- @scale_data.uniq.sort
539
- else
540
- @valid_data.uniq.sort
541
- end
782
+
783
+ # Population variance (denominator N)
784
+ def variance_population(m=nil)
785
+ check_type :scale
786
+ m||=mean
787
+ squares=@scale_data.inject(0){|a,x| x.square+a}
788
+ squares.quo(n_valid) - m.square
542
789
  end
543
- if Statsample::STATSAMPLE__.respond_to?(:frequencies)
544
- # Returns a hash with the distribution of frecuencies for
545
- # the sample
546
- def frequencies
547
- Statsample::STATSAMPLE__.frequencies(@valid_data)
548
- end
549
- else
550
- def frequencies #:nodoc:
551
- _frequencies
552
- end
790
+
791
+
792
+ # Population Standard deviation (denominator N)
793
+ def standard_deviation_population(m=nil)
794
+ check_type :scale
795
+ Math::sqrt( variance_population(m) )
553
796
  end
554
- def _frequencies #:nodoc:
555
- @valid_data.inject(Hash.new) {|a,x|
556
- a[x]||=0
557
- a[x]=a[x]+1
558
- a
559
- }
797
+ # Sample Variance (denominator n-1)
798
+
799
+ def variance_sample(m=nil)
800
+ check_type :scale
801
+ m||=mean
802
+ sum_of_squares(m).quo(n_valid - 1)
560
803
  end
561
- # Plot frequencies on a chart, using gnuplot
562
- def plot_frequencies
563
- require 'gnuplot'
564
- x=[]
565
- y=[]
566
- self.frequencies.sort.each{|k,v|
567
- x.push(k)
568
- y.push(v)
569
- }
570
- Gnuplot.open do |gp|
571
- Gnuplot::Plot.new( gp ) do |plot|
572
- plot.boxwidth("0.9 absolute")
573
- plot.yrange("[0:#{y.max}]")
574
- plot.style("fill solid 1.00 border -1")
575
- plot.set("xtics border in scale 1,0.5 nomirror rotate by -45 offset character 0, 0, 0")
576
- plot.style("histogram")
577
- plot.style("data histogram")
578
- i=-1
579
- plot.set("xtics","("+x.collect{|v| i+=1; sprintf("\"%s\" %d",v,i)}.join(",")+")")
580
- plot.data << Gnuplot::DataSet.new( [y] ) do |ds|
581
- end
582
- end
583
- end
584
804
 
585
- end
586
-
587
-
588
- # Returns the most frequent item.
589
- def mode
590
- frequencies.max{|a,b| a[1]<=>b[1]}[0]
591
- end
592
- # The numbers of item with valid data.
593
- def n_valid
594
- @valid_data.size
595
- end
596
- # Returns a hash with the distribution of proportions of
597
- # the sample.
598
- def proportions
599
- frequencies.inject({}){|a,v|
600
- a[v[0]] = v[1].quo(n_valid)
601
- a
602
- }
603
- end
604
- # Proportion of a given value.
605
- def proportion(v=1)
606
- frequencies[v].quo(@valid_data.size)
607
- end
608
- def summary(out="")
609
- out << sprintf("n valid:%d\n",n_valid)
610
- out << sprintf("factors:%s\n",factors.join(","))
611
- out << "mode:"+mode.to_s+"\n"
612
- out << "Distribution:\n"
613
- frequencies.sort.each{|k,v|
614
- key=labels.has_key?(k) ? labels[k]:k
615
- out << sprintf("%s : %s (%0.2f%%)\n",key,v, (v.quo(n_valid))*100)
616
- }
617
- if(@type==:ordinal)
618
- out << "median:"+median.to_s+"\n"
619
- end
620
- if(@type==:scale)
621
- out << "mean:"+mean.to_s+"\n"
622
- out << "sd:"+sd.to_s+"\n"
623
-
624
- end
625
- out
626
- end
805
+ # Sample Standard deviation (denominator n-1)
806
+
807
+ def standard_deviation_sample(m=nil)
808
+ check_type :scale
627
809
 
628
- # Variance of p, according to poblation size
629
- def variance_proportion(n_poblation, v=1)
630
- Statsample::proportion_variance_sample(self.proportion(v), @valid_data.size, n_poblation)
631
- end
632
- # Variance of p, according to poblation size
633
- def variance_total(n_poblation, v=1)
634
- Statsample::total_variance_sample(self.proportion(v), @valid_data.size, n_poblation)
635
- end
636
- def proportion_confidence_interval_t(n_poblation,margin=0.95,v=1)
637
- Statsample::proportion_confidence_interval_t(proportion(v), @valid_data.size, n_poblation, margin)
638
- end
639
- def proportion_confidence_interval_z(n_poblation,margin=0.95,v=1)
640
- Statsample::proportion_confidence_interval_z(proportion(v), @valid_data.size, n_poblation, margin)
641
- end
810
+ m||=mean
811
+ Math::sqrt(variance_sample(m))
812
+ end
813
+ # Skewness of the sample
814
+ def skew(m=nil)
815
+ check_type :scale
816
+ m||=mean
817
+ th=@scale_data.inject(0){|a,x| a+((x-m)**3)}
818
+ th.quo((@scale_data.size)*sd(m)**3)
819
+ end
820
+ # Kurtosis of the sample
821
+ def kurtosis(m=nil)
822
+ check_type :scale
823
+ m||=mean
824
+ fo=@scale_data.inject(0){|a,x| a+((x-m)**4)}
825
+ fo.quo((@scale_data.size)*sd(m)**4)-3
642
826
 
643
- self.instance_methods.find_all{|met| met=~/_slow$/}.each do |met|
644
- met_or=met.gsub("_slow","")
645
- if !self.method_defined?(met_or)
646
- alias_method met_or, met
647
- end
648
- end
649
- ######
650
- ### Ordinal Methods
651
- ######
652
-
653
- # Return the value of the percentil q
654
- def percentil(q)
655
- check_type :ordinal
656
- sorted=@valid_data.sort
657
- v= (n_valid * q).quo(100)
658
- if(v.to_i!=v)
659
- sorted[v.to_i]
660
- else
661
- (sorted[(v-0.5).to_i].to_f + sorted[(v+0.5).to_i]).quo(2)
662
- end
663
- end
664
- # Returns a ranked vector.
665
- def ranked(type=:ordinal)
666
- check_type :ordinal
667
- i=0
668
- r=frequencies.sort.inject({}){|a,v|
669
- a[v[0]]=(i+1 + i+v[1]).quo(2)
670
- i+=v[1]
671
- a
672
- }
673
- @data.collect {|c|
674
- r[c]
675
- }.to_vector(type)
676
- end
677
- # Return the median (percentil 50)
678
- def median
679
- check_type :ordinal
680
- if HAS_GSL and @type==:scale
681
- GSL::Stats::median_from_sorted_data(@gsl)
682
- else
683
- percentil(50)
684
- end
685
- end
686
- # Minimun value
687
- def min;
688
- check_type :ordinal
689
- @valid_data.min;
690
- end
691
- # Maximum value
692
- def max;
693
- check_type :ordinal
694
- @valid_data.max;
695
- end
696
-
697
- def set_scale_data # :nodoc
698
- @scale_data=@valid_data.collect do|x|
699
- if x.is_a? Numeric
700
- x
701
- elsif x.is_a? String and x.to_i==x.to_f
702
- x.to_i
703
- else
704
- x.to_f
705
- end
706
- end
707
- if HAS_GSL
708
- @gsl=GSL::Vector.alloc(@scale_data) if @scale_data.size>0
709
- end
710
- end
711
- private :set_scale_data
827
+ end
828
+ # Product of all values on the sample
829
+ #
830
+ def product
831
+ check_type :scale
832
+ @scale_data.inject(1){|a,x| a*x }
833
+ end
834
+ if HAS_GSL
835
+ %w{skew kurtosis variance_sample standard_deviation_sample variance_population standard_deviation_population mean sum}.each{|m|
836
+ m_nuevo=(m+"_slow").intern
837
+ alias_method m_nuevo, m.intern
838
+ }
839
+ def sum # :nodoc:
840
+ check_type :scale
841
+
842
+ @gsl.sum
843
+ end
844
+ def mean # :nodoc:
845
+ check_type :scale
846
+
847
+ @gsl.mean
848
+ end
849
+ def variance_sample(m=nil) # :nodoc:
850
+ check_type :scale
851
+
852
+ m||=mean
853
+ @gsl.variance_m
854
+ end
855
+ def standard_deviation_sample(m=nil) # :nodoc:
856
+ check_type :scale
857
+ m||=mean
858
+ @gsl.sd(m)
859
+ end
712
860
 
713
- # The range of the data (max - min)
714
- def range;
715
- check_type :scale
716
- @scale_data.max - @scale_data.min
717
- end
718
- # The sum of values for the data
719
- def sum
720
- check_type :scale
721
- @scale_data.inject(0){|a,x|x+a} ;
722
- end
723
- # The arithmetical mean of data
724
- def mean
725
- check_type :scale
726
- sum.to_f.quo(n_valid)
727
- end
728
- # Sum of squares for the data around a value.
729
- # By default, this value is the mean
730
- # ss= sum{(xi-m)^2}
731
- #
732
- def sum_of_squares(m=nil)
733
- check_type :scale
734
- m||=mean
735
- @scale_data.inject(0){|a,x| a+(x-m).square}
736
- end
737
-
738
- # Sum of squared deviation
739
- def sum_of_squared_deviation
740
- check_type :scale
741
- @scale_data.inject(0) {|a,x| x.square+a} - (sum.square.quo(n_valid))
742
- end
861
+ def variance_population(m=nil) # :nodoc:
862
+ check_type :scale
863
+ m||=mean
864
+ @gsl.variance_with_fixed_mean(m)
865
+ end
866
+ def standard_deviation_population(m=nil) # :nodoc:
867
+ check_type :scale
868
+ m||=mean
869
+ @gsl.sd_with_fixed_mean(m)
870
+ end
871
+ def skew # :nodoc:
872
+ check_type :scale
873
+ @gsl.skew
874
+ end
875
+ def kurtosis # :nodoc:
876
+ check_type :scale
877
+ @gsl.kurtosis
878
+ end
879
+ # Create a GSL::Histogram
880
+ # With a fixnum, creates X bins within the range of data
881
+ # With an Array, each value will be a cut point
882
+ def histogram(bins=10)
883
+ check_type :scale
743
884
 
744
- # Population variance (denominator N)
745
- def variance_population(m=nil)
746
- check_type :scale
747
- m||=mean
748
- squares=@scale_data.inject(0){|a,x| x.square+a}
749
- squares.quo(n_valid) - m.square
885
+ if bins.is_a? Array
886
+ #h=Statsample::Histogram.new(self, bins)
887
+ h=GSL::Histogram.alloc(bins)
888
+ else
889
+ # ugly patch. The upper limit for a bin has the form
890
+ # x < range
891
+ #h=Statsample::Histogram.new(self, bins)
892
+ h=GSL::Histogram.alloc(bins,[@valid_data.min,@valid_data.max+0.0001])
750
893
  end
751
-
894
+ h.increment(@gsl)
895
+ h
896
+ end
897
+ def plot_histogram(bins=10,options="")
898
+ check_type :scale
899
+ self.histogram(bins).graph(options)
900
+ end
752
901
 
753
- # Population Standard deviation (denominator N)
754
- def standard_deviation_population(m=nil)
755
- check_type :scale
756
-
757
- Math::sqrt( variance_population(m) )
758
- end
759
- # Sample Variance (denominator n-1)
760
-
761
- def variance_sample(m=nil)
762
- check_type :scale
763
-
764
- m||=mean
765
- sum_of_squares(m).quo(n_valid - 1)
766
- end
767
-
768
- # Sample Standard deviation (denominator n-1)
769
-
770
- def standard_deviation_sample(m=nil)
771
- check_type :scale
772
-
773
- m||=m
774
- Math::sqrt(variance_sample(m))
775
- end
776
- # Skewness of the sample
777
- def skew
778
- check_type :scale
779
- m=mean
780
- thirds=@scale_data.inject(0){|a,x| a+((x-mean)**3)}
781
- thirds.quo((@scale_data.size-1)*sd**3)
782
- end
783
- # Kurtosis of the sample
784
- def kurtosis
785
- check_type :scale
786
-
787
- m=mean
788
- thirds=@scale_data.inject(0){|a,x| a+((x-mean)**4)}
789
- thirds.quo((@scale_data.size-1)*sd**4)
790
-
791
- end
792
- # Product of all values on the sample
793
- #
794
- def product
795
- check_type :scale
796
- @scale_data.inject(1){|a,x| a*x }
797
- end
798
- if HAS_GSL
799
- %w{skew kurtosis variance_sample standard_deviation_sample variance_population standard_deviation_population mean sum}.each{|m|
800
- m_nuevo=(m+"_slow").intern
801
- alias_method m_nuevo, m.intern
802
- }
803
- def sum # :nodoc:
804
- check_type :scale
805
-
806
- @gsl.sum
807
- end
808
- def mean # :nodoc:
809
- check_type :scale
810
-
811
- @gsl.mean
812
- end
813
- def variance_sample(m=nil) # :nodoc:
814
- check_type :scale
815
-
816
- m||=mean
817
- @gsl.variance_m
818
- end
819
- def standard_deviation_sample(m=nil) # :nodoc:
820
- check_type :scale
821
- m||=mean
822
- @gsl.sd(m)
823
- end
824
-
825
- def variance_population(m=nil) # :nodoc:
826
- check_type :scale
827
- m||=mean
828
- @gsl.variance_with_fixed_mean(m)
829
- end
830
- def standard_deviation_population(m=nil) # :nodoc:
831
- check_type :scale
832
- m||=mean
833
- @gsl.sd_with_fixed_mean(m)
834
- end
835
- def skew # :nodoc:
836
- check_type :scale
837
- @gsl.skew
838
- end
839
- def kurtosis # :nodoc:
840
- check_type :scale
841
- @gsl.kurtosis
842
- end
843
- # Create a GSL::Histogram
844
- # With a fixnum, creates X bins within the range of data
845
- # With an Array, each value will be a cut point
846
- def histogram(bins=10)
847
- check_type :scale
848
- if bins.is_a? Array
849
- h=GSL::Histogram.alloc(bins)
850
- else
851
- # ugly patch. The upper limit for a bin has the form
852
- # x < range
853
- h=GSL::Histogram.alloc(bins,[@valid_data.min,@valid_data.max+0.0001])
854
- end
855
- h.increment(@gsl)
856
- h
857
- end
858
- def plot_histogram(bins=10,options="")
859
- check_type :scale
860
- self.histogram(bins).graph(options)
861
- end
862
-
863
- end
864
-
865
- # Coefficient of variation
866
- # Calculed with the sample standard deviation
867
- def coefficient_of_variation
868
- check_type :scale
869
- standard_deviation_sample.quo(mean)
870
- end
871
-
872
- alias_method :sdp, :standard_deviation_population
873
- alias_method :sds, :standard_deviation_sample
874
- alias_method :cov, :coefficient_of_variation
875
- alias_method :variance, :variance_sample
876
- alias_method :sd, :standard_deviation_sample
877
- alias_method :ss, :sum_of_squares
878
902
  end
903
+
904
+ # Coefficient of variation
905
+ # Calculed with the sample standard deviation
906
+ def coefficient_of_variation
907
+ check_type :scale
908
+ standard_deviation_sample.quo(mean)
909
+ end
910
+
911
+ alias_method :sdp, :standard_deviation_population
912
+ alias_method :sds, :standard_deviation_sample
913
+ alias_method :cov, :coefficient_of_variation
914
+ alias_method :variance, :variance_sample
915
+ alias_method :sd, :standard_deviation_sample
916
+ alias_method :ss, :sum_of_squares
917
+ end
879
918
  end