statsample 0.5.1 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. data/History.txt +12 -0
  2. data/Manifest.txt +13 -0
  3. data/README.txt +2 -1
  4. data/demo/pca.rb +29 -0
  5. data/demo/umann.rb +8 -0
  6. data/lib/distribution.rb +0 -1
  7. data/lib/matrix_extension.rb +35 -21
  8. data/lib/statsample.rb +31 -28
  9. data/lib/statsample/anova.rb +7 -2
  10. data/lib/statsample/bivariate.rb +17 -11
  11. data/lib/statsample/codification.rb +136 -87
  12. data/lib/statsample/combination.rb +0 -2
  13. data/lib/statsample/converter/csv18.rb +1 -1
  14. data/lib/statsample/converter/csv19.rb +1 -1
  15. data/lib/statsample/converters.rb +176 -171
  16. data/lib/statsample/crosstab.rb +227 -154
  17. data/lib/statsample/dataset.rb +94 -12
  18. data/lib/statsample/dominanceanalysis.rb +69 -62
  19. data/lib/statsample/dominanceanalysis/bootstrap.rb +25 -21
  20. data/lib/statsample/factor.rb +18 -0
  21. data/lib/statsample/factor/pca.rb +128 -0
  22. data/lib/statsample/factor/principalaxis.rb +133 -0
  23. data/lib/statsample/factor/rotation.rb +125 -0
  24. data/lib/statsample/histogram.rb +99 -0
  25. data/lib/statsample/mle.rb +125 -126
  26. data/lib/statsample/mle/logit.rb +91 -91
  27. data/lib/statsample/mle/probit.rb +84 -85
  28. data/lib/statsample/multiset.rb +1 -1
  29. data/lib/statsample/permutation.rb +96 -0
  30. data/lib/statsample/regression.rb +1 -1
  31. data/lib/statsample/regression/binomial.rb +89 -89
  32. data/lib/statsample/regression/binomial/logit.rb +9 -9
  33. data/lib/statsample/regression/binomial/probit.rb +9 -9
  34. data/lib/statsample/regression/multiple.rb +8 -14
  35. data/lib/statsample/regression/multiple/gslengine.rb +1 -1
  36. data/lib/statsample/regression/multiple/rubyengine.rb +55 -55
  37. data/lib/statsample/resample.rb +12 -17
  38. data/lib/statsample/srs.rb +4 -1
  39. data/lib/statsample/test.rb +23 -22
  40. data/lib/statsample/test/umannwhitney.rb +182 -0
  41. data/lib/statsample/vector.rb +854 -815
  42. data/test/test_bivariate.rb +132 -132
  43. data/test/test_codification.rb +71 -50
  44. data/test/test_dataset.rb +19 -1
  45. data/test/test_factor.rb +44 -0
  46. data/test/test_histogram.rb +26 -0
  47. data/test/test_permutation.rb +37 -0
  48. data/test/test_statistics.rb +74 -63
  49. data/test/test_umannwhitney.rb +17 -0
  50. data/test/test_vector.rb +46 -30
  51. metadata +31 -4
@@ -1,20 +1,15 @@
1
1
  module Statsample
2
- module Resample
3
- class << self
4
- def repeat_and_save(times,&action)
5
- (1..times).inject([]) {|a,x|
6
- a.push(action.call)
7
- a
8
- }
9
- end
10
-
11
- def generate (size,low,upper)
12
- range=upper-low+1
13
- Vector.new((0...size).collect {|x|
14
- rand(range)+low
15
- },:scale)
16
- end
17
-
18
- end
2
+ module Resample
3
+ class << self
4
+ def repeat_and_save(times,&action)
5
+ (1..times).inject([]) {|a,x| a.push(action.call); a}
6
+ end
7
+
8
+ def generate (size,low,upper)
9
+ range=upper-low+1
10
+ Vector.new((0...size).collect {|x| rand(range)+low },:scale)
11
+ end
12
+
19
13
  end
14
+ end
20
15
  end
@@ -35,6 +35,8 @@ module Statsample
35
35
  n0=estimation_n0(d,prop,margin)
36
36
  n0.quo( 1 + ((n0 - 1).quo(n_pobl)))
37
37
  end
38
+
39
+
38
40
  # Proportion confidence interval with t values
39
41
  # Uses estimated proportion, sample without replacement.
40
42
 
@@ -42,6 +44,7 @@ module Statsample
42
44
  t = Distribution::T.p_value(1-((1-margin).quo(2)) , n_sample-1)
43
45
  proportion_confidence_interval(prop,n_sample,n_population, t)
44
46
  end
47
+
45
48
  # Proportion confidence interval with z values
46
49
  # Uses estimated proportion, sample without replacement.
47
50
  def proportion_confidence_interval_z(p, n_sample, n_population, margin=0.95)
@@ -53,7 +56,7 @@ module Statsample
53
56
 
54
57
  def proportion_confidence_interval(p, sam,pop , x)
55
58
  f=sam.quo(pop)
56
- one_range=x * Math::sqrt((qf(sam, pop) * p * (1-p)) / (sam-1)) + (1.quo(sam * 2.0))
59
+ one_range=x * Math::sqrt((qf(sam, pop) * p * (1-p)).quo (sam-1)) + (1.quo(sam * 2.0))
57
60
  [p-one_range, p+one_range]
58
61
  end
59
62
  # Standard deviation for sample distribution of a proportion
@@ -1,25 +1,26 @@
1
1
  module Statsample
2
- # module for several statistical tests
3
- module Test
4
- # Calculate chi square for two Matrix
5
- class << self
6
- def chi_square(real,expected)
7
- raise TypeError, "Both argument should be Matrix" unless real.is_a? Matrix and expected.is_a?Matrix
8
- sum=0
9
- (0...real.row_size).each {|row_i|
10
- (0...real.column_size).each {|col_i|
11
-
12
- val=((real[row_i,col_i].to_f - expected[row_i,col_i].to_f)**2) / expected[row_i,col_i].to_f
13
- # puts "Real: #{real[row_i,col_i].to_f} ; esperado: #{expected[row_i,col_i].to_f}"
14
- # puts "Diferencial al cuadrado: #{(real[row_i,col_i].to_f - expected[row_i,col_i].to_f)**2}"
15
- sum+=val
16
- }
17
- }
18
- sum
19
- end
20
- def t_significance
21
-
22
- end
23
- end
2
+ # Module for several statistical tests
3
+
4
+ module Test
5
+ autoload(:UMannWhitney, 'statsample/test/umannwhitney')
6
+ # Calculate chi square for two Matrix
7
+ class << self
8
+ def chi_square(real,expected)
9
+ raise TypeError, "Both argument should be Matrix" unless real.is_a? Matrix and expected.is_a?Matrix
10
+ sum=0
11
+ (0...real.row_size).each {|row_i|
12
+ (0...real.column_size).each {|col_i|
13
+ val=((real[row_i,col_i].to_f - expected[row_i,col_i].to_f)**2) / expected[row_i,col_i].to_f
14
+ # puts "Real: #{real[row_i,col_i].to_f} ; esperado: #{expected[row_i,col_i].to_f}"
15
+ # puts "Diferencial al cuadrado: #{(real[row_i,col_i].to_f - expected[row_i,col_i].to_f)**2}"
16
+ sum+=val
17
+ }
18
+ }
19
+ sum
20
+ end
21
+ def u_mannwhitney(v1p,v2p)
22
+ Statsample::Test::UMannWhitney.new(v1p,v2p)
23
+ end
24
24
  end
25
+ end
25
26
  end
@@ -0,0 +1,182 @@
1
+ module Statsample
2
+ module Test
3
+ #
4
+ # = U Mann-Whitney test
5
+ #
6
+ # Non-parametric test for assessing whether two independent samples
7
+ # of observations come from the same distribution.
8
+ #
9
+ # == Assumptions
10
+ #
11
+ # * The two samples under investigation in the test are independent of each other and the observations within each sample are independent.
12
+ # * The observations are comparable (i.e., for any two observations, one can assess whether they are equal or, if not, which one is greater).
13
+ # * The variances in the two groups are approximately equal.
14
+ #
15
+ # Higher differences of distributions correspond to
16
+ # to lower values of U.
17
+ #
18
+ class UMannWhitney
19
+ # Max for m*n allowed for exact calculation of probability
20
+ MAX_MN_EXACT=10000
21
+
22
+ # Exact probability based on Dinneen & Blakesley (1973) algorithm
23
+ # This is the algorithm used on SPSS
24
+ #
25
+ # Reference: Dinneen, L., & Blakesley, B. (1973). Algorithm AS 62: A Generator for the Sampling Distribution of the Mann- Whitney U Statistic. Journal of the Royal Statistical Society, 22(2), 269-273
26
+ #
27
+ def self.exact_probability_distribution_as62(n1,n2)
28
+
29
+ freq=[]
30
+ work=[]
31
+ mn1=n1*n2+1
32
+ max_u=n1*n2
33
+ minmn=n1<n2 ? n1 : n2
34
+ maxmn=n1>n2 ? n1 : n2
35
+ n1=maxmn+1
36
+ (1..n1).each{|i| freq[i]=1}
37
+ n1+=1
38
+ (n1..mn1).each{|i| freq[i]=0}
39
+ work[1]=0
40
+ xin=maxmn
41
+ (2..minmn).each do |i|
42
+ work[i]=0
43
+ xin=xin+maxmn
44
+ n1=xin+2
45
+ l=1+xin.quo(2)
46
+ k=i
47
+ (1..l).each do |j|
48
+ k=k+1
49
+ n1=n1-1
50
+ sum=freq[j]+work[j]
51
+ freq[j]=sum
52
+ work[k]=sum-freq[n1]
53
+ freq[n1]=sum
54
+ end
55
+ end
56
+
57
+ # Generate percentages for normal U
58
+ dist=(1+max_u/2).to_i
59
+ freq.shift
60
+ total=freq.inject(0) {|a,v| a+v }
61
+ (0...dist).collect {|i|
62
+ if i!=max_u-i
63
+ ues=freq[i]*2
64
+ else
65
+ ues=freq[i]
66
+ end
67
+ ues.quo(total)
68
+ }
69
+ end
70
+
71
+ # Generate distribution for permutations
72
+
73
+ def self.distribution_permutations(n1,n2)
74
+ base=[0]*n1+[1]*n2
75
+ po=Statsample::Permutation.new(base)
76
+ upper=0
77
+ total=n1*n2
78
+ req={}
79
+ po.each do |perm|
80
+ r0,s0=0,0
81
+ perm.each_index {|c_i|
82
+ if perm[c_i]==0
83
+ r0+=c_i+1
84
+ s0+=1
85
+ end
86
+ }
87
+ u1=r0-((s0*(s0+1)).quo(2))
88
+ u2=total-u1
89
+ temp_u= (u1 <= u2) ? u1 : u2
90
+ req[perm]=temp_u
91
+ end
92
+ req
93
+ end
94
+ # Sample 1 Rank sum
95
+ attr_reader :r1
96
+ # Sample 2 Rank sum
97
+ attr_reader :r2
98
+ # Sample 1 U
99
+ attr_reader :u1
100
+ # Sample 2 U
101
+ attr_reader :u2
102
+ # U Value
103
+ attr_reader :u
104
+ # Compensation for ties
105
+ attr_reader :t
106
+ def initialize(v1,v2)
107
+ @n1=v1.valid_data.size
108
+ @n2=v2.valid_data.size
109
+
110
+ data=(v1.valid_data+v2.valid_data).to_scale
111
+ groups=(([0]*@n1)+([1]*@n2)).to_vector
112
+ ds={'g'=>groups, 'data'=>data}.to_dataset
113
+ @t=nil
114
+ @ties=data.data.size!=data.data.uniq.size
115
+ if(@ties)
116
+ adjust_for_ties(ds['data'])
117
+ end
118
+ ds['ranked']=ds['data'].ranked(:scale)
119
+
120
+ @n=ds.cases
121
+
122
+ @r1=ds.filter{|r| r['g']==0}['ranked'].sum
123
+ @r2=((ds.cases*(ds.cases+1)).quo(2))-r1
124
+ @u1=r1-((@n1*(@n1+1)).quo(2))
125
+ @u2=r2-((@n2*(@n2+1)).quo(2))
126
+ @u=(u1<u2) ? u1 : u2
127
+ end
128
+ def summary
129
+ out=<<-HEREDOC
130
+ Mann-Whitney U
131
+ Sum of ranks v1: #{@r1.to_f}
132
+ Sum of ranks v1: #{@r2.to_f}
133
+ U Value: #{@u.to_f}
134
+ Z: #{sprintf("%0.3f",z)} (p: #{sprintf("%0.3f",z_probability)})
135
+ HEREDOC
136
+ if @n1*@n2<MAX_MN_EXACT
137
+ out+="Exact p (Dinneen & Blakesley): #{sprintf("%0.3f",exact_probability)}"
138
+ end
139
+ out
140
+ end
141
+ # Exact probability of finding values of U lower or equal to sample on U distribution. Use with caution with m*n>100000
142
+ # Reference: Dinneen & Blakesley (1973)
143
+ def exact_probability
144
+ dist=UMannWhitney.exact_probability_distribution_as62(@n1,@n2)
145
+ sum=0
146
+ (0..@u.to_i).each {|i|
147
+ sum+=dist[i]
148
+ }
149
+ sum
150
+ end
151
+ # Reference: http://europe.isixsigma.com/library/content/c080806a.asp
152
+ def adjust_for_ties(data)
153
+ @t=data.frequencies.find_all{|k,v| v>1}.inject(0) {|a,v|
154
+ a+(v[1]**3-v[1]).quo(12)
155
+ }
156
+ end
157
+ # Z value for U, with adjust for ties.
158
+ # For large samples, U is approximately normally distributed.
159
+ # In that case, you can use z to obtain probabily for U.
160
+ # Reference: SPSS Manual
161
+ def z
162
+ mu=(@n1*@n2).quo(2)
163
+ if(!@ties)
164
+ ou=Math::sqrt(((@n1*@n2)*(@n1+@n2+1)).quo(12))
165
+ else
166
+ n=@n1+@n2
167
+ first=(@n1*@n2).quo(n*(n-1))
168
+ second=((n**3-n).quo(12))-@t
169
+ ou=Math::sqrt(first*second)
170
+ end
171
+ (@u-mu).quo(ou)
172
+ end
173
+ # Assuming H_0, the proportion of cdf with values of U lower
174
+ # than the sample.
175
+ # Use with more than 30 cases per group.
176
+ def z_probability
177
+ (1-Distribution::Normal.cdf(z.abs()))*2
178
+ end
179
+ end
180
+
181
+ end
182
+ end
@@ -1,3 +1,4 @@
1
+ require 'date'
1
2
  class Array
2
3
  # Creates a new Statsample::Vector object
3
4
  # Argument should be equal to Vector.new
@@ -9,25 +10,24 @@ class Array
9
10
  Statsample::Vector.new(self,:scale,*args)
10
11
  end
11
12
  end
12
-
13
13
  module Statsample
14
- class << self
15
- # Create a matrix using vectors as columns.
16
- # Use:
17
- #
18
- # matrix=Statsample.vector_cols_matrix(v1,v2)
19
- def vector_cols_matrix(*vs)
20
- # test
21
- size=vs[0].size
22
- vs.each{|v|
23
- raise ArgumentError,"Arguments should be Vector" unless v.instance_of? Statsample::Vector
24
- raise ArgumentError,"Vectors size should be the same" if v.size!=size
25
- }
26
- Matrix.rows((0...size).to_a.collect() {|i|
27
- vs.collect{|v| v[i]}
28
- })
29
- end
30
- end
14
+ class << self
15
+ # Create a matrix using vectors as columns.
16
+ # Use:
17
+ #
18
+ # matrix=Statsample.vector_cols_matrix(v1,v2)
19
+ def vector_cols_matrix(*vs)
20
+ # test
21
+ size=vs[0].size
22
+ vs.each{|v|
23
+ raise ArgumentError,"Arguments should be Vector" unless v.instance_of? Statsample::Vector
24
+ raise ArgumentError,"Vectors size should be the same" if v.size!=size
25
+ }
26
+ Matrix.rows((0...size).to_a.collect() {|i|
27
+ vs.collect{|v| v[i]}
28
+ })
29
+ end
30
+ end
31
31
  # Returns a duplicate of the input vectors, without missing data
32
32
  # for any of the vectors.
33
33
  #
@@ -46,834 +46,873 @@ module Statsample
46
46
  ds.vectors.values
47
47
  end
48
48
 
49
- class Vector
50
- include Enumerable
51
- include Writable
52
- DEFAULT_OPTIONS={
53
- :missing_values=>[],
54
- :labels=>{}
49
+ class Vector
50
+ include Enumerable
51
+ include Writable
52
+ DEFAULT_OPTIONS={
53
+ :missing_values=>[],
54
+ :today_values=>['NOW','TODAY', :NOW, :TODAY],
55
+ :labels=>{}
56
+ }
57
+ # Level of measurement. Could be :nominal, :ordinal or :scale
58
+ attr_reader :type
59
+ # Original data.
60
+ attr_reader :data
61
+ # Valid data. Equal to data, minus values assigned as missing values
62
+ attr_reader :valid_data
63
+ # Array of values considered as missing. Nil is a missing value, by default
64
+ attr_reader :missing_values
65
+ # Array of values considered as "Today", with date type. "NOW", "TODAY", :NOW and :TODAY are 'today' values, by default
66
+ attr_reader :today_values
67
+ # Missing values array
68
+ attr_reader :missing_data
69
+ # Original data, with all missing values replaced by nils
70
+ attr_reader :data_with_nils
71
+ # Date date, with all missing values replaced by nils
72
+ attr_reader :date_data_with_nils
73
+ # GSL Object, only available with rbgsl extension and type==:scale
74
+ attr_reader :gsl
75
+ # Change label for specific values
76
+ attr_accessor :labels
77
+ # Creates a new Vector object.
78
+ # [data] Array of data.
79
+ # [type] Level of meausurement. See Vector#type
80
+ # [opts] Options
81
+ # [:missing_values] Array of missing values. See Vector#missing_values
82
+ # [:today_values] Array of 'today' values. See Vector#today_values
83
+ # [:labels] Labels for data values
84
+ #
85
+ # The fast way to create a vector uses Array.to_vector or Array.to_scale.
86
+ #
87
+ # v=[1,2,3,4].to_vector(:scale)
88
+ # v=[1,2,3,4].to_scale
89
+ #
90
+
91
+ def initialize(data=[], t=:nominal, opts=Hash.new)
92
+ raise "Data should be an array" unless data.is_a? Array
93
+ @data=data
94
+ @type=t
95
+ opts=DEFAULT_OPTIONS.merge(opts)
96
+ @missing_values=opts[:missing_values]
97
+ @labels=opts[:labels]
98
+ @today_values=opts[:today_values]
99
+ @valid_data=[]
100
+ @data_with_nils=[]
101
+ @date_data_with_nils=[]
102
+ @missing_data=[]
103
+ @has_missing_data=nil
104
+ @scale_data=nil
105
+ set_valid_data_intern
106
+ self.type=t
107
+ end
108
+ # Creates a duplicate of the Vector.
109
+ # Note: data, missing_values and labels are duplicated, so
110
+ # changes on original vector doesn't propages to copies.
111
+ def dup
112
+ Vector.new(@data.dup,@type, :missing_values => @missing_values.dup, :labels => @labels.dup)
113
+ end
114
+ # Returns an empty duplicate of the vector. Maintains the type,
115
+ # missing values and labels.
116
+ def dup_empty
117
+ Vector.new([],@type, :missing_values => @missing_values.dup, :labels => @labels.dup)
118
+ end
119
+ # Raises an exception if type of vector is inferior to t type
120
+ def check_type(t)
121
+ raise NoMethodError if (t==:scale and @type!=:scale) or (t==:ordinal and @type==:nominal) or (t==:date)
122
+ end
123
+ private :check_type
124
+
125
+ # Return a vector usign the standarized values for data
126
+ # with sd with denominator N
127
+ def vector_standarized_pop
128
+ vector_standarized(true)
129
+ end
130
+ # Return a vector usign the standarized values for data
131
+ # with sd with denominator n-1
132
+
133
+ def vector_standarized(use_population=false)
134
+ raise "Should be a scale" unless @type==:scale
135
+ m=mean
136
+ sd=use_population ? sdp : sds
137
+ @data_with_nils.collect{|x|
138
+ if !x.nil?
139
+ (x.to_f - m).quo(sd)
140
+ else
141
+ nil
142
+ end
143
+ }.to_vector(:scale)
144
+ end
145
+
146
+ alias_method :standarized, :vector_standarized
147
+
148
+ def box_cox_transformation(lambda) # :nodoc:
149
+ raise "Should be a scale" unless @type==:scale
150
+ @data_with_nils.collect{|x|
151
+ if !x.nil?
152
+ if(lambda==0)
153
+ Math.log(x)
154
+ else
155
+ (x**lambda-1).quo(lambda)
156
+ end
157
+ else
158
+ nil
159
+ end
160
+ }.to_vector(:scale)
161
+ end
162
+
163
+ # Vector equality.
164
+ # Two vector will be the same if their data, missing values, type, labels are equals
165
+ def ==(v2)
166
+ raise TypeError,"Argument should be a Vector" unless v2.instance_of? Statsample::Vector
167
+ @data==v2.data and @missing_values==v2.missing_values and @type==v2.type and @labels=v2.labels
168
+ end
169
+
170
+ def _dump(i) # :nodoc:
171
+ Marshal.dump({'data'=>@data,'missing_values'=>@missing_values, 'labels'=>@labels, 'type'=>@type})
172
+ end
173
+
174
+ def self._load(data) # :nodoc:
175
+ h=Marshal.load(data)
176
+ Vector.new(h['data'], h['type'],:missing_values=> h['missing_values'], :labels=>h['labels'])
177
+ end
178
+ # Returns a new vector, with data modified by block.
179
+ # Equivalent to create a Vector after #collect on data
180
+ def recode
181
+ @data.collect{|x|
182
+ yield x
183
+ }.to_vector(@type)
184
+ end
185
+ # Modifies current vector, with data modified by block.
186
+ # Equivalent to #collect! on @data
187
+ def recode!
188
+ @data.collect!{|x|
189
+ yield x
55
190
  }
56
- # Level of measurement. Could be :nominal, :ordinal or :scale
57
- attr_reader :type
58
- # Original data.
59
- attr_reader :data
60
- # Valid data. Equal to data, minus values assigned as missing values
61
- attr_reader :valid_data
62
- # Array of values considered as missing. Nil is a missing value, by default
63
- attr_reader :missing_values
64
- # Missing values array
65
- attr_reader :missing_data
66
- # Original data, with all missing values replaced by nils
67
- attr_reader :data_with_nils
68
- # GSL Object, only available with rbgsl extension and type==:scale
69
- attr_reader :gsl
70
- # Change label for specific values
71
- attr_accessor :labels
72
- # Creates a new Vector object.
73
- # [data] Array of data.
74
- # [type] Level of meausurement. See Vector#type
75
- # [opts] Options
76
- # [:missing_values] Array of missing values. See Vector#missing_values
77
- # [:labels] Labels for data values
78
- #
79
- # The fast way to create a vector uses Array.to_vector or Array.to_scale.
80
- #
81
- # v=[1,2,3,4].to_vector(:scale)
82
- # v=[1,2,3,4].to_scale
83
- #
84
-
85
- def initialize(data=[], t=:nominal, opts=Hash.new)
86
- raise "Data should be an array" unless data.is_a? Array
87
- @data=data
88
- @type=t
89
- opts=DEFAULT_OPTIONS.merge(opts)
90
- @missing_values=opts[:missing_values]
91
- @labels=opts[:labels]
92
- @valid_data=[]
93
- @data_with_nils=[]
94
- @missing_data=[]
95
- @has_missing_data=nil
96
- @scale_data=nil
97
- set_valid_data_intern
98
- self.type=t
99
- end
100
- # Creates a duplicate of the Vector.
101
- # Note: data, missing_values and labels are duplicated, so
102
- # changes on original vector doesn't propages to copies.
103
- def dup
104
- Vector.new(@data.dup,@type, :missing_values => @missing_values.dup, :labels => @labels.dup)
105
- end
106
- # Returns an empty duplicate of the vector. Maintains the type,
107
- # missing values and labels.
108
- def dup_empty
109
- Vector.new([],@type, :missing_values => @missing_values.dup, :labels => @labels.dup)
110
- end
111
- # Raises an exception if type of vector is inferior to t type
112
- def check_type(t)
113
- raise NoMethodError if (t==:scale and @type!=:scale) or (t==:ordinal and @type==:nominal)
114
- end
115
- private :check_type
116
-
117
- # Return a vector usign the standarized values for data
118
- # with sd with denominator N
119
- def vector_standarized_pop
120
- vector_standarized(true)
121
- end
122
- # Return a vector usign the standarized values for data
123
- # with sd with denominator n-1
124
-
125
- def vector_standarized(use_population=false)
126
- raise "Should be a scale" unless @type==:scale
127
- m=mean
128
- sd=use_population ? sdp : sds
129
- @data_with_nils.collect{|x|
130
- if !x.nil?
131
- (x.to_f - m).quo(sd)
132
- else
133
- nil
134
- end
135
- }.to_vector(:scale)
136
- end
137
-
138
- alias_method :standarized, :vector_standarized
139
-
140
- def box_cox_transformation(lambda) # :nodoc:
141
- raise "Should be a scale" unless @type==:scale
142
- @data_with_nils.collect{|x|
143
- if !x.nil?
144
- if(lambda==0)
145
- Math.log(x)
146
- else
147
- (x**lambda-1).quo(lambda)
148
- end
149
- else
191
+ set_valid_data
192
+ end
193
+ # Dicotomize the vector with 0 and 1, based on lowest value
194
+ # If parameter if defined, this value and lower
195
+ # will be 0 and higher, 1
196
+ def dichotomize(low=nil)
197
+ fs=factors
198
+ low||=factors.min
199
+ @data_with_nils.collect{|x|
200
+ if x.nil?
201
+ nil
202
+ elsif x>low
203
+ 1
204
+ else
205
+ 0
206
+ end
207
+ }.to_scale
208
+ end
209
+ # Iterate on each item.
210
+ # Equivalent to
211
+ # @data.each{|x| yield x}
212
+ def each
213
+ @data.each{|x| yield(x) }
214
+ end
215
+
216
+ # Iterate on each item, retrieving index
217
+ def each_index
218
+ (0...@data.size).each {|i|
219
+ yield(i)
220
+ }
221
+ end
222
+ # Add a value at the end of the vector.
223
+ # If second argument set to false, you should update the Vector usign
224
+ # Vector.set_valid_data at the end of your insertion cycle
225
+ #
226
+ def add(v,update_valid=true)
227
+ @data.push(v)
228
+ set_valid_data if update_valid
229
+ end
230
+ # Update valid_data, missing_data, data_with_nils and gsl
231
+ # at the end of an insertion.
232
+ #
233
+ # Use after Vector.add(v,false)
234
+ # Usage:
235
+ # v=Statsample::Vector.new
236
+ # v.add(2,false)
237
+ # v.add(4,false)
238
+ # v.data
239
+ # => [2,3]
240
+ # v.valid_data
241
+ # => []
242
+ # v.set_valid_data
243
+ # v.valid_data
244
+ # => [2,3]
245
+ def set_valid_data
246
+ @valid_data.clear
247
+ @missing_data.clear
248
+ @data_with_nils.clear
249
+ @date_data_with_nils.clear
250
+ @gsl=nil
251
+ set_valid_data_intern
252
+ set_scale_data if(@type==:scale)
253
+ set_date_data if(@type==:date)
254
+ end
255
+
256
+ if Statsample::STATSAMPLE__.respond_to?(:set_valid_data_intern)
257
+ def set_valid_data_intern #:nodoc:
258
+ Statsample::STATSAMPLE__.set_valid_data_intern(self)
259
+ end
260
+ else
261
+ def set_valid_data_intern #:nodoc:
262
+ _set_valid_data_intern
263
+ end
264
+ end
265
+ def _set_valid_data_intern #:nodoc:
266
+ @data.each do |n|
267
+ if is_valid? n
268
+ @valid_data.push(n)
269
+ @data_with_nils.push(n)
270
+ else
271
+ @data_with_nils.push(nil)
272
+ @missing_data.push(n)
273
+ end
274
+ end
275
+ @has_missing_data=@missing_data.size>0
276
+ end
277
+
278
+ # Retrieves true if data has one o more missing values
279
+ def has_missing_data?
280
+ @has_missing_data
281
+ end
282
+ # Retrieves label for value x. Retrieves x if
283
+ # no label defined.
284
+ def labeling(x)
285
+ @labels.has_key?(x) ? @labels[x].to_s : x.to_s
286
+ end
287
+ # Returns a Vector with data with labels replaced by the label.
288
+ def vector_labeled
289
+ d=@data.collect{|x|
290
+ if @labels.has_key? x
291
+ @labels[x]
292
+ else
293
+ x
294
+ end
295
+ }
296
+ Vector.new(d,@type)
297
+ end
298
+ # Size of total data
299
+ def size
300
+ @data.size
301
+ end
302
+ alias_method :n, :size
303
+
304
+ # Retrieves i element of data
305
+ def [](i)
306
+ @data[i]
307
+ end
308
+ # Set i element of data.
309
+ # Note: Use set_valid_data if you include missing values
310
+ def []=(i,v)
311
+ @data[i]=v
312
+ end
313
+ # Return true if a value is valid (not nil and not included on missing values)
314
+ def is_valid?(x)
315
+ !(x.nil? or @missing_values.include? x)
316
+ end
317
+ # Set missing_values.
318
+ # if update_valid = false, you should use
319
+ # set_valid_data after all changes
320
+ def missing_values=(vals)
321
+ @missing_values = vals
322
+ set_valid_data
323
+ end
324
+ def today_values=(vals)
325
+ @today_values = vals
326
+ set_valid_data
327
+ end
328
+ # Set level of measurement.
329
+ def type=(t)
330
+ @type=t
331
+ set_scale_data if(t==:scale)
332
+ set_date_data if (t==:date)
333
+ end
334
+ def to_a
335
+ @data.dup
336
+ end
337
+ alias_method :to_ary, :to_a
338
+
339
+ # Vector sum.
340
+ # - If v is a scalar, add this value to all elements
341
+ # - If v is a Array or a Vector, should be of the same size of this vector
342
+ # every item of this vector will be added to the value of the
343
+ # item at the same position on the other vector
344
+ def +(v)
345
+ _vector_ari("+",v)
346
+ end
347
+ # Vector rest.
348
+ # - If v is a scalar, rest this value to all elements
349
+ # - If v is a Array or a Vector, should be of the same
350
+ # size of this vector
351
+ # every item of this vector will be rested to the value of the
352
+ # item at the same position on the other vector
353
+
354
+ def -(v)
355
+ _vector_ari("-",v)
356
+ end
357
+ # Reports all values that doesn't comply with a condition.
358
+ # Returns a hash with the index of data and the invalid data.
359
+ def verify
360
+ h={}
361
+ (0...@data.size).to_a.each{|i|
362
+ if !(yield @data[i])
363
+ h[i]=@data[i]
364
+ end
365
+ }
366
+ h
367
+ end
368
+ def _vector_ari(method,v) # :nodoc:
369
+ if(v.is_a? Vector or v.is_a? Array)
370
+ if v.size==@data.size
371
+ # i=0
372
+ sum=[]
373
+ 0.upto(v.size-1) {|i|
374
+ if((v.is_a? Vector and v.is_valid?(v[i]) and is_valid?(@data[i])) or (v.is_a? Array and !v[i].nil? and !data[i].nil?))
375
+ sum.push(@data[i].send(method,v[i]))
376
+ else
377
+ sum.push(nil)
378
+ end
379
+ }
380
+ Statsample::Vector.new(sum)
381
+ else
382
+ raise ArgumentError, "The array/vector parameter should be of the same size of the original vector"
383
+ end
384
+ elsif(v.respond_to? method )
385
+ Statsample::Vector.new(
386
+ @data.collect {|x|
387
+ if(!x.nil?)
388
+ x.send(method,v)
389
+ else
150
390
  nil
151
- end
152
- }.to_vector(:scale)
153
- end
154
-
155
- # Vector equality.
156
- # Two vector will be the same if their data, missing values, type, labels are equals
157
- def ==(v2)
158
- raise TypeError,"Argument should be a Vector" unless v2.instance_of? Statsample::Vector
159
- @data==v2.data and @missing_values==v2.missing_values and @type==v2.type and @labels=v2.labels
160
- end
161
-
162
- def _dump(i) # :nodoc:
163
- Marshal.dump({'data'=>@data,'missing_values'=>@missing_values, 'labels'=>@labels, 'type'=>@type})
164
- end
165
-
166
- def self._load(data) # :nodoc:
167
- h=Marshal.load(data)
168
- Vector.new(h['data'], h['type'],:missing_values=> h['missing_values'], :labels=>h['labels'])
169
- end
170
- # Returns a new vector, with data modified by block.
171
- # Equivalent to create a Vector after #collect on data
172
- def recode
173
- @data.collect{|x|
174
- yield x
175
- }.to_vector(@type)
176
- end
177
- # Modifies current vector, with data modified by block.
178
- # Equivalent to #collect! on @data
179
- def recode!
180
- @data.collect!{|x|
181
- yield x
182
- }
183
- set_valid_data
184
- end
185
- # Dicotomize the vector with 0 and 1, based on lowest value
186
- # If parameter if defined, this value and lower
187
- # will be 0 and higher, 1
188
- def dichotomize(low=nil)
189
- fs=factors
190
- low||=factors.min
191
- @data_with_nils.collect{|x|
192
- if x.nil?
193
- nil
194
- elsif x>low
195
- 1
196
- else
197
- 0
198
- end
199
- }.to_scale
200
- end
201
- # Iterate on each item.
202
- # Equivalent to
203
- # @data.each{|x| yield x}
204
- def each
205
- @data.each{|x| yield(x) }
206
- end
207
-
208
- # Iterate on each item, retrieving index
209
- def each_index
210
- (0...@data.size).each {|i|
211
- yield(i)
391
+ end
212
392
  }
213
- end
214
- # Add a value at the end of the vector.
215
- # If second argument set to false, you should update the Vector usign
216
- # Vector.set_valid_data at the end of your insertion cycle
217
- #
218
- def add(v,update_valid=true)
219
- @data.push(v)
220
- set_valid_data if update_valid
221
- end
222
- # Update valid_data, missing_data, data_with_nils and gsl
223
- # at the end of an insertion.
224
- #
225
- # Use after Vector.add(v,false)
226
- # Usage:
227
- # v=Statsample::Vector.new
228
- # v.add(2,false)
229
- # v.add(4,false)
230
- # v.data
231
- # => [2,3]
232
- # v.valid_data
233
- # => []
234
- # v.set_valid_data
235
- # v.valid_data
236
- # => [2,3]
237
- def set_valid_data
238
- @valid_data.clear
239
- @missing_data.clear
240
- @data_with_nils.clear
241
- @gsl=nil
242
- set_valid_data_intern
243
- set_scale_data if(@type==:scale)
244
- end
245
-
246
- if Statsample::STATSAMPLE__.respond_to?(:set_valid_data_intern)
247
- def set_valid_data_intern #:nodoc:
248
- Statsample::STATSAMPLE__.set_valid_data_intern(self)
249
- end
393
+ )
250
394
  else
251
- def set_valid_data_intern #:nodoc:
252
- _set_valid_data_intern
253
- end
254
- end
255
- def _set_valid_data_intern #:nodoc:
256
- @data.each do |n|
257
- if is_valid? n
258
- @valid_data.push(n)
259
- @data_with_nils.push(n)
260
- else
261
- @data_with_nils.push(nil)
262
- @missing_data.push(n)
263
- end
264
- end
265
- @has_missing_data=@missing_data.size>0
395
+ raise TypeError,"You should pass a scalar or a array/vector"
266
396
  end
267
-
268
- # Retrieves true if data has one o more missing values
269
- def has_missing_data?
270
- @has_missing_data
271
- end
272
- # Retrieves label for value x. Retrieves x if
273
- # no label defined.
274
- def labeling(x)
275
- @labels.has_key?(x) ? @labels[x].to_s : x.to_s
276
- end
277
- # Returns a Vector with data with labels replaced by the label.
278
- def vector_labeled
279
- d=@data.collect{|x|
280
- if @labels.has_key? x
281
- @labels[x]
282
- else
283
- x
284
- end
285
- }
286
- Vector.new(d,@type)
287
- end
288
- # Size of total data
289
- def size
290
- @data.size
291
- end
292
- alias_method :n, :size
293
-
294
- # Retrieves i element of data
295
- def [](i)
296
- @data[i]
297
- end
298
- # Set i element of data.
299
- # Note: Use set_valid_data if you include missing values
300
- def []=(i,v)
301
- @data[i]=v
302
- end
303
- # Return true if a value is valid (not nil and not included on missing values)
304
- def is_valid?(x)
305
- !(x.nil? or @missing_values.include? x)
306
- end
307
- # Set missing_values.
308
- # if update_valid = false, you should use
309
- # set_valid_data after all changes
310
- def missing_values=(vals)
311
- @missing_values = vals
312
- set_valid_data
313
- end
314
- # Set level of measurement.
315
- def type=(t)
316
- @type=t
317
- set_scale_data if(t==:scale)
318
- end
319
- def to_a
320
- @data.dup
321
- end
322
- alias_method :to_ary, :to_a
323
-
324
- # Vector sum.
325
- # - If v is a scalar, add this value to all elements
326
- # - If v is a Array or a Vector, should be of the same size of this vector
327
- # every item of this vector will be added to the value of the
328
- # item at the same position on the other vector
329
- def +(v)
330
- _vector_ari("+",v)
331
- end
332
- # Vector rest.
333
- # - If v is a scalar, rest this value to all elements
334
- # - If v is a Array or a Vector, should be of the same
335
- # size of this vector
336
- # every item of this vector will be rested to the value of the
337
- # item at the same position on the other vector
338
397
 
339
- def -(v)
340
- _vector_ari("-",v)
341
- end
342
- # Reports all values that doesn't comply with a condition.
343
- # Returns a hash with the index of data and the invalid data.
344
- def verify
345
- h={}
346
- (0...@data.size).to_a.each{|i|
347
- if !(yield @data[i])
348
- h[i]=@data[i]
349
- end
350
- }
351
- h
352
- end
353
- def _vector_ari(method,v) # :nodoc:
354
- if(v.is_a? Vector or v.is_a? Array)
355
- if v.size==@data.size
356
- # i=0
357
- sum=[]
358
- 0.upto(v.size-1) {|i|
359
- if((v.is_a? Vector and v.is_valid?(v[i]) and is_valid?(@data[i])) or (v.is_a? Array and !v[i].nil? and !data[i].nil?))
360
- sum.push(@data[i].send(method,v[i]))
361
- else
362
- sum.push(nil)
363
- end
364
- }
365
- Statsample::Vector.new(sum)
366
- else
367
- raise ArgumentError, "The array/vector parameter should be of the same size of the original vector"
368
- end
369
- elsif(v.respond_to? method )
370
- Statsample::Vector.new(
371
- @data.collect {|x|
372
- if(!x.nil?)
373
- x.send(method,v)
374
- else
375
- nil
376
- end
377
- }
378
- )
379
- else
380
- raise TypeError,"You should pass a scalar or a array/vector"
381
- end
382
-
383
- end
384
- # Return an array with the data splitted by a separator.
385
- # a=Vector.new(["a,b","c,d","a,b","d"])
386
- # a.splitted
387
- # =>
388
- # [["a","b"],["c","d"],["a","b"],["d"]]
389
- def splitted(sep=Statsample::SPLIT_TOKEN)
390
- @data.collect{|x|
391
- if x.nil?
392
- nil
393
- elsif (x.respond_to? :split)
394
- x.split(sep)
395
- else
396
- [x]
397
- end
398
- }
399
- end
400
- # Returns a hash of Vectors, defined by the different values
401
- # defined on the fields
402
- # Example:
403
- #
404
- # a=Vector.new(["a,b","c,d","a,b"])
405
- # a.split_by_separator
406
- # => {"a"=>#<Statsample::Type::Nominal:0x7f2dbcc09d88
407
- # @data=[1, 0, 1]>,
408
- # "b"=>#<Statsample::Type::Nominal:0x7f2dbcc09c48
409
- # @data=[1, 1, 0]>,
410
- # "c"=>#<Statsample::Type::Nominal:0x7f2dbcc09b08
411
- # @data=[0, 1, 1]>}
412
- #
413
- def split_by_separator(sep=Statsample::SPLIT_TOKEN)
414
- split_data=splitted(sep)
415
- factors=split_data.flatten.uniq.compact
416
- out=factors.inject({}) {|a,x|
417
- a[x]=[]
418
- a
419
- }
420
- split_data.each{|r|
421
- if r.nil?
422
- factors.each{|f|
423
- out[f].push(nil)
424
- }
425
- else
398
+ end
399
+ # Return an array with the data splitted by a separator.
400
+ # a=Vector.new(["a,b","c,d","a,b","d"])
401
+ # a.splitted
402
+ # =>
403
+ # [["a","b"],["c","d"],["a","b"],["d"]]
404
+ def splitted(sep=Statsample::SPLIT_TOKEN)
405
+ @data.collect{|x|
406
+ if x.nil?
407
+ nil
408
+ elsif (x.respond_to? :split)
409
+ x.split(sep)
410
+ else
411
+ [x]
412
+ end
413
+ }
414
+ end
415
+ # Returns a hash of Vectors, defined by the different values
416
+ # defined on the fields
417
+ # Example:
418
+ #
419
+ # a=Vector.new(["a,b","c,d","a,b"])
420
+ # a.split_by_separator
421
+ # => {"a"=>#<Statsample::Type::Nominal:0x7f2dbcc09d88
422
+ # @data=[1, 0, 1]>,
423
+ # "b"=>#<Statsample::Type::Nominal:0x7f2dbcc09c48
424
+ # @data=[1, 1, 0]>,
425
+ # "c"=>#<Statsample::Type::Nominal:0x7f2dbcc09b08
426
+ # @data=[0, 1, 1]>}
427
+ #
428
+ def split_by_separator(sep=Statsample::SPLIT_TOKEN)
429
+ split_data=splitted(sep)
430
+ factors=split_data.flatten.uniq.compact
431
+ out=factors.inject({}) {|a,x|
432
+ a[x]=[]
433
+ a
434
+ }
435
+ split_data.each{|r|
436
+ if r.nil?
426
437
  factors.each{|f|
427
- out[f].push(r.include?(f) ? 1:0)
438
+ out[f].push(nil)
428
439
  }
429
- end
430
- }
431
- out.inject({}){|s,v|
432
- s[v[0]]=Vector.new(v[1],:nominal)
433
- s
440
+ else
441
+ factors.each{|f|
442
+ out[f].push(r.include?(f) ? 1:0)
434
443
  }
444
+ end
445
+ }
446
+ out.inject({}){|s,v|
447
+ s[v[0]]=Vector.new(v[1],:nominal)
448
+ s
449
+ }
450
+ end
451
+ def split_by_separator_freq(sep=Statsample::SPLIT_TOKEN)
452
+ split_by_separator(sep).inject({}) {|a,v|
453
+ a[v[0]]=v[1].inject {|s,x| s+x.to_i}
454
+ a
455
+ }
456
+ end
457
+
458
+ # Returns an random sample of size n, with replacement,
459
+ # only with valid data.
460
+ #
461
+ # In all the trails, every item have the same probability
462
+ # of been selected.
463
+ def sample_with_replacement(sample=1)
464
+ if(@type!=:scale or !HAS_GSL)
465
+ vds=@valid_data.size
466
+ (0...sample).collect{ @valid_data[rand(vds)] }
467
+ else
468
+ r = GSL::Rng.alloc(GSL::Rng::MT19937,rand(10000))
469
+ r.sample(@gsl, sample).to_a
470
+ end
471
+ end
472
+ # Returns an random sample of size n, without replacement,
473
+ # only with valid data.
474
+ #
475
+ # Every element could only be selected once.
476
+ #
477
+ # A sample of the same size of the vector is the vector itself.
478
+
479
+ def sample_without_replacement(sample=1)
480
+ if(@type!=:scale or !HAS_GSL)
481
+ raise ArgumentError, "Sample size couldn't be greater than n" if sample>@valid_data.size
482
+ out=[]
483
+ size=@valid_data.size
484
+ while out.size<sample
485
+ value=rand(size)
486
+ out.push(value) if !out.include?value
487
+ end
488
+ out.collect{|i|@data[i]}
489
+ else
490
+ r = GSL::Rng.alloc(GSL::Rng::MT19937,rand(10000))
491
+ r.choose(@gsl, sample).to_a
492
+ end
493
+ end
494
+ # Retrieves number of cases which comply condition.
495
+ # If block given, retrieves number of instances where
496
+ # block returns true.
497
+ # If other values given, retrieves the frequency for
498
+ # this value.
499
+ def count(x=false)
500
+ if block_given?
501
+ r=@data.inject(0) {|s, i|
502
+ r=yield i
503
+ s+(r ? 1 : 0)
504
+ }
505
+ r.nil? ? 0 : r
506
+ else
507
+ frequencies[x].nil? ? 0 : frequencies[x]
508
+ end
509
+ end
510
+
511
+ # Returns the database type for the vector, according to its content
512
+
513
+ def db_type(dbs='mysql')
514
+ # first, detect any character not number
515
+ if @data.find {|v| v.to_s=~/\d{2,2}-\d{2,2}-\d{4,4}/} or @data.find {|v| v.to_s=~/\d{4,4}-\d{2,2}-\d{2,2}/}
516
+ return "DATE"
517
+ elsif @data.find {|v| v.to_s=~/[^0-9e.-]/ }
518
+ return "VARCHAR (255)"
519
+ elsif @data.find {|v| v.to_s=~/\./}
520
+ return "DOUBLE"
521
+ else
522
+ return "INTEGER"
523
+ end
524
+ end
525
+ # Return true if all data is Date, "today" values or nil
526
+ def can_be_date?
527
+ if @data.find {|v|
528
+ !v.nil? and !v.is_a? Date and !v.is_a? Time and (v.is_a? String and !@today_values.include? v) and (v.is_a? String and !(v=~/\d{4,4}[-\/]\d{1,2}[-\/]\d{1,2}/))}
529
+ false
530
+ else
531
+ true
435
532
  end
436
- def split_by_separator_freq(sep=Statsample::SPLIT_TOKEN)
437
- split_by_separator(sep).inject({}) {|a,v|
438
- a[v[0]]=v[1].inject {|s,x| s+x.to_i}
533
+ end
534
+ # Return true if all data is Numeric or nil
535
+ def can_be_scale?
536
+ if @data.find {|v| !v.nil? and !v.is_a? Numeric and !@missing_values.include? v}
537
+ false
538
+ else
539
+ true
540
+ end
541
+ end
542
+
543
+ def to_s
544
+ sprintf("Vector(type:%s, n:%d)[%s]",@type.to_s,@data.size, @data.collect{|d| d.nil? ? "nil":d}.join(","))
545
+ end
546
+ # Ugly name. Really, create a Vector for standard 'matrix' package.
547
+ # <tt>dir</tt> could be :horizontal or :vertical
548
+ def to_matrix(dir=:horizontal)
549
+ case dir
550
+ when :horizontal
551
+ Matrix[@data]
552
+ when :vertical
553
+ Matrix.columns([@data])
554
+ end
555
+ end
556
+ def inspect
557
+ self.to_s
558
+ end
559
+ # Retrieves uniques values for data.
560
+ def factors
561
+ if @type==:scale
562
+ @scale_data.uniq.sort
563
+ elsif @type==:date
564
+ @date_data_with_nils.uniq.sort
565
+ else
566
+ @valid_data.uniq.sort
567
+ end
568
+ end
569
+ if Statsample::STATSAMPLE__.respond_to?(:frequencies)
570
+ # Returns a hash with the distribution of frecuencies for
571
+ # the sample
572
+ def frequencies
573
+ Statsample::STATSAMPLE__.frequencies(@valid_data)
574
+ end
575
+ else
576
+ def frequencies #:nodoc:
577
+ _frequencies
578
+ end
579
+ end
580
+ def _frequencies #:nodoc:
581
+ @valid_data.inject(Hash.new) {|a,x|
582
+ a[x]||=0
583
+ a[x]=a[x]+1
584
+ a
585
+ }
586
+ end
587
+ # Plot frequencies on a chart, using gnuplot
588
+ def plot_frequencies
589
+ require 'gnuplot'
590
+ x=[]
591
+ y=[]
592
+ self.frequencies.sort.each{|k,v|
593
+ x.push(k)
594
+ y.push(v)
595
+ }
596
+ Gnuplot.open do |gp|
597
+ Gnuplot::Plot.new( gp ) do |plot|
598
+ plot.boxwidth("0.9 absolute")
599
+ plot.yrange("[0:#{y.max}]")
600
+ plot.style("fill solid 1.00 border -1")
601
+ plot.set("xtics border in scale 1,0.5 nomirror rotate by -45 offset character 0, 0, 0")
602
+ plot.style("histogram")
603
+ plot.style("data histogram")
604
+ i=-1
605
+ plot.set("xtics","("+x.collect{|v| i+=1; sprintf("\"%s\" %d",v,i)}.join(",")+")")
606
+ plot.data << Gnuplot::DataSet.new( [y] ) do |ds|
607
+ end
608
+ end
609
+ end
610
+
611
+ end
612
+
613
+
614
+ # Returns the most frequent item.
615
+ def mode
616
+ frequencies.max{|a,b| a[1]<=>b[1]}[0]
617
+ end
618
+ # The numbers of item with valid data.
619
+ def n_valid
620
+ @valid_data.size
621
+ end
622
+ # Returns a hash with the distribution of proportions of
623
+ # the sample.
624
+ def proportions
625
+ frequencies.inject({}){|a,v|
626
+ a[v[0]] = v[1].quo(n_valid)
439
627
  a
440
628
  }
441
629
  end
442
-
443
- # Returns an random sample of size n, with replacement,
444
- # only with valid data.
445
- #
446
- # In all the trails, every item have the same probability
447
- # of been selected.
448
- def sample_with_replacement(sample=1)
449
- if(@type!=:scale or !HAS_GSL)
450
- vds=@valid_data.size
451
- (0...sample).collect{ @valid_data[rand(vds)] }
452
- else
453
- r = GSL::Rng.alloc(GSL::Rng::MT19937,rand(10000))
454
- r.sample(@gsl, sample).to_a
455
- end
456
- end
457
- # Returns an random sample of size n, without replacement,
458
- # only with valid data.
459
- #
460
- # Every element could only be selected once.
461
- #
462
- # A sample of the same size of the vector is the vector itself.
463
-
464
- def sample_without_replacement(sample=1)
465
- if(@type!=:scale or !HAS_GSL)
466
- raise ArgumentError, "Sample size couldn't be greater than n" if sample>@valid_data.size
467
- out=[]
468
- size=@valid_data.size
469
- while out.size<sample
470
- value=rand(size)
471
- out.push(value) if !out.include?value
472
- end
473
- out.collect{|i|@data[i]}
474
- else
475
- r = GSL::Rng.alloc(GSL::Rng::MT19937,rand(10000))
476
- r.choose(@gsl, sample).to_a
630
+ # Proportion of a given value.
631
+ def proportion(v=1)
632
+ frequencies[v].quo(@valid_data.size)
633
+ end
634
+ def summary(out="")
635
+ out << sprintf("n valid:%d\n",n_valid)
636
+ out << sprintf("factors:%s\n",factors.join(","))
637
+ out << "mode:"+mode.to_s+"\n"
638
+ out << "Distribution:\n"
639
+ frequencies.sort.each{|k,v|
640
+ key=labels.has_key?(k) ? labels[k]:k
641
+ out << sprintf("%s : %s (%0.2f%%)\n",key,v, (v.quo(n_valid))*100)
642
+ }
643
+ if(@type==:ordinal)
644
+ out << "median:"+median.to_s+"\n"
477
645
  end
478
- end
479
- # Retrieves number of cases which comply condition.
480
- # If block given, retrieves number of instances where
481
- # block returns true.
482
- # If other values given, retrieves the frequency for
483
- # this value.
484
- def count(x=false)
485
- if block_given?
486
- r=@data.inject(0) {|s, i|
487
- r=yield i
488
- s+(r ? 1 : 0)
489
- }
490
- r.nil? ? 0 : r
491
- else
492
- frequencies[x].nil? ? 0 : frequencies[x]
646
+ if(@type==:scale)
647
+ out << "mean:"+mean.to_s+"\n"
648
+ out << "sd:"+sd.to_s+"\n"
649
+
493
650
  end
651
+ out
494
652
  end
495
-
496
- # Returns the database type for the vector, according to its content
497
-
498
- def db_type(dbs='mysql')
499
- # first, detect any character not number
500
- if @data.find {|v| v.to_s=~/\d{2,2}-\d{2,2}-\d{4,4}/} or @data.find {|v| v.to_s=~/\d{4,4}-\d{2,2}-\d{2,2}/}
501
- return "DATE"
502
- elsif @data.find {|v| v.to_s=~/[^0-9e.-]/ }
503
- return "VARCHAR (255)"
504
- elsif @data.find {|v| v.to_s=~/\./}
505
- return "DOUBLE"
506
- else
507
- return "INTEGER"
653
+
654
+ # Variance of p, according to poblation size
655
+ def variance_proportion(n_poblation, v=1)
656
+ Statsample::proportion_variance_sample(self.proportion(v), @valid_data.size, n_poblation)
657
+ end
658
+ # Variance of p, according to poblation size
659
+ def variance_total(n_poblation, v=1)
660
+ Statsample::total_variance_sample(self.proportion(v), @valid_data.size, n_poblation)
661
+ end
662
+ def proportion_confidence_interval_t(n_poblation,margin=0.95,v=1)
663
+ Statsample::proportion_confidence_interval_t(proportion(v), @valid_data.size, n_poblation, margin)
664
+ end
665
+ def proportion_confidence_interval_z(n_poblation,margin=0.95,v=1)
666
+ Statsample::proportion_confidence_interval_z(proportion(v), @valid_data.size, n_poblation, margin)
667
+ end
668
+
669
+ self.instance_methods.find_all{|met| met=~/_slow$/}.each do |met|
670
+ met_or=met.gsub("_slow","")
671
+ if !self.method_defined?(met_or)
672
+ alias_method met_or, met
673
+ end
674
+ end
675
+ ######
676
+ ### Ordinal Methods
677
+ ######
678
+
679
+ # Return the value of the percentil q
680
+ def percentil(q)
681
+ check_type :ordinal
682
+ sorted=@valid_data.sort
683
+ v= (n_valid * q).quo(100)
684
+ if(v.to_i!=v)
685
+ sorted[v.to_i]
686
+ else
687
+ (sorted[(v-0.5).to_i].to_f + sorted[(v+0.5).to_i]).quo(2)
688
+ end
689
+ end
690
+ # Returns a ranked vector.
691
+ def ranked(type=:ordinal)
692
+ check_type :ordinal
693
+ i=0
694
+ r=frequencies.sort.inject({}){|a,v|
695
+ a[v[0]]=(i+1 + i+v[1]).quo(2)
696
+ i+=v[1]
697
+ a
698
+ }
699
+ @data.collect {|c| r[c] }.to_vector(type)
700
+ end
701
+ # Return the median (percentil 50)
702
+ def median
703
+ check_type :ordinal
704
+ if HAS_GSL and @type==:scale
705
+ sorted=GSL::Vector.alloc(@scale_data.sort)
706
+ GSL::Stats::median_from_sorted_data(sorted)
707
+ else
708
+ percentil(50)
709
+ end
710
+ end
711
+ # Minimun value
712
+ def min;
713
+ check_type :ordinal
714
+ @valid_data.min;
715
+ end
716
+ # Maximum value
717
+ def max;
718
+ check_type :ordinal
719
+ @valid_data.max;
720
+ end
721
+ def set_date_data # :nodoc:
722
+ @date_data_with_nils=@data.collect do|x|
723
+ if x.is_a? Date
724
+ x
725
+ elsif x.is_a? Time
726
+ Date.new(x.year, x.month, x.day)
727
+ elsif x.is_a? String and x=~/(\d{4,4})[-\/](\d{1,2})[-\/](\d{1,2})/
728
+ Date.new($1.to_i,$2.to_i,$3.to_i)
729
+ elsif @today_values.include? x
730
+ Date.today()
731
+ elsif @missing_values.include? x or x.nil?
732
+ nil
508
733
  end
734
+ end
509
735
  end
510
- # Return true if all data is Numeric or nil
511
- def can_be_scale?
512
- if @data.find {|v| !v.nil? and !v.is_a? Numeric}
513
- false
736
+ def set_scale_data # :nodoc
737
+ @scale_data=@valid_data.collect do|x|
738
+ if x.is_a? Numeric
739
+ x
740
+ elsif x.is_a? String and x.to_i==x.to_f
741
+ x.to_i
514
742
  else
515
- true
516
- end
743
+ x.to_f
744
+ end
745
+ end
746
+ if HAS_GSL
747
+ @gsl=GSL::Vector.alloc(@scale_data) if @scale_data.size>0
748
+ end
749
+ end
750
+ private :set_scale_data
751
+
752
+ # The range of the data (max - min)
753
+ def range;
754
+ check_type :scale
755
+ @scale_data.max - @scale_data.min
756
+ end
757
+ # The sum of values for the data
758
+ def sum
759
+ check_type :scale
760
+ @scale_data.inject(0){|a,x|x+a} ;
761
+ end
762
+ # The arithmetical mean of data
763
+ def mean
764
+ check_type :scale
765
+ sum.to_f.quo(n_valid)
766
+ end
767
+ # Sum of squares for the data around a value.
768
+ # By default, this value is the mean
769
+ # ss= sum{(xi-m)^2}
770
+ #
771
+ def sum_of_squares(m=nil)
772
+ check_type :scale
773
+ m||=mean
774
+ @scale_data.inject(0){|a,x| a+(x-m).square}
517
775
  end
518
776
 
519
- def to_s
520
- sprintf("Vector(type:%s, n:%d)[%s]",@type.to_s,@data.size, @data.collect{|d| d.nil? ? "nil":d}.join(","))
521
- end
522
- # Ugly name. Really, create a Vector for standard 'matrix' package.
523
- # <tt>dir</tt> could be :horizontal or :vertical
524
- def to_matrix(dir=:horizontal)
525
- case dir
526
- when :horizontal
527
- Matrix[@data]
528
- when :vertical
529
- Matrix.columns([@data])
530
- end
531
- end
532
- def inspect
533
- self.to_s
777
+ # Sum of squared deviation
778
+ def sum_of_squared_deviation
779
+ check_type :scale
780
+ @scale_data.inject(0) {|a,x| x.square+a} - (sum.square.quo(n_valid))
534
781
  end
535
- # Retrieves uniques values for data.
536
- def factors
537
- if @type==:scale
538
- @scale_data.uniq.sort
539
- else
540
- @valid_data.uniq.sort
541
- end
782
+
783
+ # Population variance (denominator N)
784
+ def variance_population(m=nil)
785
+ check_type :scale
786
+ m||=mean
787
+ squares=@scale_data.inject(0){|a,x| x.square+a}
788
+ squares.quo(n_valid) - m.square
542
789
  end
543
- if Statsample::STATSAMPLE__.respond_to?(:frequencies)
544
- # Returns a hash with the distribution of frecuencies for
545
- # the sample
546
- def frequencies
547
- Statsample::STATSAMPLE__.frequencies(@valid_data)
548
- end
549
- else
550
- def frequencies #:nodoc:
551
- _frequencies
552
- end
790
+
791
+
792
+ # Population Standard deviation (denominator N)
793
+ def standard_deviation_population(m=nil)
794
+ check_type :scale
795
+ Math::sqrt( variance_population(m) )
553
796
  end
554
- def _frequencies #:nodoc:
555
- @valid_data.inject(Hash.new) {|a,x|
556
- a[x]||=0
557
- a[x]=a[x]+1
558
- a
559
- }
797
+ # Sample Variance (denominator n-1)
798
+
799
+ def variance_sample(m=nil)
800
+ check_type :scale
801
+ m||=mean
802
+ sum_of_squares(m).quo(n_valid - 1)
560
803
  end
561
- # Plot frequencies on a chart, using gnuplot
562
- def plot_frequencies
563
- require 'gnuplot'
564
- x=[]
565
- y=[]
566
- self.frequencies.sort.each{|k,v|
567
- x.push(k)
568
- y.push(v)
569
- }
570
- Gnuplot.open do |gp|
571
- Gnuplot::Plot.new( gp ) do |plot|
572
- plot.boxwidth("0.9 absolute")
573
- plot.yrange("[0:#{y.max}]")
574
- plot.style("fill solid 1.00 border -1")
575
- plot.set("xtics border in scale 1,0.5 nomirror rotate by -45 offset character 0, 0, 0")
576
- plot.style("histogram")
577
- plot.style("data histogram")
578
- i=-1
579
- plot.set("xtics","("+x.collect{|v| i+=1; sprintf("\"%s\" %d",v,i)}.join(",")+")")
580
- plot.data << Gnuplot::DataSet.new( [y] ) do |ds|
581
- end
582
- end
583
- end
584
804
 
585
- end
586
-
587
-
588
- # Returns the most frequent item.
589
- def mode
590
- frequencies.max{|a,b| a[1]<=>b[1]}[0]
591
- end
592
- # The numbers of item with valid data.
593
- def n_valid
594
- @valid_data.size
595
- end
596
- # Returns a hash with the distribution of proportions of
597
- # the sample.
598
- def proportions
599
- frequencies.inject({}){|a,v|
600
- a[v[0]] = v[1].quo(n_valid)
601
- a
602
- }
603
- end
604
- # Proportion of a given value.
605
- def proportion(v=1)
606
- frequencies[v].quo(@valid_data.size)
607
- end
608
- def summary(out="")
609
- out << sprintf("n valid:%d\n",n_valid)
610
- out << sprintf("factors:%s\n",factors.join(","))
611
- out << "mode:"+mode.to_s+"\n"
612
- out << "Distribution:\n"
613
- frequencies.sort.each{|k,v|
614
- key=labels.has_key?(k) ? labels[k]:k
615
- out << sprintf("%s : %s (%0.2f%%)\n",key,v, (v.quo(n_valid))*100)
616
- }
617
- if(@type==:ordinal)
618
- out << "median:"+median.to_s+"\n"
619
- end
620
- if(@type==:scale)
621
- out << "mean:"+mean.to_s+"\n"
622
- out << "sd:"+sd.to_s+"\n"
623
-
624
- end
625
- out
626
- end
805
+ # Sample Standard deviation (denominator n-1)
806
+
807
+ def standard_deviation_sample(m=nil)
808
+ check_type :scale
627
809
 
628
- # Variance of p, according to poblation size
629
- def variance_proportion(n_poblation, v=1)
630
- Statsample::proportion_variance_sample(self.proportion(v), @valid_data.size, n_poblation)
631
- end
632
- # Variance of p, according to poblation size
633
- def variance_total(n_poblation, v=1)
634
- Statsample::total_variance_sample(self.proportion(v), @valid_data.size, n_poblation)
635
- end
636
- def proportion_confidence_interval_t(n_poblation,margin=0.95,v=1)
637
- Statsample::proportion_confidence_interval_t(proportion(v), @valid_data.size, n_poblation, margin)
638
- end
639
- def proportion_confidence_interval_z(n_poblation,margin=0.95,v=1)
640
- Statsample::proportion_confidence_interval_z(proportion(v), @valid_data.size, n_poblation, margin)
641
- end
810
+ m||=mean
811
+ Math::sqrt(variance_sample(m))
812
+ end
813
+ # Skewness of the sample
814
+ def skew(m=nil)
815
+ check_type :scale
816
+ m||=mean
817
+ th=@scale_data.inject(0){|a,x| a+((x-m)**3)}
818
+ th.quo((@scale_data.size)*sd(m)**3)
819
+ end
820
+ # Kurtosis of the sample
821
+ def kurtosis(m=nil)
822
+ check_type :scale
823
+ m||=mean
824
+ fo=@scale_data.inject(0){|a,x| a+((x-m)**4)}
825
+ fo.quo((@scale_data.size)*sd(m)**4)-3
642
826
 
643
- self.instance_methods.find_all{|met| met=~/_slow$/}.each do |met|
644
- met_or=met.gsub("_slow","")
645
- if !self.method_defined?(met_or)
646
- alias_method met_or, met
647
- end
648
- end
649
- ######
650
- ### Ordinal Methods
651
- ######
652
-
653
- # Return the value of the percentil q
654
- def percentil(q)
655
- check_type :ordinal
656
- sorted=@valid_data.sort
657
- v= (n_valid * q).quo(100)
658
- if(v.to_i!=v)
659
- sorted[v.to_i]
660
- else
661
- (sorted[(v-0.5).to_i].to_f + sorted[(v+0.5).to_i]).quo(2)
662
- end
663
- end
664
- # Returns a ranked vector.
665
- def ranked(type=:ordinal)
666
- check_type :ordinal
667
- i=0
668
- r=frequencies.sort.inject({}){|a,v|
669
- a[v[0]]=(i+1 + i+v[1]).quo(2)
670
- i+=v[1]
671
- a
672
- }
673
- @data.collect {|c|
674
- r[c]
675
- }.to_vector(type)
676
- end
677
- # Return the median (percentil 50)
678
- def median
679
- check_type :ordinal
680
- if HAS_GSL and @type==:scale
681
- GSL::Stats::median_from_sorted_data(@gsl)
682
- else
683
- percentil(50)
684
- end
685
- end
686
- # Minimun value
687
- def min;
688
- check_type :ordinal
689
- @valid_data.min;
690
- end
691
- # Maximum value
692
- def max;
693
- check_type :ordinal
694
- @valid_data.max;
695
- end
696
-
697
- def set_scale_data # :nodoc
698
- @scale_data=@valid_data.collect do|x|
699
- if x.is_a? Numeric
700
- x
701
- elsif x.is_a? String and x.to_i==x.to_f
702
- x.to_i
703
- else
704
- x.to_f
705
- end
706
- end
707
- if HAS_GSL
708
- @gsl=GSL::Vector.alloc(@scale_data) if @scale_data.size>0
709
- end
710
- end
711
- private :set_scale_data
827
+ end
828
+ # Product of all values on the sample
829
+ #
830
+ def product
831
+ check_type :scale
832
+ @scale_data.inject(1){|a,x| a*x }
833
+ end
834
+ if HAS_GSL
835
+ %w{skew kurtosis variance_sample standard_deviation_sample variance_population standard_deviation_population mean sum}.each{|m|
836
+ m_nuevo=(m+"_slow").intern
837
+ alias_method m_nuevo, m.intern
838
+ }
839
+ def sum # :nodoc:
840
+ check_type :scale
841
+
842
+ @gsl.sum
843
+ end
844
+ def mean # :nodoc:
845
+ check_type :scale
846
+
847
+ @gsl.mean
848
+ end
849
+ def variance_sample(m=nil) # :nodoc:
850
+ check_type :scale
851
+
852
+ m||=mean
853
+ @gsl.variance_m
854
+ end
855
+ def standard_deviation_sample(m=nil) # :nodoc:
856
+ check_type :scale
857
+ m||=mean
858
+ @gsl.sd(m)
859
+ end
712
860
 
713
- # The range of the data (max - min)
714
- def range;
715
- check_type :scale
716
- @scale_data.max - @scale_data.min
717
- end
718
- # The sum of values for the data
719
- def sum
720
- check_type :scale
721
- @scale_data.inject(0){|a,x|x+a} ;
722
- end
723
- # The arithmetical mean of data
724
- def mean
725
- check_type :scale
726
- sum.to_f.quo(n_valid)
727
- end
728
- # Sum of squares for the data around a value.
729
- # By default, this value is the mean
730
- # ss= sum{(xi-m)^2}
731
- #
732
- def sum_of_squares(m=nil)
733
- check_type :scale
734
- m||=mean
735
- @scale_data.inject(0){|a,x| a+(x-m).square}
736
- end
737
-
738
- # Sum of squared deviation
739
- def sum_of_squared_deviation
740
- check_type :scale
741
- @scale_data.inject(0) {|a,x| x.square+a} - (sum.square.quo(n_valid))
742
- end
861
+ def variance_population(m=nil) # :nodoc:
862
+ check_type :scale
863
+ m||=mean
864
+ @gsl.variance_with_fixed_mean(m)
865
+ end
866
+ def standard_deviation_population(m=nil) # :nodoc:
867
+ check_type :scale
868
+ m||=mean
869
+ @gsl.sd_with_fixed_mean(m)
870
+ end
871
+ def skew # :nodoc:
872
+ check_type :scale
873
+ @gsl.skew
874
+ end
875
+ def kurtosis # :nodoc:
876
+ check_type :scale
877
+ @gsl.kurtosis
878
+ end
879
+ # Create a GSL::Histogram
880
+ # With a fixnum, creates X bins within the range of data
881
+ # With an Array, each value will be a cut point
882
+ def histogram(bins=10)
883
+ check_type :scale
743
884
 
744
- # Population variance (denominator N)
745
- def variance_population(m=nil)
746
- check_type :scale
747
- m||=mean
748
- squares=@scale_data.inject(0){|a,x| x.square+a}
749
- squares.quo(n_valid) - m.square
885
+ if bins.is_a? Array
886
+ #h=Statsample::Histogram.new(self, bins)
887
+ h=GSL::Histogram.alloc(bins)
888
+ else
889
+ # ugly patch. The upper limit for a bin has the form
890
+ # x < range
891
+ #h=Statsample::Histogram.new(self, bins)
892
+ h=GSL::Histogram.alloc(bins,[@valid_data.min,@valid_data.max+0.0001])
750
893
  end
751
-
894
+ h.increment(@gsl)
895
+ h
896
+ end
897
+ def plot_histogram(bins=10,options="")
898
+ check_type :scale
899
+ self.histogram(bins).graph(options)
900
+ end
752
901
 
753
- # Population Standard deviation (denominator N)
754
- def standard_deviation_population(m=nil)
755
- check_type :scale
756
-
757
- Math::sqrt( variance_population(m) )
758
- end
759
- # Sample Variance (denominator n-1)
760
-
761
- def variance_sample(m=nil)
762
- check_type :scale
763
-
764
- m||=mean
765
- sum_of_squares(m).quo(n_valid - 1)
766
- end
767
-
768
- # Sample Standard deviation (denominator n-1)
769
-
770
- def standard_deviation_sample(m=nil)
771
- check_type :scale
772
-
773
- m||=m
774
- Math::sqrt(variance_sample(m))
775
- end
776
- # Skewness of the sample
777
- def skew
778
- check_type :scale
779
- m=mean
780
- thirds=@scale_data.inject(0){|a,x| a+((x-mean)**3)}
781
- thirds.quo((@scale_data.size-1)*sd**3)
782
- end
783
- # Kurtosis of the sample
784
- def kurtosis
785
- check_type :scale
786
-
787
- m=mean
788
- thirds=@scale_data.inject(0){|a,x| a+((x-mean)**4)}
789
- thirds.quo((@scale_data.size-1)*sd**4)
790
-
791
- end
792
- # Product of all values on the sample
793
- #
794
- def product
795
- check_type :scale
796
- @scale_data.inject(1){|a,x| a*x }
797
- end
798
- if HAS_GSL
799
- %w{skew kurtosis variance_sample standard_deviation_sample variance_population standard_deviation_population mean sum}.each{|m|
800
- m_nuevo=(m+"_slow").intern
801
- alias_method m_nuevo, m.intern
802
- }
803
- def sum # :nodoc:
804
- check_type :scale
805
-
806
- @gsl.sum
807
- end
808
- def mean # :nodoc:
809
- check_type :scale
810
-
811
- @gsl.mean
812
- end
813
- def variance_sample(m=nil) # :nodoc:
814
- check_type :scale
815
-
816
- m||=mean
817
- @gsl.variance_m
818
- end
819
- def standard_deviation_sample(m=nil) # :nodoc:
820
- check_type :scale
821
- m||=mean
822
- @gsl.sd(m)
823
- end
824
-
825
- def variance_population(m=nil) # :nodoc:
826
- check_type :scale
827
- m||=mean
828
- @gsl.variance_with_fixed_mean(m)
829
- end
830
- def standard_deviation_population(m=nil) # :nodoc:
831
- check_type :scale
832
- m||=mean
833
- @gsl.sd_with_fixed_mean(m)
834
- end
835
- def skew # :nodoc:
836
- check_type :scale
837
- @gsl.skew
838
- end
839
- def kurtosis # :nodoc:
840
- check_type :scale
841
- @gsl.kurtosis
842
- end
843
- # Create a GSL::Histogram
844
- # With a fixnum, creates X bins within the range of data
845
- # With an Array, each value will be a cut point
846
- def histogram(bins=10)
847
- check_type :scale
848
- if bins.is_a? Array
849
- h=GSL::Histogram.alloc(bins)
850
- else
851
- # ugly patch. The upper limit for a bin has the form
852
- # x < range
853
- h=GSL::Histogram.alloc(bins,[@valid_data.min,@valid_data.max+0.0001])
854
- end
855
- h.increment(@gsl)
856
- h
857
- end
858
- def plot_histogram(bins=10,options="")
859
- check_type :scale
860
- self.histogram(bins).graph(options)
861
- end
862
-
863
- end
864
-
865
- # Coefficient of variation
866
- # Calculed with the sample standard deviation
867
- def coefficient_of_variation
868
- check_type :scale
869
- standard_deviation_sample.quo(mean)
870
- end
871
-
872
- alias_method :sdp, :standard_deviation_population
873
- alias_method :sds, :standard_deviation_sample
874
- alias_method :cov, :coefficient_of_variation
875
- alias_method :variance, :variance_sample
876
- alias_method :sd, :standard_deviation_sample
877
- alias_method :ss, :sum_of_squares
878
902
  end
903
+
904
+ # Coefficient of variation
905
+ # Calculed with the sample standard deviation
906
+ def coefficient_of_variation
907
+ check_type :scale
908
+ standard_deviation_sample.quo(mean)
909
+ end
910
+
911
+ alias_method :sdp, :standard_deviation_population
912
+ alias_method :sds, :standard_deviation_sample
913
+ alias_method :cov, :coefficient_of_variation
914
+ alias_method :variance, :variance_sample
915
+ alias_method :sd, :standard_deviation_sample
916
+ alias_method :ss, :sum_of_squares
917
+ end
879
918
  end