statsample 1.4.3 → 1.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/History.txt +8 -0
- data/benchmarks/correlation_matrix_15_variables.rb +1 -1
- data/benchmarks/correlation_matrix_5_variables.rb +1 -1
- data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +2 -2
- data/examples/dataset.rb +2 -2
- data/examples/icc.rb +1 -1
- data/examples/levene.rb +2 -2
- data/examples/parallel_analysis.rb +1 -1
- data/examples/u_test.rb +2 -2
- data/examples/vector.rb +1 -1
- data/examples/velicer_map_test.rb +1 -1
- data/lib/statsample.rb +30 -4
- data/lib/statsample/anova/oneway.rb +3 -3
- data/lib/statsample/anova/twoway.rb +3 -3
- data/lib/statsample/bivariate.rb +7 -7
- data/lib/statsample/bivariate/pearson.rb +2 -2
- data/lib/statsample/converter/csv.rb +1 -1
- data/lib/statsample/converters.rb +7 -7
- data/lib/statsample/dataset.rb +8 -8
- data/lib/statsample/dominanceanalysis.rb +4 -4
- data/lib/statsample/dominanceanalysis/bootstrap.rb +8 -8
- data/lib/statsample/factor.rb +2 -4
- data/lib/statsample/factor/map.rb +2 -1
- data/lib/statsample/factor/parallelanalysis.rb +2 -2
- data/lib/statsample/factor/pca.rb +2 -2
- data/lib/statsample/factor/principalaxis.rb +2 -2
- data/lib/statsample/graph/boxplot.rb +4 -4
- data/lib/statsample/graph/histogram.rb +2 -2
- data/lib/statsample/graph/scatterplot.rb +4 -4
- data/lib/statsample/matrix.rb +20 -6
- data/lib/statsample/regression.rb +2 -2
- data/lib/statsample/regression/multiple.rb +3 -3
- data/lib/statsample/regression/multiple/alglibengine.rb +5 -5
- data/lib/statsample/regression/multiple/baseengine.rb +3 -3
- data/lib/statsample/regression/multiple/gslengine.rb +5 -5
- data/lib/statsample/regression/multiple/rubyengine.rb +4 -4
- data/lib/statsample/reliability/icc.rb +1 -1
- data/lib/statsample/reliability/multiscaleanalysis.rb +4 -4
- data/lib/statsample/reliability/scaleanalysis.rb +6 -6
- data/lib/statsample/reliability/skillscaleanalysis.rb +1 -1
- data/lib/statsample/resample.rb +1 -1
- data/lib/statsample/shorthand.rb +1 -1
- data/lib/statsample/test/bartlettsphericity.rb +1 -1
- data/lib/statsample/test/levene.rb +4 -4
- data/lib/statsample/test/t.rb +3 -3
- data/lib/statsample/test/umannwhitney.rb +2 -2
- data/lib/statsample/vector.rb +103 -80
- data/lib/statsample/vector/gsl.rb +16 -16
- data/lib/statsample/version.rb +1 -1
- data/test/test_analysis.rb +1 -1
- data/test/test_anova_contrast.rb +4 -4
- data/test/test_anovatwowaywithdataset.rb +1 -1
- data/test/test_anovawithvectors.rb +6 -6
- data/test/test_awesome_print_bug.rb +1 -1
- data/test/test_bartlettsphericity.rb +3 -3
- data/test/test_bivariate.rb +38 -38
- data/test/test_crosstab.rb +2 -2
- data/test/test_csv.rb +6 -6
- data/test/test_dataset.rb +79 -79
- data/test/test_factor.rb +55 -49
- data/test/test_factor_pa.rb +4 -4
- data/test/test_ggobi.rb +3 -3
- data/test/test_gsl.rb +3 -3
- data/test/test_histogram.rb +3 -3
- data/test/test_matrix.rb +5 -5
- data/test/test_multiset.rb +19 -19
- data/test/test_regression.rb +27 -27
- data/test/test_reliability.rb +14 -14
- data/test/test_reliability_icc.rb +7 -7
- data/test/test_reliability_skillscale.rb +6 -6
- data/test/test_resample.rb +1 -1
- data/test/test_rserve_extension.rb +4 -4
- data/test/test_statistics.rb +5 -5
- data/test/test_stest.rb +8 -8
- data/test/test_stratified.rb +3 -3
- data/test/test_test_t.rb +5 -5
- data/test/test_umannwhitney.rb +2 -2
- data/test/test_vector.rb +153 -119
- data/test/test_wilcoxonsignedrank.rb +4 -4
- data/test/test_xls.rb +6 -6
- metadata +3 -53
@@ -5,8 +5,8 @@ module Statsample
|
|
5
5
|
# <blockquote>Levene's test ( Levene, 1960) is used to test if k samples have equal variances. Equal variances across samples is called homogeneity of variance. Some statistical tests, for example the analysis of variance, assume that variances are equal across groups or samples. The Levene test can be used to verify that assumption.</blockquote>
|
6
6
|
# Use:
|
7
7
|
# require 'statsample'
|
8
|
-
# a=[1,2,3,4,5,6,7,8,100,10].
|
9
|
-
# b=[30,40,50,60,70,80,90,100,110,120].
|
8
|
+
# a=[1,2,3,4,5,6,7,8,100,10].to_numeric
|
9
|
+
# b=[30,40,50,60,70,80,90,100,110,120].to_numeric
|
10
10
|
#
|
11
11
|
# levene=Statsample::Test::Levene.new([a,b])
|
12
12
|
# puts levene.summary
|
@@ -52,12 +52,12 @@ module Statsample
|
|
52
52
|
|
53
53
|
zi=@vectors.collect {|vector|
|
54
54
|
mean=vector.mean
|
55
|
-
vector.collect {|v| (v-mean).abs }.
|
55
|
+
vector.collect {|v| (v-mean).abs }.to_numeric
|
56
56
|
}
|
57
57
|
|
58
58
|
total_mean=zi.inject([]) {|ac,vector|
|
59
59
|
ac+vector.valid_data
|
60
|
-
}.
|
60
|
+
}.to_numeric.mean
|
61
61
|
|
62
62
|
k=@vectors.size
|
63
63
|
|
data/lib/statsample/test/t.rb
CHANGED
@@ -125,7 +125,7 @@ module Statsample
|
|
125
125
|
|
126
126
|
# One Sample t-test
|
127
127
|
# == Usage
|
128
|
-
# a=1000.times.map {rand(100)}.
|
128
|
+
# a=1000.times.map {rand(100)}.to_numeric
|
129
129
|
# t_1=Statsample::Test::T::OneSample.new(a, {:u=>50})
|
130
130
|
# t_1.summary
|
131
131
|
#
|
@@ -196,8 +196,8 @@ module Statsample
|
|
196
196
|
# Two Sample t-test.
|
197
197
|
#
|
198
198
|
# == Usage
|
199
|
-
# a=1000.times.map {rand(100)}.
|
200
|
-
# b=1000.times.map {rand(100)}.
|
199
|
+
# a=1000.times.map {rand(100)}.to_numeric
|
200
|
+
# b=1000.times.map {rand(100)}.to_numeric
|
201
201
|
# t_2=Statsample::Test::T::TwoSamplesIndependent.new(a,b)
|
202
202
|
# t_2.summary
|
203
203
|
# === Output
|
@@ -120,7 +120,7 @@ module Statsample
|
|
120
120
|
@v2=v2
|
121
121
|
@n1=v1.valid_data.size
|
122
122
|
@n2=v2.valid_data.size
|
123
|
-
data=(v1.valid_data+v2.valid_data).
|
123
|
+
data=(v1.valid_data+v2.valid_data).to_numeric
|
124
124
|
groups=(([0]*@n1)+([1]*@n2)).to_vector
|
125
125
|
ds={'g'=>groups, 'data'=>data}.to_dataset
|
126
126
|
@t=nil
|
@@ -128,7 +128,7 @@ module Statsample
|
|
128
128
|
if(@ties)
|
129
129
|
adjust_for_ties(ds['data'])
|
130
130
|
end
|
131
|
-
ds['ranked']=ds['data'].ranked(:
|
131
|
+
ds['ranked']=ds['data'].ranked(:numeric)
|
132
132
|
|
133
133
|
@n=ds.cases
|
134
134
|
|
data/lib/statsample/vector.rb
CHANGED
@@ -8,9 +8,15 @@ module Statsample::VectorShorthands
|
|
8
8
|
Statsample::Vector.new(self,*args)
|
9
9
|
end
|
10
10
|
|
11
|
-
# Creates a new Statsample::Vector object of type :scale
|
11
|
+
# Creates a new Statsample::Vector object of type :scale.
|
12
|
+
# Deprecated. Use to_numeric instead.
|
12
13
|
def to_scale(*args)
|
13
|
-
|
14
|
+
$stderr.puts "WARNING: to_scale has been deprecated. Use to_numeric instead."
|
15
|
+
Statsample::Vector.new(self, :numeric, *args)
|
16
|
+
end
|
17
|
+
|
18
|
+
def to_numeric(*args)
|
19
|
+
Statsample::Vector.new(self, :numeric, *args)
|
14
20
|
end
|
15
21
|
end
|
16
22
|
|
@@ -31,10 +37,10 @@ module Statsample
|
|
31
37
|
# Collection of values on one dimension. Works as a column on a Spreadsheet.
|
32
38
|
#
|
33
39
|
# == Usage
|
34
|
-
# The fast way to create a vector uses Array.to_vector or Array.
|
40
|
+
# The fast way to create a vector uses Array.to_vector or Array.to_numeric.
|
35
41
|
#
|
36
|
-
# v=[1,2,3,4].to_vector(:
|
37
|
-
# v=[1,2,3,4].
|
42
|
+
# v=[1,2,3,4].to_vector(:numeric)
|
43
|
+
# v=[1,2,3,4].to_numeric
|
38
44
|
#
|
39
45
|
class Vector
|
40
46
|
include Enumerable
|
@@ -42,7 +48,7 @@ module Statsample
|
|
42
48
|
include Summarizable
|
43
49
|
include Statsample::VectorShorthands
|
44
50
|
|
45
|
-
# Level of measurement. Could be :
|
51
|
+
# Level of measurement. Could be :object, :numeric
|
46
52
|
attr_reader :type
|
47
53
|
# Original data.
|
48
54
|
attr_reader :data
|
@@ -71,7 +77,17 @@ module Statsample
|
|
71
77
|
# * <tt>:today_values</tt> Array of 'today' values. See Vector#today_values
|
72
78
|
# * <tt>:labels</tt> Labels for data values
|
73
79
|
# * <tt>:name</tt> Name of vector
|
74
|
-
def initialize(data=[], type=:
|
80
|
+
def initialize(data=[], type=:object, opts=Hash.new)
|
81
|
+
if type == :ordinal or type == :scale
|
82
|
+
$stderr.puts "WARNING: #{type} has been deprecated. Use :numeric instead."
|
83
|
+
type = :numeric
|
84
|
+
end
|
85
|
+
|
86
|
+
if type == :nominal
|
87
|
+
$stderr.puts "WARNING: nominal has been deprecated. Use :object instead."
|
88
|
+
type = :object
|
89
|
+
end
|
90
|
+
|
75
91
|
@data=data.is_a?(Array) ? data : data.to_a
|
76
92
|
@type=type
|
77
93
|
opts_default={
|
@@ -95,7 +111,7 @@ module Statsample
|
|
95
111
|
@date_data_with_nils=[]
|
96
112
|
@missing_data=[]
|
97
113
|
@has_missing_data=nil
|
98
|
-
@
|
114
|
+
@numeric_data=nil
|
99
115
|
set_valid_data
|
100
116
|
self.type=type
|
101
117
|
end
|
@@ -119,23 +135,29 @@ module Statsample
|
|
119
135
|
end
|
120
136
|
end
|
121
137
|
vector=new(values)
|
122
|
-
vector.type=:
|
138
|
+
vector.type=:numeric if vector.can_be_numeric?
|
123
139
|
vector
|
124
140
|
end
|
125
|
-
# Create a new
|
141
|
+
# Create a new numeric type vector
|
126
142
|
# Parameters
|
127
143
|
# [n] Size
|
128
144
|
# [val] Value of each value
|
129
145
|
# [&block] If block provided, is used to set the values of vector
|
130
|
-
def self.
|
146
|
+
def self.new_numeric(n,val=nil, &block)
|
131
147
|
if block
|
132
|
-
vector=n.times.map {|i| block.call(i)}.
|
148
|
+
vector=n.times.map {|i| block.call(i)}.to_numeric
|
133
149
|
else
|
134
|
-
vector=n.times.map { val}.
|
150
|
+
vector=n.times.map { val}.to_numeric
|
135
151
|
end
|
136
|
-
vector.type=:
|
152
|
+
vector.type=:numeric
|
137
153
|
vector
|
138
154
|
end
|
155
|
+
|
156
|
+
# Deprecated. Use new_numeric instead.
|
157
|
+
def self.new_scale(n, val=nil,&block)
|
158
|
+
$stderr.puts "WARNING: .new_scale has been deprecated. Use .new_numeric instead."
|
159
|
+
new_numeric n, val, &block
|
160
|
+
end
|
139
161
|
# Creates a duplicate of the Vector.
|
140
162
|
# Note: data, missing_values and labels are duplicated, so
|
141
163
|
# changes on original vector doesn't propages to copies.
|
@@ -161,33 +183,34 @@ module Statsample
|
|
161
183
|
|
162
184
|
|
163
185
|
def _check_type(t) #:nodoc:
|
164
|
-
raise NoMethodError if (t
|
186
|
+
raise NoMethodError if (t == :numeric and @type == :object) or
|
187
|
+
(t == :date) or (:date == @type)
|
165
188
|
end
|
166
189
|
|
167
190
|
def vector_standarized_compute(m,sd) # :nodoc:
|
168
|
-
@data_with_nils.collect{|x| x.nil? ? nil : (x.to_f - m).quo(sd) }.to_vector(:
|
191
|
+
@data_with_nils.collect{|x| x.nil? ? nil : (x.to_f - m).quo(sd) }.to_vector(:numeric)
|
169
192
|
end
|
170
193
|
# Return a vector usign the standarized values for data
|
171
194
|
# with sd with denominator n-1. With variance=0 or mean nil,
|
172
195
|
# returns a vector of equal size full of nils
|
173
196
|
#
|
174
197
|
def vector_standarized(use_population=false)
|
175
|
-
check_type :
|
198
|
+
check_type :numeric
|
176
199
|
m=mean
|
177
200
|
sd=use_population ? sdp : sds
|
178
|
-
return ([nil]*size).
|
201
|
+
return ([nil]*size).to_numeric if mean.nil? or sd==0.0
|
179
202
|
vector=vector_standarized_compute(m,sd)
|
180
203
|
vector.name=_("%s(standarized)") % @name
|
181
204
|
vector
|
182
205
|
end
|
183
206
|
def vector_centered_compute(m) #:nodoc:
|
184
|
-
@data_with_nils.collect {|x| x.nil? ? nil : x.to_f-m }.
|
207
|
+
@data_with_nils.collect {|x| x.nil? ? nil : x.to_f-m }.to_numeric
|
185
208
|
end
|
186
209
|
# Return a centered vector
|
187
210
|
def vector_centered
|
188
|
-
check_type :
|
211
|
+
check_type :numeric
|
189
212
|
m=mean
|
190
|
-
return ([nil]*size).
|
213
|
+
return ([nil]*size).to_numeric if mean.nil?
|
191
214
|
vector=vector_centered_compute(m)
|
192
215
|
vector.name=_("%s(centered)") % @name
|
193
216
|
vector
|
@@ -198,14 +221,14 @@ module Statsample
|
|
198
221
|
# Return a vector with values replaced with the percentiles
|
199
222
|
# of each values
|
200
223
|
def vector_percentil
|
201
|
-
check_type :
|
224
|
+
check_type :numeric
|
202
225
|
c=@valid_data.size
|
203
226
|
vector=ranked.map {|i| i.nil? ? nil : (i.quo(c)*100).to_f }.to_vector(@type)
|
204
227
|
vector.name=_("%s(percentil)") % @name
|
205
228
|
vector
|
206
229
|
end
|
207
230
|
def box_cox_transformation(lambda) # :nodoc:
|
208
|
-
raise "Should be a
|
231
|
+
raise "Should be a numeric" unless @type==:numeric
|
209
232
|
@data_with_nils.collect{|x|
|
210
233
|
if !x.nil?
|
211
234
|
if(lambda==0)
|
@@ -216,7 +239,7 @@ module Statsample
|
|
216
239
|
else
|
217
240
|
nil
|
218
241
|
end
|
219
|
-
}.to_vector(:
|
242
|
+
}.to_vector(:numeric)
|
220
243
|
end
|
221
244
|
|
222
245
|
# Vector equality.
|
@@ -269,7 +292,7 @@ module Statsample
|
|
269
292
|
else
|
270
293
|
0
|
271
294
|
end
|
272
|
-
end.
|
295
|
+
end.to_numeric
|
273
296
|
end
|
274
297
|
# Iterate on each item.
|
275
298
|
# Equivalent to
|
@@ -313,7 +336,7 @@ module Statsample
|
|
313
336
|
@data_with_nils.clear
|
314
337
|
@date_data_with_nils.clear
|
315
338
|
set_valid_data_intern
|
316
|
-
|
339
|
+
set_numeric_data if(@type==:numeric)
|
317
340
|
set_date_data if(@type==:date)
|
318
341
|
end
|
319
342
|
if Statsample::STATSAMPLE__.respond_to?(:set_valid_data_intern)
|
@@ -394,7 +417,7 @@ module Statsample
|
|
394
417
|
# Set level of measurement.
|
395
418
|
def type=(t)
|
396
419
|
@type=t
|
397
|
-
|
420
|
+
set_numeric_data if(t==:numeric)
|
398
421
|
set_date_data if (t==:date)
|
399
422
|
end
|
400
423
|
def to_a
|
@@ -450,7 +473,7 @@ module Statsample
|
|
450
473
|
sum.push(nil)
|
451
474
|
end
|
452
475
|
}
|
453
|
-
Statsample::Vector.new(sum, :
|
476
|
+
Statsample::Vector.new(sum, :numeric)
|
454
477
|
elsif(v.respond_to? method )
|
455
478
|
Statsample::Vector.new(
|
456
479
|
@data.collect {|x|
|
@@ -459,7 +482,7 @@ module Statsample
|
|
459
482
|
else
|
460
483
|
nil
|
461
484
|
end
|
462
|
-
} , :
|
485
|
+
} , :numeric)
|
463
486
|
else
|
464
487
|
raise TypeError,"You should pass a scalar or a array/vector"
|
465
488
|
end
|
@@ -487,11 +510,11 @@ module Statsample
|
|
487
510
|
#
|
488
511
|
# a=Vector.new(["a,b","c,d","a,b"])
|
489
512
|
# a.split_by_separator
|
490
|
-
# => {"a"=>#<Statsample::Type::
|
513
|
+
# => {"a"=>#<Statsample::Type::object:0x7f2dbcc09d88
|
491
514
|
# @data=[1, 0, 1]>,
|
492
|
-
# "b"=>#<Statsample::Type::
|
515
|
+
# "b"=>#<Statsample::Type::object:0x7f2dbcc09c48
|
493
516
|
# @data=[1, 1, 0]>,
|
494
|
-
# "c"=>#<Statsample::Type::
|
517
|
+
# "c"=>#<Statsample::Type::object:0x7f2dbcc09b08
|
495
518
|
# @data=[0, 1, 1]>}
|
496
519
|
#
|
497
520
|
def split_by_separator(sep=Statsample::SPLIT_TOKEN)
|
@@ -513,7 +536,7 @@ module Statsample
|
|
513
536
|
end
|
514
537
|
end
|
515
538
|
out.inject({}){|s,v|
|
516
|
-
s[v[0]]=Vector.new(v[1],:
|
539
|
+
s[v[0]]=Vector.new(v[1],:object)
|
517
540
|
s
|
518
541
|
}
|
519
542
|
end
|
@@ -554,8 +577,8 @@ module Statsample
|
|
554
577
|
end
|
555
578
|
|
556
579
|
es.each do |est|
|
557
|
-
bss[est]=bss[est].
|
558
|
-
bss[est].type=:
|
580
|
+
bss[est]=bss[est].to_numeric
|
581
|
+
bss[est].type=:numeric
|
559
582
|
end
|
560
583
|
bss.to_dataset
|
561
584
|
|
@@ -595,7 +618,7 @@ module Statsample
|
|
595
618
|
nb.times do |i|
|
596
619
|
other=@data_with_nils.dup
|
597
620
|
other.slice!(i*k,k)
|
598
|
-
other=other.
|
621
|
+
other=other.to_numeric
|
599
622
|
es.each do |estimator|
|
600
623
|
# Add pseudovalue
|
601
624
|
ps[estimator].push( nb * est_n[estimator] - (nb-1) * h_est[estimator].call(other))
|
@@ -604,8 +627,8 @@ module Statsample
|
|
604
627
|
|
605
628
|
|
606
629
|
es.each do |est|
|
607
|
-
ps[est]=ps[est].
|
608
|
-
ps[est].type=:
|
630
|
+
ps[est]=ps[est].to_numeric
|
631
|
+
ps[est].type=:numeric
|
609
632
|
end
|
610
633
|
ps.to_dataset
|
611
634
|
end
|
@@ -702,7 +725,7 @@ module Statsample
|
|
702
725
|
end
|
703
726
|
end
|
704
727
|
# Return true if all data is Numeric or nil
|
705
|
-
def
|
728
|
+
def can_be_numeric?
|
706
729
|
if @data.find {|v| !v.nil? and !v.is_a? Numeric and !@missing_values.include? v}
|
707
730
|
false
|
708
731
|
else
|
@@ -728,8 +751,8 @@ module Statsample
|
|
728
751
|
end
|
729
752
|
# Retrieves uniques values for data.
|
730
753
|
def factors
|
731
|
-
if @type==:
|
732
|
-
@
|
754
|
+
if @type==:numeric
|
755
|
+
@numeric_data.uniq.sort
|
733
756
|
elsif @type==:date
|
734
757
|
@date_data_with_nils.uniq.sort
|
735
758
|
else
|
@@ -781,7 +804,7 @@ module Statsample
|
|
781
804
|
b.section(:name=>name) do |s|
|
782
805
|
s.text _("n :%d") % n
|
783
806
|
s.text _("n valid:%d") % n_valid
|
784
|
-
if @type==:
|
807
|
+
if @type==:object
|
785
808
|
s.text _("factors:%s") % factors.join(",")
|
786
809
|
s.text _("mode: %s") % mode
|
787
810
|
|
@@ -793,8 +816,8 @@ module Statsample
|
|
793
816
|
end
|
794
817
|
end
|
795
818
|
|
796
|
-
s.text _("median: %s") % median.to_s if(@type==:
|
797
|
-
if(@type==:
|
819
|
+
s.text _("median: %s") % median.to_s if(@type==:numeric or @type==:numeric)
|
820
|
+
if(@type==:numeric)
|
798
821
|
s.text _("mean: %0.4f") % mean
|
799
822
|
if sd
|
800
823
|
s.text _("std.dev.: %0.4f") % sd
|
@@ -829,7 +852,7 @@ module Statsample
|
|
829
852
|
end
|
830
853
|
|
831
854
|
######
|
832
|
-
###
|
855
|
+
### numeric Methods
|
833
856
|
######
|
834
857
|
|
835
858
|
# == Percentil
|
@@ -843,7 +866,7 @@ module Statsample
|
|
843
866
|
# This is the NIST recommended method (http://en.wikipedia.org/wiki/Percentile#NIST_method)
|
844
867
|
#
|
845
868
|
def percentil(q, strategy = :midpoint)
|
846
|
-
check_type :
|
869
|
+
check_type :numeric
|
847
870
|
sorted=@valid_data.sort
|
848
871
|
|
849
872
|
case strategy
|
@@ -873,8 +896,8 @@ module Statsample
|
|
873
896
|
end
|
874
897
|
|
875
898
|
# Returns a ranked vector.
|
876
|
-
def ranked(type=:
|
877
|
-
check_type :
|
899
|
+
def ranked(type=:numeric)
|
900
|
+
check_type :numeric
|
878
901
|
i=0
|
879
902
|
r=frequencies.sort.inject({}){|a,v|
|
880
903
|
a[v[0]]=(i+1 + i+v[1]).quo(2)
|
@@ -885,17 +908,17 @@ module Statsample
|
|
885
908
|
end
|
886
909
|
# Return the median (percentil 50)
|
887
910
|
def median
|
888
|
-
check_type :
|
911
|
+
check_type :numeric
|
889
912
|
percentil(50)
|
890
913
|
end
|
891
914
|
# Minimun value
|
892
915
|
def min
|
893
|
-
check_type :
|
916
|
+
check_type :numeric
|
894
917
|
@valid_data.min
|
895
918
|
end
|
896
919
|
# Maximum value
|
897
920
|
def max
|
898
|
-
check_type :
|
921
|
+
check_type :numeric
|
899
922
|
@valid_data.max
|
900
923
|
end
|
901
924
|
|
@@ -915,8 +938,8 @@ module Statsample
|
|
915
938
|
end
|
916
939
|
end
|
917
940
|
|
918
|
-
def
|
919
|
-
@
|
941
|
+
def set_numeric_data
|
942
|
+
@numeric_data=@valid_data.collect do|x|
|
920
943
|
if x.is_a? Numeric
|
921
944
|
x
|
922
945
|
elsif x.is_a? String and x.to_i==x.to_f
|
@@ -927,21 +950,21 @@ module Statsample
|
|
927
950
|
end
|
928
951
|
end
|
929
952
|
|
930
|
-
private :set_date_data, :
|
953
|
+
private :set_date_data, :set_numeric_data
|
931
954
|
|
932
955
|
# The range of the data (max - min)
|
933
956
|
def range;
|
934
|
-
check_type :
|
935
|
-
@
|
957
|
+
check_type :numeric
|
958
|
+
@numeric_data.max - @numeric_data.min
|
936
959
|
end
|
937
960
|
# The sum of values for the data
|
938
961
|
def sum
|
939
|
-
check_type :
|
940
|
-
@
|
962
|
+
check_type :numeric
|
963
|
+
@numeric_data.inject(0){|a,x|x+a} ;
|
941
964
|
end
|
942
965
|
# The arithmetical mean of data
|
943
966
|
def mean
|
944
|
-
check_type :
|
967
|
+
check_type :numeric
|
945
968
|
sum.to_f.quo(n_valid)
|
946
969
|
end
|
947
970
|
# Sum of squares for the data around a value.
|
@@ -949,28 +972,28 @@ module Statsample
|
|
949
972
|
# ss= sum{(xi-m)^2}
|
950
973
|
#
|
951
974
|
def sum_of_squares(m=nil)
|
952
|
-
check_type :
|
975
|
+
check_type :numeric
|
953
976
|
m||=mean
|
954
|
-
@
|
977
|
+
@numeric_data.inject(0){|a,x| a+(x-m).square}
|
955
978
|
end
|
956
979
|
# Sum of squared deviation
|
957
980
|
def sum_of_squared_deviation
|
958
|
-
check_type :
|
959
|
-
@
|
981
|
+
check_type :numeric
|
982
|
+
@numeric_data.inject(0) {|a,x| x.square+a} - (sum.square.quo(n_valid))
|
960
983
|
end
|
961
984
|
|
962
985
|
# Population variance (denominator N)
|
963
986
|
def variance_population(m=nil)
|
964
|
-
check_type :
|
987
|
+
check_type :numeric
|
965
988
|
m||=mean
|
966
|
-
squares=@
|
989
|
+
squares=@numeric_data.inject(0){|a,x| x.square+a}
|
967
990
|
squares.quo(n_valid) - m.square
|
968
991
|
end
|
969
992
|
|
970
993
|
|
971
994
|
# Population Standard deviation (denominator N)
|
972
995
|
def standard_deviation_population(m=nil)
|
973
|
-
check_type :
|
996
|
+
check_type :numeric
|
974
997
|
Math::sqrt( variance_population(m) )
|
975
998
|
end
|
976
999
|
|
@@ -978,9 +1001,9 @@ module Statsample
|
|
978
1001
|
# author: Al Chou
|
979
1002
|
|
980
1003
|
def average_deviation_population( m = nil )
|
981
|
-
check_type :
|
1004
|
+
check_type :numeric
|
982
1005
|
m ||= mean
|
983
|
-
( @
|
1006
|
+
( @numeric_data.inject( 0 ) { |a, x| ( x - m ).abs + a } ).quo( n_valid )
|
984
1007
|
end
|
985
1008
|
def median_absolute_deviation
|
986
1009
|
med=median
|
@@ -989,43 +1012,43 @@ module Statsample
|
|
989
1012
|
alias :mad :median_absolute_deviation
|
990
1013
|
# Sample Variance (denominator n-1)
|
991
1014
|
def variance_sample(m=nil)
|
992
|
-
check_type :
|
1015
|
+
check_type :numeric
|
993
1016
|
m||=mean
|
994
1017
|
sum_of_squares(m).quo(n_valid - 1)
|
995
1018
|
end
|
996
1019
|
|
997
1020
|
# Sample Standard deviation (denominator n-1)
|
998
1021
|
def standard_deviation_sample(m=nil)
|
999
|
-
check_type :
|
1022
|
+
check_type :numeric
|
1000
1023
|
m||=mean
|
1001
1024
|
Math::sqrt(variance_sample(m))
|
1002
1025
|
end
|
1003
1026
|
# Skewness of the sample
|
1004
1027
|
def skew(m=nil)
|
1005
|
-
check_type :
|
1028
|
+
check_type :numeric
|
1006
1029
|
m||=mean
|
1007
|
-
th=@
|
1008
|
-
th.quo((@
|
1030
|
+
th=@numeric_data.inject(0){|a,x| a+((x-m)**3)}
|
1031
|
+
th.quo((@numeric_data.size)*sd(m)**3)
|
1009
1032
|
end
|
1010
1033
|
# Kurtosis of the sample
|
1011
1034
|
def kurtosis(m=nil)
|
1012
|
-
check_type :
|
1035
|
+
check_type :numeric
|
1013
1036
|
m||=mean
|
1014
|
-
fo=@
|
1015
|
-
fo.quo((@
|
1037
|
+
fo=@numeric_data.inject(0){|a,x| a+((x-m)**4)}
|
1038
|
+
fo.quo((@numeric_data.size)*sd(m)**4)-3
|
1016
1039
|
|
1017
1040
|
end
|
1018
1041
|
# Product of all values on the sample
|
1019
1042
|
#
|
1020
1043
|
def product
|
1021
|
-
check_type :
|
1022
|
-
@
|
1044
|
+
check_type :numeric
|
1045
|
+
@numeric_data.inject(1){|a,x| a*x }
|
1023
1046
|
end
|
1024
1047
|
|
1025
1048
|
# With a fixnum, creates X bins within the range of data
|
1026
1049
|
# With an Array, each value will be a cut point
|
1027
1050
|
def histogram(bins=10)
|
1028
|
-
check_type :
|
1051
|
+
check_type :numeric
|
1029
1052
|
|
1030
1053
|
if bins.is_a? Array
|
1031
1054
|
#h=Statsample::Histogram.new(self, bins)
|
@@ -1050,7 +1073,7 @@ module Statsample
|
|
1050
1073
|
# Coefficient of variation
|
1051
1074
|
# Calculed with the sample standard deviation
|
1052
1075
|
def coefficient_of_variation
|
1053
|
-
check_type :
|
1076
|
+
check_type :numeric
|
1054
1077
|
standard_deviation_sample.quo(mean)
|
1055
1078
|
end
|
1056
1079
|
# Standard error of the distribution mean
|