statsample 1.4.3 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/History.txt +8 -0
- data/benchmarks/correlation_matrix_15_variables.rb +1 -1
- data/benchmarks/correlation_matrix_5_variables.rb +1 -1
- data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +2 -2
- data/examples/dataset.rb +2 -2
- data/examples/icc.rb +1 -1
- data/examples/levene.rb +2 -2
- data/examples/parallel_analysis.rb +1 -1
- data/examples/u_test.rb +2 -2
- data/examples/vector.rb +1 -1
- data/examples/velicer_map_test.rb +1 -1
- data/lib/statsample.rb +30 -4
- data/lib/statsample/anova/oneway.rb +3 -3
- data/lib/statsample/anova/twoway.rb +3 -3
- data/lib/statsample/bivariate.rb +7 -7
- data/lib/statsample/bivariate/pearson.rb +2 -2
- data/lib/statsample/converter/csv.rb +1 -1
- data/lib/statsample/converters.rb +7 -7
- data/lib/statsample/dataset.rb +8 -8
- data/lib/statsample/dominanceanalysis.rb +4 -4
- data/lib/statsample/dominanceanalysis/bootstrap.rb +8 -8
- data/lib/statsample/factor.rb +2 -4
- data/lib/statsample/factor/map.rb +2 -1
- data/lib/statsample/factor/parallelanalysis.rb +2 -2
- data/lib/statsample/factor/pca.rb +2 -2
- data/lib/statsample/factor/principalaxis.rb +2 -2
- data/lib/statsample/graph/boxplot.rb +4 -4
- data/lib/statsample/graph/histogram.rb +2 -2
- data/lib/statsample/graph/scatterplot.rb +4 -4
- data/lib/statsample/matrix.rb +20 -6
- data/lib/statsample/regression.rb +2 -2
- data/lib/statsample/regression/multiple.rb +3 -3
- data/lib/statsample/regression/multiple/alglibengine.rb +5 -5
- data/lib/statsample/regression/multiple/baseengine.rb +3 -3
- data/lib/statsample/regression/multiple/gslengine.rb +5 -5
- data/lib/statsample/regression/multiple/rubyengine.rb +4 -4
- data/lib/statsample/reliability/icc.rb +1 -1
- data/lib/statsample/reliability/multiscaleanalysis.rb +4 -4
- data/lib/statsample/reliability/scaleanalysis.rb +6 -6
- data/lib/statsample/reliability/skillscaleanalysis.rb +1 -1
- data/lib/statsample/resample.rb +1 -1
- data/lib/statsample/shorthand.rb +1 -1
- data/lib/statsample/test/bartlettsphericity.rb +1 -1
- data/lib/statsample/test/levene.rb +4 -4
- data/lib/statsample/test/t.rb +3 -3
- data/lib/statsample/test/umannwhitney.rb +2 -2
- data/lib/statsample/vector.rb +103 -80
- data/lib/statsample/vector/gsl.rb +16 -16
- data/lib/statsample/version.rb +1 -1
- data/test/test_analysis.rb +1 -1
- data/test/test_anova_contrast.rb +4 -4
- data/test/test_anovatwowaywithdataset.rb +1 -1
- data/test/test_anovawithvectors.rb +6 -6
- data/test/test_awesome_print_bug.rb +1 -1
- data/test/test_bartlettsphericity.rb +3 -3
- data/test/test_bivariate.rb +38 -38
- data/test/test_crosstab.rb +2 -2
- data/test/test_csv.rb +6 -6
- data/test/test_dataset.rb +79 -79
- data/test/test_factor.rb +55 -49
- data/test/test_factor_pa.rb +4 -4
- data/test/test_ggobi.rb +3 -3
- data/test/test_gsl.rb +3 -3
- data/test/test_histogram.rb +3 -3
- data/test/test_matrix.rb +5 -5
- data/test/test_multiset.rb +19 -19
- data/test/test_regression.rb +27 -27
- data/test/test_reliability.rb +14 -14
- data/test/test_reliability_icc.rb +7 -7
- data/test/test_reliability_skillscale.rb +6 -6
- data/test/test_resample.rb +1 -1
- data/test/test_rserve_extension.rb +4 -4
- data/test/test_statistics.rb +5 -5
- data/test/test_stest.rb +8 -8
- data/test/test_stratified.rb +3 -3
- data/test/test_test_t.rb +5 -5
- data/test/test_umannwhitney.rb +2 -2
- data/test/test_vector.rb +153 -119
- data/test/test_wilcoxonsignedrank.rb +4 -4
- data/test/test_xls.rb +6 -6
- metadata +3 -53
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2bdb8a5e29a75f62af49b094ac026bbc01597707
|
4
|
+
data.tar.gz: 9459ac7bf01e6d20e81a23286dab63f63bdbcea3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: de03429dbc2c7a1bf0e0acfd0afcebcb55f53630688d635bdfa549eca988bfde40f601e056ae10946f064e86e769bc3be0b50888e1a3209ab243d0253e5b9ed6
|
7
|
+
data.tar.gz: dd71de3f239d8202f3716e4e268f5199cce24e6436e1b017d82970a3bf27da1749bc00a0ab35d030fbd1f63b717aab49c513bb51f8728807a565206cf81d30f1
|
data/History.txt
CHANGED
@@ -1,3 +1,11 @@
|
|
1
|
+
=== 1.5.0 / 2015-06-11
|
2
|
+
* Made sure all methods work properly with and without GSL.
|
3
|
+
* Statsample works with either rb-gsl or gsl-nmatrix.
|
4
|
+
* Changed the data types of Statsample::Vector from :ordinal, :scale and
|
5
|
+
:nominal to only :numeric and :object. :numeric replaces :ordinal/:scale
|
6
|
+
and :object replaces :nominal. Methods for creating the older data types still
|
7
|
+
exist, but throw a warning prodding the user to use the new methods.
|
8
|
+
|
1
9
|
=== 1.4.3 / 2015-04-27
|
2
10
|
* Removed rb-gsl dependency.
|
3
11
|
|
@@ -18,7 +18,7 @@ In this test, we test the calculation using #{vars} variables with
|
|
18
18
|
reps 200 #number of repetitions
|
19
19
|
|
20
20
|
ds=vars.times.inject({}) {|ac,v|
|
21
|
-
ac["x#{v}"]=Statsample::Vector.
|
21
|
+
ac["x#{v}"]=Statsample::Vector.new_numeric(cases) {rand()}
|
22
22
|
ac
|
23
23
|
}.to_dataset
|
24
24
|
|
@@ -7,7 +7,7 @@ require 'benchmark'
|
|
7
7
|
def create_dataset(vars,cases)
|
8
8
|
ran=Distribution::Normal.rng
|
9
9
|
ds=vars.times.inject({}) {|ac,v|
|
10
|
-
ac["x#{v}"]=Statsample::Vector.
|
10
|
+
ac["x#{v}"]=Statsample::Vector.new_numeric(cases) {ran.call}
|
11
11
|
ac
|
12
12
|
}.to_dataset
|
13
13
|
end
|
@@ -56,7 +56,7 @@ else
|
|
56
56
|
end
|
57
57
|
|
58
58
|
|
59
|
-
rs.fields.each {|f| rs[f].type=:
|
59
|
+
rs.fields.each {|f| rs[f].type=:numeric}
|
60
60
|
|
61
61
|
rs['c_v']=rs.collect {|row| row['cases']*row['vars']}
|
62
62
|
|
data/examples/dataset.rb
CHANGED
@@ -4,8 +4,8 @@ require 'statsample'
|
|
4
4
|
|
5
5
|
Statsample::Analysis.store(Statsample::Dataset) do
|
6
6
|
samples=1000
|
7
|
-
a=Statsample::Vector.
|
8
|
-
b=Statsample::Vector.
|
7
|
+
a=Statsample::Vector.new_numeric(samples) {r=rand(5); r==4 ? nil: r}
|
8
|
+
b=Statsample::Vector.new_numeric(samples) {r=rand(5); r==4 ? nil: r}
|
9
9
|
|
10
10
|
ds={'a'=>a,'b'=>b}.to_dataset
|
11
11
|
summary(ds)
|
data/examples/icc.rb
CHANGED
@@ -6,7 +6,7 @@ require 'statsample'
|
|
6
6
|
Statsample::Analysis.store(Statsample::Reliability::ICC) do
|
7
7
|
|
8
8
|
size=1000
|
9
|
-
a=Statsample::Vector.
|
9
|
+
a=Statsample::Vector.new_numeric(size) {rand(10)}
|
10
10
|
b=a.recode{|i|i+rand(4)-2}
|
11
11
|
c=a.recode{|i|i+rand(4)-2}
|
12
12
|
d=a.recode{|i|i+rand(4)-2}
|
data/examples/levene.rb
CHANGED
@@ -5,8 +5,8 @@ require 'statsample'
|
|
5
5
|
|
6
6
|
Statsample::Analysis.store(Statsample::Test::Levene) do
|
7
7
|
|
8
|
-
a=[1,2,3,4,5,6,7,8,100,10].
|
9
|
-
b=[30,40,50,60,70,80,90,100,110,120].
|
8
|
+
a=[1,2,3,4,5,6,7,8,100,10].to_numeric
|
9
|
+
b=[30,40,50,60,70,80,90,100,110,120].to_numeric
|
10
10
|
summary(levene([a,b]))
|
11
11
|
end
|
12
12
|
|
@@ -15,7 +15,7 @@ f3=rnorm(samples)
|
|
15
15
|
vectors={}
|
16
16
|
|
17
17
|
variables.times do |i|
|
18
|
-
vectors["v#{i}"]=samples.times.collect {|nv| f1[nv]*i+(f2[nv]*(15-i))+((f3[nv]*(30-i))*1.5)*rng.call}.
|
18
|
+
vectors["v#{i}"]=samples.times.collect {|nv| f1[nv]*i+(f2[nv]*(15-i))+((f3[nv]*(30-i))*1.5)*rng.call}.to_numeric
|
19
19
|
vectors["v#{i}"].name="Vector #{i}"
|
20
20
|
end
|
21
21
|
|
data/examples/u_test.rb
CHANGED
@@ -4,8 +4,8 @@ require 'statsample'
|
|
4
4
|
|
5
5
|
Statsample::Analysis.store(Statsample::Test::UMannWhitney) do
|
6
6
|
|
7
|
-
a=10.times.map {rand(100)}.
|
8
|
-
b=20.times.map {(rand(20))**2+50}.
|
7
|
+
a=10.times.map {rand(100)}.to_numeric
|
8
|
+
b=20.times.map {(rand(20))**2+50}.to_numeric
|
9
9
|
|
10
10
|
u=Statsample::Test::UMannWhitney.new(a,b)
|
11
11
|
summary u
|
data/examples/vector.rb
CHANGED
data/lib/statsample.rb
CHANGED
@@ -87,6 +87,32 @@ class Array
|
|
87
87
|
self
|
88
88
|
end
|
89
89
|
end
|
90
|
+
|
91
|
+
def sum
|
92
|
+
inject(:+)
|
93
|
+
end
|
94
|
+
|
95
|
+
def mean
|
96
|
+
self.sum / size
|
97
|
+
end
|
98
|
+
|
99
|
+
# Calcualte sum of squares
|
100
|
+
def sum_of_squares(m=nil)
|
101
|
+
m ||= mean
|
102
|
+
self.inject(0) {|a,x| a + (x-m).square }
|
103
|
+
end
|
104
|
+
|
105
|
+
# Calculate sample variance
|
106
|
+
def variance_sample(m=nil)
|
107
|
+
m ||= mean
|
108
|
+
sum_of_squares(m).quo(size - 1)
|
109
|
+
end
|
110
|
+
|
111
|
+
# Calculate sample standard deviation
|
112
|
+
def sd
|
113
|
+
m ||= mean
|
114
|
+
Math::sqrt(variance_sample(m))
|
115
|
+
end
|
90
116
|
end
|
91
117
|
|
92
118
|
def create_test(*args, &_proc)
|
@@ -141,7 +167,7 @@ module Statsample
|
|
141
167
|
class_variable_get(cv)
|
142
168
|
end
|
143
169
|
end
|
144
|
-
|
170
|
+
|
145
171
|
create_has_library :gsl
|
146
172
|
|
147
173
|
SPLIT_TOKEN = ','
|
@@ -202,9 +228,9 @@ module Statsample
|
|
202
228
|
# Returns a duplicate of the input vectors, without missing data
|
203
229
|
# for any of the vectors.
|
204
230
|
#
|
205
|
-
# a=[1,2,3,6,7,nil,3,5].
|
206
|
-
# b=[nil,nil,5,6,4,5,10,2].
|
207
|
-
# c=[2,4,6,7,4,5,6,7].
|
231
|
+
# a=[1,2,3,6,7,nil,3,5].to_numeric
|
232
|
+
# b=[nil,nil,5,6,4,5,10,2].to_numeric
|
233
|
+
# c=[2,4,6,7,4,5,6,7].to_numeric
|
208
234
|
# a2,b2,c2=Statsample.only_valid(a,b,c)
|
209
235
|
# => [#<Statsample::Scale:0xb748c8c8 @data=[3, 6, 7, 3, 5]>,
|
210
236
|
# #<Statsample::Scale:0xb748c814 @data=[5, 6, 4, 10, 2]>,
|
@@ -67,9 +67,9 @@ module Statsample
|
|
67
67
|
|
68
68
|
# One Way Anova with vectors
|
69
69
|
# Example:
|
70
|
-
# v1=[2,3,4,5,6].
|
71
|
-
# v2=[3,3,4,5,6].
|
72
|
-
# v3=[5,3,1,5,6].
|
70
|
+
# v1=[2,3,4,5,6].to_numeric
|
71
|
+
# v2=[3,3,4,5,6].to_numeric
|
72
|
+
# v3=[5,3,1,5,6].to_numeric
|
73
73
|
# anova=Statsample::Anova::OneWayWithVectors.new([v1,v2,v3])
|
74
74
|
# anova.f
|
75
75
|
# => 0.0243902439024391
|
@@ -107,9 +107,9 @@ module Statsample
|
|
107
107
|
|
108
108
|
# Two Way Anova with vectors
|
109
109
|
# Example:
|
110
|
-
# v1=[1,1,2,2].
|
111
|
-
# v2=[1,2,1,2].
|
112
|
-
# v3=[5,3,1,5].
|
110
|
+
# v1=[1,1,2,2].to_numeric
|
111
|
+
# v2=[1,2,1,2].to_numeric
|
112
|
+
# v3=[5,3,1,5].to_numeric
|
113
113
|
# anova=Statsample::Anova::TwoWayWithVectors.new(:a=>v1,:b=>v2, :dependent=>v3)
|
114
114
|
#
|
115
115
|
class TwoWayWithVectors < TwoWay
|
data/lib/statsample/bivariate.rb
CHANGED
@@ -125,7 +125,7 @@ module Statsample
|
|
125
125
|
nv.push(froms[i]-r*dels[i])
|
126
126
|
end
|
127
127
|
end
|
128
|
-
nv.to_vector(:
|
128
|
+
nv.to_vector(:numeric)
|
129
129
|
end
|
130
130
|
# Correlation between v1 and v2, controling the effect of
|
131
131
|
# control on both.
|
@@ -169,7 +169,7 @@ module Statsample
|
|
169
169
|
def covariance_matrix_pairwise(ds)
|
170
170
|
cache={}
|
171
171
|
matrix=ds.collect_matrix do |row,col|
|
172
|
-
if (ds[row].type!=:
|
172
|
+
if (ds[row].type!=:numeric or ds[col].type!=:numeric)
|
173
173
|
nil
|
174
174
|
elsif row==col
|
175
175
|
ds[row].variance
|
@@ -215,7 +215,7 @@ module Statsample
|
|
215
215
|
cm=ds.collect_matrix do |row,col|
|
216
216
|
if row==col
|
217
217
|
1.0
|
218
|
-
elsif (ds[row].type!=:
|
218
|
+
elsif (ds[row].type!=:numeric or ds[col].type!=:numeric)
|
219
219
|
nil
|
220
220
|
else
|
221
221
|
if cache[[col,row]].nil?
|
@@ -248,7 +248,7 @@ module Statsample
|
|
248
248
|
rows=ds.fields.collect do |row|
|
249
249
|
ds.fields.collect do |col|
|
250
250
|
v1a,v2a=Statsample.only_valid_clone(ds[row],ds[col])
|
251
|
-
(row==col or ds[row].type!=:
|
251
|
+
(row==col or ds[row].type!=:numeric or ds[col].type!=:numeric) ? nil : prop_pearson(t_pearson(ds[row],ds[col]), v1a.size, tails)
|
252
252
|
end
|
253
253
|
end
|
254
254
|
Matrix.rows(rows)
|
@@ -257,7 +257,7 @@ module Statsample
|
|
257
257
|
# Spearman ranked correlation coefficient (rho) between 2 vectors
|
258
258
|
def spearman(v1,v2)
|
259
259
|
v1a,v2a=Statsample.only_valid_clone(v1,v2)
|
260
|
-
v1r,v2r=v1a.ranked(:
|
260
|
+
v1r,v2r=v1a.ranked(:numeric),v2a.ranked(:numeric)
|
261
261
|
pearson(v1r,v2r)
|
262
262
|
end
|
263
263
|
# Calculate Point biserial correlation. Equal to Pearson correlation, with
|
@@ -265,7 +265,7 @@ module Statsample
|
|
265
265
|
def point_biserial(dichotomous,continous)
|
266
266
|
ds={'d'=>dichotomous,'c'=>continous}.to_dataset.dup_only_valid
|
267
267
|
raise(TypeError, "First vector should be dichotomous") if ds['d'].factors.size!=2
|
268
|
-
raise(TypeError, "Second vector should be continous") if ds['c'].type!=:
|
268
|
+
raise(TypeError, "Second vector should be continous") if ds['c'].type!=:numeric
|
269
269
|
f0=ds['d'].factors.sort[0]
|
270
270
|
m0=ds.filter_field('c') {|c| c['d']==f0}
|
271
271
|
m1=ds.filter_field('c') {|c| c['d']!=f0}
|
@@ -276,7 +276,7 @@ module Statsample
|
|
276
276
|
def tau_a(v1,v2)
|
277
277
|
v1a,v2a=Statsample.only_valid_clone(v1,v2)
|
278
278
|
n=v1.size
|
279
|
-
v1r,v2r=v1a.ranked(:
|
279
|
+
v1r,v2r=v1a.ranked(:numeric),v2a.ranked(:numeric)
|
280
280
|
o1=ordered_pairs(v1r)
|
281
281
|
o2=ordered_pairs(v2r)
|
282
282
|
delta= o1.size*2-(o2 & o1).size*2
|
@@ -7,8 +7,8 @@ module Statsample
|
|
7
7
|
# variables.
|
8
8
|
#
|
9
9
|
# == Usage
|
10
|
-
# a = [1,2,3,4,5,6].
|
11
|
-
# b = [2,3,4,5,6,7].
|
10
|
+
# a = [1,2,3,4,5,6].to_numeric
|
11
|
+
# b = [2,3,4,5,6,7].to_numeric
|
12
12
|
# pearson = Statsample::Bivariate::Pearson.new(a,b)
|
13
13
|
# puts pearson.r
|
14
14
|
# puts pearson.t
|
@@ -18,7 +18,7 @@ module Statsample
|
|
18
18
|
sth.column_info.each {|c|
|
19
19
|
vectors[c['name']]=Statsample::Vector.new([])
|
20
20
|
vectors[c['name']].name=c['name']
|
21
|
-
vectors[c['name']].type= (c['type_name']=='INTEGER' or c['type_name']=='DOUBLE') ? :
|
21
|
+
vectors[c['name']].type= (c['type_name']=='INTEGER' or c['type_name']=='DOUBLE') ? :numeric : :object
|
22
22
|
fields.push(c['name'])
|
23
23
|
}
|
24
24
|
ds=Statsample::Dataset.new(vectors,fields)
|
@@ -106,10 +106,10 @@ module Statsample
|
|
106
106
|
end
|
107
107
|
end
|
108
108
|
end
|
109
|
-
def
|
109
|
+
def convert_to_numeric_and_date(ds,fields)
|
110
110
|
fields.each do |f|
|
111
|
-
if ds[f].
|
112
|
-
ds[f].type=:
|
111
|
+
if ds[f].can_be_numeric?
|
112
|
+
ds[f].type=:numeric
|
113
113
|
elsif ds[f].can_be_date?
|
114
114
|
ds[f].type=:date
|
115
115
|
end
|
@@ -128,7 +128,7 @@ module Statsample
|
|
128
128
|
next if row==["\x1A"]
|
129
129
|
ds.add_case_array(row)
|
130
130
|
end
|
131
|
-
|
131
|
+
convert_to_numeric_and_date(ds,fields)
|
132
132
|
ds.update_valid_data
|
133
133
|
fields.each {|f|
|
134
134
|
ds[f].name=f
|
@@ -231,7 +231,7 @@ module Statsample
|
|
231
231
|
raise
|
232
232
|
end
|
233
233
|
end
|
234
|
-
|
234
|
+
convert_to_numeric_and_date(ds, fields)
|
235
235
|
ds.update_valid_data
|
236
236
|
fields.each {|f|
|
237
237
|
ds[f].name=f
|
@@ -345,7 +345,7 @@ out
|
|
345
345
|
# nickname = nickname
|
346
346
|
def variable_definition(carrier,v,name,nickname=nil)
|
347
347
|
nickname = (nickname.nil? ? "" : "nickname=\"#{nickname}\"" )
|
348
|
-
if v.type==:
|
348
|
+
if v.type==:object or v.data.find {|d| d.is_a? String }
|
349
349
|
carrier.categorials.push(name)
|
350
350
|
carrier.conversions[name]={}
|
351
351
|
factors=v.factors
|
data/lib/statsample/dataset.rb
CHANGED
@@ -52,8 +52,8 @@ module Statsample
|
|
52
52
|
#
|
53
53
|
# The fast way to create a dataset uses Hash#to_dataset, with
|
54
54
|
# field order as arguments
|
55
|
-
# v1 = [1,2,3].
|
56
|
-
# v2 = [1,2,3].
|
55
|
+
# v1 = [1,2,3].to_numeric
|
56
|
+
# v2 = [1,2,3].to_numeric
|
57
57
|
# ds = {'v1'=>v2, 'v2'=>v2}.to_dataset(%w{v2 v1})
|
58
58
|
|
59
59
|
class Dataset
|
@@ -477,7 +477,7 @@ module Statsample
|
|
477
477
|
}
|
478
478
|
end
|
479
479
|
|
480
|
-
def vector_by_calculation(type=:
|
480
|
+
def vector_by_calculation(type=:numeric)
|
481
481
|
a=[]
|
482
482
|
each do |row|
|
483
483
|
a.push(yield(row))
|
@@ -547,7 +547,7 @@ module Statsample
|
|
547
547
|
a.push(sum.quo(size-invalids))
|
548
548
|
end
|
549
549
|
end
|
550
|
-
a=a.to_vector(:
|
550
|
+
a=a.to_vector(:numeric)
|
551
551
|
a.name=_("Means from %s") % @name
|
552
552
|
a
|
553
553
|
end
|
@@ -680,7 +680,7 @@ module Statsample
|
|
680
680
|
end
|
681
681
|
# Retrieves a Statsample::Vector, based on the result
|
682
682
|
# of calculation performed on each case.
|
683
|
-
def collect(type=:
|
683
|
+
def collect(type=:numeric)
|
684
684
|
data=[]
|
685
685
|
each {|row|
|
686
686
|
data.push yield(row)
|
@@ -688,7 +688,7 @@ module Statsample
|
|
688
688
|
Statsample::Vector.new(data,type)
|
689
689
|
end
|
690
690
|
# Same as Statsample::Vector.collect, but giving case index as second parameter on yield.
|
691
|
-
def collect_with_index(type=:
|
691
|
+
def collect_with_index(type=:numeric)
|
692
692
|
data=[]
|
693
693
|
each_with_index {|row, i|
|
694
694
|
data.push(yield(row, i))
|
@@ -869,7 +869,7 @@ module Statsample
|
|
869
869
|
# => Vector [4,6]
|
870
870
|
def compute(text)
|
871
871
|
@fields.each{|f|
|
872
|
-
if @vectors[f].type=:
|
872
|
+
if @vectors[f].type=:numeric
|
873
873
|
text.gsub!(f,"row['#{f}'].to_f")
|
874
874
|
else
|
875
875
|
text.gsub!(f,"row['#{f}']")
|
@@ -958,7 +958,7 @@ module Statsample
|
|
958
958
|
max_n=0
|
959
959
|
h=parent_fields.inject({}) {|a,v| a[v]=Statsample::Vector.new([], @vectors[v].type);a }
|
960
960
|
# Adding _row_id
|
961
|
-
h['_col_id']=[].
|
961
|
+
h['_col_id']=[].to_numeric
|
962
962
|
ds_vars.push("_col_id")
|
963
963
|
@fields.each do |f|
|
964
964
|
if f=~re
|
@@ -7,9 +7,9 @@ module Statsample
|
|
7
7
|
#
|
8
8
|
# == Use
|
9
9
|
#
|
10
|
-
# a=1000.times.collect {rand}.
|
11
|
-
# b=1000.times.collect {rand}.
|
12
|
-
# c=1000.times.collect {rand}.
|
10
|
+
# a=1000.times.collect {rand}.to_numeric
|
11
|
+
# b=1000.times.collect {rand}.to_numeric
|
12
|
+
# c=1000.times.collect {rand}.to_numeric
|
13
13
|
# ds={'a'=>a,'b'=>b,'c'=>c}.to_dataset
|
14
14
|
# ds['y']=ds.collect{|row| row['a']*5+row['b']*3+row['c']*2+rand()}
|
15
15
|
# da=Statsample::DominanceAnalysis.new(ds,'y')
|
@@ -279,7 +279,7 @@ module Statsample
|
|
279
279
|
|
280
280
|
def get_averages(averages)
|
281
281
|
out={}
|
282
|
-
averages.each{|key,val| out[key]=val.to_vector(:
|
282
|
+
averages.each{|key,val| out[key]=val.to_vector(:numeric).mean }
|
283
283
|
out
|
284
284
|
end
|
285
285
|
# Hash with average for each k size model.
|
@@ -6,10 +6,10 @@ module Statsample
|
|
6
6
|
# == Usage
|
7
7
|
#
|
8
8
|
# require 'statsample'
|
9
|
-
# a=100.times.collect {rand}.
|
10
|
-
# b=100.times.collect {rand}.
|
11
|
-
# c=100.times.collect {rand}.
|
12
|
-
# d=100.times.collect {rand}.
|
9
|
+
# a=100.times.collect {rand}.to_numeric
|
10
|
+
# b=100.times.collect {rand}.to_numeric
|
11
|
+
# c=100.times.collect {rand}.to_numeric
|
12
|
+
# d=100.times.collect {rand}.to_numeric
|
13
13
|
# ds={'a'=>a,'b'=>b,'c'=>c,'d'=>d}.to_dataset
|
14
14
|
# ds['y']=ds.collect{|row| row['a']*5+row['b']*2+row['c']*2+row['d']*2+10*rand()}
|
15
15
|
# dab=Statsample::DominanceAnalysis::Bootstrap.new(ds2, 'y', :debug=>true)
|
@@ -182,7 +182,7 @@ module Statsample
|
|
182
182
|
table.row([_("Complete dominance"),"","","","","","",""])
|
183
183
|
table.hr
|
184
184
|
@pairs.each{|pair|
|
185
|
-
std=@samples_td[pair].to_vector(:
|
185
|
+
std=@samples_td[pair].to_vector(:numeric)
|
186
186
|
ttd=da.total_dominance_pairwise(pair[0],pair[1])
|
187
187
|
table.row(summary_pairs(pair,std,ttd))
|
188
188
|
}
|
@@ -190,7 +190,7 @@ module Statsample
|
|
190
190
|
table.row([_("Conditional dominance"),"","","","","","",""])
|
191
191
|
table.hr
|
192
192
|
@pairs.each{|pair|
|
193
|
-
std=@samples_cd[pair].to_vector(:
|
193
|
+
std=@samples_cd[pair].to_vector(:numeric)
|
194
194
|
ttd=da.conditional_dominance_pairwise(pair[0],pair[1])
|
195
195
|
table.row(summary_pairs(pair,std,ttd))
|
196
196
|
|
@@ -199,7 +199,7 @@ module Statsample
|
|
199
199
|
table.row([_("General Dominance"),"","","","","","",""])
|
200
200
|
table.hr
|
201
201
|
@pairs.each{|pair|
|
202
|
-
std=@samples_gd[pair].to_vector(:
|
202
|
+
std=@samples_gd[pair].to_vector(:numeric)
|
203
203
|
ttd=da.general_dominance_pairwise(pair[0],pair[1])
|
204
204
|
table.row(summary_pairs(pair,std,ttd))
|
205
205
|
}
|
@@ -208,7 +208,7 @@ module Statsample
|
|
208
208
|
table=ReportBuilder::Table.new(:name=>_("General averages"), :header=>[_("var"), _("mean"), _("se"), _("p.5"), _("p.95")])
|
209
209
|
|
210
210
|
@fields.each{|f|
|
211
|
-
v=@samples_ga[f].to_vector(:
|
211
|
+
v=@samples_ga[f].to_vector(:numeric)
|
212
212
|
row=[@ds[f].name, sprintf("%0.3f",v.mean), sprintf("%0.3f",v.sd), sprintf("%0.3f",v.percentil(5)),sprintf("%0.3f",v.percentil(95))]
|
213
213
|
table.row(row)
|
214
214
|
|