statsample 1.5.0 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.build.sh +15 -0
- data/.gitignore +1 -0
- data/.travis.yml +19 -7
- data/CONTRIBUTING.md +33 -0
- data/History.txt +5 -0
- data/README.md +41 -53
- data/benchmarks/correlation_matrix_15_variables.rb +6 -5
- data/benchmarks/correlation_matrix_5_variables.rb +6 -5
- data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +23 -26
- data/examples/boxplot.rb +17 -5
- data/examples/correlation_matrix.rb +36 -7
- data/examples/dataset.rb +25 -5
- data/examples/dominance_analysis.rb +8 -7
- data/examples/dominance_analysis_bootstrap.rb +16 -11
- data/examples/histogram.rb +16 -2
- data/examples/icc.rb +5 -6
- data/examples/levene.rb +17 -3
- data/examples/multiple_regression.rb +6 -3
- data/examples/parallel_analysis.rb +11 -6
- data/examples/polychoric.rb +26 -13
- data/examples/principal_axis.rb +8 -4
- data/examples/reliability.rb +10 -10
- data/examples/scatterplot.rb +8 -0
- data/examples/t_test.rb +7 -0
- data/examples/u_test.rb +10 -2
- data/examples/vector.rb +9 -6
- data/examples/velicer_map_test.rb +12 -8
- data/lib/statsample.rb +13 -47
- data/lib/statsample/analysis/suite.rb +1 -1
- data/lib/statsample/anova/oneway.rb +6 -6
- data/lib/statsample/anova/twoway.rb +26 -24
- data/lib/statsample/bivariate.rb +78 -61
- data/lib/statsample/bivariate/pearson.rb +2 -2
- data/lib/statsample/codification.rb +45 -32
- data/lib/statsample/converter/csv.rb +15 -53
- data/lib/statsample/converter/spss.rb +6 -5
- data/lib/statsample/converters.rb +50 -211
- data/lib/statsample/crosstab.rb +26 -25
- data/lib/statsample/daru.rb +117 -0
- data/lib/statsample/dataset.rb +70 -942
- data/lib/statsample/dominanceanalysis.rb +16 -17
- data/lib/statsample/dominanceanalysis/bootstrap.rb +26 -28
- data/lib/statsample/factor/parallelanalysis.rb +17 -19
- data/lib/statsample/factor/pca.rb +21 -20
- data/lib/statsample/factor/principalaxis.rb +3 -3
- data/lib/statsample/graph/boxplot.rb +8 -16
- data/lib/statsample/graph/histogram.rb +4 -4
- data/lib/statsample/graph/scatterplot.rb +8 -7
- data/lib/statsample/histogram.rb +128 -119
- data/lib/statsample/matrix.rb +20 -16
- data/lib/statsample/multiset.rb +39 -38
- data/lib/statsample/regression.rb +3 -3
- data/lib/statsample/regression/multiple.rb +8 -10
- data/lib/statsample/regression/multiple/alglibengine.rb +96 -89
- data/lib/statsample/regression/multiple/baseengine.rb +32 -32
- data/lib/statsample/regression/multiple/gslengine.rb +33 -36
- data/lib/statsample/regression/multiple/matrixengine.rb +7 -9
- data/lib/statsample/regression/multiple/rubyengine.rb +39 -41
- data/lib/statsample/reliability.rb +23 -25
- data/lib/statsample/reliability/icc.rb +8 -7
- data/lib/statsample/reliability/multiscaleanalysis.rb +14 -12
- data/lib/statsample/reliability/scaleanalysis.rb +58 -60
- data/lib/statsample/reliability/skillscaleanalysis.rb +34 -29
- data/lib/statsample/resample.rb +1 -1
- data/lib/statsample/shorthand.rb +29 -25
- data/lib/statsample/test/kolmogorovsmirnov.rb +5 -3
- data/lib/statsample/test/levene.rb +28 -27
- data/lib/statsample/test/t.rb +7 -9
- data/lib/statsample/test/umannwhitney.rb +28 -28
- data/lib/statsample/test/wilcoxonsignedrank.rb +45 -43
- data/lib/statsample/vector.rb +70 -1013
- data/lib/statsample/version.rb +1 -1
- data/statsample.gemspec +12 -16
- data/test/helpers_tests.rb +1 -1
- data/test/test_analysis.rb +17 -17
- data/test/test_anova_contrast.rb +6 -6
- data/test/test_anovatwowaywithdataset.rb +8 -8
- data/test/test_anovawithvectors.rb +8 -8
- data/test/test_awesome_print_bug.rb +1 -1
- data/test/test_bartlettsphericity.rb +4 -4
- data/test/test_bivariate.rb +48 -43
- data/test/test_codification.rb +33 -33
- data/test/test_crosstab.rb +9 -9
- data/test/test_dataset.rb +28 -458
- data/test/test_factor.rb +46 -38
- data/test/test_factor_pa.rb +22 -13
- data/test/test_ggobi.rb +4 -4
- data/test/test_gsl.rb +4 -4
- data/test/test_histogram.rb +3 -3
- data/test/test_matrix.rb +13 -13
- data/test/test_multiset.rb +103 -91
- data/test/test_regression.rb +57 -52
- data/test/test_reliability.rb +55 -45
- data/test/test_reliability_icc.rb +8 -8
- data/test/test_reliability_skillscale.rb +26 -24
- data/test/test_resample.rb +1 -1
- data/test/test_statistics.rb +3 -13
- data/test/test_stest.rb +9 -9
- data/test/test_stratified.rb +3 -3
- data/test/test_test_t.rb +12 -12
- data/test/test_umannwhitney.rb +2 -2
- data/test/test_vector.rb +76 -613
- data/test/test_wilcoxonsignedrank.rb +4 -4
- metadata +57 -28
- data/lib/statsample/rserve_extension.rb +0 -20
- data/lib/statsample/vector/gsl.rb +0 -106
- data/test/fixtures/repeated_fields.csv +0 -7
- data/test/fixtures/scientific_notation.csv +0 -4
- data/test/fixtures/test_csv.csv +0 -7
- data/test/fixtures/test_xls.xls +0 -0
- data/test/test_csv.rb +0 -63
- data/test/test_rserve_extension.rb +0 -42
- data/test/test_xls.rb +0 -52
@@ -7,13 +7,13 @@ module Statsample
|
|
7
7
|
#
|
8
8
|
# == Use
|
9
9
|
#
|
10
|
-
#
|
11
|
-
#
|
12
|
-
#
|
13
|
-
#
|
14
|
-
#
|
15
|
-
#
|
16
|
-
#
|
10
|
+
# a = Daru::Vector.new(1000.times.collect {rand})
|
11
|
+
# b = Daru::Vector.new(1000.times.collect {rand})
|
12
|
+
# c = Daru::Vector.new(1000.times.collect {rand})
|
13
|
+
# ds= Daru::DataFrame.new({:a => a,:b => b,:c => c})
|
14
|
+
# ds[:y] = ds.collect_rows {|row| row[:a]*5 + row[:b]*3 + row[:c]*2 + rand()}
|
15
|
+
# da=Statsample::DominanceAnalysis.new(ds, :y)
|
16
|
+
# puts da.summary
|
17
17
|
#
|
18
18
|
# === Output:
|
19
19
|
#
|
@@ -115,21 +115,21 @@ module Statsample
|
|
115
115
|
}
|
116
116
|
@dependent=dependent
|
117
117
|
@dependent=[@dependent] unless @dependent.is_a? Array
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
@name=_("Dominance Analysis: %s over %s") % [ @predictors.flatten.join(",") , @dependent.join(",")] if @name.nil?
|
122
|
-
|
123
|
-
if input.is_a? Statsample::Dataset
|
118
|
+
|
119
|
+
if input.kind_of? Daru::DataFrame
|
120
|
+
@predictors ||= input.vectors.to_a - @dependent
|
124
121
|
@ds=input
|
125
122
|
@matrix=Statsample::Bivariate.correlation_matrix(input)
|
126
123
|
@cases=Statsample::Bivariate.min_n_valid(input)
|
127
124
|
elsif input.is_a? ::Matrix
|
125
|
+
@predictors ||= input.fields-@dependent
|
128
126
|
@ds=nil
|
129
127
|
@matrix=input
|
130
128
|
else
|
131
129
|
raise ArgumentError.new("You should use a Matrix or a Dataset")
|
132
130
|
end
|
131
|
+
|
132
|
+
@name=_("Dominance Analysis: %s over %s") % [ @predictors.flatten.join(",") , @dependent.join(",")] if @name.nil?
|
133
133
|
@models=nil
|
134
134
|
@models_data=nil
|
135
135
|
@general_averages=nil
|
@@ -264,22 +264,21 @@ module Statsample
|
|
264
264
|
end
|
265
265
|
|
266
266
|
def md(m)
|
267
|
-
models_data[m.sort {|a,b| a.to_s<=>b.to_s}]
|
267
|
+
models_data[m.sort {|a,b| a.to_s <=> b.to_s}]
|
268
268
|
end
|
269
269
|
# Get all model of size k
|
270
270
|
def md_k(k)
|
271
271
|
out=[]
|
272
|
-
@models.each{|m| out.push(md(m)) if m.size==k }
|
272
|
+
@models.each{ |m| out.push(md(m)) if m.size==k }
|
273
273
|
out
|
274
274
|
end
|
275
275
|
|
276
276
|
# For a hash with arrays of numbers as values
|
277
277
|
# Returns a hash with same keys and
|
278
278
|
# value as the mean of values of original hash
|
279
|
-
|
280
279
|
def get_averages(averages)
|
281
280
|
out={}
|
282
|
-
averages.each{|key,val| out[key]=
|
281
|
+
averages.each{ |key,val| out[key] = Daru::Vector.new(val).mean }
|
283
282
|
out
|
284
283
|
end
|
285
284
|
# Hash with average for each k size model.
|
@@ -5,16 +5,16 @@ module Statsample
|
|
5
5
|
#
|
6
6
|
# == Usage
|
7
7
|
#
|
8
|
-
#
|
9
|
-
#
|
10
|
-
#
|
11
|
-
#
|
12
|
-
#
|
13
|
-
#
|
14
|
-
#
|
15
|
-
#
|
16
|
-
#
|
17
|
-
#
|
8
|
+
# require 'statsample'
|
9
|
+
# a = Daru::Vector.new(100.times.collect {rand})
|
10
|
+
# b = Daru::Vector.new(100.times.collect {rand})
|
11
|
+
# c = Daru::Vector.new(100.times.collect {rand})
|
12
|
+
# d = Daru::Vector.new(100.times.collect {rand})
|
13
|
+
# ds = Daru::DataFrame.new({:a => a,:b => b,:c => c,:d => d})
|
14
|
+
# ds[:y] = ds.collect_rows { |row| row[:a]*5+row[:b]*2+row[:c]*2+row[:d]*2+10*rand() }
|
15
|
+
# dab=Statsample::DominanceAnalysis::Bootstrap.new(ds, :y, :debug=>true)
|
16
|
+
# dab.bootstrap(100,nil)
|
17
|
+
# puts dab.summary
|
18
18
|
# <strong>Output</strong>
|
19
19
|
# Sample size: 100
|
20
20
|
# t: 1.98421693632958
|
@@ -91,28 +91,28 @@ module Statsample
|
|
91
91
|
ALPHA=0.95
|
92
92
|
# Create a new Dominance Analysis Bootstrap Object
|
93
93
|
#
|
94
|
-
# * ds: A
|
94
|
+
# * ds: A Daru::DataFrame object
|
95
95
|
# * y_var: Name of dependent variable
|
96
96
|
# * opts: Any other attribute of the class
|
97
97
|
def initialize(ds,y_var, opts=Hash.new)
|
98
|
-
@ds=ds
|
99
|
-
@y_var=y_var
|
100
|
-
@n=ds.
|
98
|
+
@ds = ds
|
99
|
+
@y_var = y_var.respond_to?(:to_sym) ? y_var.to_sym : y_var
|
100
|
+
@n = ds.nrows
|
101
101
|
|
102
102
|
@n_samples=0
|
103
103
|
@alpha=ALPHA
|
104
104
|
@debug=false
|
105
105
|
if y_var.is_a? Array
|
106
|
-
@fields=ds.
|
106
|
+
@fields=ds.vectors.to_a - y_var
|
107
107
|
@regression_class=Regression::Multiple::MultipleDependent
|
108
108
|
|
109
109
|
else
|
110
|
-
@fields=ds.
|
110
|
+
@fields=ds.vectors.to_a - [y_var]
|
111
111
|
@regression_class=Regression::Multiple::MatrixEngine
|
112
112
|
end
|
113
|
-
@samples_ga=@fields.inject({}){|a,v| a[v]=[];a}
|
113
|
+
@samples_ga=@fields.inject({}) { |a,v| a[v]=[]; a }
|
114
114
|
|
115
|
-
@name=_("Bootstrap dominance Analysis: %s over %s") % [ ds.
|
115
|
+
@name=_("Bootstrap dominance Analysis: %s over %s") % [ ds.vectors.to_a.join(",") , @y_var]
|
116
116
|
opts.each{|k,v|
|
117
117
|
self.send("#{k}=",v) if self.respond_to? k
|
118
118
|
}
|
@@ -130,15 +130,14 @@ module Statsample
|
|
130
130
|
# each sample on @samples_td, @samples_cd, @samples_gd, @samples_ga
|
131
131
|
#
|
132
132
|
# * number_samples: Number of new samples to add
|
133
|
-
# * n: size of each new sample. If nil, equal to original sample size
|
134
|
-
|
133
|
+
# * n: size of each new sample. If nil, equal to original sample size
|
135
134
|
def bootstrap(number_samples,n=nil)
|
136
135
|
number_samples.times{ |t|
|
137
136
|
@n_samples+=1
|
138
137
|
puts _("Bootstrap %d of %d") % [t+1, number_samples] if @debug
|
139
|
-
ds_boot=@ds.bootstrap(n)
|
138
|
+
ds_boot=@ds.bootstrap(n)
|
140
139
|
da_1=DominanceAnalysis.new(ds_boot, @y_var, :regression_class => @regression_class)
|
141
|
-
|
140
|
+
|
142
141
|
da_1.total_dominance.each{|k,v|
|
143
142
|
@samples_td[k].push(v)
|
144
143
|
}
|
@@ -182,7 +181,7 @@ module Statsample
|
|
182
181
|
table.row([_("Complete dominance"),"","","","","","",""])
|
183
182
|
table.hr
|
184
183
|
@pairs.each{|pair|
|
185
|
-
std
|
184
|
+
std=Daru::Vector.new(@samples_td[pair])
|
186
185
|
ttd=da.total_dominance_pairwise(pair[0],pair[1])
|
187
186
|
table.row(summary_pairs(pair,std,ttd))
|
188
187
|
}
|
@@ -190,7 +189,7 @@ module Statsample
|
|
190
189
|
table.row([_("Conditional dominance"),"","","","","","",""])
|
191
190
|
table.hr
|
192
191
|
@pairs.each{|pair|
|
193
|
-
std
|
192
|
+
std=Daru::Vector.new(@samples_cd[pair])
|
194
193
|
ttd=da.conditional_dominance_pairwise(pair[0],pair[1])
|
195
194
|
table.row(summary_pairs(pair,std,ttd))
|
196
195
|
|
@@ -199,7 +198,7 @@ module Statsample
|
|
199
198
|
table.row([_("General Dominance"),"","","","","","",""])
|
200
199
|
table.hr
|
201
200
|
@pairs.each{|pair|
|
202
|
-
std
|
201
|
+
std=Daru::Vector.new(@samples_gd[pair])
|
203
202
|
ttd=da.general_dominance_pairwise(pair[0],pair[1])
|
204
203
|
table.row(summary_pairs(pair,std,ttd))
|
205
204
|
}
|
@@ -208,10 +207,9 @@ module Statsample
|
|
208
207
|
table=ReportBuilder::Table.new(:name=>_("General averages"), :header=>[_("var"), _("mean"), _("se"), _("p.5"), _("p.95")])
|
209
208
|
|
210
209
|
@fields.each{|f|
|
211
|
-
v
|
210
|
+
v=Daru::Vector.new(@samples_ga[f])
|
212
211
|
row=[@ds[f].name, sprintf("%0.3f",v.mean), sprintf("%0.3f",v.sd), sprintf("%0.3f",v.percentil(5)),sprintf("%0.3f",v.percentil(95))]
|
213
|
-
table.row(row)
|
214
|
-
|
212
|
+
table.row(row)
|
215
213
|
}
|
216
214
|
|
217
215
|
generator.parse_element(table)
|
@@ -22,13 +22,13 @@ module Statsample
|
|
22
22
|
|
23
23
|
class ParallelAnalysis
|
24
24
|
def self.with_random_data(cases,vars,opts=Hash.new)
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
ds.cases=cases
|
25
|
+
ds= Daru::DataFrame.new({},
|
26
|
+
order: vars.times.map {|i| "v#{i+1}".to_sym},
|
27
|
+
index: cases )
|
29
28
|
opts=opts.merge({:bootstrap_method=> :random, :no_data=>true})
|
30
29
|
new(ds, opts)
|
31
30
|
end
|
31
|
+
|
32
32
|
include DirtyMemoize
|
33
33
|
include Summarizable
|
34
34
|
# Number of random sets to produce. 50 by default
|
@@ -61,9 +61,9 @@ module Statsample
|
|
61
61
|
attr_accessor :use_gsl
|
62
62
|
def initialize(ds, opts=Hash.new)
|
63
63
|
@ds=ds
|
64
|
-
@fields=@ds.
|
64
|
+
@fields=@ds.vectors.to_a
|
65
65
|
@n_variables=@fields.size
|
66
|
-
@n_cases=ds.
|
66
|
+
@n_cases=ds.nrows
|
67
67
|
opts_default={
|
68
68
|
:name=>_("Parallel Analysis"),
|
69
69
|
:iterations=>50, # See Liu and Rijmen (2008)
|
@@ -82,7 +82,7 @@ module Statsample
|
|
82
82
|
# Number of factor to retent
|
83
83
|
def number_of_factors
|
84
84
|
total=0
|
85
|
-
ds_eigenvalues.
|
85
|
+
ds_eigenvalues.vectors.to_a.each_with_index do |f,i|
|
86
86
|
if (@original[i]>0 and @original[i]>ds_eigenvalues[f].percentil(percentil))
|
87
87
|
total+=1
|
88
88
|
else
|
@@ -101,7 +101,7 @@ module Statsample
|
|
101
101
|
s.text _("Number of iterations: %d") % @iterations
|
102
102
|
if @no_data
|
103
103
|
s.table(:name=>_("Eigenvalues"), :header=>[_("n"), _("generated eigenvalue"), "p.#{percentil}"]) do |t|
|
104
|
-
ds_eigenvalues.
|
104
|
+
ds_eigenvalues.vectors.to_a.each_with_index do |f,i|
|
105
105
|
v=ds_eigenvalues[f]
|
106
106
|
t.row [i+1, "%0.4f" % v.mean, "%0.4f" % v.percentil(percentil), ]
|
107
107
|
end
|
@@ -109,7 +109,7 @@ module Statsample
|
|
109
109
|
else
|
110
110
|
s.text _("Number or factors to preserve: %d") % number_of_factors
|
111
111
|
s.table(:name=>_("Eigenvalues"), :header=>[_("n"), _("data eigenvalue"), _("generated eigenvalue"),"p.#{percentil}",_("preserve?")]) do |t|
|
112
|
-
ds_eigenvalues.
|
112
|
+
ds_eigenvalues.vectors.to_a.each_with_index do |f,i|
|
113
113
|
v=ds_eigenvalues[f]
|
114
114
|
t.row [i+1, "%0.4f" % @original[i], "%0.4f" % v.mean, "%0.4f" % v.percentil(percentil), (v.percentil(percentil)>0 and @original[i] > v.percentil(percentil)) ? "Yes":""]
|
115
115
|
end
|
@@ -120,11 +120,9 @@ module Statsample
|
|
120
120
|
end
|
121
121
|
# Perform calculation. Shouldn't be called directly for the user
|
122
122
|
def compute
|
123
|
+
@original=Statsample::Bivariate.send(matrix_method, @ds).eigenvalues unless no_data
|
124
|
+
@ds_eigenvalues=Daru::DataFrame.new({}, order: (1..@n_variables).map{|v| ("ev_%05d" % v).to_sym})
|
123
125
|
|
124
|
-
|
125
|
-
@original=Statsample::Bivariate.send(matrix_method, @ds).eigenvalues unless no_data
|
126
|
-
@ds_eigenvalues=Statsample::Dataset.new((1..@n_variables).map{|v| "ev_%05d" % v})
|
127
|
-
@ds_eigenvalues.fields.each {|f| @ds_eigenvalues[f].type=:numeric}
|
128
126
|
if bootstrap_method==:parameter or bootstrap_method==:random
|
129
127
|
rng = Distribution::Normal.rng
|
130
128
|
end
|
@@ -133,18 +131,18 @@ module Statsample
|
|
133
131
|
begin
|
134
132
|
puts "#{@name}: Iteration #{i}" if $DEBUG or debug
|
135
133
|
# Create a dataset of dummy values
|
136
|
-
ds_bootstrap=
|
134
|
+
ds_bootstrap = Daru::DataFrame.new({}, order: @ds.vectors, index: @n_cases)
|
137
135
|
|
138
136
|
@fields.each do |f|
|
139
137
|
if bootstrap_method==:random
|
140
|
-
ds_bootstrap[f]
|
138
|
+
ds_bootstrap[f] = Daru::Vector.new(@n_cases.times.map {|c| rng.call})
|
141
139
|
elsif bootstrap_method==:data
|
142
|
-
ds_bootstrap[f]=ds[f].sample_with_replacement(@n_cases)
|
140
|
+
ds_bootstrap[f] = ds[f].sample_with_replacement(@n_cases)
|
143
141
|
else
|
144
142
|
raise "bootstrap_method doesn't recogniced"
|
145
143
|
end
|
146
144
|
end
|
147
|
-
ds_bootstrap.
|
145
|
+
ds_bootstrap.update
|
148
146
|
|
149
147
|
matrix=Statsample::Bivariate.send(matrix_method, ds_bootstrap)
|
150
148
|
matrix=matrix.to_gsl if @use_gsl
|
@@ -155,13 +153,13 @@ module Statsample
|
|
155
153
|
end
|
156
154
|
end
|
157
155
|
ev=matrix.eigenvalues
|
158
|
-
@ds_eigenvalues.
|
156
|
+
@ds_eigenvalues.add_row(ev)
|
159
157
|
rescue Statsample::Bivariate::Tetrachoric::RequerimentNotMeet => e
|
160
158
|
puts "Error: #{e}" if $DEBUG
|
161
159
|
redo
|
162
160
|
end
|
163
161
|
end
|
164
|
-
@ds_eigenvalues.
|
162
|
+
@ds_eigenvalues.update
|
165
163
|
end
|
166
164
|
dirty_memoize :number_of_factors, :ds_eigenvalues
|
167
165
|
dirty_writer :iterations, :bootstrap_method, :percentil, :smc
|
@@ -13,11 +13,11 @@ module Factor
|
|
13
13
|
#
|
14
14
|
# == Usage:
|
15
15
|
# require 'statsample'
|
16
|
-
# a=[2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1]
|
17
|
-
# b=[2.4,0.7,2.9,2.2,3.0,2.7,1.6,1.1,1.6,0.9]
|
18
|
-
# ds={
|
19
|
-
# cor_matrix=Statsample::Bivariate.correlation_matrix(ds)
|
20
|
-
# pca=Statsample::Factor::PCA.new(cor_matrix)
|
16
|
+
# a = Daru::Vector.new([2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1])
|
17
|
+
# b = Daru::Vector.new([2.4,0.7,2.9,2.2,3.0,2.7,1.6,1.1,1.6,0.9])
|
18
|
+
# ds = Daru::DataFrame.new({:a => a,:b => b})
|
19
|
+
# cor_matrix = Statsample::Bivariate.correlation_matrix(ds)
|
20
|
+
# pca= Statsample::Factor::PCA.new(cor_matrix)
|
21
21
|
# pca.m
|
22
22
|
# => 1
|
23
23
|
# pca.eigenvalues
|
@@ -52,11 +52,13 @@ module Factor
|
|
52
52
|
attr_accessor :rotation_type
|
53
53
|
attr_accessor :matrix_type
|
54
54
|
def initialize(matrix, opts=Hash.new)
|
55
|
-
@use_gsl=
|
55
|
+
@use_gsl = opts[:use_gsl]
|
56
|
+
opts.delete :use_gsl
|
57
|
+
|
56
58
|
@name=_("Principal Component Analysis")
|
57
59
|
@matrix=matrix
|
58
60
|
@n_variables=@matrix.column_size
|
59
|
-
@variables_names=(@matrix.respond_to? :fields) ? @matrix.fields : @n_variables.times.map {|i|
|
61
|
+
@variables_names=(@matrix.respond_to? :fields) ? @matrix.fields : @n_variables.times.map {|i| "VAR_#{i+1}".to_sym }
|
60
62
|
|
61
63
|
@matrix_type = @matrix.respond_to?(:_type) ? @matrix._type : :correlation
|
62
64
|
|
@@ -67,13 +69,14 @@ module Factor
|
|
67
69
|
opts.each{|k,v|
|
68
70
|
self.send("#{k}=",v) if self.respond_to? k
|
69
71
|
}
|
72
|
+
|
70
73
|
if @use_gsl.nil?
|
71
74
|
@use_gsl=Statsample.has_gsl?
|
72
75
|
end
|
73
76
|
if @matrix.respond_to? :fields
|
74
77
|
@variables_names=@matrix.fields
|
75
78
|
else
|
76
|
-
@variables_names=@n_variables.times.map {|i| "V#{i+1}"}
|
79
|
+
@variables_names=@n_variables.times.map {|i| "V#{i+1}".to_sym}
|
77
80
|
end
|
78
81
|
calculate_eigenpairs
|
79
82
|
|
@@ -81,7 +84,6 @@ module Factor
|
|
81
84
|
# Set number of factors with eigenvalues > 1
|
82
85
|
@m=@eigenpairs.find_all {|ev,ec| ev>=1.0}.size
|
83
86
|
end
|
84
|
-
|
85
87
|
end
|
86
88
|
def rotation
|
87
89
|
@rotation_type.new(component_matrix)
|
@@ -92,10 +94,10 @@ module Factor
|
|
92
94
|
def create_centered_ds
|
93
95
|
h={}
|
94
96
|
@original_ds.factors.each {|f|
|
95
|
-
mean
|
96
|
-
h[f]
|
97
|
+
mean = @original_ds[f].mean
|
98
|
+
h[f] = @original_ds[f].recode {|c| c-mean}
|
97
99
|
}
|
98
|
-
@ds=h
|
100
|
+
@ds = Daru::DataFrame.new(h)
|
99
101
|
end
|
100
102
|
|
101
103
|
# Feature matrix for +m+ factors
|
@@ -137,8 +139,8 @@ module Factor
|
|
137
139
|
pcs=(fv.transpose*data_matrix.transpose).transpose
|
138
140
|
|
139
141
|
pcs.extend Statsample::NamedMatrix
|
140
|
-
pcs.fields_y=m.times.map {|i| "PC_
|
141
|
-
pcs.
|
142
|
+
pcs.fields_y = m.times.map { |i| "PC_#{i+1}".to_sym }
|
143
|
+
pcs.to_dataframe
|
142
144
|
end
|
143
145
|
def component_matrix(m=nil)
|
144
146
|
var="component_matrix_#{matrix_type}"
|
@@ -159,7 +161,7 @@ module Factor
|
|
159
161
|
cm.extend NamedMatrix
|
160
162
|
cm.name=_("Component matrix (from covariance)")
|
161
163
|
cm.fields_x = @variables_names
|
162
|
-
cm.fields_y = m.times.map {|i| "PC_
|
164
|
+
cm.fields_y = m.times.map {|i| "PC_#{i+1}".to_sym }
|
163
165
|
|
164
166
|
cm
|
165
167
|
end
|
@@ -180,17 +182,16 @@ module Factor
|
|
180
182
|
cm.extend CovariateMatrix
|
181
183
|
cm.name=_("Component matrix")
|
182
184
|
cm.fields_x = @variables_names
|
183
|
-
cm.fields_y = m.times.map {|i| "PC_
|
185
|
+
cm.fields_y = m.times.map { |i| "PC_#{i+1}".to_sym }
|
184
186
|
cm
|
185
187
|
end
|
186
188
|
def communalities(m=nil)
|
187
|
-
|
188
189
|
m||=@m
|
189
190
|
h=[]
|
190
191
|
@n_variables.times do |i|
|
191
192
|
sum=0
|
192
193
|
m.times do |j|
|
193
|
-
sum+=(@eigenpairs[j][0].abs*@eigenpairs[j][1][i]**2)
|
194
|
+
sum += (@eigenpairs[j][0].abs*@eigenpairs[j][1][i]**2)
|
194
195
|
end
|
195
196
|
h.push(sum)
|
196
197
|
end
|
@@ -202,11 +203,11 @@ module Factor
|
|
202
203
|
end
|
203
204
|
def eigenvectors
|
204
205
|
@eigenpairs.collect {|c|
|
205
|
-
@use_gsl ? c[1].to_gsl : c[1]
|
206
|
+
@use_gsl ? c[1].to_gsl : Daru::Vector.new(c[1])
|
206
207
|
}
|
207
208
|
end
|
208
209
|
def calculate_eigenpairs
|
209
|
-
@eigenpairs= @use_gsl ? @matrix.to_gsl.eigenpairs : @matrix.to_matrix.eigenpairs_ruby
|
210
|
+
@eigenpairs= @use_gsl ? @matrix.to_gsl.eigenpairs : @matrix.to_matrix.eigenpairs_ruby
|
210
211
|
end
|
211
212
|
|
212
213
|
|
@@ -6,9 +6,9 @@ module Factor
|
|
6
6
|
#
|
7
7
|
# == Usage:
|
8
8
|
# require 'statsample'
|
9
|
-
# a=[2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1]
|
10
|
-
# b=[2.4,0.7,2.9,2.2,3.0,2.7,1.6,1.1,1.6,0.9]
|
11
|
-
# ds={
|
9
|
+
# a = Daru::Vector.new([2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1])
|
10
|
+
# b = Daru::Vector.new([2.4,0.7,2.9,2.2,3.0,2.7,1.6,1.1,1.6,0.9])
|
11
|
+
# ds= Daru::DataFrame.new({:a => a,:b => b})
|
12
12
|
# cor_matrix=Statsample::Bivariate.correlation_matrix(ds)
|
13
13
|
# pa=Statsample::Factor::PrincipalAxis.new(cor_matrix)
|
14
14
|
# pa.iterate(1)
|
@@ -8,12 +8,12 @@ module Statsample
|
|
8
8
|
#
|
9
9
|
# == Usage
|
10
10
|
# === Svg output
|
11
|
-
# a=[1,2,3,4]
|
12
|
-
# b=[3,4,5,6]
|
13
|
-
#
|
11
|
+
# a = Daru::Vector.new([1,2,3,4])
|
12
|
+
# b = Daru::Vector.new([3,4,5,6])
|
13
|
+
# puts Statsample::Graph::Boxplot.new(:vectors=>[a,b]).to_svg
|
14
14
|
# === Using ReportBuilder
|
15
|
-
# a=[1,2,3,4]
|
16
|
-
# b=[3,4,5,6]
|
15
|
+
# a = Daru::Vector.new([1,2,3,4])
|
16
|
+
# b = Daru::Vector.new([3,4,5,6])
|
17
17
|
# rb=ReportBuilder.new
|
18
18
|
# rb.add(Statsample::Graph::Boxplot.new(:vectors=>[a,b]))
|
19
19
|
# rb.save_html('boxplot.html')
|
@@ -85,8 +85,6 @@ module Statsample
|
|
85
85
|
min||=@vectors.map {|v| v.min}.min
|
86
86
|
max||=@vectors.map {|v| v.max}.max
|
87
87
|
|
88
|
-
|
89
|
-
|
90
88
|
margin_hor=margin_left + margin_right
|
91
89
|
margin_vert=margin_top + margin_bottom
|
92
90
|
x_scale = pv.Scale.ordinal(@vectors.size.times.map.to_a).split_banded(0, width-margin_hor, 4.0/5)
|
@@ -115,12 +113,10 @@ module Statsample
|
|
115
113
|
out[:low_whisker]=min
|
116
114
|
out[:high_whisker]=max
|
117
115
|
# And now, data outside whiskers
|
118
|
-
out[:outliers]=v.
|
116
|
+
out[:outliers]=v.to_a.find_all {|d| d < min or d > max }
|
119
117
|
out
|
120
118
|
}
|
121
|
-
|
122
|
-
|
123
|
-
|
119
|
+
|
124
120
|
vis=Rubyvis::Panel.new do |pan|
|
125
121
|
pan.width width - margin_hor
|
126
122
|
pan.height height - margin_vert
|
@@ -157,7 +153,6 @@ module Statsample
|
|
157
153
|
bp.left {|v| x_scale[index]}
|
158
154
|
bp.width x_scale.range_band
|
159
155
|
|
160
|
-
|
161
156
|
# Bar
|
162
157
|
bp.bar do |b|
|
163
158
|
b.bottom {|v| y_scale[v[:percentil_25]]}
|
@@ -168,9 +163,7 @@ module Statsample
|
|
168
163
|
colors.scale(that.groups[parent.index]).darker
|
169
164
|
else
|
170
165
|
colors.scale(index).darker
|
171
|
-
end
|
172
|
-
|
173
|
-
|
166
|
+
end
|
174
167
|
}
|
175
168
|
b.fill_style {|v|
|
176
169
|
if that.groups
|
@@ -237,7 +230,6 @@ module Statsample
|
|
237
230
|
builder.section(:name=>name) do |b|
|
238
231
|
b.image(to_svg, :type=>'svg', :width=>width, :height=>height)
|
239
232
|
end
|
240
|
-
|
241
233
|
end
|
242
234
|
end
|
243
235
|
end
|