statsample 1.5.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.build.sh +15 -0
- data/.gitignore +1 -0
- data/.travis.yml +19 -7
- data/CONTRIBUTING.md +33 -0
- data/History.txt +5 -0
- data/README.md +41 -53
- data/benchmarks/correlation_matrix_15_variables.rb +6 -5
- data/benchmarks/correlation_matrix_5_variables.rb +6 -5
- data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +23 -26
- data/examples/boxplot.rb +17 -5
- data/examples/correlation_matrix.rb +36 -7
- data/examples/dataset.rb +25 -5
- data/examples/dominance_analysis.rb +8 -7
- data/examples/dominance_analysis_bootstrap.rb +16 -11
- data/examples/histogram.rb +16 -2
- data/examples/icc.rb +5 -6
- data/examples/levene.rb +17 -3
- data/examples/multiple_regression.rb +6 -3
- data/examples/parallel_analysis.rb +11 -6
- data/examples/polychoric.rb +26 -13
- data/examples/principal_axis.rb +8 -4
- data/examples/reliability.rb +10 -10
- data/examples/scatterplot.rb +8 -0
- data/examples/t_test.rb +7 -0
- data/examples/u_test.rb +10 -2
- data/examples/vector.rb +9 -6
- data/examples/velicer_map_test.rb +12 -8
- data/lib/statsample.rb +13 -47
- data/lib/statsample/analysis/suite.rb +1 -1
- data/lib/statsample/anova/oneway.rb +6 -6
- data/lib/statsample/anova/twoway.rb +26 -24
- data/lib/statsample/bivariate.rb +78 -61
- data/lib/statsample/bivariate/pearson.rb +2 -2
- data/lib/statsample/codification.rb +45 -32
- data/lib/statsample/converter/csv.rb +15 -53
- data/lib/statsample/converter/spss.rb +6 -5
- data/lib/statsample/converters.rb +50 -211
- data/lib/statsample/crosstab.rb +26 -25
- data/lib/statsample/daru.rb +117 -0
- data/lib/statsample/dataset.rb +70 -942
- data/lib/statsample/dominanceanalysis.rb +16 -17
- data/lib/statsample/dominanceanalysis/bootstrap.rb +26 -28
- data/lib/statsample/factor/parallelanalysis.rb +17 -19
- data/lib/statsample/factor/pca.rb +21 -20
- data/lib/statsample/factor/principalaxis.rb +3 -3
- data/lib/statsample/graph/boxplot.rb +8 -16
- data/lib/statsample/graph/histogram.rb +4 -4
- data/lib/statsample/graph/scatterplot.rb +8 -7
- data/lib/statsample/histogram.rb +128 -119
- data/lib/statsample/matrix.rb +20 -16
- data/lib/statsample/multiset.rb +39 -38
- data/lib/statsample/regression.rb +3 -3
- data/lib/statsample/regression/multiple.rb +8 -10
- data/lib/statsample/regression/multiple/alglibengine.rb +96 -89
- data/lib/statsample/regression/multiple/baseengine.rb +32 -32
- data/lib/statsample/regression/multiple/gslengine.rb +33 -36
- data/lib/statsample/regression/multiple/matrixengine.rb +7 -9
- data/lib/statsample/regression/multiple/rubyengine.rb +39 -41
- data/lib/statsample/reliability.rb +23 -25
- data/lib/statsample/reliability/icc.rb +8 -7
- data/lib/statsample/reliability/multiscaleanalysis.rb +14 -12
- data/lib/statsample/reliability/scaleanalysis.rb +58 -60
- data/lib/statsample/reliability/skillscaleanalysis.rb +34 -29
- data/lib/statsample/resample.rb +1 -1
- data/lib/statsample/shorthand.rb +29 -25
- data/lib/statsample/test/kolmogorovsmirnov.rb +5 -3
- data/lib/statsample/test/levene.rb +28 -27
- data/lib/statsample/test/t.rb +7 -9
- data/lib/statsample/test/umannwhitney.rb +28 -28
- data/lib/statsample/test/wilcoxonsignedrank.rb +45 -43
- data/lib/statsample/vector.rb +70 -1013
- data/lib/statsample/version.rb +1 -1
- data/statsample.gemspec +12 -16
- data/test/helpers_tests.rb +1 -1
- data/test/test_analysis.rb +17 -17
- data/test/test_anova_contrast.rb +6 -6
- data/test/test_anovatwowaywithdataset.rb +8 -8
- data/test/test_anovawithvectors.rb +8 -8
- data/test/test_awesome_print_bug.rb +1 -1
- data/test/test_bartlettsphericity.rb +4 -4
- data/test/test_bivariate.rb +48 -43
- data/test/test_codification.rb +33 -33
- data/test/test_crosstab.rb +9 -9
- data/test/test_dataset.rb +28 -458
- data/test/test_factor.rb +46 -38
- data/test/test_factor_pa.rb +22 -13
- data/test/test_ggobi.rb +4 -4
- data/test/test_gsl.rb +4 -4
- data/test/test_histogram.rb +3 -3
- data/test/test_matrix.rb +13 -13
- data/test/test_multiset.rb +103 -91
- data/test/test_regression.rb +57 -52
- data/test/test_reliability.rb +55 -45
- data/test/test_reliability_icc.rb +8 -8
- data/test/test_reliability_skillscale.rb +26 -24
- data/test/test_resample.rb +1 -1
- data/test/test_statistics.rb +3 -13
- data/test/test_stest.rb +9 -9
- data/test/test_stratified.rb +3 -3
- data/test/test_test_t.rb +12 -12
- data/test/test_umannwhitney.rb +2 -2
- data/test/test_vector.rb +76 -613
- data/test/test_wilcoxonsignedrank.rb +4 -4
- metadata +57 -28
- data/lib/statsample/rserve_extension.rb +0 -20
- data/lib/statsample/vector/gsl.rb +0 -106
- data/test/fixtures/repeated_fields.csv +0 -7
- data/test/fixtures/scientific_notation.csv +0 -4
- data/test/fixtures/test_csv.csv +0 -7
- data/test/fixtures/test_xls.xls +0 -0
- data/test/test_csv.rb +0 -63
- data/test/test_rserve_extension.rb +0 -42
- data/test/test_xls.rb +0 -52
@@ -7,13 +7,13 @@ module Statsample
|
|
7
7
|
#
|
8
8
|
# == Use
|
9
9
|
#
|
10
|
-
#
|
11
|
-
#
|
12
|
-
#
|
13
|
-
#
|
14
|
-
#
|
15
|
-
#
|
16
|
-
#
|
10
|
+
# a = Daru::Vector.new(1000.times.collect {rand})
|
11
|
+
# b = Daru::Vector.new(1000.times.collect {rand})
|
12
|
+
# c = Daru::Vector.new(1000.times.collect {rand})
|
13
|
+
# ds= Daru::DataFrame.new({:a => a,:b => b,:c => c})
|
14
|
+
# ds[:y] = ds.collect_rows {|row| row[:a]*5 + row[:b]*3 + row[:c]*2 + rand()}
|
15
|
+
# da=Statsample::DominanceAnalysis.new(ds, :y)
|
16
|
+
# puts da.summary
|
17
17
|
#
|
18
18
|
# === Output:
|
19
19
|
#
|
@@ -115,21 +115,21 @@ module Statsample
|
|
115
115
|
}
|
116
116
|
@dependent=dependent
|
117
117
|
@dependent=[@dependent] unless @dependent.is_a? Array
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
@name=_("Dominance Analysis: %s over %s") % [ @predictors.flatten.join(",") , @dependent.join(",")] if @name.nil?
|
122
|
-
|
123
|
-
if input.is_a? Statsample::Dataset
|
118
|
+
|
119
|
+
if input.kind_of? Daru::DataFrame
|
120
|
+
@predictors ||= input.vectors.to_a - @dependent
|
124
121
|
@ds=input
|
125
122
|
@matrix=Statsample::Bivariate.correlation_matrix(input)
|
126
123
|
@cases=Statsample::Bivariate.min_n_valid(input)
|
127
124
|
elsif input.is_a? ::Matrix
|
125
|
+
@predictors ||= input.fields-@dependent
|
128
126
|
@ds=nil
|
129
127
|
@matrix=input
|
130
128
|
else
|
131
129
|
raise ArgumentError.new("You should use a Matrix or a Dataset")
|
132
130
|
end
|
131
|
+
|
132
|
+
@name=_("Dominance Analysis: %s over %s") % [ @predictors.flatten.join(",") , @dependent.join(",")] if @name.nil?
|
133
133
|
@models=nil
|
134
134
|
@models_data=nil
|
135
135
|
@general_averages=nil
|
@@ -264,22 +264,21 @@ module Statsample
|
|
264
264
|
end
|
265
265
|
|
266
266
|
def md(m)
|
267
|
-
models_data[m.sort {|a,b| a.to_s<=>b.to_s}]
|
267
|
+
models_data[m.sort {|a,b| a.to_s <=> b.to_s}]
|
268
268
|
end
|
269
269
|
# Get all model of size k
|
270
270
|
def md_k(k)
|
271
271
|
out=[]
|
272
|
-
@models.each{|m| out.push(md(m)) if m.size==k }
|
272
|
+
@models.each{ |m| out.push(md(m)) if m.size==k }
|
273
273
|
out
|
274
274
|
end
|
275
275
|
|
276
276
|
# For a hash with arrays of numbers as values
|
277
277
|
# Returns a hash with same keys and
|
278
278
|
# value as the mean of values of original hash
|
279
|
-
|
280
279
|
def get_averages(averages)
|
281
280
|
out={}
|
282
|
-
averages.each{|key,val| out[key]=
|
281
|
+
averages.each{ |key,val| out[key] = Daru::Vector.new(val).mean }
|
283
282
|
out
|
284
283
|
end
|
285
284
|
# Hash with average for each k size model.
|
@@ -5,16 +5,16 @@ module Statsample
|
|
5
5
|
#
|
6
6
|
# == Usage
|
7
7
|
#
|
8
|
-
#
|
9
|
-
#
|
10
|
-
#
|
11
|
-
#
|
12
|
-
#
|
13
|
-
#
|
14
|
-
#
|
15
|
-
#
|
16
|
-
#
|
17
|
-
#
|
8
|
+
# require 'statsample'
|
9
|
+
# a = Daru::Vector.new(100.times.collect {rand})
|
10
|
+
# b = Daru::Vector.new(100.times.collect {rand})
|
11
|
+
# c = Daru::Vector.new(100.times.collect {rand})
|
12
|
+
# d = Daru::Vector.new(100.times.collect {rand})
|
13
|
+
# ds = Daru::DataFrame.new({:a => a,:b => b,:c => c,:d => d})
|
14
|
+
# ds[:y] = ds.collect_rows { |row| row[:a]*5+row[:b]*2+row[:c]*2+row[:d]*2+10*rand() }
|
15
|
+
# dab=Statsample::DominanceAnalysis::Bootstrap.new(ds, :y, :debug=>true)
|
16
|
+
# dab.bootstrap(100,nil)
|
17
|
+
# puts dab.summary
|
18
18
|
# <strong>Output</strong>
|
19
19
|
# Sample size: 100
|
20
20
|
# t: 1.98421693632958
|
@@ -91,28 +91,28 @@ module Statsample
|
|
91
91
|
ALPHA=0.95
|
92
92
|
# Create a new Dominance Analysis Bootstrap Object
|
93
93
|
#
|
94
|
-
# * ds: A
|
94
|
+
# * ds: A Daru::DataFrame object
|
95
95
|
# * y_var: Name of dependent variable
|
96
96
|
# * opts: Any other attribute of the class
|
97
97
|
def initialize(ds,y_var, opts=Hash.new)
|
98
|
-
@ds=ds
|
99
|
-
@y_var=y_var
|
100
|
-
@n=ds.
|
98
|
+
@ds = ds
|
99
|
+
@y_var = y_var.respond_to?(:to_sym) ? y_var.to_sym : y_var
|
100
|
+
@n = ds.nrows
|
101
101
|
|
102
102
|
@n_samples=0
|
103
103
|
@alpha=ALPHA
|
104
104
|
@debug=false
|
105
105
|
if y_var.is_a? Array
|
106
|
-
@fields=ds.
|
106
|
+
@fields=ds.vectors.to_a - y_var
|
107
107
|
@regression_class=Regression::Multiple::MultipleDependent
|
108
108
|
|
109
109
|
else
|
110
|
-
@fields=ds.
|
110
|
+
@fields=ds.vectors.to_a - [y_var]
|
111
111
|
@regression_class=Regression::Multiple::MatrixEngine
|
112
112
|
end
|
113
|
-
@samples_ga=@fields.inject({}){|a,v| a[v]=[];a}
|
113
|
+
@samples_ga=@fields.inject({}) { |a,v| a[v]=[]; a }
|
114
114
|
|
115
|
-
@name=_("Bootstrap dominance Analysis: %s over %s") % [ ds.
|
115
|
+
@name=_("Bootstrap dominance Analysis: %s over %s") % [ ds.vectors.to_a.join(",") , @y_var]
|
116
116
|
opts.each{|k,v|
|
117
117
|
self.send("#{k}=",v) if self.respond_to? k
|
118
118
|
}
|
@@ -130,15 +130,14 @@ module Statsample
|
|
130
130
|
# each sample on @samples_td, @samples_cd, @samples_gd, @samples_ga
|
131
131
|
#
|
132
132
|
# * number_samples: Number of new samples to add
|
133
|
-
# * n: size of each new sample. If nil, equal to original sample size
|
134
|
-
|
133
|
+
# * n: size of each new sample. If nil, equal to original sample size
|
135
134
|
def bootstrap(number_samples,n=nil)
|
136
135
|
number_samples.times{ |t|
|
137
136
|
@n_samples+=1
|
138
137
|
puts _("Bootstrap %d of %d") % [t+1, number_samples] if @debug
|
139
|
-
ds_boot=@ds.bootstrap(n)
|
138
|
+
ds_boot=@ds.bootstrap(n)
|
140
139
|
da_1=DominanceAnalysis.new(ds_boot, @y_var, :regression_class => @regression_class)
|
141
|
-
|
140
|
+
|
142
141
|
da_1.total_dominance.each{|k,v|
|
143
142
|
@samples_td[k].push(v)
|
144
143
|
}
|
@@ -182,7 +181,7 @@ module Statsample
|
|
182
181
|
table.row([_("Complete dominance"),"","","","","","",""])
|
183
182
|
table.hr
|
184
183
|
@pairs.each{|pair|
|
185
|
-
std
|
184
|
+
std=Daru::Vector.new(@samples_td[pair])
|
186
185
|
ttd=da.total_dominance_pairwise(pair[0],pair[1])
|
187
186
|
table.row(summary_pairs(pair,std,ttd))
|
188
187
|
}
|
@@ -190,7 +189,7 @@ module Statsample
|
|
190
189
|
table.row([_("Conditional dominance"),"","","","","","",""])
|
191
190
|
table.hr
|
192
191
|
@pairs.each{|pair|
|
193
|
-
std
|
192
|
+
std=Daru::Vector.new(@samples_cd[pair])
|
194
193
|
ttd=da.conditional_dominance_pairwise(pair[0],pair[1])
|
195
194
|
table.row(summary_pairs(pair,std,ttd))
|
196
195
|
|
@@ -199,7 +198,7 @@ module Statsample
|
|
199
198
|
table.row([_("General Dominance"),"","","","","","",""])
|
200
199
|
table.hr
|
201
200
|
@pairs.each{|pair|
|
202
|
-
std
|
201
|
+
std=Daru::Vector.new(@samples_gd[pair])
|
203
202
|
ttd=da.general_dominance_pairwise(pair[0],pair[1])
|
204
203
|
table.row(summary_pairs(pair,std,ttd))
|
205
204
|
}
|
@@ -208,10 +207,9 @@ module Statsample
|
|
208
207
|
table=ReportBuilder::Table.new(:name=>_("General averages"), :header=>[_("var"), _("mean"), _("se"), _("p.5"), _("p.95")])
|
209
208
|
|
210
209
|
@fields.each{|f|
|
211
|
-
v
|
210
|
+
v=Daru::Vector.new(@samples_ga[f])
|
212
211
|
row=[@ds[f].name, sprintf("%0.3f",v.mean), sprintf("%0.3f",v.sd), sprintf("%0.3f",v.percentil(5)),sprintf("%0.3f",v.percentil(95))]
|
213
|
-
table.row(row)
|
214
|
-
|
212
|
+
table.row(row)
|
215
213
|
}
|
216
214
|
|
217
215
|
generator.parse_element(table)
|
@@ -22,13 +22,13 @@ module Statsample
|
|
22
22
|
|
23
23
|
class ParallelAnalysis
|
24
24
|
def self.with_random_data(cases,vars,opts=Hash.new)
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
ds.cases=cases
|
25
|
+
ds= Daru::DataFrame.new({},
|
26
|
+
order: vars.times.map {|i| "v#{i+1}".to_sym},
|
27
|
+
index: cases )
|
29
28
|
opts=opts.merge({:bootstrap_method=> :random, :no_data=>true})
|
30
29
|
new(ds, opts)
|
31
30
|
end
|
31
|
+
|
32
32
|
include DirtyMemoize
|
33
33
|
include Summarizable
|
34
34
|
# Number of random sets to produce. 50 by default
|
@@ -61,9 +61,9 @@ module Statsample
|
|
61
61
|
attr_accessor :use_gsl
|
62
62
|
def initialize(ds, opts=Hash.new)
|
63
63
|
@ds=ds
|
64
|
-
@fields=@ds.
|
64
|
+
@fields=@ds.vectors.to_a
|
65
65
|
@n_variables=@fields.size
|
66
|
-
@n_cases=ds.
|
66
|
+
@n_cases=ds.nrows
|
67
67
|
opts_default={
|
68
68
|
:name=>_("Parallel Analysis"),
|
69
69
|
:iterations=>50, # See Liu and Rijmen (2008)
|
@@ -82,7 +82,7 @@ module Statsample
|
|
82
82
|
# Number of factor to retent
|
83
83
|
def number_of_factors
|
84
84
|
total=0
|
85
|
-
ds_eigenvalues.
|
85
|
+
ds_eigenvalues.vectors.to_a.each_with_index do |f,i|
|
86
86
|
if (@original[i]>0 and @original[i]>ds_eigenvalues[f].percentil(percentil))
|
87
87
|
total+=1
|
88
88
|
else
|
@@ -101,7 +101,7 @@ module Statsample
|
|
101
101
|
s.text _("Number of iterations: %d") % @iterations
|
102
102
|
if @no_data
|
103
103
|
s.table(:name=>_("Eigenvalues"), :header=>[_("n"), _("generated eigenvalue"), "p.#{percentil}"]) do |t|
|
104
|
-
ds_eigenvalues.
|
104
|
+
ds_eigenvalues.vectors.to_a.each_with_index do |f,i|
|
105
105
|
v=ds_eigenvalues[f]
|
106
106
|
t.row [i+1, "%0.4f" % v.mean, "%0.4f" % v.percentil(percentil), ]
|
107
107
|
end
|
@@ -109,7 +109,7 @@ module Statsample
|
|
109
109
|
else
|
110
110
|
s.text _("Number or factors to preserve: %d") % number_of_factors
|
111
111
|
s.table(:name=>_("Eigenvalues"), :header=>[_("n"), _("data eigenvalue"), _("generated eigenvalue"),"p.#{percentil}",_("preserve?")]) do |t|
|
112
|
-
ds_eigenvalues.
|
112
|
+
ds_eigenvalues.vectors.to_a.each_with_index do |f,i|
|
113
113
|
v=ds_eigenvalues[f]
|
114
114
|
t.row [i+1, "%0.4f" % @original[i], "%0.4f" % v.mean, "%0.4f" % v.percentil(percentil), (v.percentil(percentil)>0 and @original[i] > v.percentil(percentil)) ? "Yes":""]
|
115
115
|
end
|
@@ -120,11 +120,9 @@ module Statsample
|
|
120
120
|
end
|
121
121
|
# Perform calculation. Shouldn't be called directly for the user
|
122
122
|
def compute
|
123
|
+
@original=Statsample::Bivariate.send(matrix_method, @ds).eigenvalues unless no_data
|
124
|
+
@ds_eigenvalues=Daru::DataFrame.new({}, order: (1..@n_variables).map{|v| ("ev_%05d" % v).to_sym})
|
123
125
|
|
124
|
-
|
125
|
-
@original=Statsample::Bivariate.send(matrix_method, @ds).eigenvalues unless no_data
|
126
|
-
@ds_eigenvalues=Statsample::Dataset.new((1..@n_variables).map{|v| "ev_%05d" % v})
|
127
|
-
@ds_eigenvalues.fields.each {|f| @ds_eigenvalues[f].type=:numeric}
|
128
126
|
if bootstrap_method==:parameter or bootstrap_method==:random
|
129
127
|
rng = Distribution::Normal.rng
|
130
128
|
end
|
@@ -133,18 +131,18 @@ module Statsample
|
|
133
131
|
begin
|
134
132
|
puts "#{@name}: Iteration #{i}" if $DEBUG or debug
|
135
133
|
# Create a dataset of dummy values
|
136
|
-
ds_bootstrap=
|
134
|
+
ds_bootstrap = Daru::DataFrame.new({}, order: @ds.vectors, index: @n_cases)
|
137
135
|
|
138
136
|
@fields.each do |f|
|
139
137
|
if bootstrap_method==:random
|
140
|
-
ds_bootstrap[f]
|
138
|
+
ds_bootstrap[f] = Daru::Vector.new(@n_cases.times.map {|c| rng.call})
|
141
139
|
elsif bootstrap_method==:data
|
142
|
-
ds_bootstrap[f]=ds[f].sample_with_replacement(@n_cases)
|
140
|
+
ds_bootstrap[f] = ds[f].sample_with_replacement(@n_cases)
|
143
141
|
else
|
144
142
|
raise "bootstrap_method doesn't recogniced"
|
145
143
|
end
|
146
144
|
end
|
147
|
-
ds_bootstrap.
|
145
|
+
ds_bootstrap.update
|
148
146
|
|
149
147
|
matrix=Statsample::Bivariate.send(matrix_method, ds_bootstrap)
|
150
148
|
matrix=matrix.to_gsl if @use_gsl
|
@@ -155,13 +153,13 @@ module Statsample
|
|
155
153
|
end
|
156
154
|
end
|
157
155
|
ev=matrix.eigenvalues
|
158
|
-
@ds_eigenvalues.
|
156
|
+
@ds_eigenvalues.add_row(ev)
|
159
157
|
rescue Statsample::Bivariate::Tetrachoric::RequerimentNotMeet => e
|
160
158
|
puts "Error: #{e}" if $DEBUG
|
161
159
|
redo
|
162
160
|
end
|
163
161
|
end
|
164
|
-
@ds_eigenvalues.
|
162
|
+
@ds_eigenvalues.update
|
165
163
|
end
|
166
164
|
dirty_memoize :number_of_factors, :ds_eigenvalues
|
167
165
|
dirty_writer :iterations, :bootstrap_method, :percentil, :smc
|
@@ -13,11 +13,11 @@ module Factor
|
|
13
13
|
#
|
14
14
|
# == Usage:
|
15
15
|
# require 'statsample'
|
16
|
-
# a=[2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1]
|
17
|
-
# b=[2.4,0.7,2.9,2.2,3.0,2.7,1.6,1.1,1.6,0.9]
|
18
|
-
# ds={
|
19
|
-
# cor_matrix=Statsample::Bivariate.correlation_matrix(ds)
|
20
|
-
# pca=Statsample::Factor::PCA.new(cor_matrix)
|
16
|
+
# a = Daru::Vector.new([2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1])
|
17
|
+
# b = Daru::Vector.new([2.4,0.7,2.9,2.2,3.0,2.7,1.6,1.1,1.6,0.9])
|
18
|
+
# ds = Daru::DataFrame.new({:a => a,:b => b})
|
19
|
+
# cor_matrix = Statsample::Bivariate.correlation_matrix(ds)
|
20
|
+
# pca= Statsample::Factor::PCA.new(cor_matrix)
|
21
21
|
# pca.m
|
22
22
|
# => 1
|
23
23
|
# pca.eigenvalues
|
@@ -52,11 +52,13 @@ module Factor
|
|
52
52
|
attr_accessor :rotation_type
|
53
53
|
attr_accessor :matrix_type
|
54
54
|
def initialize(matrix, opts=Hash.new)
|
55
|
-
@use_gsl=
|
55
|
+
@use_gsl = opts[:use_gsl]
|
56
|
+
opts.delete :use_gsl
|
57
|
+
|
56
58
|
@name=_("Principal Component Analysis")
|
57
59
|
@matrix=matrix
|
58
60
|
@n_variables=@matrix.column_size
|
59
|
-
@variables_names=(@matrix.respond_to? :fields) ? @matrix.fields : @n_variables.times.map {|i|
|
61
|
+
@variables_names=(@matrix.respond_to? :fields) ? @matrix.fields : @n_variables.times.map {|i| "VAR_#{i+1}".to_sym }
|
60
62
|
|
61
63
|
@matrix_type = @matrix.respond_to?(:_type) ? @matrix._type : :correlation
|
62
64
|
|
@@ -67,13 +69,14 @@ module Factor
|
|
67
69
|
opts.each{|k,v|
|
68
70
|
self.send("#{k}=",v) if self.respond_to? k
|
69
71
|
}
|
72
|
+
|
70
73
|
if @use_gsl.nil?
|
71
74
|
@use_gsl=Statsample.has_gsl?
|
72
75
|
end
|
73
76
|
if @matrix.respond_to? :fields
|
74
77
|
@variables_names=@matrix.fields
|
75
78
|
else
|
76
|
-
@variables_names=@n_variables.times.map {|i| "V#{i+1}"}
|
79
|
+
@variables_names=@n_variables.times.map {|i| "V#{i+1}".to_sym}
|
77
80
|
end
|
78
81
|
calculate_eigenpairs
|
79
82
|
|
@@ -81,7 +84,6 @@ module Factor
|
|
81
84
|
# Set number of factors with eigenvalues > 1
|
82
85
|
@m=@eigenpairs.find_all {|ev,ec| ev>=1.0}.size
|
83
86
|
end
|
84
|
-
|
85
87
|
end
|
86
88
|
def rotation
|
87
89
|
@rotation_type.new(component_matrix)
|
@@ -92,10 +94,10 @@ module Factor
|
|
92
94
|
def create_centered_ds
|
93
95
|
h={}
|
94
96
|
@original_ds.factors.each {|f|
|
95
|
-
mean
|
96
|
-
h[f]
|
97
|
+
mean = @original_ds[f].mean
|
98
|
+
h[f] = @original_ds[f].recode {|c| c-mean}
|
97
99
|
}
|
98
|
-
@ds=h
|
100
|
+
@ds = Daru::DataFrame.new(h)
|
99
101
|
end
|
100
102
|
|
101
103
|
# Feature matrix for +m+ factors
|
@@ -137,8 +139,8 @@ module Factor
|
|
137
139
|
pcs=(fv.transpose*data_matrix.transpose).transpose
|
138
140
|
|
139
141
|
pcs.extend Statsample::NamedMatrix
|
140
|
-
pcs.fields_y=m.times.map {|i| "PC_
|
141
|
-
pcs.
|
142
|
+
pcs.fields_y = m.times.map { |i| "PC_#{i+1}".to_sym }
|
143
|
+
pcs.to_dataframe
|
142
144
|
end
|
143
145
|
def component_matrix(m=nil)
|
144
146
|
var="component_matrix_#{matrix_type}"
|
@@ -159,7 +161,7 @@ module Factor
|
|
159
161
|
cm.extend NamedMatrix
|
160
162
|
cm.name=_("Component matrix (from covariance)")
|
161
163
|
cm.fields_x = @variables_names
|
162
|
-
cm.fields_y = m.times.map {|i| "PC_
|
164
|
+
cm.fields_y = m.times.map {|i| "PC_#{i+1}".to_sym }
|
163
165
|
|
164
166
|
cm
|
165
167
|
end
|
@@ -180,17 +182,16 @@ module Factor
|
|
180
182
|
cm.extend CovariateMatrix
|
181
183
|
cm.name=_("Component matrix")
|
182
184
|
cm.fields_x = @variables_names
|
183
|
-
cm.fields_y = m.times.map {|i| "PC_
|
185
|
+
cm.fields_y = m.times.map { |i| "PC_#{i+1}".to_sym }
|
184
186
|
cm
|
185
187
|
end
|
186
188
|
def communalities(m=nil)
|
187
|
-
|
188
189
|
m||=@m
|
189
190
|
h=[]
|
190
191
|
@n_variables.times do |i|
|
191
192
|
sum=0
|
192
193
|
m.times do |j|
|
193
|
-
sum+=(@eigenpairs[j][0].abs*@eigenpairs[j][1][i]**2)
|
194
|
+
sum += (@eigenpairs[j][0].abs*@eigenpairs[j][1][i]**2)
|
194
195
|
end
|
195
196
|
h.push(sum)
|
196
197
|
end
|
@@ -202,11 +203,11 @@ module Factor
|
|
202
203
|
end
|
203
204
|
def eigenvectors
|
204
205
|
@eigenpairs.collect {|c|
|
205
|
-
@use_gsl ? c[1].to_gsl : c[1]
|
206
|
+
@use_gsl ? c[1].to_gsl : Daru::Vector.new(c[1])
|
206
207
|
}
|
207
208
|
end
|
208
209
|
def calculate_eigenpairs
|
209
|
-
@eigenpairs= @use_gsl ? @matrix.to_gsl.eigenpairs : @matrix.to_matrix.eigenpairs_ruby
|
210
|
+
@eigenpairs= @use_gsl ? @matrix.to_gsl.eigenpairs : @matrix.to_matrix.eigenpairs_ruby
|
210
211
|
end
|
211
212
|
|
212
213
|
|
@@ -6,9 +6,9 @@ module Factor
|
|
6
6
|
#
|
7
7
|
# == Usage:
|
8
8
|
# require 'statsample'
|
9
|
-
# a=[2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1]
|
10
|
-
# b=[2.4,0.7,2.9,2.2,3.0,2.7,1.6,1.1,1.6,0.9]
|
11
|
-
# ds={
|
9
|
+
# a = Daru::Vector.new([2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1])
|
10
|
+
# b = Daru::Vector.new([2.4,0.7,2.9,2.2,3.0,2.7,1.6,1.1,1.6,0.9])
|
11
|
+
# ds= Daru::DataFrame.new({:a => a,:b => b})
|
12
12
|
# cor_matrix=Statsample::Bivariate.correlation_matrix(ds)
|
13
13
|
# pa=Statsample::Factor::PrincipalAxis.new(cor_matrix)
|
14
14
|
# pa.iterate(1)
|
@@ -8,12 +8,12 @@ module Statsample
|
|
8
8
|
#
|
9
9
|
# == Usage
|
10
10
|
# === Svg output
|
11
|
-
# a=[1,2,3,4]
|
12
|
-
# b=[3,4,5,6]
|
13
|
-
#
|
11
|
+
# a = Daru::Vector.new([1,2,3,4])
|
12
|
+
# b = Daru::Vector.new([3,4,5,6])
|
13
|
+
# puts Statsample::Graph::Boxplot.new(:vectors=>[a,b]).to_svg
|
14
14
|
# === Using ReportBuilder
|
15
|
-
# a=[1,2,3,4]
|
16
|
-
# b=[3,4,5,6]
|
15
|
+
# a = Daru::Vector.new([1,2,3,4])
|
16
|
+
# b = Daru::Vector.new([3,4,5,6])
|
17
17
|
# rb=ReportBuilder.new
|
18
18
|
# rb.add(Statsample::Graph::Boxplot.new(:vectors=>[a,b]))
|
19
19
|
# rb.save_html('boxplot.html')
|
@@ -85,8 +85,6 @@ module Statsample
|
|
85
85
|
min||=@vectors.map {|v| v.min}.min
|
86
86
|
max||=@vectors.map {|v| v.max}.max
|
87
87
|
|
88
|
-
|
89
|
-
|
90
88
|
margin_hor=margin_left + margin_right
|
91
89
|
margin_vert=margin_top + margin_bottom
|
92
90
|
x_scale = pv.Scale.ordinal(@vectors.size.times.map.to_a).split_banded(0, width-margin_hor, 4.0/5)
|
@@ -115,12 +113,10 @@ module Statsample
|
|
115
113
|
out[:low_whisker]=min
|
116
114
|
out[:high_whisker]=max
|
117
115
|
# And now, data outside whiskers
|
118
|
-
out[:outliers]=v.
|
116
|
+
out[:outliers]=v.to_a.find_all {|d| d < min or d > max }
|
119
117
|
out
|
120
118
|
}
|
121
|
-
|
122
|
-
|
123
|
-
|
119
|
+
|
124
120
|
vis=Rubyvis::Panel.new do |pan|
|
125
121
|
pan.width width - margin_hor
|
126
122
|
pan.height height - margin_vert
|
@@ -157,7 +153,6 @@ module Statsample
|
|
157
153
|
bp.left {|v| x_scale[index]}
|
158
154
|
bp.width x_scale.range_band
|
159
155
|
|
160
|
-
|
161
156
|
# Bar
|
162
157
|
bp.bar do |b|
|
163
158
|
b.bottom {|v| y_scale[v[:percentil_25]]}
|
@@ -168,9 +163,7 @@ module Statsample
|
|
168
163
|
colors.scale(that.groups[parent.index]).darker
|
169
164
|
else
|
170
165
|
colors.scale(index).darker
|
171
|
-
end
|
172
|
-
|
173
|
-
|
166
|
+
end
|
174
167
|
}
|
175
168
|
b.fill_style {|v|
|
176
169
|
if that.groups
|
@@ -237,7 +230,6 @@ module Statsample
|
|
237
230
|
builder.section(:name=>name) do |b|
|
238
231
|
b.image(to_svg, :type=>'svg', :width=>width, :height=>height)
|
239
232
|
end
|
240
|
-
|
241
233
|
end
|
242
234
|
end
|
243
235
|
end
|