statsample 1.5.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (114) hide show
  1. checksums.yaml +4 -4
  2. data/.build.sh +15 -0
  3. data/.gitignore +1 -0
  4. data/.travis.yml +19 -7
  5. data/CONTRIBUTING.md +33 -0
  6. data/History.txt +5 -0
  7. data/README.md +41 -53
  8. data/benchmarks/correlation_matrix_15_variables.rb +6 -5
  9. data/benchmarks/correlation_matrix_5_variables.rb +6 -5
  10. data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +23 -26
  11. data/examples/boxplot.rb +17 -5
  12. data/examples/correlation_matrix.rb +36 -7
  13. data/examples/dataset.rb +25 -5
  14. data/examples/dominance_analysis.rb +8 -7
  15. data/examples/dominance_analysis_bootstrap.rb +16 -11
  16. data/examples/histogram.rb +16 -2
  17. data/examples/icc.rb +5 -6
  18. data/examples/levene.rb +17 -3
  19. data/examples/multiple_regression.rb +6 -3
  20. data/examples/parallel_analysis.rb +11 -6
  21. data/examples/polychoric.rb +26 -13
  22. data/examples/principal_axis.rb +8 -4
  23. data/examples/reliability.rb +10 -10
  24. data/examples/scatterplot.rb +8 -0
  25. data/examples/t_test.rb +7 -0
  26. data/examples/u_test.rb +10 -2
  27. data/examples/vector.rb +9 -6
  28. data/examples/velicer_map_test.rb +12 -8
  29. data/lib/statsample.rb +13 -47
  30. data/lib/statsample/analysis/suite.rb +1 -1
  31. data/lib/statsample/anova/oneway.rb +6 -6
  32. data/lib/statsample/anova/twoway.rb +26 -24
  33. data/lib/statsample/bivariate.rb +78 -61
  34. data/lib/statsample/bivariate/pearson.rb +2 -2
  35. data/lib/statsample/codification.rb +45 -32
  36. data/lib/statsample/converter/csv.rb +15 -53
  37. data/lib/statsample/converter/spss.rb +6 -5
  38. data/lib/statsample/converters.rb +50 -211
  39. data/lib/statsample/crosstab.rb +26 -25
  40. data/lib/statsample/daru.rb +117 -0
  41. data/lib/statsample/dataset.rb +70 -942
  42. data/lib/statsample/dominanceanalysis.rb +16 -17
  43. data/lib/statsample/dominanceanalysis/bootstrap.rb +26 -28
  44. data/lib/statsample/factor/parallelanalysis.rb +17 -19
  45. data/lib/statsample/factor/pca.rb +21 -20
  46. data/lib/statsample/factor/principalaxis.rb +3 -3
  47. data/lib/statsample/graph/boxplot.rb +8 -16
  48. data/lib/statsample/graph/histogram.rb +4 -4
  49. data/lib/statsample/graph/scatterplot.rb +8 -7
  50. data/lib/statsample/histogram.rb +128 -119
  51. data/lib/statsample/matrix.rb +20 -16
  52. data/lib/statsample/multiset.rb +39 -38
  53. data/lib/statsample/regression.rb +3 -3
  54. data/lib/statsample/regression/multiple.rb +8 -10
  55. data/lib/statsample/regression/multiple/alglibengine.rb +96 -89
  56. data/lib/statsample/regression/multiple/baseengine.rb +32 -32
  57. data/lib/statsample/regression/multiple/gslengine.rb +33 -36
  58. data/lib/statsample/regression/multiple/matrixengine.rb +7 -9
  59. data/lib/statsample/regression/multiple/rubyengine.rb +39 -41
  60. data/lib/statsample/reliability.rb +23 -25
  61. data/lib/statsample/reliability/icc.rb +8 -7
  62. data/lib/statsample/reliability/multiscaleanalysis.rb +14 -12
  63. data/lib/statsample/reliability/scaleanalysis.rb +58 -60
  64. data/lib/statsample/reliability/skillscaleanalysis.rb +34 -29
  65. data/lib/statsample/resample.rb +1 -1
  66. data/lib/statsample/shorthand.rb +29 -25
  67. data/lib/statsample/test/kolmogorovsmirnov.rb +5 -3
  68. data/lib/statsample/test/levene.rb +28 -27
  69. data/lib/statsample/test/t.rb +7 -9
  70. data/lib/statsample/test/umannwhitney.rb +28 -28
  71. data/lib/statsample/test/wilcoxonsignedrank.rb +45 -43
  72. data/lib/statsample/vector.rb +70 -1013
  73. data/lib/statsample/version.rb +1 -1
  74. data/statsample.gemspec +12 -16
  75. data/test/helpers_tests.rb +1 -1
  76. data/test/test_analysis.rb +17 -17
  77. data/test/test_anova_contrast.rb +6 -6
  78. data/test/test_anovatwowaywithdataset.rb +8 -8
  79. data/test/test_anovawithvectors.rb +8 -8
  80. data/test/test_awesome_print_bug.rb +1 -1
  81. data/test/test_bartlettsphericity.rb +4 -4
  82. data/test/test_bivariate.rb +48 -43
  83. data/test/test_codification.rb +33 -33
  84. data/test/test_crosstab.rb +9 -9
  85. data/test/test_dataset.rb +28 -458
  86. data/test/test_factor.rb +46 -38
  87. data/test/test_factor_pa.rb +22 -13
  88. data/test/test_ggobi.rb +4 -4
  89. data/test/test_gsl.rb +4 -4
  90. data/test/test_histogram.rb +3 -3
  91. data/test/test_matrix.rb +13 -13
  92. data/test/test_multiset.rb +103 -91
  93. data/test/test_regression.rb +57 -52
  94. data/test/test_reliability.rb +55 -45
  95. data/test/test_reliability_icc.rb +8 -8
  96. data/test/test_reliability_skillscale.rb +26 -24
  97. data/test/test_resample.rb +1 -1
  98. data/test/test_statistics.rb +3 -13
  99. data/test/test_stest.rb +9 -9
  100. data/test/test_stratified.rb +3 -3
  101. data/test/test_test_t.rb +12 -12
  102. data/test/test_umannwhitney.rb +2 -2
  103. data/test/test_vector.rb +76 -613
  104. data/test/test_wilcoxonsignedrank.rb +4 -4
  105. metadata +57 -28
  106. data/lib/statsample/rserve_extension.rb +0 -20
  107. data/lib/statsample/vector/gsl.rb +0 -106
  108. data/test/fixtures/repeated_fields.csv +0 -7
  109. data/test/fixtures/scientific_notation.csv +0 -4
  110. data/test/fixtures/test_csv.csv +0 -7
  111. data/test/fixtures/test_xls.xls +0 -0
  112. data/test/test_csv.rb +0 -63
  113. data/test/test_rserve_extension.rb +0 -42
  114. data/test/test_xls.rb +0 -52
@@ -7,13 +7,13 @@ module Statsample
7
7
  #
8
8
  # == Use
9
9
  #
10
- # a=1000.times.collect {rand}.to_numeric
11
- # b=1000.times.collect {rand}.to_numeric
12
- # c=1000.times.collect {rand}.to_numeric
13
- # ds={'a'=>a,'b'=>b,'c'=>c}.to_dataset
14
- # ds['y']=ds.collect{|row| row['a']*5+row['b']*3+row['c']*2+rand()}
15
- # da=Statsample::DominanceAnalysis.new(ds,'y')
16
- # puts da.summary
10
+ # a = Daru::Vector.new(1000.times.collect {rand})
11
+ # b = Daru::Vector.new(1000.times.collect {rand})
12
+ # c = Daru::Vector.new(1000.times.collect {rand})
13
+ # ds= Daru::DataFrame.new({:a => a,:b => b,:c => c})
14
+ # ds[:y] = ds.collect_rows {|row| row[:a]*5 + row[:b]*3 + row[:c]*2 + rand()}
15
+ # da=Statsample::DominanceAnalysis.new(ds, :y)
16
+ # puts da.summary
17
17
  #
18
18
  # === Output:
19
19
  #
@@ -115,21 +115,21 @@ module Statsample
115
115
  }
116
116
  @dependent=dependent
117
117
  @dependent=[@dependent] unless @dependent.is_a? Array
118
-
119
- @predictors ||= input.fields-@dependent
120
-
121
- @name=_("Dominance Analysis: %s over %s") % [ @predictors.flatten.join(",") , @dependent.join(",")] if @name.nil?
122
-
123
- if input.is_a? Statsample::Dataset
118
+
119
+ if input.kind_of? Daru::DataFrame
120
+ @predictors ||= input.vectors.to_a - @dependent
124
121
  @ds=input
125
122
  @matrix=Statsample::Bivariate.correlation_matrix(input)
126
123
  @cases=Statsample::Bivariate.min_n_valid(input)
127
124
  elsif input.is_a? ::Matrix
125
+ @predictors ||= input.fields-@dependent
128
126
  @ds=nil
129
127
  @matrix=input
130
128
  else
131
129
  raise ArgumentError.new("You should use a Matrix or a Dataset")
132
130
  end
131
+
132
+ @name=_("Dominance Analysis: %s over %s") % [ @predictors.flatten.join(",") , @dependent.join(",")] if @name.nil?
133
133
  @models=nil
134
134
  @models_data=nil
135
135
  @general_averages=nil
@@ -264,22 +264,21 @@ module Statsample
264
264
  end
265
265
 
266
266
  def md(m)
267
- models_data[m.sort {|a,b| a.to_s<=>b.to_s}]
267
+ models_data[m.sort {|a,b| a.to_s <=> b.to_s}]
268
268
  end
269
269
  # Get all model of size k
270
270
  def md_k(k)
271
271
  out=[]
272
- @models.each{|m| out.push(md(m)) if m.size==k }
272
+ @models.each{ |m| out.push(md(m)) if m.size==k }
273
273
  out
274
274
  end
275
275
 
276
276
  # For a hash with arrays of numbers as values
277
277
  # Returns a hash with same keys and
278
278
  # value as the mean of values of original hash
279
-
280
279
  def get_averages(averages)
281
280
  out={}
282
- averages.each{|key,val| out[key]=val.to_vector(:numeric).mean }
281
+ averages.each{ |key,val| out[key] = Daru::Vector.new(val).mean }
283
282
  out
284
283
  end
285
284
  # Hash with average for each k size model.
@@ -5,16 +5,16 @@ module Statsample
5
5
  #
6
6
  # == Usage
7
7
  #
8
- # require 'statsample'
9
- # a=100.times.collect {rand}.to_numeric
10
- # b=100.times.collect {rand}.to_numeric
11
- # c=100.times.collect {rand}.to_numeric
12
- # d=100.times.collect {rand}.to_numeric
13
- # ds={'a'=>a,'b'=>b,'c'=>c,'d'=>d}.to_dataset
14
- # ds['y']=ds.collect{|row| row['a']*5+row['b']*2+row['c']*2+row['d']*2+10*rand()}
15
- # dab=Statsample::DominanceAnalysis::Bootstrap.new(ds2, 'y', :debug=>true)
16
- # dab.bootstrap(100,nil)
17
- # puts dab.summary
8
+ # require 'statsample'
9
+ # a = Daru::Vector.new(100.times.collect {rand})
10
+ # b = Daru::Vector.new(100.times.collect {rand})
11
+ # c = Daru::Vector.new(100.times.collect {rand})
12
+ # d = Daru::Vector.new(100.times.collect {rand})
13
+ # ds = Daru::DataFrame.new({:a => a,:b => b,:c => c,:d => d})
14
+ # ds[:y] = ds.collect_rows { |row| row[:a]*5+row[:b]*2+row[:c]*2+row[:d]*2+10*rand() }
15
+ # dab=Statsample::DominanceAnalysis::Bootstrap.new(ds, :y, :debug=>true)
16
+ # dab.bootstrap(100,nil)
17
+ # puts dab.summary
18
18
  # <strong>Output</strong>
19
19
  # Sample size: 100
20
20
  # t: 1.98421693632958
@@ -91,28 +91,28 @@ module Statsample
91
91
  ALPHA=0.95
92
92
  # Create a new Dominance Analysis Bootstrap Object
93
93
  #
94
- # * ds: A Dataset object
94
+ # * ds: A Daru::DataFrame object
95
95
  # * y_var: Name of dependent variable
96
96
  # * opts: Any other attribute of the class
97
97
  def initialize(ds,y_var, opts=Hash.new)
98
- @ds=ds
99
- @y_var=y_var
100
- @n=ds.cases
98
+ @ds = ds
99
+ @y_var = y_var.respond_to?(:to_sym) ? y_var.to_sym : y_var
100
+ @n = ds.nrows
101
101
 
102
102
  @n_samples=0
103
103
  @alpha=ALPHA
104
104
  @debug=false
105
105
  if y_var.is_a? Array
106
- @fields=ds.fields-y_var
106
+ @fields=ds.vectors.to_a - y_var
107
107
  @regression_class=Regression::Multiple::MultipleDependent
108
108
 
109
109
  else
110
- @fields=ds.fields-[y_var]
110
+ @fields=ds.vectors.to_a - [y_var]
111
111
  @regression_class=Regression::Multiple::MatrixEngine
112
112
  end
113
- @samples_ga=@fields.inject({}){|a,v| a[v]=[];a}
113
+ @samples_ga=@fields.inject({}) { |a,v| a[v]=[]; a }
114
114
 
115
- @name=_("Bootstrap dominance Analysis: %s over %s") % [ ds.fields.join(",") , @y_var]
115
+ @name=_("Bootstrap dominance Analysis: %s over %s") % [ ds.vectors.to_a.join(",") , @y_var]
116
116
  opts.each{|k,v|
117
117
  self.send("#{k}=",v) if self.respond_to? k
118
118
  }
@@ -130,15 +130,14 @@ module Statsample
130
130
  # each sample on @samples_td, @samples_cd, @samples_gd, @samples_ga
131
131
  #
132
132
  # * number_samples: Number of new samples to add
133
- # * n: size of each new sample. If nil, equal to original sample size
134
-
133
+ # * n: size of each new sample. If nil, equal to original sample size
135
134
  def bootstrap(number_samples,n=nil)
136
135
  number_samples.times{ |t|
137
136
  @n_samples+=1
138
137
  puts _("Bootstrap %d of %d") % [t+1, number_samples] if @debug
139
- ds_boot=@ds.bootstrap(n)
138
+ ds_boot=@ds.bootstrap(n)
140
139
  da_1=DominanceAnalysis.new(ds_boot, @y_var, :regression_class => @regression_class)
141
-
140
+
142
141
  da_1.total_dominance.each{|k,v|
143
142
  @samples_td[k].push(v)
144
143
  }
@@ -182,7 +181,7 @@ module Statsample
182
181
  table.row([_("Complete dominance"),"","","","","","",""])
183
182
  table.hr
184
183
  @pairs.each{|pair|
185
- std=@samples_td[pair].to_vector(:numeric)
184
+ std=Daru::Vector.new(@samples_td[pair])
186
185
  ttd=da.total_dominance_pairwise(pair[0],pair[1])
187
186
  table.row(summary_pairs(pair,std,ttd))
188
187
  }
@@ -190,7 +189,7 @@ module Statsample
190
189
  table.row([_("Conditional dominance"),"","","","","","",""])
191
190
  table.hr
192
191
  @pairs.each{|pair|
193
- std=@samples_cd[pair].to_vector(:numeric)
192
+ std=Daru::Vector.new(@samples_cd[pair])
194
193
  ttd=da.conditional_dominance_pairwise(pair[0],pair[1])
195
194
  table.row(summary_pairs(pair,std,ttd))
196
195
 
@@ -199,7 +198,7 @@ module Statsample
199
198
  table.row([_("General Dominance"),"","","","","","",""])
200
199
  table.hr
201
200
  @pairs.each{|pair|
202
- std=@samples_gd[pair].to_vector(:numeric)
201
+ std=Daru::Vector.new(@samples_gd[pair])
203
202
  ttd=da.general_dominance_pairwise(pair[0],pair[1])
204
203
  table.row(summary_pairs(pair,std,ttd))
205
204
  }
@@ -208,10 +207,9 @@ module Statsample
208
207
  table=ReportBuilder::Table.new(:name=>_("General averages"), :header=>[_("var"), _("mean"), _("se"), _("p.5"), _("p.95")])
209
208
 
210
209
  @fields.each{|f|
211
- v=@samples_ga[f].to_vector(:numeric)
210
+ v=Daru::Vector.new(@samples_ga[f])
212
211
  row=[@ds[f].name, sprintf("%0.3f",v.mean), sprintf("%0.3f",v.sd), sprintf("%0.3f",v.percentil(5)),sprintf("%0.3f",v.percentil(95))]
213
- table.row(row)
214
-
212
+ table.row(row)
215
213
  }
216
214
 
217
215
  generator.parse_element(table)
@@ -22,13 +22,13 @@ module Statsample
22
22
 
23
23
  class ParallelAnalysis
24
24
  def self.with_random_data(cases,vars,opts=Hash.new)
25
- require 'ostruct'
26
- ds=OpenStruct.new
27
- ds.fields=vars.times.map {|i| "v#{i+1}"}
28
- ds.cases=cases
25
+ ds= Daru::DataFrame.new({},
26
+ order: vars.times.map {|i| "v#{i+1}".to_sym},
27
+ index: cases )
29
28
  opts=opts.merge({:bootstrap_method=> :random, :no_data=>true})
30
29
  new(ds, opts)
31
30
  end
31
+
32
32
  include DirtyMemoize
33
33
  include Summarizable
34
34
  # Number of random sets to produce. 50 by default
@@ -61,9 +61,9 @@ module Statsample
61
61
  attr_accessor :use_gsl
62
62
  def initialize(ds, opts=Hash.new)
63
63
  @ds=ds
64
- @fields=@ds.fields
64
+ @fields=@ds.vectors.to_a
65
65
  @n_variables=@fields.size
66
- @n_cases=ds.cases
66
+ @n_cases=ds.nrows
67
67
  opts_default={
68
68
  :name=>_("Parallel Analysis"),
69
69
  :iterations=>50, # See Liu and Rijmen (2008)
@@ -82,7 +82,7 @@ module Statsample
82
82
  # Number of factor to retent
83
83
  def number_of_factors
84
84
  total=0
85
- ds_eigenvalues.fields.each_with_index do |f,i|
85
+ ds_eigenvalues.vectors.to_a.each_with_index do |f,i|
86
86
  if (@original[i]>0 and @original[i]>ds_eigenvalues[f].percentil(percentil))
87
87
  total+=1
88
88
  else
@@ -101,7 +101,7 @@ module Statsample
101
101
  s.text _("Number of iterations: %d") % @iterations
102
102
  if @no_data
103
103
  s.table(:name=>_("Eigenvalues"), :header=>[_("n"), _("generated eigenvalue"), "p.#{percentil}"]) do |t|
104
- ds_eigenvalues.fields.each_with_index do |f,i|
104
+ ds_eigenvalues.vectors.to_a.each_with_index do |f,i|
105
105
  v=ds_eigenvalues[f]
106
106
  t.row [i+1, "%0.4f" % v.mean, "%0.4f" % v.percentil(percentil), ]
107
107
  end
@@ -109,7 +109,7 @@ module Statsample
109
109
  else
110
110
  s.text _("Number or factors to preserve: %d") % number_of_factors
111
111
  s.table(:name=>_("Eigenvalues"), :header=>[_("n"), _("data eigenvalue"), _("generated eigenvalue"),"p.#{percentil}",_("preserve?")]) do |t|
112
- ds_eigenvalues.fields.each_with_index do |f,i|
112
+ ds_eigenvalues.vectors.to_a.each_with_index do |f,i|
113
113
  v=ds_eigenvalues[f]
114
114
  t.row [i+1, "%0.4f" % @original[i], "%0.4f" % v.mean, "%0.4f" % v.percentil(percentil), (v.percentil(percentil)>0 and @original[i] > v.percentil(percentil)) ? "Yes":""]
115
115
  end
@@ -120,11 +120,9 @@ module Statsample
120
120
  end
121
121
  # Perform calculation. Shouldn't be called directly for the user
122
122
  def compute
123
+ @original=Statsample::Bivariate.send(matrix_method, @ds).eigenvalues unless no_data
124
+ @ds_eigenvalues=Daru::DataFrame.new({}, order: (1..@n_variables).map{|v| ("ev_%05d" % v).to_sym})
123
125
 
124
-
125
- @original=Statsample::Bivariate.send(matrix_method, @ds).eigenvalues unless no_data
126
- @ds_eigenvalues=Statsample::Dataset.new((1..@n_variables).map{|v| "ev_%05d" % v})
127
- @ds_eigenvalues.fields.each {|f| @ds_eigenvalues[f].type=:numeric}
128
126
  if bootstrap_method==:parameter or bootstrap_method==:random
129
127
  rng = Distribution::Normal.rng
130
128
  end
@@ -133,18 +131,18 @@ module Statsample
133
131
  begin
134
132
  puts "#{@name}: Iteration #{i}" if $DEBUG or debug
135
133
  # Create a dataset of dummy values
136
- ds_bootstrap=Statsample::Dataset.new(@ds.fields)
134
+ ds_bootstrap = Daru::DataFrame.new({}, order: @ds.vectors, index: @n_cases)
137
135
 
138
136
  @fields.each do |f|
139
137
  if bootstrap_method==:random
140
- ds_bootstrap[f]=@n_cases.times.map {|c| rng.call}.to_numeric
138
+ ds_bootstrap[f] = Daru::Vector.new(@n_cases.times.map {|c| rng.call})
141
139
  elsif bootstrap_method==:data
142
- ds_bootstrap[f]=ds[f].sample_with_replacement(@n_cases)
140
+ ds_bootstrap[f] = ds[f].sample_with_replacement(@n_cases)
143
141
  else
144
142
  raise "bootstrap_method doesn't recogniced"
145
143
  end
146
144
  end
147
- ds_bootstrap.update_valid_data
145
+ ds_bootstrap.update
148
146
 
149
147
  matrix=Statsample::Bivariate.send(matrix_method, ds_bootstrap)
150
148
  matrix=matrix.to_gsl if @use_gsl
@@ -155,13 +153,13 @@ module Statsample
155
153
  end
156
154
  end
157
155
  ev=matrix.eigenvalues
158
- @ds_eigenvalues.add_case_array(ev)
156
+ @ds_eigenvalues.add_row(ev)
159
157
  rescue Statsample::Bivariate::Tetrachoric::RequerimentNotMeet => e
160
158
  puts "Error: #{e}" if $DEBUG
161
159
  redo
162
160
  end
163
161
  end
164
- @ds_eigenvalues.update_valid_data
162
+ @ds_eigenvalues.update
165
163
  end
166
164
  dirty_memoize :number_of_factors, :ds_eigenvalues
167
165
  dirty_writer :iterations, :bootstrap_method, :percentil, :smc
@@ -13,11 +13,11 @@ module Factor
13
13
  #
14
14
  # == Usage:
15
15
  # require 'statsample'
16
- # a=[2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1].to_numeric
17
- # b=[2.4,0.7,2.9,2.2,3.0,2.7,1.6,1.1,1.6,0.9].to_numeric
18
- # ds={'a'=>a,'b'=>b}.to_dataset
19
- # cor_matrix=Statsample::Bivariate.correlation_matrix(ds)
20
- # pca=Statsample::Factor::PCA.new(cor_matrix)
16
+ # a = Daru::Vector.new([2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1])
17
+ # b = Daru::Vector.new([2.4,0.7,2.9,2.2,3.0,2.7,1.6,1.1,1.6,0.9])
18
+ # ds = Daru::DataFrame.new({:a => a,:b => b})
19
+ # cor_matrix = Statsample::Bivariate.correlation_matrix(ds)
20
+ # pca= Statsample::Factor::PCA.new(cor_matrix)
21
21
  # pca.m
22
22
  # => 1
23
23
  # pca.eigenvalues
@@ -52,11 +52,13 @@ module Factor
52
52
  attr_accessor :rotation_type
53
53
  attr_accessor :matrix_type
54
54
  def initialize(matrix, opts=Hash.new)
55
- @use_gsl=nil
55
+ @use_gsl = opts[:use_gsl]
56
+ opts.delete :use_gsl
57
+
56
58
  @name=_("Principal Component Analysis")
57
59
  @matrix=matrix
58
60
  @n_variables=@matrix.column_size
59
- @variables_names=(@matrix.respond_to? :fields) ? @matrix.fields : @n_variables.times.map {|i| _("VAR_%d") % (i+1)}
61
+ @variables_names=(@matrix.respond_to? :fields) ? @matrix.fields : @n_variables.times.map {|i| "VAR_#{i+1}".to_sym }
60
62
 
61
63
  @matrix_type = @matrix.respond_to?(:_type) ? @matrix._type : :correlation
62
64
 
@@ -67,13 +69,14 @@ module Factor
67
69
  opts.each{|k,v|
68
70
  self.send("#{k}=",v) if self.respond_to? k
69
71
  }
72
+
70
73
  if @use_gsl.nil?
71
74
  @use_gsl=Statsample.has_gsl?
72
75
  end
73
76
  if @matrix.respond_to? :fields
74
77
  @variables_names=@matrix.fields
75
78
  else
76
- @variables_names=@n_variables.times.map {|i| "V#{i+1}"}
79
+ @variables_names=@n_variables.times.map {|i| "V#{i+1}".to_sym}
77
80
  end
78
81
  calculate_eigenpairs
79
82
 
@@ -81,7 +84,6 @@ module Factor
81
84
  # Set number of factors with eigenvalues > 1
82
85
  @m=@eigenpairs.find_all {|ev,ec| ev>=1.0}.size
83
86
  end
84
-
85
87
  end
86
88
  def rotation
87
89
  @rotation_type.new(component_matrix)
@@ -92,10 +94,10 @@ module Factor
92
94
  def create_centered_ds
93
95
  h={}
94
96
  @original_ds.factors.each {|f|
95
- mean=@original_ds[f].mean
96
- h[f]=@original_ds[f].recode {|c| c-mean}
97
+ mean = @original_ds[f].mean
98
+ h[f] = @original_ds[f].recode {|c| c-mean}
97
99
  }
98
- @ds=h.to_dataset
100
+ @ds = Daru::DataFrame.new(h)
99
101
  end
100
102
 
101
103
  # Feature matrix for +m+ factors
@@ -137,8 +139,8 @@ module Factor
137
139
  pcs=(fv.transpose*data_matrix.transpose).transpose
138
140
 
139
141
  pcs.extend Statsample::NamedMatrix
140
- pcs.fields_y=m.times.map {|i| "PC_%d" % (i+1)}
141
- pcs.to_dataset
142
+ pcs.fields_y = m.times.map { |i| "PC_#{i+1}".to_sym }
143
+ pcs.to_dataframe
142
144
  end
143
145
  def component_matrix(m=nil)
144
146
  var="component_matrix_#{matrix_type}"
@@ -159,7 +161,7 @@ module Factor
159
161
  cm.extend NamedMatrix
160
162
  cm.name=_("Component matrix (from covariance)")
161
163
  cm.fields_x = @variables_names
162
- cm.fields_y = m.times.map {|i| "PC_%d" % (i+1)}
164
+ cm.fields_y = m.times.map {|i| "PC_#{i+1}".to_sym }
163
165
 
164
166
  cm
165
167
  end
@@ -180,17 +182,16 @@ module Factor
180
182
  cm.extend CovariateMatrix
181
183
  cm.name=_("Component matrix")
182
184
  cm.fields_x = @variables_names
183
- cm.fields_y = m.times.map {|i| "PC_%d" % (i+1)}
185
+ cm.fields_y = m.times.map { |i| "PC_#{i+1}".to_sym }
184
186
  cm
185
187
  end
186
188
  def communalities(m=nil)
187
-
188
189
  m||=@m
189
190
  h=[]
190
191
  @n_variables.times do |i|
191
192
  sum=0
192
193
  m.times do |j|
193
- sum+=(@eigenpairs[j][0].abs*@eigenpairs[j][1][i]**2)
194
+ sum += (@eigenpairs[j][0].abs*@eigenpairs[j][1][i]**2)
194
195
  end
195
196
  h.push(sum)
196
197
  end
@@ -202,11 +203,11 @@ module Factor
202
203
  end
203
204
  def eigenvectors
204
205
  @eigenpairs.collect {|c|
205
- @use_gsl ? c[1].to_gsl : c[1].to_vector
206
+ @use_gsl ? c[1].to_gsl : Daru::Vector.new(c[1])
206
207
  }
207
208
  end
208
209
  def calculate_eigenpairs
209
- @eigenpairs= @use_gsl ? @matrix.to_gsl.eigenpairs : @matrix.to_matrix.eigenpairs_ruby
210
+ @eigenpairs= @use_gsl ? @matrix.to_gsl.eigenpairs : @matrix.to_matrix.eigenpairs_ruby
210
211
  end
211
212
 
212
213
 
@@ -6,9 +6,9 @@ module Factor
6
6
  #
7
7
  # == Usage:
8
8
  # require 'statsample'
9
- # a=[2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1].to_numeric
10
- # b=[2.4,0.7,2.9,2.2,3.0,2.7,1.6,1.1,1.6,0.9].to_numeric
11
- # ds={'a'=>a,'b'=>b}.to_dataset
9
+ # a = Daru::Vector.new([2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1])
10
+ # b = Daru::Vector.new([2.4,0.7,2.9,2.2,3.0,2.7,1.6,1.1,1.6,0.9])
11
+ # ds= Daru::DataFrame.new({:a => a,:b => b})
12
12
  # cor_matrix=Statsample::Bivariate.correlation_matrix(ds)
13
13
  # pa=Statsample::Factor::PrincipalAxis.new(cor_matrix)
14
14
  # pa.iterate(1)
@@ -8,12 +8,12 @@ module Statsample
8
8
  #
9
9
  # == Usage
10
10
  # === Svg output
11
- # a=[1,2,3,4].to_numeric
12
- # b=[3,4,5,6].to_numeric
13
- # puts Statsample::Graph::Boxplot.new(:vectors=>[a,b]).to_svg
11
+ # a = Daru::Vector.new([1,2,3,4])
12
+ # b = Daru::Vector.new([3,4,5,6])
13
+ # puts Statsample::Graph::Boxplot.new(:vectors=>[a,b]).to_svg
14
14
  # === Using ReportBuilder
15
- # a=[1,2,3,4].to_numeric
16
- # b=[3,4,5,6].to_numeric
15
+ # a = Daru::Vector.new([1,2,3,4])
16
+ # b = Daru::Vector.new([3,4,5,6])
17
17
  # rb=ReportBuilder.new
18
18
  # rb.add(Statsample::Graph::Boxplot.new(:vectors=>[a,b]))
19
19
  # rb.save_html('boxplot.html')
@@ -85,8 +85,6 @@ module Statsample
85
85
  min||=@vectors.map {|v| v.min}.min
86
86
  max||=@vectors.map {|v| v.max}.max
87
87
 
88
-
89
-
90
88
  margin_hor=margin_left + margin_right
91
89
  margin_vert=margin_top + margin_bottom
92
90
  x_scale = pv.Scale.ordinal(@vectors.size.times.map.to_a).split_banded(0, width-margin_hor, 4.0/5)
@@ -115,12 +113,10 @@ module Statsample
115
113
  out[:low_whisker]=min
116
114
  out[:high_whisker]=max
117
115
  # And now, data outside whiskers
118
- out[:outliers]=v.data_with_nils.find_all {|d| d < min or d > max }
116
+ out[:outliers]=v.to_a.find_all {|d| d < min or d > max }
119
117
  out
120
118
  }
121
-
122
-
123
-
119
+
124
120
  vis=Rubyvis::Panel.new do |pan|
125
121
  pan.width width - margin_hor
126
122
  pan.height height - margin_vert
@@ -157,7 +153,6 @@ module Statsample
157
153
  bp.left {|v| x_scale[index]}
158
154
  bp.width x_scale.range_band
159
155
 
160
-
161
156
  # Bar
162
157
  bp.bar do |b|
163
158
  b.bottom {|v| y_scale[v[:percentil_25]]}
@@ -168,9 +163,7 @@ module Statsample
168
163
  colors.scale(that.groups[parent.index]).darker
169
164
  else
170
165
  colors.scale(index).darker
171
- end
172
-
173
-
166
+ end
174
167
  }
175
168
  b.fill_style {|v|
176
169
  if that.groups
@@ -237,7 +230,6 @@ module Statsample
237
230
  builder.section(:name=>name) do |b|
238
231
  b.image(to_svg, :type=>'svg', :width=>width, :height=>height)
239
232
  end
240
-
241
233
  end
242
234
  end
243
235
  end