statsample 1.5.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. checksums.yaml +4 -4
  2. data/.build.sh +15 -0
  3. data/.gitignore +1 -0
  4. data/.travis.yml +19 -7
  5. data/CONTRIBUTING.md +33 -0
  6. data/History.txt +5 -0
  7. data/README.md +41 -53
  8. data/benchmarks/correlation_matrix_15_variables.rb +6 -5
  9. data/benchmarks/correlation_matrix_5_variables.rb +6 -5
  10. data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +23 -26
  11. data/examples/boxplot.rb +17 -5
  12. data/examples/correlation_matrix.rb +36 -7
  13. data/examples/dataset.rb +25 -5
  14. data/examples/dominance_analysis.rb +8 -7
  15. data/examples/dominance_analysis_bootstrap.rb +16 -11
  16. data/examples/histogram.rb +16 -2
  17. data/examples/icc.rb +5 -6
  18. data/examples/levene.rb +17 -3
  19. data/examples/multiple_regression.rb +6 -3
  20. data/examples/parallel_analysis.rb +11 -6
  21. data/examples/polychoric.rb +26 -13
  22. data/examples/principal_axis.rb +8 -4
  23. data/examples/reliability.rb +10 -10
  24. data/examples/scatterplot.rb +8 -0
  25. data/examples/t_test.rb +7 -0
  26. data/examples/u_test.rb +10 -2
  27. data/examples/vector.rb +9 -6
  28. data/examples/velicer_map_test.rb +12 -8
  29. data/lib/statsample.rb +13 -47
  30. data/lib/statsample/analysis/suite.rb +1 -1
  31. data/lib/statsample/anova/oneway.rb +6 -6
  32. data/lib/statsample/anova/twoway.rb +26 -24
  33. data/lib/statsample/bivariate.rb +78 -61
  34. data/lib/statsample/bivariate/pearson.rb +2 -2
  35. data/lib/statsample/codification.rb +45 -32
  36. data/lib/statsample/converter/csv.rb +15 -53
  37. data/lib/statsample/converter/spss.rb +6 -5
  38. data/lib/statsample/converters.rb +50 -211
  39. data/lib/statsample/crosstab.rb +26 -25
  40. data/lib/statsample/daru.rb +117 -0
  41. data/lib/statsample/dataset.rb +70 -942
  42. data/lib/statsample/dominanceanalysis.rb +16 -17
  43. data/lib/statsample/dominanceanalysis/bootstrap.rb +26 -28
  44. data/lib/statsample/factor/parallelanalysis.rb +17 -19
  45. data/lib/statsample/factor/pca.rb +21 -20
  46. data/lib/statsample/factor/principalaxis.rb +3 -3
  47. data/lib/statsample/graph/boxplot.rb +8 -16
  48. data/lib/statsample/graph/histogram.rb +4 -4
  49. data/lib/statsample/graph/scatterplot.rb +8 -7
  50. data/lib/statsample/histogram.rb +128 -119
  51. data/lib/statsample/matrix.rb +20 -16
  52. data/lib/statsample/multiset.rb +39 -38
  53. data/lib/statsample/regression.rb +3 -3
  54. data/lib/statsample/regression/multiple.rb +8 -10
  55. data/lib/statsample/regression/multiple/alglibengine.rb +96 -89
  56. data/lib/statsample/regression/multiple/baseengine.rb +32 -32
  57. data/lib/statsample/regression/multiple/gslengine.rb +33 -36
  58. data/lib/statsample/regression/multiple/matrixengine.rb +7 -9
  59. data/lib/statsample/regression/multiple/rubyengine.rb +39 -41
  60. data/lib/statsample/reliability.rb +23 -25
  61. data/lib/statsample/reliability/icc.rb +8 -7
  62. data/lib/statsample/reliability/multiscaleanalysis.rb +14 -12
  63. data/lib/statsample/reliability/scaleanalysis.rb +58 -60
  64. data/lib/statsample/reliability/skillscaleanalysis.rb +34 -29
  65. data/lib/statsample/resample.rb +1 -1
  66. data/lib/statsample/shorthand.rb +29 -25
  67. data/lib/statsample/test/kolmogorovsmirnov.rb +5 -3
  68. data/lib/statsample/test/levene.rb +28 -27
  69. data/lib/statsample/test/t.rb +7 -9
  70. data/lib/statsample/test/umannwhitney.rb +28 -28
  71. data/lib/statsample/test/wilcoxonsignedrank.rb +45 -43
  72. data/lib/statsample/vector.rb +70 -1013
  73. data/lib/statsample/version.rb +1 -1
  74. data/statsample.gemspec +12 -16
  75. data/test/helpers_tests.rb +1 -1
  76. data/test/test_analysis.rb +17 -17
  77. data/test/test_anova_contrast.rb +6 -6
  78. data/test/test_anovatwowaywithdataset.rb +8 -8
  79. data/test/test_anovawithvectors.rb +8 -8
  80. data/test/test_awesome_print_bug.rb +1 -1
  81. data/test/test_bartlettsphericity.rb +4 -4
  82. data/test/test_bivariate.rb +48 -43
  83. data/test/test_codification.rb +33 -33
  84. data/test/test_crosstab.rb +9 -9
  85. data/test/test_dataset.rb +28 -458
  86. data/test/test_factor.rb +46 -38
  87. data/test/test_factor_pa.rb +22 -13
  88. data/test/test_ggobi.rb +4 -4
  89. data/test/test_gsl.rb +4 -4
  90. data/test/test_histogram.rb +3 -3
  91. data/test/test_matrix.rb +13 -13
  92. data/test/test_multiset.rb +103 -91
  93. data/test/test_regression.rb +57 -52
  94. data/test/test_reliability.rb +55 -45
  95. data/test/test_reliability_icc.rb +8 -8
  96. data/test/test_reliability_skillscale.rb +26 -24
  97. data/test/test_resample.rb +1 -1
  98. data/test/test_statistics.rb +3 -13
  99. data/test/test_stest.rb +9 -9
  100. data/test/test_stratified.rb +3 -3
  101. data/test/test_test_t.rb +12 -12
  102. data/test/test_umannwhitney.rb +2 -2
  103. data/test/test_vector.rb +76 -613
  104. data/test/test_wilcoxonsignedrank.rb +4 -4
  105. metadata +57 -28
  106. data/lib/statsample/rserve_extension.rb +0 -20
  107. data/lib/statsample/vector/gsl.rb +0 -106
  108. data/test/fixtures/repeated_fields.csv +0 -7
  109. data/test/fixtures/scientific_notation.csv +0 -4
  110. data/test/fixtures/test_csv.csv +0 -7
  111. data/test/fixtures/test_xls.xls +0 -0
  112. data/test/test_csv.rb +0 -63
  113. data/test/test_rserve_extension.rb +0 -42
  114. data/test/test_xls.rb +0 -52
@@ -7,13 +7,13 @@ module Statsample
7
7
  #
8
8
  # == Use
9
9
  #
10
- # a=1000.times.collect {rand}.to_numeric
11
- # b=1000.times.collect {rand}.to_numeric
12
- # c=1000.times.collect {rand}.to_numeric
13
- # ds={'a'=>a,'b'=>b,'c'=>c}.to_dataset
14
- # ds['y']=ds.collect{|row| row['a']*5+row['b']*3+row['c']*2+rand()}
15
- # da=Statsample::DominanceAnalysis.new(ds,'y')
16
- # puts da.summary
10
+ # a = Daru::Vector.new(1000.times.collect {rand})
11
+ # b = Daru::Vector.new(1000.times.collect {rand})
12
+ # c = Daru::Vector.new(1000.times.collect {rand})
13
+ # ds= Daru::DataFrame.new({:a => a,:b => b,:c => c})
14
+ # ds[:y] = ds.collect_rows {|row| row[:a]*5 + row[:b]*3 + row[:c]*2 + rand()}
15
+ # da=Statsample::DominanceAnalysis.new(ds, :y)
16
+ # puts da.summary
17
17
  #
18
18
  # === Output:
19
19
  #
@@ -115,21 +115,21 @@ module Statsample
115
115
  }
116
116
  @dependent=dependent
117
117
  @dependent=[@dependent] unless @dependent.is_a? Array
118
-
119
- @predictors ||= input.fields-@dependent
120
-
121
- @name=_("Dominance Analysis: %s over %s") % [ @predictors.flatten.join(",") , @dependent.join(",")] if @name.nil?
122
-
123
- if input.is_a? Statsample::Dataset
118
+
119
+ if input.kind_of? Daru::DataFrame
120
+ @predictors ||= input.vectors.to_a - @dependent
124
121
  @ds=input
125
122
  @matrix=Statsample::Bivariate.correlation_matrix(input)
126
123
  @cases=Statsample::Bivariate.min_n_valid(input)
127
124
  elsif input.is_a? ::Matrix
125
+ @predictors ||= input.fields-@dependent
128
126
  @ds=nil
129
127
  @matrix=input
130
128
  else
131
129
  raise ArgumentError.new("You should use a Matrix or a Dataset")
132
130
  end
131
+
132
+ @name=_("Dominance Analysis: %s over %s") % [ @predictors.flatten.join(",") , @dependent.join(",")] if @name.nil?
133
133
  @models=nil
134
134
  @models_data=nil
135
135
  @general_averages=nil
@@ -264,22 +264,21 @@ module Statsample
264
264
  end
265
265
 
266
266
  def md(m)
267
- models_data[m.sort {|a,b| a.to_s<=>b.to_s}]
267
+ models_data[m.sort {|a,b| a.to_s <=> b.to_s}]
268
268
  end
269
269
  # Get all model of size k
270
270
  def md_k(k)
271
271
  out=[]
272
- @models.each{|m| out.push(md(m)) if m.size==k }
272
+ @models.each{ |m| out.push(md(m)) if m.size==k }
273
273
  out
274
274
  end
275
275
 
276
276
  # For a hash with arrays of numbers as values
277
277
  # Returns a hash with same keys and
278
278
  # value as the mean of values of original hash
279
-
280
279
  def get_averages(averages)
281
280
  out={}
282
- averages.each{|key,val| out[key]=val.to_vector(:numeric).mean }
281
+ averages.each{ |key,val| out[key] = Daru::Vector.new(val).mean }
283
282
  out
284
283
  end
285
284
  # Hash with average for each k size model.
@@ -5,16 +5,16 @@ module Statsample
5
5
  #
6
6
  # == Usage
7
7
  #
8
- # require 'statsample'
9
- # a=100.times.collect {rand}.to_numeric
10
- # b=100.times.collect {rand}.to_numeric
11
- # c=100.times.collect {rand}.to_numeric
12
- # d=100.times.collect {rand}.to_numeric
13
- # ds={'a'=>a,'b'=>b,'c'=>c,'d'=>d}.to_dataset
14
- # ds['y']=ds.collect{|row| row['a']*5+row['b']*2+row['c']*2+row['d']*2+10*rand()}
15
- # dab=Statsample::DominanceAnalysis::Bootstrap.new(ds2, 'y', :debug=>true)
16
- # dab.bootstrap(100,nil)
17
- # puts dab.summary
8
+ # require 'statsample'
9
+ # a = Daru::Vector.new(100.times.collect {rand})
10
+ # b = Daru::Vector.new(100.times.collect {rand})
11
+ # c = Daru::Vector.new(100.times.collect {rand})
12
+ # d = Daru::Vector.new(100.times.collect {rand})
13
+ # ds = Daru::DataFrame.new({:a => a,:b => b,:c => c,:d => d})
14
+ # ds[:y] = ds.collect_rows { |row| row[:a]*5+row[:b]*2+row[:c]*2+row[:d]*2+10*rand() }
15
+ # dab=Statsample::DominanceAnalysis::Bootstrap.new(ds, :y, :debug=>true)
16
+ # dab.bootstrap(100,nil)
17
+ # puts dab.summary
18
18
  # <strong>Output</strong>
19
19
  # Sample size: 100
20
20
  # t: 1.98421693632958
@@ -91,28 +91,28 @@ module Statsample
91
91
  ALPHA=0.95
92
92
  # Create a new Dominance Analysis Bootstrap Object
93
93
  #
94
- # * ds: A Dataset object
94
+ # * ds: A Daru::DataFrame object
95
95
  # * y_var: Name of dependent variable
96
96
  # * opts: Any other attribute of the class
97
97
  def initialize(ds,y_var, opts=Hash.new)
98
- @ds=ds
99
- @y_var=y_var
100
- @n=ds.cases
98
+ @ds = ds
99
+ @y_var = y_var.respond_to?(:to_sym) ? y_var.to_sym : y_var
100
+ @n = ds.nrows
101
101
 
102
102
  @n_samples=0
103
103
  @alpha=ALPHA
104
104
  @debug=false
105
105
  if y_var.is_a? Array
106
- @fields=ds.fields-y_var
106
+ @fields=ds.vectors.to_a - y_var
107
107
  @regression_class=Regression::Multiple::MultipleDependent
108
108
 
109
109
  else
110
- @fields=ds.fields-[y_var]
110
+ @fields=ds.vectors.to_a - [y_var]
111
111
  @regression_class=Regression::Multiple::MatrixEngine
112
112
  end
113
- @samples_ga=@fields.inject({}){|a,v| a[v]=[];a}
113
+ @samples_ga=@fields.inject({}) { |a,v| a[v]=[]; a }
114
114
 
115
- @name=_("Bootstrap dominance Analysis: %s over %s") % [ ds.fields.join(",") , @y_var]
115
+ @name=_("Bootstrap dominance Analysis: %s over %s") % [ ds.vectors.to_a.join(",") , @y_var]
116
116
  opts.each{|k,v|
117
117
  self.send("#{k}=",v) if self.respond_to? k
118
118
  }
@@ -130,15 +130,14 @@ module Statsample
130
130
  # each sample on @samples_td, @samples_cd, @samples_gd, @samples_ga
131
131
  #
132
132
  # * number_samples: Number of new samples to add
133
- # * n: size of each new sample. If nil, equal to original sample size
134
-
133
+ # * n: size of each new sample. If nil, equal to original sample size
135
134
  def bootstrap(number_samples,n=nil)
136
135
  number_samples.times{ |t|
137
136
  @n_samples+=1
138
137
  puts _("Bootstrap %d of %d") % [t+1, number_samples] if @debug
139
- ds_boot=@ds.bootstrap(n)
138
+ ds_boot=@ds.bootstrap(n)
140
139
  da_1=DominanceAnalysis.new(ds_boot, @y_var, :regression_class => @regression_class)
141
-
140
+
142
141
  da_1.total_dominance.each{|k,v|
143
142
  @samples_td[k].push(v)
144
143
  }
@@ -182,7 +181,7 @@ module Statsample
182
181
  table.row([_("Complete dominance"),"","","","","","",""])
183
182
  table.hr
184
183
  @pairs.each{|pair|
185
- std=@samples_td[pair].to_vector(:numeric)
184
+ std=Daru::Vector.new(@samples_td[pair])
186
185
  ttd=da.total_dominance_pairwise(pair[0],pair[1])
187
186
  table.row(summary_pairs(pair,std,ttd))
188
187
  }
@@ -190,7 +189,7 @@ module Statsample
190
189
  table.row([_("Conditional dominance"),"","","","","","",""])
191
190
  table.hr
192
191
  @pairs.each{|pair|
193
- std=@samples_cd[pair].to_vector(:numeric)
192
+ std=Daru::Vector.new(@samples_cd[pair])
194
193
  ttd=da.conditional_dominance_pairwise(pair[0],pair[1])
195
194
  table.row(summary_pairs(pair,std,ttd))
196
195
 
@@ -199,7 +198,7 @@ module Statsample
199
198
  table.row([_("General Dominance"),"","","","","","",""])
200
199
  table.hr
201
200
  @pairs.each{|pair|
202
- std=@samples_gd[pair].to_vector(:numeric)
201
+ std=Daru::Vector.new(@samples_gd[pair])
203
202
  ttd=da.general_dominance_pairwise(pair[0],pair[1])
204
203
  table.row(summary_pairs(pair,std,ttd))
205
204
  }
@@ -208,10 +207,9 @@ module Statsample
208
207
  table=ReportBuilder::Table.new(:name=>_("General averages"), :header=>[_("var"), _("mean"), _("se"), _("p.5"), _("p.95")])
209
208
 
210
209
  @fields.each{|f|
211
- v=@samples_ga[f].to_vector(:numeric)
210
+ v=Daru::Vector.new(@samples_ga[f])
212
211
  row=[@ds[f].name, sprintf("%0.3f",v.mean), sprintf("%0.3f",v.sd), sprintf("%0.3f",v.percentil(5)),sprintf("%0.3f",v.percentil(95))]
213
- table.row(row)
214
-
212
+ table.row(row)
215
213
  }
216
214
 
217
215
  generator.parse_element(table)
@@ -22,13 +22,13 @@ module Statsample
22
22
 
23
23
  class ParallelAnalysis
24
24
  def self.with_random_data(cases,vars,opts=Hash.new)
25
- require 'ostruct'
26
- ds=OpenStruct.new
27
- ds.fields=vars.times.map {|i| "v#{i+1}"}
28
- ds.cases=cases
25
+ ds= Daru::DataFrame.new({},
26
+ order: vars.times.map {|i| "v#{i+1}".to_sym},
27
+ index: cases )
29
28
  opts=opts.merge({:bootstrap_method=> :random, :no_data=>true})
30
29
  new(ds, opts)
31
30
  end
31
+
32
32
  include DirtyMemoize
33
33
  include Summarizable
34
34
  # Number of random sets to produce. 50 by default
@@ -61,9 +61,9 @@ module Statsample
61
61
  attr_accessor :use_gsl
62
62
  def initialize(ds, opts=Hash.new)
63
63
  @ds=ds
64
- @fields=@ds.fields
64
+ @fields=@ds.vectors.to_a
65
65
  @n_variables=@fields.size
66
- @n_cases=ds.cases
66
+ @n_cases=ds.nrows
67
67
  opts_default={
68
68
  :name=>_("Parallel Analysis"),
69
69
  :iterations=>50, # See Liu and Rijmen (2008)
@@ -82,7 +82,7 @@ module Statsample
82
82
  # Number of factor to retent
83
83
  def number_of_factors
84
84
  total=0
85
- ds_eigenvalues.fields.each_with_index do |f,i|
85
+ ds_eigenvalues.vectors.to_a.each_with_index do |f,i|
86
86
  if (@original[i]>0 and @original[i]>ds_eigenvalues[f].percentil(percentil))
87
87
  total+=1
88
88
  else
@@ -101,7 +101,7 @@ module Statsample
101
101
  s.text _("Number of iterations: %d") % @iterations
102
102
  if @no_data
103
103
  s.table(:name=>_("Eigenvalues"), :header=>[_("n"), _("generated eigenvalue"), "p.#{percentil}"]) do |t|
104
- ds_eigenvalues.fields.each_with_index do |f,i|
104
+ ds_eigenvalues.vectors.to_a.each_with_index do |f,i|
105
105
  v=ds_eigenvalues[f]
106
106
  t.row [i+1, "%0.4f" % v.mean, "%0.4f" % v.percentil(percentil), ]
107
107
  end
@@ -109,7 +109,7 @@ module Statsample
109
109
  else
110
110
  s.text _("Number or factors to preserve: %d") % number_of_factors
111
111
  s.table(:name=>_("Eigenvalues"), :header=>[_("n"), _("data eigenvalue"), _("generated eigenvalue"),"p.#{percentil}",_("preserve?")]) do |t|
112
- ds_eigenvalues.fields.each_with_index do |f,i|
112
+ ds_eigenvalues.vectors.to_a.each_with_index do |f,i|
113
113
  v=ds_eigenvalues[f]
114
114
  t.row [i+1, "%0.4f" % @original[i], "%0.4f" % v.mean, "%0.4f" % v.percentil(percentil), (v.percentil(percentil)>0 and @original[i] > v.percentil(percentil)) ? "Yes":""]
115
115
  end
@@ -120,11 +120,9 @@ module Statsample
120
120
  end
121
121
  # Perform calculation. Shouldn't be called directly for the user
122
122
  def compute
123
+ @original=Statsample::Bivariate.send(matrix_method, @ds).eigenvalues unless no_data
124
+ @ds_eigenvalues=Daru::DataFrame.new({}, order: (1..@n_variables).map{|v| ("ev_%05d" % v).to_sym})
123
125
 
124
-
125
- @original=Statsample::Bivariate.send(matrix_method, @ds).eigenvalues unless no_data
126
- @ds_eigenvalues=Statsample::Dataset.new((1..@n_variables).map{|v| "ev_%05d" % v})
127
- @ds_eigenvalues.fields.each {|f| @ds_eigenvalues[f].type=:numeric}
128
126
  if bootstrap_method==:parameter or bootstrap_method==:random
129
127
  rng = Distribution::Normal.rng
130
128
  end
@@ -133,18 +131,18 @@ module Statsample
133
131
  begin
134
132
  puts "#{@name}: Iteration #{i}" if $DEBUG or debug
135
133
  # Create a dataset of dummy values
136
- ds_bootstrap=Statsample::Dataset.new(@ds.fields)
134
+ ds_bootstrap = Daru::DataFrame.new({}, order: @ds.vectors, index: @n_cases)
137
135
 
138
136
  @fields.each do |f|
139
137
  if bootstrap_method==:random
140
- ds_bootstrap[f]=@n_cases.times.map {|c| rng.call}.to_numeric
138
+ ds_bootstrap[f] = Daru::Vector.new(@n_cases.times.map {|c| rng.call})
141
139
  elsif bootstrap_method==:data
142
- ds_bootstrap[f]=ds[f].sample_with_replacement(@n_cases)
140
+ ds_bootstrap[f] = ds[f].sample_with_replacement(@n_cases)
143
141
  else
144
142
  raise "bootstrap_method doesn't recogniced"
145
143
  end
146
144
  end
147
- ds_bootstrap.update_valid_data
145
+ ds_bootstrap.update
148
146
 
149
147
  matrix=Statsample::Bivariate.send(matrix_method, ds_bootstrap)
150
148
  matrix=matrix.to_gsl if @use_gsl
@@ -155,13 +153,13 @@ module Statsample
155
153
  end
156
154
  end
157
155
  ev=matrix.eigenvalues
158
- @ds_eigenvalues.add_case_array(ev)
156
+ @ds_eigenvalues.add_row(ev)
159
157
  rescue Statsample::Bivariate::Tetrachoric::RequerimentNotMeet => e
160
158
  puts "Error: #{e}" if $DEBUG
161
159
  redo
162
160
  end
163
161
  end
164
- @ds_eigenvalues.update_valid_data
162
+ @ds_eigenvalues.update
165
163
  end
166
164
  dirty_memoize :number_of_factors, :ds_eigenvalues
167
165
  dirty_writer :iterations, :bootstrap_method, :percentil, :smc
@@ -13,11 +13,11 @@ module Factor
13
13
  #
14
14
  # == Usage:
15
15
  # require 'statsample'
16
- # a=[2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1].to_numeric
17
- # b=[2.4,0.7,2.9,2.2,3.0,2.7,1.6,1.1,1.6,0.9].to_numeric
18
- # ds={'a'=>a,'b'=>b}.to_dataset
19
- # cor_matrix=Statsample::Bivariate.correlation_matrix(ds)
20
- # pca=Statsample::Factor::PCA.new(cor_matrix)
16
+ # a = Daru::Vector.new([2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1])
17
+ # b = Daru::Vector.new([2.4,0.7,2.9,2.2,3.0,2.7,1.6,1.1,1.6,0.9])
18
+ # ds = Daru::DataFrame.new({:a => a,:b => b})
19
+ # cor_matrix = Statsample::Bivariate.correlation_matrix(ds)
20
+ # pca= Statsample::Factor::PCA.new(cor_matrix)
21
21
  # pca.m
22
22
  # => 1
23
23
  # pca.eigenvalues
@@ -52,11 +52,13 @@ module Factor
52
52
  attr_accessor :rotation_type
53
53
  attr_accessor :matrix_type
54
54
  def initialize(matrix, opts=Hash.new)
55
- @use_gsl=nil
55
+ @use_gsl = opts[:use_gsl]
56
+ opts.delete :use_gsl
57
+
56
58
  @name=_("Principal Component Analysis")
57
59
  @matrix=matrix
58
60
  @n_variables=@matrix.column_size
59
- @variables_names=(@matrix.respond_to? :fields) ? @matrix.fields : @n_variables.times.map {|i| _("VAR_%d") % (i+1)}
61
+ @variables_names=(@matrix.respond_to? :fields) ? @matrix.fields : @n_variables.times.map {|i| "VAR_#{i+1}".to_sym }
60
62
 
61
63
  @matrix_type = @matrix.respond_to?(:_type) ? @matrix._type : :correlation
62
64
 
@@ -67,13 +69,14 @@ module Factor
67
69
  opts.each{|k,v|
68
70
  self.send("#{k}=",v) if self.respond_to? k
69
71
  }
72
+
70
73
  if @use_gsl.nil?
71
74
  @use_gsl=Statsample.has_gsl?
72
75
  end
73
76
  if @matrix.respond_to? :fields
74
77
  @variables_names=@matrix.fields
75
78
  else
76
- @variables_names=@n_variables.times.map {|i| "V#{i+1}"}
79
+ @variables_names=@n_variables.times.map {|i| "V#{i+1}".to_sym}
77
80
  end
78
81
  calculate_eigenpairs
79
82
 
@@ -81,7 +84,6 @@ module Factor
81
84
  # Set number of factors with eigenvalues > 1
82
85
  @m=@eigenpairs.find_all {|ev,ec| ev>=1.0}.size
83
86
  end
84
-
85
87
  end
86
88
  def rotation
87
89
  @rotation_type.new(component_matrix)
@@ -92,10 +94,10 @@ module Factor
92
94
  def create_centered_ds
93
95
  h={}
94
96
  @original_ds.factors.each {|f|
95
- mean=@original_ds[f].mean
96
- h[f]=@original_ds[f].recode {|c| c-mean}
97
+ mean = @original_ds[f].mean
98
+ h[f] = @original_ds[f].recode {|c| c-mean}
97
99
  }
98
- @ds=h.to_dataset
100
+ @ds = Daru::DataFrame.new(h)
99
101
  end
100
102
 
101
103
  # Feature matrix for +m+ factors
@@ -137,8 +139,8 @@ module Factor
137
139
  pcs=(fv.transpose*data_matrix.transpose).transpose
138
140
 
139
141
  pcs.extend Statsample::NamedMatrix
140
- pcs.fields_y=m.times.map {|i| "PC_%d" % (i+1)}
141
- pcs.to_dataset
142
+ pcs.fields_y = m.times.map { |i| "PC_#{i+1}".to_sym }
143
+ pcs.to_dataframe
142
144
  end
143
145
  def component_matrix(m=nil)
144
146
  var="component_matrix_#{matrix_type}"
@@ -159,7 +161,7 @@ module Factor
159
161
  cm.extend NamedMatrix
160
162
  cm.name=_("Component matrix (from covariance)")
161
163
  cm.fields_x = @variables_names
162
- cm.fields_y = m.times.map {|i| "PC_%d" % (i+1)}
164
+ cm.fields_y = m.times.map {|i| "PC_#{i+1}".to_sym }
163
165
 
164
166
  cm
165
167
  end
@@ -180,17 +182,16 @@ module Factor
180
182
  cm.extend CovariateMatrix
181
183
  cm.name=_("Component matrix")
182
184
  cm.fields_x = @variables_names
183
- cm.fields_y = m.times.map {|i| "PC_%d" % (i+1)}
185
+ cm.fields_y = m.times.map { |i| "PC_#{i+1}".to_sym }
184
186
  cm
185
187
  end
186
188
  def communalities(m=nil)
187
-
188
189
  m||=@m
189
190
  h=[]
190
191
  @n_variables.times do |i|
191
192
  sum=0
192
193
  m.times do |j|
193
- sum+=(@eigenpairs[j][0].abs*@eigenpairs[j][1][i]**2)
194
+ sum += (@eigenpairs[j][0].abs*@eigenpairs[j][1][i]**2)
194
195
  end
195
196
  h.push(sum)
196
197
  end
@@ -202,11 +203,11 @@ module Factor
202
203
  end
203
204
  def eigenvectors
204
205
  @eigenpairs.collect {|c|
205
- @use_gsl ? c[1].to_gsl : c[1].to_vector
206
+ @use_gsl ? c[1].to_gsl : Daru::Vector.new(c[1])
206
207
  }
207
208
  end
208
209
  def calculate_eigenpairs
209
- @eigenpairs= @use_gsl ? @matrix.to_gsl.eigenpairs : @matrix.to_matrix.eigenpairs_ruby
210
+ @eigenpairs= @use_gsl ? @matrix.to_gsl.eigenpairs : @matrix.to_matrix.eigenpairs_ruby
210
211
  end
211
212
 
212
213
 
@@ -6,9 +6,9 @@ module Factor
6
6
  #
7
7
  # == Usage:
8
8
  # require 'statsample'
9
- # a=[2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1].to_numeric
10
- # b=[2.4,0.7,2.9,2.2,3.0,2.7,1.6,1.1,1.6,0.9].to_numeric
11
- # ds={'a'=>a,'b'=>b}.to_dataset
9
+ # a = Daru::Vector.new([2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1])
10
+ # b = Daru::Vector.new([2.4,0.7,2.9,2.2,3.0,2.7,1.6,1.1,1.6,0.9])
11
+ # ds= Daru::DataFrame.new({:a => a,:b => b})
12
12
  # cor_matrix=Statsample::Bivariate.correlation_matrix(ds)
13
13
  # pa=Statsample::Factor::PrincipalAxis.new(cor_matrix)
14
14
  # pa.iterate(1)
@@ -8,12 +8,12 @@ module Statsample
8
8
  #
9
9
  # == Usage
10
10
  # === Svg output
11
- # a=[1,2,3,4].to_numeric
12
- # b=[3,4,5,6].to_numeric
13
- # puts Statsample::Graph::Boxplot.new(:vectors=>[a,b]).to_svg
11
+ # a = Daru::Vector.new([1,2,3,4])
12
+ # b = Daru::Vector.new([3,4,5,6])
13
+ # puts Statsample::Graph::Boxplot.new(:vectors=>[a,b]).to_svg
14
14
  # === Using ReportBuilder
15
- # a=[1,2,3,4].to_numeric
16
- # b=[3,4,5,6].to_numeric
15
+ # a = Daru::Vector.new([1,2,3,4])
16
+ # b = Daru::Vector.new([3,4,5,6])
17
17
  # rb=ReportBuilder.new
18
18
  # rb.add(Statsample::Graph::Boxplot.new(:vectors=>[a,b]))
19
19
  # rb.save_html('boxplot.html')
@@ -85,8 +85,6 @@ module Statsample
85
85
  min||=@vectors.map {|v| v.min}.min
86
86
  max||=@vectors.map {|v| v.max}.max
87
87
 
88
-
89
-
90
88
  margin_hor=margin_left + margin_right
91
89
  margin_vert=margin_top + margin_bottom
92
90
  x_scale = pv.Scale.ordinal(@vectors.size.times.map.to_a).split_banded(0, width-margin_hor, 4.0/5)
@@ -115,12 +113,10 @@ module Statsample
115
113
  out[:low_whisker]=min
116
114
  out[:high_whisker]=max
117
115
  # And now, data outside whiskers
118
- out[:outliers]=v.data_with_nils.find_all {|d| d < min or d > max }
116
+ out[:outliers]=v.to_a.find_all {|d| d < min or d > max }
119
117
  out
120
118
  }
121
-
122
-
123
-
119
+
124
120
  vis=Rubyvis::Panel.new do |pan|
125
121
  pan.width width - margin_hor
126
122
  pan.height height - margin_vert
@@ -157,7 +153,6 @@ module Statsample
157
153
  bp.left {|v| x_scale[index]}
158
154
  bp.width x_scale.range_band
159
155
 
160
-
161
156
  # Bar
162
157
  bp.bar do |b|
163
158
  b.bottom {|v| y_scale[v[:percentil_25]]}
@@ -168,9 +163,7 @@ module Statsample
168
163
  colors.scale(that.groups[parent.index]).darker
169
164
  else
170
165
  colors.scale(index).darker
171
- end
172
-
173
-
166
+ end
174
167
  }
175
168
  b.fill_style {|v|
176
169
  if that.groups
@@ -237,7 +230,6 @@ module Statsample
237
230
  builder.section(:name=>name) do |b|
238
231
  b.image(to_svg, :type=>'svg', :width=>width, :height=>height)
239
232
  end
240
-
241
233
  end
242
234
  end
243
235
  end