statsample-ekatena 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +15 -0
  3. data/.travis.yml +23 -0
  4. data/CONTRIBUTING.md +17 -0
  5. data/Gemfile +2 -0
  6. data/History.txt +457 -0
  7. data/LICENSE.txt +12 -0
  8. data/README.md +175 -0
  9. data/Rakefile +44 -0
  10. data/benchmarks/correlation_matrix_15_variables.rb +32 -0
  11. data/benchmarks/correlation_matrix_5_variables.rb +33 -0
  12. data/benchmarks/correlation_matrix_methods/correlation_matrix.ds +0 -0
  13. data/benchmarks/correlation_matrix_methods/correlation_matrix.html +93 -0
  14. data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +71 -0
  15. data/benchmarks/correlation_matrix_methods/correlation_matrix.xls +0 -0
  16. data/benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods +0 -0
  17. data/benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods +0 -0
  18. data/benchmarks/correlation_matrix_methods/results.ds +0 -0
  19. data/benchmarks/factor_map.rb +37 -0
  20. data/benchmarks/helpers_benchmark.rb +5 -0
  21. data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
  22. data/doc_latex/manual/equations.tex +78 -0
  23. data/examples/boxplot.rb +28 -0
  24. data/examples/chisquare_test.rb +23 -0
  25. data/examples/correlation_matrix.rb +32 -0
  26. data/examples/dataset.rb +30 -0
  27. data/examples/dominance_analysis.rb +33 -0
  28. data/examples/dominance_analysis_bootstrap.rb +32 -0
  29. data/examples/histogram.rb +26 -0
  30. data/examples/icc.rb +24 -0
  31. data/examples/levene.rb +29 -0
  32. data/examples/multiple_regression.rb +20 -0
  33. data/examples/multivariate_correlation.rb +33 -0
  34. data/examples/parallel_analysis.rb +40 -0
  35. data/examples/polychoric.rb +40 -0
  36. data/examples/principal_axis.rb +26 -0
  37. data/examples/reliability.rb +31 -0
  38. data/examples/scatterplot.rb +25 -0
  39. data/examples/t_test.rb +27 -0
  40. data/examples/tetrachoric.rb +17 -0
  41. data/examples/u_test.rb +24 -0
  42. data/examples/vector.rb +20 -0
  43. data/examples/velicer_map_test.rb +46 -0
  44. data/grab_references.rb +29 -0
  45. data/lib/spss.rb +134 -0
  46. data/lib/statsample-ekatena/analysis.rb +100 -0
  47. data/lib/statsample-ekatena/analysis/suite.rb +89 -0
  48. data/lib/statsample-ekatena/analysis/suitereportbuilder.rb +44 -0
  49. data/lib/statsample-ekatena/anova.rb +24 -0
  50. data/lib/statsample-ekatena/anova/contrast.rb +79 -0
  51. data/lib/statsample-ekatena/anova/oneway.rb +187 -0
  52. data/lib/statsample-ekatena/anova/twoway.rb +207 -0
  53. data/lib/statsample-ekatena/bivariate.rb +406 -0
  54. data/lib/statsample-ekatena/bivariate/pearson.rb +54 -0
  55. data/lib/statsample-ekatena/codification.rb +182 -0
  56. data/lib/statsample-ekatena/converter/csv.rb +28 -0
  57. data/lib/statsample-ekatena/converter/spss.rb +48 -0
  58. data/lib/statsample-ekatena/converters.rb +211 -0
  59. data/lib/statsample-ekatena/crosstab.rb +188 -0
  60. data/lib/statsample-ekatena/daru.rb +115 -0
  61. data/lib/statsample-ekatena/dataset.rb +10 -0
  62. data/lib/statsample-ekatena/dominanceanalysis.rb +425 -0
  63. data/lib/statsample-ekatena/dominanceanalysis/bootstrap.rb +232 -0
  64. data/lib/statsample-ekatena/factor.rb +104 -0
  65. data/lib/statsample-ekatena/factor/map.rb +124 -0
  66. data/lib/statsample-ekatena/factor/parallelanalysis.rb +166 -0
  67. data/lib/statsample-ekatena/factor/pca.rb +242 -0
  68. data/lib/statsample-ekatena/factor/principalaxis.rb +243 -0
  69. data/lib/statsample-ekatena/factor/rotation.rb +198 -0
  70. data/lib/statsample-ekatena/formula/fit_model.rb +46 -0
  71. data/lib/statsample-ekatena/formula/formula.rb +306 -0
  72. data/lib/statsample-ekatena/graph.rb +11 -0
  73. data/lib/statsample-ekatena/graph/boxplot.rb +236 -0
  74. data/lib/statsample-ekatena/graph/histogram.rb +198 -0
  75. data/lib/statsample-ekatena/graph/scatterplot.rb +213 -0
  76. data/lib/statsample-ekatena/histogram.rb +180 -0
  77. data/lib/statsample-ekatena/matrix.rb +329 -0
  78. data/lib/statsample-ekatena/multiset.rb +310 -0
  79. data/lib/statsample-ekatena/regression.rb +65 -0
  80. data/lib/statsample-ekatena/regression/multiple.rb +89 -0
  81. data/lib/statsample-ekatena/regression/multiple/alglibengine.rb +128 -0
  82. data/lib/statsample-ekatena/regression/multiple/baseengine.rb +251 -0
  83. data/lib/statsample-ekatena/regression/multiple/gslengine.rb +129 -0
  84. data/lib/statsample-ekatena/regression/multiple/matrixengine.rb +205 -0
  85. data/lib/statsample-ekatena/regression/multiple/rubyengine.rb +86 -0
  86. data/lib/statsample-ekatena/regression/simple.rb +121 -0
  87. data/lib/statsample-ekatena/reliability.rb +150 -0
  88. data/lib/statsample-ekatena/reliability/icc.rb +415 -0
  89. data/lib/statsample-ekatena/reliability/multiscaleanalysis.rb +181 -0
  90. data/lib/statsample-ekatena/reliability/scaleanalysis.rb +233 -0
  91. data/lib/statsample-ekatena/reliability/skillscaleanalysis.rb +114 -0
  92. data/lib/statsample-ekatena/resample.rb +15 -0
  93. data/lib/statsample-ekatena/shorthand.rb +125 -0
  94. data/lib/statsample-ekatena/srs.rb +169 -0
  95. data/lib/statsample-ekatena/test.rb +82 -0
  96. data/lib/statsample-ekatena/test/bartlettsphericity.rb +45 -0
  97. data/lib/statsample-ekatena/test/chisquare.rb +73 -0
  98. data/lib/statsample-ekatena/test/f.rb +52 -0
  99. data/lib/statsample-ekatena/test/kolmogorovsmirnov.rb +63 -0
  100. data/lib/statsample-ekatena/test/levene.rb +88 -0
  101. data/lib/statsample-ekatena/test/t.rb +309 -0
  102. data/lib/statsample-ekatena/test/umannwhitney.rb +208 -0
  103. data/lib/statsample-ekatena/test/wilcoxonsignedrank.rb +90 -0
  104. data/lib/statsample-ekatena/vector.rb +19 -0
  105. data/lib/statsample-ekatena/version.rb +3 -0
  106. data/lib/statsample.rb +282 -0
  107. data/po/es/statsample.mo +0 -0
  108. data/po/es/statsample.po +959 -0
  109. data/po/statsample.pot +947 -0
  110. data/references.txt +24 -0
  111. data/statsample-ekatena.gemspec +49 -0
  112. data/test/fixtures/bank2.dat +200 -0
  113. data/test/fixtures/correlation_matrix.rb +17 -0
  114. data/test/fixtures/df.csv +15 -0
  115. data/test/fixtures/hartman_23.matrix +9 -0
  116. data/test/fixtures/stock_data.csv +500 -0
  117. data/test/fixtures/tetmat_matrix.txt +5 -0
  118. data/test/fixtures/tetmat_test.txt +1001 -0
  119. data/test/helpers_tests.rb +83 -0
  120. data/test/test_analysis.rb +176 -0
  121. data/test/test_anova_contrast.rb +36 -0
  122. data/test/test_anovaoneway.rb +26 -0
  123. data/test/test_anovatwoway.rb +37 -0
  124. data/test/test_anovatwowaywithdataset.rb +47 -0
  125. data/test/test_anovawithvectors.rb +102 -0
  126. data/test/test_awesome_print_bug.rb +16 -0
  127. data/test/test_bartlettsphericity.rb +25 -0
  128. data/test/test_bivariate.rb +164 -0
  129. data/test/test_codification.rb +78 -0
  130. data/test/test_crosstab.rb +67 -0
  131. data/test/test_dominance_analysis.rb +39 -0
  132. data/test/test_factor.rb +228 -0
  133. data/test/test_factor_map.rb +38 -0
  134. data/test/test_factor_pa.rb +56 -0
  135. data/test/test_fit_model.rb +88 -0
  136. data/test/test_ggobi.rb +35 -0
  137. data/test/test_gsl.rb +15 -0
  138. data/test/test_histogram.rb +109 -0
  139. data/test/test_matrix.rb +48 -0
  140. data/test/test_multiset.rb +176 -0
  141. data/test/test_regression.rb +231 -0
  142. data/test/test_reliability.rb +223 -0
  143. data/test/test_reliability_icc.rb +198 -0
  144. data/test/test_reliability_skillscale.rb +57 -0
  145. data/test/test_resample.rb +24 -0
  146. data/test/test_srs.rb +9 -0
  147. data/test/test_statistics.rb +69 -0
  148. data/test/test_stest.rb +69 -0
  149. data/test/test_stratified.rb +17 -0
  150. data/test/test_test_f.rb +33 -0
  151. data/test/test_test_kolmogorovsmirnov.rb +34 -0
  152. data/test/test_test_t.rb +62 -0
  153. data/test/test_umannwhitney.rb +27 -0
  154. data/test/test_vector.rb +12 -0
  155. data/test/test_wilcoxonsignedrank.rb +64 -0
  156. metadata +570 -0
@@ -0,0 +1,128 @@
1
+ if HAS_ALGIB
2
+ module Statsample
3
+ module Regression
4
+ module Multiple
5
+ # Class for Multiple Regression Analysis
6
+ # Requires Alglib gem and uses a listwise aproach.
7
+ # Faster than GslEngine on massive prediction use, because process is c-based.
8
+ # Prefer GslEngine if you need good memory use.
9
+ # If you need pairwise, use RubyEngine
10
+ # Example:
11
+ #
12
+ # @a = Daru::Vector.new([1,3,2,4,3,5,4,6,5,7])
13
+ # @b = Daru::Vector.new([3,3,4,4,5,5,6,6,4,4])
14
+ # @c = Daru::Vector.new([11,22,30,40,50,65,78,79,99,100])
15
+ # @y = Daru::Vector.new([3,4,5,6,7,8,9,10,20,30])
16
+ # ds = Daru::DataFrame.new({:a => @a,:b => @b,:c => @c,:y => @y})
17
+ # lr=Statsample::Regression::Multiple::AlglibEngine.new(ds, :y)
18
+ #
19
+ class AlglibEngine < BaseEngine
20
+ def initialize(ds,y_var, opts=Hash.new)
21
+ super
22
+ @ds = ds.reject_values(*Daru::MISSING_VALUES)
23
+ @ds_valid = @ds
24
+ @dy = @ds[@y_var]
25
+ @ds_indep = ds.dup(ds.vectors.to_a - [y_var])
26
+ # Create a custom matrix
27
+ columns = []
28
+ @fields = []
29
+ @ds.vectors.each do |f|
30
+ if f != @y_var
31
+ columns.push(@ds[f].to_a)
32
+ @fields.push(f)
33
+ end
34
+ end
35
+ @dep_columns = columns.dup
36
+ columns.push(@ds[@y_var])
37
+ matrix=Matrix.columns(columns)
38
+ @lr_s=nil
39
+ @lr=::Alglib::LinearRegression.build_from_matrix(matrix)
40
+ @coeffs=assign_names(@lr.coeffs)
41
+ end
42
+
43
+ def _dump(i)
44
+ Marshal.dump({'ds'=>@ds,'y_var'=>@y_var})
45
+ end
46
+
47
+ def self._load(data)
48
+ h=Marshal.load(data)
49
+ self.new(h['ds'], h['y_var'])
50
+ end
51
+
52
+ def coeffs
53
+ @coeffs
54
+ end
55
+ # Coefficients using a constant
56
+ # Based on http://www.xycoon.com/ols1.htm
57
+ def matrix_resolution
58
+ mse_p=mse
59
+ columns=@dep_columns.dup.map {|xi| xi.map{|i| i.to_f}}
60
+ columns.unshift([1.0]*@ds.cases)
61
+ y=Matrix.columns([@dy.data.map {|i| i.to_f}])
62
+ x=Matrix.columns(columns)
63
+ xt=x.t
64
+ matrix=((xt*x)).inverse*xt
65
+ matrix*y
66
+ end
67
+
68
+ def r2
69
+ r**2
70
+ end
71
+
72
+ def r
73
+ Bivariate::pearson(@dy,predicted)
74
+ end
75
+
76
+ def sst
77
+ @dy.ss
78
+ end
79
+
80
+ def constant
81
+ @lr.constant
82
+ end
83
+
84
+ def standarized_coeffs
85
+ l=lr_s
86
+ assign_names(l.coeffs)
87
+ end
88
+
89
+ def lr_s
90
+ if @lr_s.nil?
91
+ build_standarized
92
+ end
93
+ @lr_s
94
+ end
95
+
96
+ def build_standarized
97
+ @ds_s=@ds.standardize
98
+ columns=[]
99
+ @ds_s.vectors.each{|f|
100
+ columns.push(@ds_s[f].to_a) unless f == @y_var
101
+ }
102
+ @dep_columns_s=columns.dup
103
+ columns.push(@ds_s[@y_var])
104
+ matrix=Matrix.columns(columns)
105
+ @lr_s=Alglib::LinearRegression.build_from_matrix(matrix)
106
+ end
107
+
108
+ def process(v)
109
+ @lr.process(v)
110
+ end
111
+
112
+ def process_s(v)
113
+ lr_s.process(v)
114
+ end
115
+ # ???? Not equal to SPSS output
116
+ def standarized_residuals
117
+ res = residuals
118
+ red_sd = residuals.sds
119
+ Daru::Vector.new(res.collect {|v| v.quo(red_sd) })
120
+ end
121
+ end
122
+ end
123
+ end
124
+ end # for Statsample
125
+ end # for if
126
+
127
+
128
+
@@ -0,0 +1,251 @@
1
+ module Statsample
2
+ module Regression
3
+ module Multiple
4
+ # Base class for Multiple Regression Engines
5
+ class BaseEngine
6
+ include Statsample::Summarizable
7
+ # Name of analysis
8
+ attr_accessor :name
9
+ # Minimum number of valid case for pairs of correlation
10
+ attr_reader :cases
11
+ # Number of valid cases (listwise)
12
+ attr_reader :valid_cases
13
+ # Number of total cases (dataset.cases)
14
+ attr_reader :total_cases
15
+
16
+ attr_accessor :digits
17
+ def self.univariate?
18
+ true
19
+ end
20
+ def initialize(ds, y_var, opts = Hash.new)
21
+ @ds=ds
22
+ @predictors_n=@ds.vectors.size-1
23
+ @total_cases=@ds.nrows
24
+ @cases=@ds.nrows
25
+ @y_var=y_var
26
+ @r2=nil
27
+ @name=_("Multiple Regression: %s over %s") % [ ds.vectors.to_a.join(",") , @y_var]
28
+
29
+ opts_default={:digits=>3}
30
+ @opts=opts_default.merge opts
31
+
32
+ @opts.each{|k,v|
33
+ self.send("#{k}=",v) if self.respond_to? k
34
+ }
35
+ end
36
+ # Calculate F Test
37
+ def anova
38
+ @anova||=Statsample::Anova::OneWay.new(:ss_num=>ssr, :ss_den=>sse, :df_num=>df_r, :df_den=>df_e, :name_numerator=>_("Regression"), :name_denominator=>_("Error"), :name=>"ANOVA")
39
+ end
40
+ # Standard error of estimate
41
+ def se_estimate
42
+ Math::sqrt(sse.quo(df_e))
43
+ end
44
+ # Retrieves a vector with predicted values for y
45
+ def predicted
46
+ Daru::Vector.new(
47
+ @total_cases.times.collect do |i|
48
+ invalid = false
49
+ vect = @dep_columns.collect {|v| invalid = true if v[i].nil?; v[i]}
50
+ if invalid
51
+ nil
52
+ else
53
+ process(vect)
54
+ end
55
+ end
56
+ )
57
+ end
58
+ # Retrieves a vector with standarized values for y
59
+ def standarized_predicted
60
+ predicted.standarized
61
+ end
62
+ # Retrieves a vector with residuals values for y
63
+ def residuals
64
+ Daru::Vector.new(
65
+ (0...@total_cases).collect do |i|
66
+ invalid=false
67
+ vect=@dep_columns.collect{|v| invalid=true if v[i].nil?; v[i]}
68
+ if invalid or @ds[@y_var][i].nil?
69
+ nil
70
+ else
71
+ @ds[@y_var][i] - process(vect)
72
+ end
73
+ end
74
+ )
75
+ end
76
+ # R Multiple
77
+ def r
78
+ raise "You should implement this"
79
+ end
80
+ # Sum of squares Total
81
+ def sst
82
+ raise "You should implement this"
83
+ end
84
+ # R^2 Adjusted.
85
+ # Estimate Population R^2 usign Ezequiel formula.
86
+ # Always lower than sample R^2
87
+ # == Reference:
88
+ # * Leach, L. & Henson, R. (2007). The Use and Impact of Adjusted R2 Effects in Published Regression Research. Multiple Linear Regression Viewpoints, 33(1), 1-11.
89
+ def r2_adjusted
90
+ r2-((1-r2)*@predictors_n).quo(df_e)
91
+ end
92
+ # Sum of squares (regression)
93
+ def ssr
94
+ r2*sst
95
+ end
96
+ # Sum of squares (Error)
97
+ def sse
98
+ sst - ssr
99
+ end
100
+ # T values for coeffs
101
+ def coeffs_t
102
+ out={}
103
+ se=coeffs_se
104
+ coeffs.each do |k,v|
105
+ out[k]=v / se[k]
106
+ end
107
+ out
108
+ end
109
+ # Mean square Regression
110
+ def msr
111
+ ssr.quo(df_r)
112
+ end
113
+ # Mean Square Error
114
+ def mse
115
+ sse.quo(df_e)
116
+ end
117
+ # Degrees of freedom for regression
118
+ def df_r
119
+ @predictors_n
120
+ end
121
+ # Degrees of freedom for error
122
+ def df_e
123
+ @valid_cases-@predictors_n-1
124
+ end
125
+ # Fisher for Anova
126
+ def f
127
+ anova.f
128
+ end
129
+ # p-value of Fisher
130
+ def probability
131
+ anova.probability
132
+ end
133
+ # Tolerance for a given variable
134
+ # http://talkstats.com/showthread.php?t=5056
135
+ def tolerance(var)
136
+ ds = assign_names(@dep_columns)
137
+ ds.each { |k,v| ds[k] = Daru::Vector.new(v) }
138
+ lr = self.class.new(Daru::DataFrame.new(ds),var)
139
+ 1 - lr.r2
140
+ end
141
+ # Tolerances for each coefficient
142
+ def coeffs_tolerances
143
+ @fields.inject({}) {|a,f|
144
+ a[f]=tolerance(f);
145
+ a
146
+ }
147
+ end
148
+ # Standard Error for coefficients
149
+ def coeffs_se
150
+ out={}
151
+ mse=sse.quo(df_e)
152
+ coeffs.each {|k,v|
153
+ out[k]=Math::sqrt(mse/(@ds[k].sum_of_squares * tolerance(k)))
154
+ }
155
+ out
156
+ end
157
+ # Estandar error of R^2
158
+ # ????
159
+ def se_r2
160
+ Math::sqrt((4*r2*(1-r2)**2*(df_e)**2).quo((@cases**2-1)*(@cases+3)))
161
+ end
162
+
163
+ # Estimated Variance-Covariance Matrix
164
+ # Used for calculation of se of constant
165
+ def estimated_variance_covariance_matrix
166
+ #mse_p=mse
167
+ columns=[]
168
+ @ds_valid.vectors.each{|k|
169
+ v = @ds_valid[k]
170
+ columns.push(v.to_a) unless k == @y_var
171
+ }
172
+ columns.unshift([1.0]*@valid_cases)
173
+ x=::Matrix.columns(columns)
174
+ matrix=((x.t*x)).inverse * mse
175
+ matrix.collect {|i| Math::sqrt(i) if i>=0 }
176
+ end
177
+ # T for constant
178
+ def constant_t
179
+ constant.to_f/constant_se
180
+ end
181
+ # Standard error for constant
182
+ def constant_se
183
+ estimated_variance_covariance_matrix[0,0]
184
+ end
185
+ def report_building(b)
186
+ di="%0.#{digits}f"
187
+ b.section(:name=>@name) do |g|
188
+ c=coeffs
189
+ g.text _("Engine: %s") % self.class
190
+ g.text(_("Cases(listwise)=%d(%d)") % [@total_cases, @valid_cases])
191
+ g.text _("R=")+(di % r)
192
+ g.text _("R^2=")+(di % r2)
193
+ g.text _("R^2 Adj=")+(di % r2_adjusted)
194
+ g.text _("Std.Error R=")+ (di % se_estimate)
195
+
196
+ g.text(_("Equation")+"="+ sprintf(di,constant) +" + "+ @fields.collect {|k| sprintf("#{di}%s",c[k],k)}.join(' + ') )
197
+
198
+ g.parse_element(anova)
199
+ sc=standarized_coeffs
200
+
201
+ cse=coeffs_se
202
+ g.table(:name=>_("Beta coefficients"), :header=>%w{coeff b beta se t}.collect{|field| _(field)} ) do |t|
203
+ t.row([_("Constant"), sprintf(di, constant), "-", constant_se.nil? ? "": sprintf(di, constant_se), constant_t.nil? ? "" : sprintf(di, constant_t)])
204
+ @fields.each do |f|
205
+ t.row([f, sprintf(di, c[f]), sprintf(di, sc[f]), sprintf(di, cse[f]), sprintf(di, c[f].quo(cse[f]))])
206
+ end
207
+ end
208
+ end
209
+ end
210
+
211
+
212
+ def assign_names(c)
213
+ a={}
214
+ @fields.each_index {|i|
215
+ a[@fields[i]]=c[i]
216
+ }
217
+ a
218
+ end
219
+
220
+ # Sum of squares of regression
221
+ # using the predicted value minus y mean
222
+ def ssr_direct
223
+ mean=@dy.mean
224
+ cases=0
225
+ ssr=(0...@ds.cases).inject(0) {|a,i|
226
+ invalid=false
227
+ v=@dep_columns.collect{|c| invalid=true if c[i].nil?; c[i]}
228
+ if !invalid
229
+ cases+=1
230
+ a+((process(v)-mean)**2)
231
+ else
232
+ a
233
+ end
234
+ }
235
+ ssr
236
+ end
237
+ def sse_direct
238
+ sst-ssr
239
+ end
240
+ def process(v)
241
+ c=coeffs
242
+ total=constant
243
+ @fields.each_index{|i|
244
+ total+=c[@fields[i]]*v[i]
245
+ }
246
+ total
247
+ end
248
+ end
249
+ end
250
+ end
251
+ end
@@ -0,0 +1,129 @@
1
+ if Statsample.has_gsl?
2
+ module Statsample
3
+ module Regression
4
+ module Multiple
5
+ # Class for Multiple Regression Analysis
6
+ # Requires rbgsl and uses a listwise aproach.
7
+ # Slower on prediction of values than Alglib, because predict is ruby based.
8
+ # Better memory management on multiple (+1000) series of regression.
9
+ # If you need pairwise, use RubyEngine
10
+ # Example:
11
+ #
12
+ # @a = Daru::Vector.new([1,3,2,4,3,5,4,6,5,7])
13
+ # @b = Daru::Vector.new([3,3,4,4,5,5,6,6,4,4])
14
+ # @c = Daru::Vector.new([11,22,30,40,50,65,78,79,99,100])
15
+ # @y = Daru::Vector.new([3,4,5,6,7,8,9,10,20,30])
16
+ # ds = Daru::DataFrame.new({:a => @a,:b => @b,:c => @c,:y => @y})
17
+ # lr=Statsample::Regression::Multiple::GslEngine.new(ds,:y)
18
+ #
19
+ class GslEngine < BaseEngine
20
+ def initialize(ds,y_var, opts=Hash.new)
21
+ super
22
+ @ds = ds.reject_values(*Daru::MISSING_VALUES)
23
+ @ds_valid = @ds
24
+ @valid_cases = @ds_valid.nrows
25
+ @dy = @ds[@y_var]
26
+ @ds_indep = ds.dup(ds.vectors.to_a - [y_var])
27
+ # Create a custom matrix
28
+ columns=[]
29
+ @fields=[]
30
+ max_deps = GSL::Matrix.alloc(@ds.nrows, @ds.vectors.size)
31
+ constant_col=@ds.vectors.size-1
32
+ for i in 0...@ds.nrows
33
+ max_deps.set(i,constant_col,1)
34
+ end
35
+ j = 0
36
+ @ds.vectors.each do |f|
37
+ if f != @y_var
38
+ @ds[f].each_index do |i1|
39
+ max_deps.set(i1,j,@ds[f][i1])
40
+ end
41
+
42
+ columns.push(@ds[f].to_a)
43
+ @fields.push(f)
44
+ j += 1
45
+ end
46
+ end
47
+ @dep_columns = columns.dup
48
+ @lr_s = nil
49
+ c, @cov, @chisq, @status = GSL::MultiFit.linear(max_deps, @dy.to_gsl)
50
+ @constant=c[constant_col]
51
+ @coeffs_a=c.to_a.slice(0...constant_col)
52
+ @coeffs=assign_names(@coeffs_a)
53
+ c=nil
54
+ end
55
+
56
+ def _dump(i)
57
+ Marshal.dump({'ds'=>@ds,'y_var'=>@y_var})
58
+ end
59
+ def self._load(data)
60
+ h=Marshal.load(data)
61
+ self.new(h['ds'], h['y_var'])
62
+ end
63
+
64
+ def coeffs
65
+ @coeffs
66
+ end
67
+ # Coefficients using a constant
68
+ # Based on http://www.xycoon.com/ols1.htm
69
+ def matrix_resolution
70
+ columns=@dep_columns.dup.map {|xi| xi.map{|i| i.to_f}}
71
+ columns.unshift([1.0]*@ds.cases)
72
+ y=Matrix.columns([@dy.data.map {|i| i.to_f}])
73
+ x=Matrix.columns(columns)
74
+ xt=x.t
75
+ matrix=((xt*x)).inverse*xt
76
+ matrix*y
77
+ end
78
+ def r2
79
+ r**2
80
+ end
81
+ def r
82
+ Bivariate::pearson(@dy, predicted)
83
+ end
84
+ def sst
85
+ @dy.ss
86
+ end
87
+ def constant
88
+ @constant
89
+ end
90
+ def standarized_coeffs
91
+ l=lr_s
92
+ l.coeffs
93
+ end
94
+ def lr_s
95
+ if @lr_s.nil?
96
+ build_standarized
97
+ end
98
+ @lr_s
99
+ end
100
+ def build_standarized
101
+ @ds_s=@ds.standardize
102
+ @lr_s=GslEngine.new(@ds_s,@y_var)
103
+ end
104
+ def process_s(v)
105
+ lr_s.process(v)
106
+ end
107
+ # ???? Not equal to SPSS output
108
+ def standarized_residuals
109
+ res=residuals
110
+ red_sd=residuals.sds
111
+ Daru::Vector.new(res.collect {|v| v.quo(red_sd) })
112
+ end
113
+
114
+ # Standard error for coeffs
115
+ def coeffs_se
116
+ out = {}
117
+ evcm = estimated_variance_covariance_matrix
118
+ @ds_valid.vectors.to_a.each_with_index do |f,i|
119
+ mi = i+1
120
+ next if f == @y_var
121
+ out[f] = evcm[mi,mi]
122
+ end
123
+ out
124
+ end
125
+ end
126
+ end
127
+ end
128
+ end # for Statsample
129
+ end # for if