statsample-ekatena 2.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (156) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +15 -0
  3. data/.travis.yml +23 -0
  4. data/CONTRIBUTING.md +17 -0
  5. data/Gemfile +2 -0
  6. data/History.txt +457 -0
  7. data/LICENSE.txt +12 -0
  8. data/README.md +175 -0
  9. data/Rakefile +44 -0
  10. data/benchmarks/correlation_matrix_15_variables.rb +32 -0
  11. data/benchmarks/correlation_matrix_5_variables.rb +33 -0
  12. data/benchmarks/correlation_matrix_methods/correlation_matrix.ds +0 -0
  13. data/benchmarks/correlation_matrix_methods/correlation_matrix.html +93 -0
  14. data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +71 -0
  15. data/benchmarks/correlation_matrix_methods/correlation_matrix.xls +0 -0
  16. data/benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods +0 -0
  17. data/benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods +0 -0
  18. data/benchmarks/correlation_matrix_methods/results.ds +0 -0
  19. data/benchmarks/factor_map.rb +37 -0
  20. data/benchmarks/helpers_benchmark.rb +5 -0
  21. data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
  22. data/doc_latex/manual/equations.tex +78 -0
  23. data/examples/boxplot.rb +28 -0
  24. data/examples/chisquare_test.rb +23 -0
  25. data/examples/correlation_matrix.rb +32 -0
  26. data/examples/dataset.rb +30 -0
  27. data/examples/dominance_analysis.rb +33 -0
  28. data/examples/dominance_analysis_bootstrap.rb +32 -0
  29. data/examples/histogram.rb +26 -0
  30. data/examples/icc.rb +24 -0
  31. data/examples/levene.rb +29 -0
  32. data/examples/multiple_regression.rb +20 -0
  33. data/examples/multivariate_correlation.rb +33 -0
  34. data/examples/parallel_analysis.rb +40 -0
  35. data/examples/polychoric.rb +40 -0
  36. data/examples/principal_axis.rb +26 -0
  37. data/examples/reliability.rb +31 -0
  38. data/examples/scatterplot.rb +25 -0
  39. data/examples/t_test.rb +27 -0
  40. data/examples/tetrachoric.rb +17 -0
  41. data/examples/u_test.rb +24 -0
  42. data/examples/vector.rb +20 -0
  43. data/examples/velicer_map_test.rb +46 -0
  44. data/grab_references.rb +29 -0
  45. data/lib/spss.rb +134 -0
  46. data/lib/statsample-ekatena/analysis.rb +100 -0
  47. data/lib/statsample-ekatena/analysis/suite.rb +89 -0
  48. data/lib/statsample-ekatena/analysis/suitereportbuilder.rb +44 -0
  49. data/lib/statsample-ekatena/anova.rb +24 -0
  50. data/lib/statsample-ekatena/anova/contrast.rb +79 -0
  51. data/lib/statsample-ekatena/anova/oneway.rb +187 -0
  52. data/lib/statsample-ekatena/anova/twoway.rb +207 -0
  53. data/lib/statsample-ekatena/bivariate.rb +406 -0
  54. data/lib/statsample-ekatena/bivariate/pearson.rb +54 -0
  55. data/lib/statsample-ekatena/codification.rb +182 -0
  56. data/lib/statsample-ekatena/converter/csv.rb +28 -0
  57. data/lib/statsample-ekatena/converter/spss.rb +48 -0
  58. data/lib/statsample-ekatena/converters.rb +211 -0
  59. data/lib/statsample-ekatena/crosstab.rb +188 -0
  60. data/lib/statsample-ekatena/daru.rb +115 -0
  61. data/lib/statsample-ekatena/dataset.rb +10 -0
  62. data/lib/statsample-ekatena/dominanceanalysis.rb +425 -0
  63. data/lib/statsample-ekatena/dominanceanalysis/bootstrap.rb +232 -0
  64. data/lib/statsample-ekatena/factor.rb +104 -0
  65. data/lib/statsample-ekatena/factor/map.rb +124 -0
  66. data/lib/statsample-ekatena/factor/parallelanalysis.rb +166 -0
  67. data/lib/statsample-ekatena/factor/pca.rb +242 -0
  68. data/lib/statsample-ekatena/factor/principalaxis.rb +243 -0
  69. data/lib/statsample-ekatena/factor/rotation.rb +198 -0
  70. data/lib/statsample-ekatena/formula/fit_model.rb +46 -0
  71. data/lib/statsample-ekatena/formula/formula.rb +306 -0
  72. data/lib/statsample-ekatena/graph.rb +11 -0
  73. data/lib/statsample-ekatena/graph/boxplot.rb +236 -0
  74. data/lib/statsample-ekatena/graph/histogram.rb +198 -0
  75. data/lib/statsample-ekatena/graph/scatterplot.rb +213 -0
  76. data/lib/statsample-ekatena/histogram.rb +180 -0
  77. data/lib/statsample-ekatena/matrix.rb +329 -0
  78. data/lib/statsample-ekatena/multiset.rb +310 -0
  79. data/lib/statsample-ekatena/regression.rb +65 -0
  80. data/lib/statsample-ekatena/regression/multiple.rb +89 -0
  81. data/lib/statsample-ekatena/regression/multiple/alglibengine.rb +128 -0
  82. data/lib/statsample-ekatena/regression/multiple/baseengine.rb +251 -0
  83. data/lib/statsample-ekatena/regression/multiple/gslengine.rb +129 -0
  84. data/lib/statsample-ekatena/regression/multiple/matrixengine.rb +205 -0
  85. data/lib/statsample-ekatena/regression/multiple/rubyengine.rb +86 -0
  86. data/lib/statsample-ekatena/regression/simple.rb +121 -0
  87. data/lib/statsample-ekatena/reliability.rb +150 -0
  88. data/lib/statsample-ekatena/reliability/icc.rb +415 -0
  89. data/lib/statsample-ekatena/reliability/multiscaleanalysis.rb +181 -0
  90. data/lib/statsample-ekatena/reliability/scaleanalysis.rb +233 -0
  91. data/lib/statsample-ekatena/reliability/skillscaleanalysis.rb +114 -0
  92. data/lib/statsample-ekatena/resample.rb +15 -0
  93. data/lib/statsample-ekatena/shorthand.rb +125 -0
  94. data/lib/statsample-ekatena/srs.rb +169 -0
  95. data/lib/statsample-ekatena/test.rb +82 -0
  96. data/lib/statsample-ekatena/test/bartlettsphericity.rb +45 -0
  97. data/lib/statsample-ekatena/test/chisquare.rb +73 -0
  98. data/lib/statsample-ekatena/test/f.rb +52 -0
  99. data/lib/statsample-ekatena/test/kolmogorovsmirnov.rb +63 -0
  100. data/lib/statsample-ekatena/test/levene.rb +88 -0
  101. data/lib/statsample-ekatena/test/t.rb +309 -0
  102. data/lib/statsample-ekatena/test/umannwhitney.rb +208 -0
  103. data/lib/statsample-ekatena/test/wilcoxonsignedrank.rb +90 -0
  104. data/lib/statsample-ekatena/vector.rb +19 -0
  105. data/lib/statsample-ekatena/version.rb +3 -0
  106. data/lib/statsample.rb +282 -0
  107. data/po/es/statsample.mo +0 -0
  108. data/po/es/statsample.po +959 -0
  109. data/po/statsample.pot +947 -0
  110. data/references.txt +24 -0
  111. data/statsample-ekatena.gemspec +49 -0
  112. data/test/fixtures/bank2.dat +200 -0
  113. data/test/fixtures/correlation_matrix.rb +17 -0
  114. data/test/fixtures/df.csv +15 -0
  115. data/test/fixtures/hartman_23.matrix +9 -0
  116. data/test/fixtures/stock_data.csv +500 -0
  117. data/test/fixtures/tetmat_matrix.txt +5 -0
  118. data/test/fixtures/tetmat_test.txt +1001 -0
  119. data/test/helpers_tests.rb +83 -0
  120. data/test/test_analysis.rb +176 -0
  121. data/test/test_anova_contrast.rb +36 -0
  122. data/test/test_anovaoneway.rb +26 -0
  123. data/test/test_anovatwoway.rb +37 -0
  124. data/test/test_anovatwowaywithdataset.rb +47 -0
  125. data/test/test_anovawithvectors.rb +102 -0
  126. data/test/test_awesome_print_bug.rb +16 -0
  127. data/test/test_bartlettsphericity.rb +25 -0
  128. data/test/test_bivariate.rb +164 -0
  129. data/test/test_codification.rb +78 -0
  130. data/test/test_crosstab.rb +67 -0
  131. data/test/test_dominance_analysis.rb +39 -0
  132. data/test/test_factor.rb +228 -0
  133. data/test/test_factor_map.rb +38 -0
  134. data/test/test_factor_pa.rb +56 -0
  135. data/test/test_fit_model.rb +88 -0
  136. data/test/test_ggobi.rb +35 -0
  137. data/test/test_gsl.rb +15 -0
  138. data/test/test_histogram.rb +109 -0
  139. data/test/test_matrix.rb +48 -0
  140. data/test/test_multiset.rb +176 -0
  141. data/test/test_regression.rb +231 -0
  142. data/test/test_reliability.rb +223 -0
  143. data/test/test_reliability_icc.rb +198 -0
  144. data/test/test_reliability_skillscale.rb +57 -0
  145. data/test/test_resample.rb +24 -0
  146. data/test/test_srs.rb +9 -0
  147. data/test/test_statistics.rb +69 -0
  148. data/test/test_stest.rb +69 -0
  149. data/test/test_stratified.rb +17 -0
  150. data/test/test_test_f.rb +33 -0
  151. data/test/test_test_kolmogorovsmirnov.rb +34 -0
  152. data/test/test_test_t.rb +62 -0
  153. data/test/test_umannwhitney.rb +27 -0
  154. data/test/test_vector.rb +12 -0
  155. data/test/test_wilcoxonsignedrank.rb +64 -0
  156. metadata +570 -0
@@ -0,0 +1,128 @@
1
+ if HAS_ALGIB
2
+ module Statsample
3
+ module Regression
4
+ module Multiple
5
+ # Class for Multiple Regression Analysis
6
+ # Requires Alglib gem and uses a listwise aproach.
7
+ # Faster than GslEngine on massive prediction use, because process is c-based.
8
+ # Prefer GslEngine if you need good memory use.
9
+ # If you need pairwise, use RubyEngine
10
+ # Example:
11
+ #
12
+ # @a = Daru::Vector.new([1,3,2,4,3,5,4,6,5,7])
13
+ # @b = Daru::Vector.new([3,3,4,4,5,5,6,6,4,4])
14
+ # @c = Daru::Vector.new([11,22,30,40,50,65,78,79,99,100])
15
+ # @y = Daru::Vector.new([3,4,5,6,7,8,9,10,20,30])
16
+ # ds = Daru::DataFrame.new({:a => @a,:b => @b,:c => @c,:y => @y})
17
+ # lr=Statsample::Regression::Multiple::AlglibEngine.new(ds, :y)
18
+ #
19
+ class AlglibEngine < BaseEngine
20
+ def initialize(ds,y_var, opts=Hash.new)
21
+ super
22
+ @ds = ds.reject_values(*Daru::MISSING_VALUES)
23
+ @ds_valid = @ds
24
+ @dy = @ds[@y_var]
25
+ @ds_indep = ds.dup(ds.vectors.to_a - [y_var])
26
+ # Create a custom matrix
27
+ columns = []
28
+ @fields = []
29
+ @ds.vectors.each do |f|
30
+ if f != @y_var
31
+ columns.push(@ds[f].to_a)
32
+ @fields.push(f)
33
+ end
34
+ end
35
+ @dep_columns = columns.dup
36
+ columns.push(@ds[@y_var])
37
+ matrix=Matrix.columns(columns)
38
+ @lr_s=nil
39
+ @lr=::Alglib::LinearRegression.build_from_matrix(matrix)
40
+ @coeffs=assign_names(@lr.coeffs)
41
+ end
42
+
43
+ def _dump(i)
44
+ Marshal.dump({'ds'=>@ds,'y_var'=>@y_var})
45
+ end
46
+
47
+ def self._load(data)
48
+ h=Marshal.load(data)
49
+ self.new(h['ds'], h['y_var'])
50
+ end
51
+
52
+ def coeffs
53
+ @coeffs
54
+ end
55
+ # Coefficients using a constant
56
+ # Based on http://www.xycoon.com/ols1.htm
57
+ def matrix_resolution
58
+ mse_p=mse
59
+ columns=@dep_columns.dup.map {|xi| xi.map{|i| i.to_f}}
60
+ columns.unshift([1.0]*@ds.cases)
61
+ y=Matrix.columns([@dy.data.map {|i| i.to_f}])
62
+ x=Matrix.columns(columns)
63
+ xt=x.t
64
+ matrix=((xt*x)).inverse*xt
65
+ matrix*y
66
+ end
67
+
68
+ def r2
69
+ r**2
70
+ end
71
+
72
+ def r
73
+ Bivariate::pearson(@dy,predicted)
74
+ end
75
+
76
+ def sst
77
+ @dy.ss
78
+ end
79
+
80
+ def constant
81
+ @lr.constant
82
+ end
83
+
84
+ def standarized_coeffs
85
+ l=lr_s
86
+ assign_names(l.coeffs)
87
+ end
88
+
89
+ def lr_s
90
+ if @lr_s.nil?
91
+ build_standarized
92
+ end
93
+ @lr_s
94
+ end
95
+
96
+ def build_standarized
97
+ @ds_s=@ds.standardize
98
+ columns=[]
99
+ @ds_s.vectors.each{|f|
100
+ columns.push(@ds_s[f].to_a) unless f == @y_var
101
+ }
102
+ @dep_columns_s=columns.dup
103
+ columns.push(@ds_s[@y_var])
104
+ matrix=Matrix.columns(columns)
105
+ @lr_s=Alglib::LinearRegression.build_from_matrix(matrix)
106
+ end
107
+
108
+ def process(v)
109
+ @lr.process(v)
110
+ end
111
+
112
+ def process_s(v)
113
+ lr_s.process(v)
114
+ end
115
+ # ???? Not equal to SPSS output
116
+ def standarized_residuals
117
+ res = residuals
118
+ red_sd = residuals.sds
119
+ Daru::Vector.new(res.collect {|v| v.quo(red_sd) })
120
+ end
121
+ end
122
+ end
123
+ end
124
+ end # for Statsample
125
+ end # for if
126
+
127
+
128
+
@@ -0,0 +1,251 @@
1
+ module Statsample
2
+ module Regression
3
+ module Multiple
4
+ # Base class for Multiple Regression Engines
5
+ class BaseEngine
6
+ include Statsample::Summarizable
7
+ # Name of analysis
8
+ attr_accessor :name
9
+ # Minimum number of valid case for pairs of correlation
10
+ attr_reader :cases
11
+ # Number of valid cases (listwise)
12
+ attr_reader :valid_cases
13
+ # Number of total cases (dataset.cases)
14
+ attr_reader :total_cases
15
+
16
+ attr_accessor :digits
17
+ def self.univariate?
18
+ true
19
+ end
20
+ def initialize(ds, y_var, opts = Hash.new)
21
+ @ds=ds
22
+ @predictors_n=@ds.vectors.size-1
23
+ @total_cases=@ds.nrows
24
+ @cases=@ds.nrows
25
+ @y_var=y_var
26
+ @r2=nil
27
+ @name=_("Multiple Regression: %s over %s") % [ ds.vectors.to_a.join(",") , @y_var]
28
+
29
+ opts_default={:digits=>3}
30
+ @opts=opts_default.merge opts
31
+
32
+ @opts.each{|k,v|
33
+ self.send("#{k}=",v) if self.respond_to? k
34
+ }
35
+ end
36
+ # Calculate F Test
37
+ def anova
38
+ @anova||=Statsample::Anova::OneWay.new(:ss_num=>ssr, :ss_den=>sse, :df_num=>df_r, :df_den=>df_e, :name_numerator=>_("Regression"), :name_denominator=>_("Error"), :name=>"ANOVA")
39
+ end
40
+ # Standard error of estimate
41
+ def se_estimate
42
+ Math::sqrt(sse.quo(df_e))
43
+ end
44
+ # Retrieves a vector with predicted values for y
45
+ def predicted
46
+ Daru::Vector.new(
47
+ @total_cases.times.collect do |i|
48
+ invalid = false
49
+ vect = @dep_columns.collect {|v| invalid = true if v[i].nil?; v[i]}
50
+ if invalid
51
+ nil
52
+ else
53
+ process(vect)
54
+ end
55
+ end
56
+ )
57
+ end
58
+ # Retrieves a vector with standarized values for y
59
+ def standarized_predicted
60
+ predicted.standarized
61
+ end
62
+ # Retrieves a vector with residuals values for y
63
+ def residuals
64
+ Daru::Vector.new(
65
+ (0...@total_cases).collect do |i|
66
+ invalid=false
67
+ vect=@dep_columns.collect{|v| invalid=true if v[i].nil?; v[i]}
68
+ if invalid or @ds[@y_var][i].nil?
69
+ nil
70
+ else
71
+ @ds[@y_var][i] - process(vect)
72
+ end
73
+ end
74
+ )
75
+ end
76
+ # R Multiple
77
+ def r
78
+ raise "You should implement this"
79
+ end
80
+ # Sum of squares Total
81
+ def sst
82
+ raise "You should implement this"
83
+ end
84
+ # R^2 Adjusted.
85
+ # Estimate Population R^2 usign Ezequiel formula.
86
+ # Always lower than sample R^2
87
+ # == Reference:
88
+ # * Leach, L. & Henson, R. (2007). The Use and Impact of Adjusted R2 Effects in Published Regression Research. Multiple Linear Regression Viewpoints, 33(1), 1-11.
89
+ def r2_adjusted
90
+ r2-((1-r2)*@predictors_n).quo(df_e)
91
+ end
92
+ # Sum of squares (regression)
93
+ def ssr
94
+ r2*sst
95
+ end
96
+ # Sum of squares (Error)
97
+ def sse
98
+ sst - ssr
99
+ end
100
+ # T values for coeffs
101
+ def coeffs_t
102
+ out={}
103
+ se=coeffs_se
104
+ coeffs.each do |k,v|
105
+ out[k]=v / se[k]
106
+ end
107
+ out
108
+ end
109
+ # Mean square Regression
110
+ def msr
111
+ ssr.quo(df_r)
112
+ end
113
+ # Mean Square Error
114
+ def mse
115
+ sse.quo(df_e)
116
+ end
117
+ # Degrees of freedom for regression
118
+ def df_r
119
+ @predictors_n
120
+ end
121
+ # Degrees of freedom for error
122
+ def df_e
123
+ @valid_cases-@predictors_n-1
124
+ end
125
+ # Fisher for Anova
126
+ def f
127
+ anova.f
128
+ end
129
+ # p-value of Fisher
130
+ def probability
131
+ anova.probability
132
+ end
133
+ # Tolerance for a given variable
134
+ # http://talkstats.com/showthread.php?t=5056
135
+ def tolerance(var)
136
+ ds = assign_names(@dep_columns)
137
+ ds.each { |k,v| ds[k] = Daru::Vector.new(v) }
138
+ lr = self.class.new(Daru::DataFrame.new(ds),var)
139
+ 1 - lr.r2
140
+ end
141
+ # Tolerances for each coefficient
142
+ def coeffs_tolerances
143
+ @fields.inject({}) {|a,f|
144
+ a[f]=tolerance(f);
145
+ a
146
+ }
147
+ end
148
+ # Standard Error for coefficients
149
+ def coeffs_se
150
+ out={}
151
+ mse=sse.quo(df_e)
152
+ coeffs.each {|k,v|
153
+ out[k]=Math::sqrt(mse/(@ds[k].sum_of_squares * tolerance(k)))
154
+ }
155
+ out
156
+ end
157
+ # Estandar error of R^2
158
+ # ????
159
+ def se_r2
160
+ Math::sqrt((4*r2*(1-r2)**2*(df_e)**2).quo((@cases**2-1)*(@cases+3)))
161
+ end
162
+
163
+ # Estimated Variance-Covariance Matrix
164
+ # Used for calculation of se of constant
165
+ def estimated_variance_covariance_matrix
166
+ #mse_p=mse
167
+ columns=[]
168
+ @ds_valid.vectors.each{|k|
169
+ v = @ds_valid[k]
170
+ columns.push(v.to_a) unless k == @y_var
171
+ }
172
+ columns.unshift([1.0]*@valid_cases)
173
+ x=::Matrix.columns(columns)
174
+ matrix=((x.t*x)).inverse * mse
175
+ matrix.collect {|i| Math::sqrt(i) if i>=0 }
176
+ end
177
+ # T for constant
178
+ def constant_t
179
+ constant.to_f/constant_se
180
+ end
181
+ # Standard error for constant
182
+ def constant_se
183
+ estimated_variance_covariance_matrix[0,0]
184
+ end
185
+ def report_building(b)
186
+ di="%0.#{digits}f"
187
+ b.section(:name=>@name) do |g|
188
+ c=coeffs
189
+ g.text _("Engine: %s") % self.class
190
+ g.text(_("Cases(listwise)=%d(%d)") % [@total_cases, @valid_cases])
191
+ g.text _("R=")+(di % r)
192
+ g.text _("R^2=")+(di % r2)
193
+ g.text _("R^2 Adj=")+(di % r2_adjusted)
194
+ g.text _("Std.Error R=")+ (di % se_estimate)
195
+
196
+ g.text(_("Equation")+"="+ sprintf(di,constant) +" + "+ @fields.collect {|k| sprintf("#{di}%s",c[k],k)}.join(' + ') )
197
+
198
+ g.parse_element(anova)
199
+ sc=standarized_coeffs
200
+
201
+ cse=coeffs_se
202
+ g.table(:name=>_("Beta coefficients"), :header=>%w{coeff b beta se t}.collect{|field| _(field)} ) do |t|
203
+ t.row([_("Constant"), sprintf(di, constant), "-", constant_se.nil? ? "": sprintf(di, constant_se), constant_t.nil? ? "" : sprintf(di, constant_t)])
204
+ @fields.each do |f|
205
+ t.row([f, sprintf(di, c[f]), sprintf(di, sc[f]), sprintf(di, cse[f]), sprintf(di, c[f].quo(cse[f]))])
206
+ end
207
+ end
208
+ end
209
+ end
210
+
211
+
212
+ def assign_names(c)
213
+ a={}
214
+ @fields.each_index {|i|
215
+ a[@fields[i]]=c[i]
216
+ }
217
+ a
218
+ end
219
+
220
+ # Sum of squares of regression
221
+ # using the predicted value minus y mean
222
+ def ssr_direct
223
+ mean=@dy.mean
224
+ cases=0
225
+ ssr=(0...@ds.cases).inject(0) {|a,i|
226
+ invalid=false
227
+ v=@dep_columns.collect{|c| invalid=true if c[i].nil?; c[i]}
228
+ if !invalid
229
+ cases+=1
230
+ a+((process(v)-mean)**2)
231
+ else
232
+ a
233
+ end
234
+ }
235
+ ssr
236
+ end
237
+ def sse_direct
238
+ sst-ssr
239
+ end
240
+ def process(v)
241
+ c=coeffs
242
+ total=constant
243
+ @fields.each_index{|i|
244
+ total+=c[@fields[i]]*v[i]
245
+ }
246
+ total
247
+ end
248
+ end
249
+ end
250
+ end
251
+ end
@@ -0,0 +1,129 @@
1
+ if Statsample.has_gsl?
2
+ module Statsample
3
+ module Regression
4
+ module Multiple
5
+ # Class for Multiple Regression Analysis
6
+ # Requires rbgsl and uses a listwise aproach.
7
+ # Slower on prediction of values than Alglib, because predict is ruby based.
8
+ # Better memory management on multiple (+1000) series of regression.
9
+ # If you need pairwise, use RubyEngine
10
+ # Example:
11
+ #
12
+ # @a = Daru::Vector.new([1,3,2,4,3,5,4,6,5,7])
13
+ # @b = Daru::Vector.new([3,3,4,4,5,5,6,6,4,4])
14
+ # @c = Daru::Vector.new([11,22,30,40,50,65,78,79,99,100])
15
+ # @y = Daru::Vector.new([3,4,5,6,7,8,9,10,20,30])
16
+ # ds = Daru::DataFrame.new({:a => @a,:b => @b,:c => @c,:y => @y})
17
+ # lr=Statsample::Regression::Multiple::GslEngine.new(ds,:y)
18
+ #
19
+ class GslEngine < BaseEngine
20
+ def initialize(ds,y_var, opts=Hash.new)
21
+ super
22
+ @ds = ds.reject_values(*Daru::MISSING_VALUES)
23
+ @ds_valid = @ds
24
+ @valid_cases = @ds_valid.nrows
25
+ @dy = @ds[@y_var]
26
+ @ds_indep = ds.dup(ds.vectors.to_a - [y_var])
27
+ # Create a custom matrix
28
+ columns=[]
29
+ @fields=[]
30
+ max_deps = GSL::Matrix.alloc(@ds.nrows, @ds.vectors.size)
31
+ constant_col=@ds.vectors.size-1
32
+ for i in 0...@ds.nrows
33
+ max_deps.set(i,constant_col,1)
34
+ end
35
+ j = 0
36
+ @ds.vectors.each do |f|
37
+ if f != @y_var
38
+ @ds[f].each_index do |i1|
39
+ max_deps.set(i1,j,@ds[f][i1])
40
+ end
41
+
42
+ columns.push(@ds[f].to_a)
43
+ @fields.push(f)
44
+ j += 1
45
+ end
46
+ end
47
+ @dep_columns = columns.dup
48
+ @lr_s = nil
49
+ c, @cov, @chisq, @status = GSL::MultiFit.linear(max_deps, @dy.to_gsl)
50
+ @constant=c[constant_col]
51
+ @coeffs_a=c.to_a.slice(0...constant_col)
52
+ @coeffs=assign_names(@coeffs_a)
53
+ c=nil
54
+ end
55
+
56
+ def _dump(i)
57
+ Marshal.dump({'ds'=>@ds,'y_var'=>@y_var})
58
+ end
59
+ def self._load(data)
60
+ h=Marshal.load(data)
61
+ self.new(h['ds'], h['y_var'])
62
+ end
63
+
64
+ def coeffs
65
+ @coeffs
66
+ end
67
+ # Coefficients using a constant
68
+ # Based on http://www.xycoon.com/ols1.htm
69
+ def matrix_resolution
70
+ columns=@dep_columns.dup.map {|xi| xi.map{|i| i.to_f}}
71
+ columns.unshift([1.0]*@ds.cases)
72
+ y=Matrix.columns([@dy.data.map {|i| i.to_f}])
73
+ x=Matrix.columns(columns)
74
+ xt=x.t
75
+ matrix=((xt*x)).inverse*xt
76
+ matrix*y
77
+ end
78
+ def r2
79
+ r**2
80
+ end
81
+ def r
82
+ Bivariate::pearson(@dy, predicted)
83
+ end
84
+ def sst
85
+ @dy.ss
86
+ end
87
+ def constant
88
+ @constant
89
+ end
90
+ def standarized_coeffs
91
+ l=lr_s
92
+ l.coeffs
93
+ end
94
+ def lr_s
95
+ if @lr_s.nil?
96
+ build_standarized
97
+ end
98
+ @lr_s
99
+ end
100
+ def build_standarized
101
+ @ds_s=@ds.standardize
102
+ @lr_s=GslEngine.new(@ds_s,@y_var)
103
+ end
104
+ def process_s(v)
105
+ lr_s.process(v)
106
+ end
107
+ # ???? Not equal to SPSS output
108
+ def standarized_residuals
109
+ res=residuals
110
+ red_sd=residuals.sds
111
+ Daru::Vector.new(res.collect {|v| v.quo(red_sd) })
112
+ end
113
+
114
+ # Standard error for coeffs
115
+ def coeffs_se
116
+ out = {}
117
+ evcm = estimated_variance_covariance_matrix
118
+ @ds_valid.vectors.to_a.each_with_index do |f,i|
119
+ mi = i+1
120
+ next if f == @y_var
121
+ out[f] = evcm[mi,mi]
122
+ end
123
+ out
124
+ end
125
+ end
126
+ end
127
+ end
128
+ end # for Statsample
129
+ end # for if