statsample 0.6.1 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. data/History.txt +4 -0
  2. data/Manifest.txt +8 -19
  3. data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
  4. data/demo/dominance_analysis_bootstrap.rb +20 -0
  5. data/demo/dominanceanalysis.rb +11 -0
  6. data/demo/multiple_regression.rb +40 -0
  7. data/demo/polychoric.rb +13 -0
  8. data/demo/tetrachoric.rb +10 -0
  9. data/lib/distribution.rb +1 -0
  10. data/lib/distribution/normalbivariate.rb +100 -0
  11. data/lib/statsample.rb +4 -105
  12. data/lib/statsample/bivariate.rb +5 -1
  13. data/lib/statsample/bivariate/polychoric.rb +581 -0
  14. data/lib/statsample/bivariate/tetrachoric.rb +37 -5
  15. data/lib/statsample/converters.rb +11 -0
  16. data/lib/statsample/dominanceanalysis.rb +104 -90
  17. data/lib/statsample/dominanceanalysis/bootstrap.rb +160 -131
  18. data/lib/statsample/factor/pca.rb +1 -2
  19. data/lib/statsample/factor/principalaxis.rb +2 -2
  20. data/lib/statsample/graph/svghistogram.rb +170 -172
  21. data/lib/statsample/matrix.rb +79 -0
  22. data/lib/statsample/mle.rb +6 -4
  23. data/lib/statsample/mle/probit.rb +0 -1
  24. data/lib/statsample/regression/multiple/alglibengine.rb +23 -23
  25. data/lib/statsample/regression/multiple/baseengine.rb +112 -113
  26. data/lib/statsample/regression/multiple/gslengine.rb +91 -94
  27. data/lib/statsample/regression/multiple/rubyengine.rb +1 -1
  28. data/lib/statsample/srs.rb +1 -1
  29. data/lib/statsample/test.rb +0 -1
  30. data/lib/statsample/test/umannwhitney.rb +8 -5
  31. data/po/es/statsample.po +201 -39
  32. data/po/statsample.pot +184 -32
  33. data/test/test_bivariate.rb +21 -2
  34. data/test/test_distribution.rb +58 -40
  35. data/test/test_factor.rb +0 -1
  36. data/test/test_gsl.rb +13 -14
  37. data/test/test_regression.rb +1 -1
  38. data/test/test_statistics.rb +1 -4
  39. metadata +10 -21
  40. data/demo/benchmark.rb +0 -76
  41. data/demo/chi-square.rb +0 -44
  42. data/demo/crosstab.rb +0 -7
  43. data/demo/dice.rb +0 -13
  44. data/demo/distribution_t.rb +0 -95
  45. data/demo/graph.rb +0 -9
  46. data/demo/item_analysis.rb +0 -30
  47. data/demo/mean.rb +0 -81
  48. data/demo/nunnally_6.rb +0 -34
  49. data/demo/pca.rb +0 -29
  50. data/demo/proportion.rb +0 -57
  51. data/demo/regression.rb +0 -82
  52. data/demo/sample_test.csv +0 -113
  53. data/demo/spss_matrix.rb +0 -3
  54. data/demo/strata_proportion.rb +0 -152
  55. data/demo/stratum.rb +0 -141
  56. data/demo/t-student.rb +0 -17
  57. data/demo/umann.rb +0 -8
  58. data/lib/matrix_extension.rb +0 -92
@@ -17,8 +17,7 @@ module Factor
17
17
 
18
18
 
19
19
  def initialize(matrix ,opts=Hash.new)
20
- if matrix.is_a? ::Matrix
21
- require 'matrix_extension'
20
+ if matrix.respond_to? :to_gsl
22
21
  matrix=matrix.to_gsl
23
22
  end
24
23
  @name=""
@@ -40,8 +40,8 @@ module Factor
40
40
  @iterations=0
41
41
  t.times do |i|
42
42
  @iterations+=1
43
- prev_com.each_with_index{|v,i|
44
- work_matrix[i][i]=v
43
+ prev_com.each_with_index{|v,it|
44
+ work_matrix[it][it]=v
45
45
  }
46
46
  pca=Statsample::PCA.new(::Matrix.rows(work_matrix))
47
47
 
@@ -1,208 +1,206 @@
1
1
  module Statsample
2
- module Graph
3
- class SvgHistogram < SVG::Graph::BarBase
4
- attr_accessor :inner_margin, :mean, :sigma, :show_normal
5
- def initialize(config)
6
- config[:fields]=[:dummy]
7
- super(config)
8
-
9
- @histogram=nil
10
- end
11
- include REXML
12
-
13
- # In addition to the defaults set in Graph::initialize, sets
14
- # [inner_margin] 14
15
- # [key] false
16
- def set_defaults
17
- super
18
- self.top_align = self.top_font = 0
19
- init_with({
20
- :inner_margin=>16,
21
- :key=>false,
22
- :show_normal=>false
23
- })
24
-
25
- end
26
-
27
- def histogram=(h)
28
- @histogram=h
29
- @data=[{:data=>(0...@histogram.bins).to_a.collect {|i|
30
- @histogram[i]
31
- }}]
2
+ module Graph
3
+ class SvgHistogram < SVG::Graph::BarBase
4
+ attr_accessor :inner_margin, :mean, :sigma, :show_normal
5
+ def initialize(config)
6
+ config[:fields]=[:dummy]
7
+ super(config)
8
+
9
+ @histogram=nil
10
+ end
11
+ include REXML
12
+
13
+ # In addition to the defaults set in Graph::initialize, sets
14
+ # [inner_margin] 14
15
+ # [key] false
16
+ def set_defaults
17
+ super
18
+ self.top_align = self.top_font = 0
19
+ init_with({
20
+ :inner_margin=>16,
21
+ :key=>false,
22
+ :show_normal=>false
23
+ })
24
+
25
+ end
26
+
27
+ def histogram=(h)
28
+ @histogram=h
29
+ @data=[{:data=>(0...@histogram.bins).to_a.collect {|i|
30
+ @histogram[i]
31
+ }}]
32
+ end
33
+ def get_x_labels
34
+ [""]
35
+ end
36
+
37
+ def get_y_labels
38
+ maxvalue = max_value
39
+ minvalue = min_value
40
+ range = maxvalue - minvalue
41
+
42
+ top_pad = range == 0 ? 10 : range / 20.0
43
+ scale_range = (maxvalue + top_pad) - minvalue
44
+
45
+ scale_division = scale_divisions || (scale_range / 10.0)
46
+
47
+ if scale_integers
48
+ scale_division = scale_division < 1 ? 1 : scale_division.round
32
49
  end
33
- def get_x_labels
34
- [""]
35
- end
36
50
 
37
- def get_y_labels
38
- maxvalue = max_value
39
- minvalue = min_value
40
- range = maxvalue - minvalue
41
-
42
- top_pad = range == 0 ? 10 : range / 20.0
43
- scale_range = (maxvalue + top_pad) - minvalue
44
-
45
- scale_division = scale_divisions || (scale_range / 10.0)
46
-
47
- if scale_integers
48
- scale_division = scale_division < 1 ? 1 : scale_division.round
51
+ rv = []
52
+ maxvalue = maxvalue%scale_division == 0 ?
53
+ maxvalue : maxvalue + scale_division
54
+ minvalue.step( maxvalue, scale_division ) {|v| rv << v}
55
+ rv
56
+ end
57
+
58
+ def unit_width
59
+ (@graph_width-(@inner_margin*2)) / (@histogram.max-@histogram.min).to_f
60
+ end
61
+ def draw_x_label(v)
62
+ left = (v - @histogram.min)*unit_width
63
+ x=@inner_margin+left
64
+ text = @graph.add_element( "text" )
65
+ text.attributes["class"] = "xAxisLabels"
66
+ text.text = sprintf("%0.2f",v)
67
+ y = @graph_height + x_label_font_size + 3
68
+ text.attributes["x"] = x.to_s
69
+ text.attributes["y"] = y.to_s
70
+ end
71
+
72
+ def draw_x_labels
73
+ if show_x_labels
74
+ (0...@histogram.bins).each do |i|
75
+ value = @histogram[i]
76
+ range = @histogram.get_range(i)
77
+ draw_x_label(range[0])
78
+ if(i==(@histogram.bins)-1)
79
+ draw_x_label(range[1])
49
80
  end
50
-
51
- rv = []
52
- maxvalue = maxvalue%scale_division == 0 ?
53
- maxvalue : maxvalue + scale_division
54
- minvalue.step( maxvalue, scale_division ) {|v| rv << v}
55
- return rv
56
- end
57
-
58
- def unit_width
59
- (@graph_width-(@inner_margin*2)) / (@histogram.max-@histogram.min).to_f
81
+ end
60
82
  end
61
- def draw_x_label(v)
62
- left = (v - @histogram.min)*unit_width
63
- x=@inner_margin+left
64
- text = @graph.add_element( "text" )
65
- text.attributes["class"] = "xAxisLabels"
66
- text.text = sprintf("%0.2f",v)
67
- y = @graph_height + x_label_font_size + 3
68
- text.attributes["x"] = x.to_s
69
- text.attributes["y"] = y.to_s
70
-
83
+ end
84
+ def draw_data
85
+ minvalue = min_value
86
+ fieldwidth = field_width
87
+ unit_size = (@graph_height.to_f - font_size*2*top_font) /
88
+ (get_y_labels.max - get_y_labels.min)
89
+ bottom = @graph_height
90
+ field_count = 0
91
+ hist_min=@histogram.min
92
+ hist_max=@histogram.max
93
+ range_hist=hist_max-hist_min
94
+ total=0
95
+
96
+ (0...@histogram.bins).each do |i|
97
+ dataset_count = 0
98
+ value = @histogram[i]
99
+ total=total+value
100
+ range = @histogram.get_range(i)
101
+ left = (range[0] - hist_min)*unit_width
102
+ bar_width = (range[1] - hist_min)*unit_width - left
103
+ length = (value.abs - (minvalue > 0 ? minvalue : 0)) * unit_size
104
+ # top is 0 if value is negative
105
+ top = bottom - (((value < 0 ? 0 : value) - minvalue) * unit_size)
106
+
107
+ @graph.add_element( "rect", {
108
+ "x" => (@inner_margin+left).to_s,
109
+ "y" => top.to_s,
110
+ "width" => bar_width.to_s,
111
+ "height" => length.to_s,
112
+ "class" => "fill#{dataset_count+1}"
113
+ })
114
+
115
+ make_datapoint_text(left + @inner_margin+ (bar_width/2), top - 6, value.to_s)
116
+ field_count += 1
71
117
  end
72
- def draw_x_labels
73
- if show_x_labels
74
- (0...@histogram.bins).each { |i|
75
- value = @histogram[i]
76
- range = @histogram.get_range(i)
77
- draw_x_label(range[0])
78
- if(i==(@histogram.bins)-1)
79
- draw_x_label(range[1])
80
- end
81
- }
118
+ if(show_normal)
119
+ divs=30
120
+ path=""
121
+ 0.upto(divs) do |i|
122
+ x_abs=hist_min+(range_hist/divs)*i
123
+ y=GSL::Ran::gaussian_pdf((x_abs-mean) / sigma)*total
124
+ xg=@inner_margin+((x_abs-hist_min)*unit_width)
125
+ yg=bottom-(y-minvalue)*unit_size
126
+ if i==0
127
+ path="M#{xg} #{yg} "
128
+ else
129
+ path+="L#{xg} #{yg} "
82
130
  end
131
+ end
132
+ @graph.add_element("path",
133
+ { "d"=>path, "style"=>"stroke:black;fill:none" })
83
134
  end
84
- def draw_data
85
- minvalue = min_value
86
- fieldwidth = field_width
87
-
88
- unit_size = (@graph_height.to_f - font_size*2*top_font) /
89
- (get_y_labels.max - get_y_labels.min)
90
- bottom = @graph_height
91
- field_count = 0
92
- hist_min=@histogram.min
93
- hist_max=@histogram.max
94
- range_hist=hist_max-hist_min
95
- total=0
96
- (0...@histogram.bins).each { |i|
97
- dataset_count = 0
98
- value = @histogram[i]
99
- total=total+value
100
- range = @histogram.get_range(i)
101
- left = (range[0] - hist_min)*unit_width
102
- bar_width = (range[1] - hist_min)*unit_width - left
103
- length = (value.abs - (minvalue > 0 ? minvalue : 0)) * unit_size
104
- # top is 0 if value is negative
105
- top = bottom - (((value < 0 ? 0 : value) - minvalue) * unit_size)
106
- @graph.add_element( "rect", {
107
- "x" => (@inner_margin+left).to_s,
108
- "y" => top.to_s,
109
- "width" => bar_width.to_s,
110
- "height" => length.to_s,
111
- "class" => "fill#{dataset_count+1}"
112
- })
113
- make_datapoint_text(left + @inner_margin+ (bar_width/2), top - 6, value.to_s)
114
- field_count += 1
115
- }
116
- if(show_normal)
117
- divs=30
118
- path=""
119
- 0.upto(divs) {|i|
120
- x_abs=hist_min+(range_hist/divs)*i
121
- y=GSL::Ran::gaussian_pdf((x_abs-mean) / sigma)*total
122
- xg=@inner_margin+((x_abs-hist_min)*unit_width)
123
- yg=bottom-(y-minvalue)*unit_size
124
- if i==0
125
- path="M#{xg} #{yg} "
126
- else
127
- path+="L#{xg} #{yg} "
128
- end
129
- }
130
- @graph.add_element("path", {
131
- "d"=>path,
132
- "style"=>"stroke:black;fill:none"
133
- }
134
- )
135
- end
136
- end
137
-
138
-
139
- def get_css
140
- return <<EOL
135
+ end
136
+
137
+
138
+ def get_css
139
+ return <<EOL
141
140
  /* default fill styles for multiple datasets (probably only use a single dataset on this graph though) */
142
141
 
143
142
  .key1,.fill1{
144
- fill: #ff0000;
145
- stroke: black;
146
- stroke-width: 1px;
143
+ fill: #ff0000;
144
+ stroke: black;
145
+ stroke-width: 1px;
147
146
  }
148
147
  .key2,.fill2{
149
- fill: #0000ff;
150
- stroke: black;
151
- stroke-width: 1px;
148
+ fill: #0000ff;
149
+ stroke: black;
150
+ stroke-width: 1px;
152
151
  }
153
152
  .key3,.fill3{
154
- fill: #00ff00;
155
- stroke: none;
156
- stroke-width: 1px;
153
+ fill: #00ff00;
154
+ stroke: none;
155
+ stroke-width: 1px;
157
156
  }
158
157
  .key4,.fill4{
159
- fill: #ffcc00;
160
- stroke: none;
161
- stroke-width: 1px;
158
+ fill: #ffcc00;
159
+ stroke: none;
160
+ stroke-width: 1px;
162
161
  }
163
162
  .key5,.fill5{
164
- fill: #00ccff;
165
- stroke: none;
166
- stroke-width: 1px;
163
+ fill: #00ccff;
164
+ stroke: none;
165
+ stroke-width: 1px;
167
166
  }
168
167
  .key6,.fill6{
169
- fill: #ff00ff;
170
- stroke: none;
171
- stroke-width: 1px;
168
+ fill: #ff00ff;
169
+ stroke: none;
170
+ stroke-width: 1px;
172
171
  }
173
172
  .key7,.fill7{
174
- fill: #00ffff;
175
- stroke: none;
176
- stroke-width: 1px;
173
+ fill: #00ffff;
174
+ stroke: none;
175
+ stroke-width: 1px;
177
176
  }
178
177
  .key8,.fill8{
179
- fill: #ffff00;
180
- stroke: none;
181
- stroke-width: 1px;
178
+ fill: #ffff00;
179
+ stroke: none;
180
+ stroke-width: 1px;
182
181
  }
183
182
  .key9,.fill9{
184
- fill: #cc6666;
185
- stroke: none;
186
- stroke-width: 1px;
183
+ fill: #cc6666;
184
+ stroke: none;
185
+ stroke-width: 1px;
187
186
  }
188
187
  .key10,.fill10{
189
- fill: #663399;
190
- stroke: none;
191
- stroke-width: 1px;
188
+ fill: #663399;
189
+ stroke: none;
190
+ stroke-width: 1px;
192
191
  }
193
192
  .key11,.fill11{
194
- fill: #339900;
195
- stroke: none;
196
- stroke-width: 1px;
193
+ fill: #339900;
194
+ stroke: none;
195
+ stroke-width: 1px;
197
196
  }
198
197
  .key12,.fill12{
199
- fill: #9966FF;
200
- stroke: none;
201
- stroke-width: 1px;
198
+ fill: #9966FF;
199
+ stroke: none;
200
+ stroke-width: 1px;
202
201
  }
203
202
  EOL
204
- end
205
-
206
- end
207
- end
203
+ end
204
+ end
205
+ end
208
206
  end
@@ -0,0 +1,79 @@
1
+ require 'matrix'
2
+ if RUBY_VERSION<="1.9.0"
3
+ class ::Vector
4
+ alias_method :old_coerce, :coerce
5
+ def coerce(other)
6
+ case other
7
+ when Numeric
8
+ return Matrix::Scalar.new(other), self
9
+ else
10
+ raise TypeError, "#{self.class} can't be coerced into #{other.class}"
11
+ end
12
+ end
13
+ end
14
+ end
15
+
16
+ class ::Matrix
17
+ def to_gsl
18
+ out=[]
19
+ self.row_size.times{|i|
20
+ out[i]=self.row(i).to_a
21
+ }
22
+ GSL::Matrix[*out]
23
+ end
24
+
25
+ # Calculate marginal of rows
26
+ def rows_sum
27
+ (0...row_size).collect {|i|
28
+ row(i).to_a.inject(0) {|a,v| a+v}
29
+ }
30
+ end
31
+ # Calculate marginal of columns
32
+ def cols_sum
33
+ (0...column_size).collect {|i|
34
+ column(i).to_a.inject(0) {|a,v| a+v}
35
+ }
36
+ end
37
+ # Calculate sum of cells
38
+ def total_sum
39
+ rows_sum.inject(0){|a,v| a+v}
40
+ end
41
+ end
42
+
43
+ module GSL
44
+ class Matrix
45
+ def to_matrix
46
+ rows=self.size1
47
+ cols=self.size2
48
+ out=(0...rows).collect{|i| (0...cols).collect {|j| self[i,j]} }
49
+ ::Matrix.rows(out)
50
+ end
51
+ end
52
+ end
53
+
54
+ module Statsample
55
+ attr :labels
56
+ attr :name
57
+ module CorrelationMatrix
58
+ def summary
59
+ rp=ReportBuilder.new()
60
+ rp.add(self)
61
+ rp.to_text
62
+ end
63
+ def labels=(v)
64
+ @labels=v
65
+ end
66
+ def name=(v)
67
+ @name=v
68
+ end
69
+ def to_reportbuilder(generator)
70
+ @name||="Correlation Matrix"
71
+ @labels||=row_size.times.collect {|i| i.to_s}
72
+ t=ReportBuilder::Table.new(:name=>@name, :header=>[""]+@labels)
73
+ row_size.times {|i|
74
+ t.add_row([@labels[i]]+@rows[i].collect {|i| sprintf("%0.3f",i).gsub("0.",".")})
75
+ }
76
+ generator.parse_element(t)
77
+ end
78
+ end
79
+ end