statsample 0.6.1 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +4 -0
- data/Manifest.txt +8 -19
- data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
- data/demo/dominance_analysis_bootstrap.rb +20 -0
- data/demo/dominanceanalysis.rb +11 -0
- data/demo/multiple_regression.rb +40 -0
- data/demo/polychoric.rb +13 -0
- data/demo/tetrachoric.rb +10 -0
- data/lib/distribution.rb +1 -0
- data/lib/distribution/normalbivariate.rb +100 -0
- data/lib/statsample.rb +4 -105
- data/lib/statsample/bivariate.rb +5 -1
- data/lib/statsample/bivariate/polychoric.rb +581 -0
- data/lib/statsample/bivariate/tetrachoric.rb +37 -5
- data/lib/statsample/converters.rb +11 -0
- data/lib/statsample/dominanceanalysis.rb +104 -90
- data/lib/statsample/dominanceanalysis/bootstrap.rb +160 -131
- data/lib/statsample/factor/pca.rb +1 -2
- data/lib/statsample/factor/principalaxis.rb +2 -2
- data/lib/statsample/graph/svghistogram.rb +170 -172
- data/lib/statsample/matrix.rb +79 -0
- data/lib/statsample/mle.rb +6 -4
- data/lib/statsample/mle/probit.rb +0 -1
- data/lib/statsample/regression/multiple/alglibengine.rb +23 -23
- data/lib/statsample/regression/multiple/baseengine.rb +112 -113
- data/lib/statsample/regression/multiple/gslengine.rb +91 -94
- data/lib/statsample/regression/multiple/rubyengine.rb +1 -1
- data/lib/statsample/srs.rb +1 -1
- data/lib/statsample/test.rb +0 -1
- data/lib/statsample/test/umannwhitney.rb +8 -5
- data/po/es/statsample.po +201 -39
- data/po/statsample.pot +184 -32
- data/test/test_bivariate.rb +21 -2
- data/test/test_distribution.rb +58 -40
- data/test/test_factor.rb +0 -1
- data/test/test_gsl.rb +13 -14
- data/test/test_regression.rb +1 -1
- data/test/test_statistics.rb +1 -4
- metadata +10 -21
- data/demo/benchmark.rb +0 -76
- data/demo/chi-square.rb +0 -44
- data/demo/crosstab.rb +0 -7
- data/demo/dice.rb +0 -13
- data/demo/distribution_t.rb +0 -95
- data/demo/graph.rb +0 -9
- data/demo/item_analysis.rb +0 -30
- data/demo/mean.rb +0 -81
- data/demo/nunnally_6.rb +0 -34
- data/demo/pca.rb +0 -29
- data/demo/proportion.rb +0 -57
- data/demo/regression.rb +0 -82
- data/demo/sample_test.csv +0 -113
- data/demo/spss_matrix.rb +0 -3
- data/demo/strata_proportion.rb +0 -152
- data/demo/stratum.rb +0 -141
- data/demo/t-student.rb +0 -17
- data/demo/umann.rb +0 -8
- data/lib/matrix_extension.rb +0 -92
|
@@ -1,208 +1,206 @@
|
|
|
1
1
|
module Statsample
|
|
2
|
-
|
|
3
|
-
class SvgHistogram < SVG::Graph::BarBase
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
2
|
+
module Graph
|
|
3
|
+
class SvgHistogram < SVG::Graph::BarBase
|
|
4
|
+
attr_accessor :inner_margin, :mean, :sigma, :show_normal
|
|
5
|
+
def initialize(config)
|
|
6
|
+
config[:fields]=[:dummy]
|
|
7
|
+
super(config)
|
|
8
|
+
|
|
9
|
+
@histogram=nil
|
|
10
|
+
end
|
|
11
|
+
include REXML
|
|
12
|
+
|
|
13
|
+
# In addition to the defaults set in Graph::initialize, sets
|
|
14
|
+
# [inner_margin] 14
|
|
15
|
+
# [key] false
|
|
16
|
+
def set_defaults
|
|
17
|
+
super
|
|
18
|
+
self.top_align = self.top_font = 0
|
|
19
|
+
init_with({
|
|
20
|
+
:inner_margin=>16,
|
|
21
|
+
:key=>false,
|
|
22
|
+
:show_normal=>false
|
|
23
|
+
})
|
|
24
|
+
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def histogram=(h)
|
|
28
|
+
@histogram=h
|
|
29
|
+
@data=[{:data=>(0...@histogram.bins).to_a.collect {|i|
|
|
30
|
+
@histogram[i]
|
|
31
|
+
}}]
|
|
32
|
+
end
|
|
33
|
+
def get_x_labels
|
|
34
|
+
[""]
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def get_y_labels
|
|
38
|
+
maxvalue = max_value
|
|
39
|
+
minvalue = min_value
|
|
40
|
+
range = maxvalue - minvalue
|
|
41
|
+
|
|
42
|
+
top_pad = range == 0 ? 10 : range / 20.0
|
|
43
|
+
scale_range = (maxvalue + top_pad) - minvalue
|
|
44
|
+
|
|
45
|
+
scale_division = scale_divisions || (scale_range / 10.0)
|
|
46
|
+
|
|
47
|
+
if scale_integers
|
|
48
|
+
scale_division = scale_division < 1 ? 1 : scale_division.round
|
|
32
49
|
end
|
|
33
|
-
def get_x_labels
|
|
34
|
-
[""]
|
|
35
|
-
end
|
|
36
50
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
51
|
+
rv = []
|
|
52
|
+
maxvalue = maxvalue%scale_division == 0 ?
|
|
53
|
+
maxvalue : maxvalue + scale_division
|
|
54
|
+
minvalue.step( maxvalue, scale_division ) {|v| rv << v}
|
|
55
|
+
rv
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def unit_width
|
|
59
|
+
(@graph_width-(@inner_margin*2)) / (@histogram.max-@histogram.min).to_f
|
|
60
|
+
end
|
|
61
|
+
def draw_x_label(v)
|
|
62
|
+
left = (v - @histogram.min)*unit_width
|
|
63
|
+
x=@inner_margin+left
|
|
64
|
+
text = @graph.add_element( "text" )
|
|
65
|
+
text.attributes["class"] = "xAxisLabels"
|
|
66
|
+
text.text = sprintf("%0.2f",v)
|
|
67
|
+
y = @graph_height + x_label_font_size + 3
|
|
68
|
+
text.attributes["x"] = x.to_s
|
|
69
|
+
text.attributes["y"] = y.to_s
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def draw_x_labels
|
|
73
|
+
if show_x_labels
|
|
74
|
+
(0...@histogram.bins).each do |i|
|
|
75
|
+
value = @histogram[i]
|
|
76
|
+
range = @histogram.get_range(i)
|
|
77
|
+
draw_x_label(range[0])
|
|
78
|
+
if(i==(@histogram.bins)-1)
|
|
79
|
+
draw_x_label(range[1])
|
|
49
80
|
end
|
|
50
|
-
|
|
51
|
-
rv = []
|
|
52
|
-
maxvalue = maxvalue%scale_division == 0 ?
|
|
53
|
-
maxvalue : maxvalue + scale_division
|
|
54
|
-
minvalue.step( maxvalue, scale_division ) {|v| rv << v}
|
|
55
|
-
return rv
|
|
56
|
-
end
|
|
57
|
-
|
|
58
|
-
def unit_width
|
|
59
|
-
(@graph_width-(@inner_margin*2)) / (@histogram.max-@histogram.min).to_f
|
|
81
|
+
end
|
|
60
82
|
end
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
83
|
+
end
|
|
84
|
+
def draw_data
|
|
85
|
+
minvalue = min_value
|
|
86
|
+
fieldwidth = field_width
|
|
87
|
+
unit_size = (@graph_height.to_f - font_size*2*top_font) /
|
|
88
|
+
(get_y_labels.max - get_y_labels.min)
|
|
89
|
+
bottom = @graph_height
|
|
90
|
+
field_count = 0
|
|
91
|
+
hist_min=@histogram.min
|
|
92
|
+
hist_max=@histogram.max
|
|
93
|
+
range_hist=hist_max-hist_min
|
|
94
|
+
total=0
|
|
95
|
+
|
|
96
|
+
(0...@histogram.bins).each do |i|
|
|
97
|
+
dataset_count = 0
|
|
98
|
+
value = @histogram[i]
|
|
99
|
+
total=total+value
|
|
100
|
+
range = @histogram.get_range(i)
|
|
101
|
+
left = (range[0] - hist_min)*unit_width
|
|
102
|
+
bar_width = (range[1] - hist_min)*unit_width - left
|
|
103
|
+
length = (value.abs - (minvalue > 0 ? minvalue : 0)) * unit_size
|
|
104
|
+
# top is 0 if value is negative
|
|
105
|
+
top = bottom - (((value < 0 ? 0 : value) - minvalue) * unit_size)
|
|
106
|
+
|
|
107
|
+
@graph.add_element( "rect", {
|
|
108
|
+
"x" => (@inner_margin+left).to_s,
|
|
109
|
+
"y" => top.to_s,
|
|
110
|
+
"width" => bar_width.to_s,
|
|
111
|
+
"height" => length.to_s,
|
|
112
|
+
"class" => "fill#{dataset_count+1}"
|
|
113
|
+
})
|
|
114
|
+
|
|
115
|
+
make_datapoint_text(left + @inner_margin+ (bar_width/2), top - 6, value.to_s)
|
|
116
|
+
field_count += 1
|
|
71
117
|
end
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
118
|
+
if(show_normal)
|
|
119
|
+
divs=30
|
|
120
|
+
path=""
|
|
121
|
+
0.upto(divs) do |i|
|
|
122
|
+
x_abs=hist_min+(range_hist/divs)*i
|
|
123
|
+
y=GSL::Ran::gaussian_pdf((x_abs-mean) / sigma)*total
|
|
124
|
+
xg=@inner_margin+((x_abs-hist_min)*unit_width)
|
|
125
|
+
yg=bottom-(y-minvalue)*unit_size
|
|
126
|
+
if i==0
|
|
127
|
+
path="M#{xg} #{yg} "
|
|
128
|
+
else
|
|
129
|
+
path+="L#{xg} #{yg} "
|
|
82
130
|
end
|
|
131
|
+
end
|
|
132
|
+
@graph.add_element("path",
|
|
133
|
+
{ "d"=>path, "style"=>"stroke:black;fill:none" })
|
|
83
134
|
end
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
(get_y_labels.max - get_y_labels.min)
|
|
90
|
-
bottom = @graph_height
|
|
91
|
-
field_count = 0
|
|
92
|
-
hist_min=@histogram.min
|
|
93
|
-
hist_max=@histogram.max
|
|
94
|
-
range_hist=hist_max-hist_min
|
|
95
|
-
total=0
|
|
96
|
-
(0...@histogram.bins).each { |i|
|
|
97
|
-
dataset_count = 0
|
|
98
|
-
value = @histogram[i]
|
|
99
|
-
total=total+value
|
|
100
|
-
range = @histogram.get_range(i)
|
|
101
|
-
left = (range[0] - hist_min)*unit_width
|
|
102
|
-
bar_width = (range[1] - hist_min)*unit_width - left
|
|
103
|
-
length = (value.abs - (minvalue > 0 ? minvalue : 0)) * unit_size
|
|
104
|
-
# top is 0 if value is negative
|
|
105
|
-
top = bottom - (((value < 0 ? 0 : value) - minvalue) * unit_size)
|
|
106
|
-
@graph.add_element( "rect", {
|
|
107
|
-
"x" => (@inner_margin+left).to_s,
|
|
108
|
-
"y" => top.to_s,
|
|
109
|
-
"width" => bar_width.to_s,
|
|
110
|
-
"height" => length.to_s,
|
|
111
|
-
"class" => "fill#{dataset_count+1}"
|
|
112
|
-
})
|
|
113
|
-
make_datapoint_text(left + @inner_margin+ (bar_width/2), top - 6, value.to_s)
|
|
114
|
-
field_count += 1
|
|
115
|
-
}
|
|
116
|
-
if(show_normal)
|
|
117
|
-
divs=30
|
|
118
|
-
path=""
|
|
119
|
-
0.upto(divs) {|i|
|
|
120
|
-
x_abs=hist_min+(range_hist/divs)*i
|
|
121
|
-
y=GSL::Ran::gaussian_pdf((x_abs-mean) / sigma)*total
|
|
122
|
-
xg=@inner_margin+((x_abs-hist_min)*unit_width)
|
|
123
|
-
yg=bottom-(y-minvalue)*unit_size
|
|
124
|
-
if i==0
|
|
125
|
-
path="M#{xg} #{yg} "
|
|
126
|
-
else
|
|
127
|
-
path+="L#{xg} #{yg} "
|
|
128
|
-
end
|
|
129
|
-
}
|
|
130
|
-
@graph.add_element("path", {
|
|
131
|
-
"d"=>path,
|
|
132
|
-
"style"=>"stroke:black;fill:none"
|
|
133
|
-
}
|
|
134
|
-
)
|
|
135
|
-
end
|
|
136
|
-
end
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
def get_css
|
|
140
|
-
return <<EOL
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def get_css
|
|
139
|
+
return <<EOL
|
|
141
140
|
/* default fill styles for multiple datasets (probably only use a single dataset on this graph though) */
|
|
142
141
|
|
|
143
142
|
.key1,.fill1{
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
143
|
+
fill: #ff0000;
|
|
144
|
+
stroke: black;
|
|
145
|
+
stroke-width: 1px;
|
|
147
146
|
}
|
|
148
147
|
.key2,.fill2{
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
148
|
+
fill: #0000ff;
|
|
149
|
+
stroke: black;
|
|
150
|
+
stroke-width: 1px;
|
|
152
151
|
}
|
|
153
152
|
.key3,.fill3{
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
153
|
+
fill: #00ff00;
|
|
154
|
+
stroke: none;
|
|
155
|
+
stroke-width: 1px;
|
|
157
156
|
}
|
|
158
157
|
.key4,.fill4{
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
158
|
+
fill: #ffcc00;
|
|
159
|
+
stroke: none;
|
|
160
|
+
stroke-width: 1px;
|
|
162
161
|
}
|
|
163
162
|
.key5,.fill5{
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
163
|
+
fill: #00ccff;
|
|
164
|
+
stroke: none;
|
|
165
|
+
stroke-width: 1px;
|
|
167
166
|
}
|
|
168
167
|
.key6,.fill6{
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
168
|
+
fill: #ff00ff;
|
|
169
|
+
stroke: none;
|
|
170
|
+
stroke-width: 1px;
|
|
172
171
|
}
|
|
173
172
|
.key7,.fill7{
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
173
|
+
fill: #00ffff;
|
|
174
|
+
stroke: none;
|
|
175
|
+
stroke-width: 1px;
|
|
177
176
|
}
|
|
178
177
|
.key8,.fill8{
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
178
|
+
fill: #ffff00;
|
|
179
|
+
stroke: none;
|
|
180
|
+
stroke-width: 1px;
|
|
182
181
|
}
|
|
183
182
|
.key9,.fill9{
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
183
|
+
fill: #cc6666;
|
|
184
|
+
stroke: none;
|
|
185
|
+
stroke-width: 1px;
|
|
187
186
|
}
|
|
188
187
|
.key10,.fill10{
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
188
|
+
fill: #663399;
|
|
189
|
+
stroke: none;
|
|
190
|
+
stroke-width: 1px;
|
|
192
191
|
}
|
|
193
192
|
.key11,.fill11{
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
193
|
+
fill: #339900;
|
|
194
|
+
stroke: none;
|
|
195
|
+
stroke-width: 1px;
|
|
197
196
|
}
|
|
198
197
|
.key12,.fill12{
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
198
|
+
fill: #9966FF;
|
|
199
|
+
stroke: none;
|
|
200
|
+
stroke-width: 1px;
|
|
202
201
|
}
|
|
203
202
|
EOL
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
end
|
|
207
|
-
end
|
|
203
|
+
end
|
|
204
|
+
end
|
|
205
|
+
end
|
|
208
206
|
end
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
require 'matrix'
|
|
2
|
+
if RUBY_VERSION<="1.9.0"
|
|
3
|
+
class ::Vector
|
|
4
|
+
alias_method :old_coerce, :coerce
|
|
5
|
+
def coerce(other)
|
|
6
|
+
case other
|
|
7
|
+
when Numeric
|
|
8
|
+
return Matrix::Scalar.new(other), self
|
|
9
|
+
else
|
|
10
|
+
raise TypeError, "#{self.class} can't be coerced into #{other.class}"
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
class ::Matrix
|
|
17
|
+
def to_gsl
|
|
18
|
+
out=[]
|
|
19
|
+
self.row_size.times{|i|
|
|
20
|
+
out[i]=self.row(i).to_a
|
|
21
|
+
}
|
|
22
|
+
GSL::Matrix[*out]
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Calculate marginal of rows
|
|
26
|
+
def rows_sum
|
|
27
|
+
(0...row_size).collect {|i|
|
|
28
|
+
row(i).to_a.inject(0) {|a,v| a+v}
|
|
29
|
+
}
|
|
30
|
+
end
|
|
31
|
+
# Calculate marginal of columns
|
|
32
|
+
def cols_sum
|
|
33
|
+
(0...column_size).collect {|i|
|
|
34
|
+
column(i).to_a.inject(0) {|a,v| a+v}
|
|
35
|
+
}
|
|
36
|
+
end
|
|
37
|
+
# Calculate sum of cells
|
|
38
|
+
def total_sum
|
|
39
|
+
rows_sum.inject(0){|a,v| a+v}
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
module GSL
|
|
44
|
+
class Matrix
|
|
45
|
+
def to_matrix
|
|
46
|
+
rows=self.size1
|
|
47
|
+
cols=self.size2
|
|
48
|
+
out=(0...rows).collect{|i| (0...cols).collect {|j| self[i,j]} }
|
|
49
|
+
::Matrix.rows(out)
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
module Statsample
|
|
55
|
+
attr :labels
|
|
56
|
+
attr :name
|
|
57
|
+
module CorrelationMatrix
|
|
58
|
+
def summary
|
|
59
|
+
rp=ReportBuilder.new()
|
|
60
|
+
rp.add(self)
|
|
61
|
+
rp.to_text
|
|
62
|
+
end
|
|
63
|
+
def labels=(v)
|
|
64
|
+
@labels=v
|
|
65
|
+
end
|
|
66
|
+
def name=(v)
|
|
67
|
+
@name=v
|
|
68
|
+
end
|
|
69
|
+
def to_reportbuilder(generator)
|
|
70
|
+
@name||="Correlation Matrix"
|
|
71
|
+
@labels||=row_size.times.collect {|i| i.to_s}
|
|
72
|
+
t=ReportBuilder::Table.new(:name=>@name, :header=>[""]+@labels)
|
|
73
|
+
row_size.times {|i|
|
|
74
|
+
t.add_row([@labels[i]]+@rows[i].collect {|i| sprintf("%0.3f",i).gsub("0.",".")})
|
|
75
|
+
}
|
|
76
|
+
generator.parse_element(t)
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|