statsample 0.6.1 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +4 -0
- data/Manifest.txt +8 -19
- data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
- data/demo/dominance_analysis_bootstrap.rb +20 -0
- data/demo/dominanceanalysis.rb +11 -0
- data/demo/multiple_regression.rb +40 -0
- data/demo/polychoric.rb +13 -0
- data/demo/tetrachoric.rb +10 -0
- data/lib/distribution.rb +1 -0
- data/lib/distribution/normalbivariate.rb +100 -0
- data/lib/statsample.rb +4 -105
- data/lib/statsample/bivariate.rb +5 -1
- data/lib/statsample/bivariate/polychoric.rb +581 -0
- data/lib/statsample/bivariate/tetrachoric.rb +37 -5
- data/lib/statsample/converters.rb +11 -0
- data/lib/statsample/dominanceanalysis.rb +104 -90
- data/lib/statsample/dominanceanalysis/bootstrap.rb +160 -131
- data/lib/statsample/factor/pca.rb +1 -2
- data/lib/statsample/factor/principalaxis.rb +2 -2
- data/lib/statsample/graph/svghistogram.rb +170 -172
- data/lib/statsample/matrix.rb +79 -0
- data/lib/statsample/mle.rb +6 -4
- data/lib/statsample/mle/probit.rb +0 -1
- data/lib/statsample/regression/multiple/alglibengine.rb +23 -23
- data/lib/statsample/regression/multiple/baseengine.rb +112 -113
- data/lib/statsample/regression/multiple/gslengine.rb +91 -94
- data/lib/statsample/regression/multiple/rubyengine.rb +1 -1
- data/lib/statsample/srs.rb +1 -1
- data/lib/statsample/test.rb +0 -1
- data/lib/statsample/test/umannwhitney.rb +8 -5
- data/po/es/statsample.po +201 -39
- data/po/statsample.pot +184 -32
- data/test/test_bivariate.rb +21 -2
- data/test/test_distribution.rb +58 -40
- data/test/test_factor.rb +0 -1
- data/test/test_gsl.rb +13 -14
- data/test/test_regression.rb +1 -1
- data/test/test_statistics.rb +1 -4
- metadata +10 -21
- data/demo/benchmark.rb +0 -76
- data/demo/chi-square.rb +0 -44
- data/demo/crosstab.rb +0 -7
- data/demo/dice.rb +0 -13
- data/demo/distribution_t.rb +0 -95
- data/demo/graph.rb +0 -9
- data/demo/item_analysis.rb +0 -30
- data/demo/mean.rb +0 -81
- data/demo/nunnally_6.rb +0 -34
- data/demo/pca.rb +0 -29
- data/demo/proportion.rb +0 -57
- data/demo/regression.rb +0 -82
- data/demo/sample_test.csv +0 -113
- data/demo/spss_matrix.rb +0 -3
- data/demo/strata_proportion.rb +0 -152
- data/demo/stratum.rb +0 -141
- data/demo/t-student.rb +0 -17
- data/demo/umann.rb +0 -8
- data/lib/matrix_extension.rb +0 -92
@@ -1,208 +1,206 @@
|
|
1
1
|
module Statsample
|
2
|
-
|
3
|
-
class SvgHistogram < SVG::Graph::BarBase
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
2
|
+
module Graph
|
3
|
+
class SvgHistogram < SVG::Graph::BarBase
|
4
|
+
attr_accessor :inner_margin, :mean, :sigma, :show_normal
|
5
|
+
def initialize(config)
|
6
|
+
config[:fields]=[:dummy]
|
7
|
+
super(config)
|
8
|
+
|
9
|
+
@histogram=nil
|
10
|
+
end
|
11
|
+
include REXML
|
12
|
+
|
13
|
+
# In addition to the defaults set in Graph::initialize, sets
|
14
|
+
# [inner_margin] 14
|
15
|
+
# [key] false
|
16
|
+
def set_defaults
|
17
|
+
super
|
18
|
+
self.top_align = self.top_font = 0
|
19
|
+
init_with({
|
20
|
+
:inner_margin=>16,
|
21
|
+
:key=>false,
|
22
|
+
:show_normal=>false
|
23
|
+
})
|
24
|
+
|
25
|
+
end
|
26
|
+
|
27
|
+
def histogram=(h)
|
28
|
+
@histogram=h
|
29
|
+
@data=[{:data=>(0...@histogram.bins).to_a.collect {|i|
|
30
|
+
@histogram[i]
|
31
|
+
}}]
|
32
|
+
end
|
33
|
+
def get_x_labels
|
34
|
+
[""]
|
35
|
+
end
|
36
|
+
|
37
|
+
def get_y_labels
|
38
|
+
maxvalue = max_value
|
39
|
+
minvalue = min_value
|
40
|
+
range = maxvalue - minvalue
|
41
|
+
|
42
|
+
top_pad = range == 0 ? 10 : range / 20.0
|
43
|
+
scale_range = (maxvalue + top_pad) - minvalue
|
44
|
+
|
45
|
+
scale_division = scale_divisions || (scale_range / 10.0)
|
46
|
+
|
47
|
+
if scale_integers
|
48
|
+
scale_division = scale_division < 1 ? 1 : scale_division.round
|
32
49
|
end
|
33
|
-
def get_x_labels
|
34
|
-
[""]
|
35
|
-
end
|
36
50
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
51
|
+
rv = []
|
52
|
+
maxvalue = maxvalue%scale_division == 0 ?
|
53
|
+
maxvalue : maxvalue + scale_division
|
54
|
+
minvalue.step( maxvalue, scale_division ) {|v| rv << v}
|
55
|
+
rv
|
56
|
+
end
|
57
|
+
|
58
|
+
def unit_width
|
59
|
+
(@graph_width-(@inner_margin*2)) / (@histogram.max-@histogram.min).to_f
|
60
|
+
end
|
61
|
+
def draw_x_label(v)
|
62
|
+
left = (v - @histogram.min)*unit_width
|
63
|
+
x=@inner_margin+left
|
64
|
+
text = @graph.add_element( "text" )
|
65
|
+
text.attributes["class"] = "xAxisLabels"
|
66
|
+
text.text = sprintf("%0.2f",v)
|
67
|
+
y = @graph_height + x_label_font_size + 3
|
68
|
+
text.attributes["x"] = x.to_s
|
69
|
+
text.attributes["y"] = y.to_s
|
70
|
+
end
|
71
|
+
|
72
|
+
def draw_x_labels
|
73
|
+
if show_x_labels
|
74
|
+
(0...@histogram.bins).each do |i|
|
75
|
+
value = @histogram[i]
|
76
|
+
range = @histogram.get_range(i)
|
77
|
+
draw_x_label(range[0])
|
78
|
+
if(i==(@histogram.bins)-1)
|
79
|
+
draw_x_label(range[1])
|
49
80
|
end
|
50
|
-
|
51
|
-
rv = []
|
52
|
-
maxvalue = maxvalue%scale_division == 0 ?
|
53
|
-
maxvalue : maxvalue + scale_division
|
54
|
-
minvalue.step( maxvalue, scale_division ) {|v| rv << v}
|
55
|
-
return rv
|
56
|
-
end
|
57
|
-
|
58
|
-
def unit_width
|
59
|
-
(@graph_width-(@inner_margin*2)) / (@histogram.max-@histogram.min).to_f
|
81
|
+
end
|
60
82
|
end
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
83
|
+
end
|
84
|
+
def draw_data
|
85
|
+
minvalue = min_value
|
86
|
+
fieldwidth = field_width
|
87
|
+
unit_size = (@graph_height.to_f - font_size*2*top_font) /
|
88
|
+
(get_y_labels.max - get_y_labels.min)
|
89
|
+
bottom = @graph_height
|
90
|
+
field_count = 0
|
91
|
+
hist_min=@histogram.min
|
92
|
+
hist_max=@histogram.max
|
93
|
+
range_hist=hist_max-hist_min
|
94
|
+
total=0
|
95
|
+
|
96
|
+
(0...@histogram.bins).each do |i|
|
97
|
+
dataset_count = 0
|
98
|
+
value = @histogram[i]
|
99
|
+
total=total+value
|
100
|
+
range = @histogram.get_range(i)
|
101
|
+
left = (range[0] - hist_min)*unit_width
|
102
|
+
bar_width = (range[1] - hist_min)*unit_width - left
|
103
|
+
length = (value.abs - (minvalue > 0 ? minvalue : 0)) * unit_size
|
104
|
+
# top is 0 if value is negative
|
105
|
+
top = bottom - (((value < 0 ? 0 : value) - minvalue) * unit_size)
|
106
|
+
|
107
|
+
@graph.add_element( "rect", {
|
108
|
+
"x" => (@inner_margin+left).to_s,
|
109
|
+
"y" => top.to_s,
|
110
|
+
"width" => bar_width.to_s,
|
111
|
+
"height" => length.to_s,
|
112
|
+
"class" => "fill#{dataset_count+1}"
|
113
|
+
})
|
114
|
+
|
115
|
+
make_datapoint_text(left + @inner_margin+ (bar_width/2), top - 6, value.to_s)
|
116
|
+
field_count += 1
|
71
117
|
end
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
118
|
+
if(show_normal)
|
119
|
+
divs=30
|
120
|
+
path=""
|
121
|
+
0.upto(divs) do |i|
|
122
|
+
x_abs=hist_min+(range_hist/divs)*i
|
123
|
+
y=GSL::Ran::gaussian_pdf((x_abs-mean) / sigma)*total
|
124
|
+
xg=@inner_margin+((x_abs-hist_min)*unit_width)
|
125
|
+
yg=bottom-(y-minvalue)*unit_size
|
126
|
+
if i==0
|
127
|
+
path="M#{xg} #{yg} "
|
128
|
+
else
|
129
|
+
path+="L#{xg} #{yg} "
|
82
130
|
end
|
131
|
+
end
|
132
|
+
@graph.add_element("path",
|
133
|
+
{ "d"=>path, "style"=>"stroke:black;fill:none" })
|
83
134
|
end
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
(get_y_labels.max - get_y_labels.min)
|
90
|
-
bottom = @graph_height
|
91
|
-
field_count = 0
|
92
|
-
hist_min=@histogram.min
|
93
|
-
hist_max=@histogram.max
|
94
|
-
range_hist=hist_max-hist_min
|
95
|
-
total=0
|
96
|
-
(0...@histogram.bins).each { |i|
|
97
|
-
dataset_count = 0
|
98
|
-
value = @histogram[i]
|
99
|
-
total=total+value
|
100
|
-
range = @histogram.get_range(i)
|
101
|
-
left = (range[0] - hist_min)*unit_width
|
102
|
-
bar_width = (range[1] - hist_min)*unit_width - left
|
103
|
-
length = (value.abs - (minvalue > 0 ? minvalue : 0)) * unit_size
|
104
|
-
# top is 0 if value is negative
|
105
|
-
top = bottom - (((value < 0 ? 0 : value) - minvalue) * unit_size)
|
106
|
-
@graph.add_element( "rect", {
|
107
|
-
"x" => (@inner_margin+left).to_s,
|
108
|
-
"y" => top.to_s,
|
109
|
-
"width" => bar_width.to_s,
|
110
|
-
"height" => length.to_s,
|
111
|
-
"class" => "fill#{dataset_count+1}"
|
112
|
-
})
|
113
|
-
make_datapoint_text(left + @inner_margin+ (bar_width/2), top - 6, value.to_s)
|
114
|
-
field_count += 1
|
115
|
-
}
|
116
|
-
if(show_normal)
|
117
|
-
divs=30
|
118
|
-
path=""
|
119
|
-
0.upto(divs) {|i|
|
120
|
-
x_abs=hist_min+(range_hist/divs)*i
|
121
|
-
y=GSL::Ran::gaussian_pdf((x_abs-mean) / sigma)*total
|
122
|
-
xg=@inner_margin+((x_abs-hist_min)*unit_width)
|
123
|
-
yg=bottom-(y-minvalue)*unit_size
|
124
|
-
if i==0
|
125
|
-
path="M#{xg} #{yg} "
|
126
|
-
else
|
127
|
-
path+="L#{xg} #{yg} "
|
128
|
-
end
|
129
|
-
}
|
130
|
-
@graph.add_element("path", {
|
131
|
-
"d"=>path,
|
132
|
-
"style"=>"stroke:black;fill:none"
|
133
|
-
}
|
134
|
-
)
|
135
|
-
end
|
136
|
-
end
|
137
|
-
|
138
|
-
|
139
|
-
def get_css
|
140
|
-
return <<EOL
|
135
|
+
end
|
136
|
+
|
137
|
+
|
138
|
+
def get_css
|
139
|
+
return <<EOL
|
141
140
|
/* default fill styles for multiple datasets (probably only use a single dataset on this graph though) */
|
142
141
|
|
143
142
|
.key1,.fill1{
|
144
|
-
|
145
|
-
|
146
|
-
|
143
|
+
fill: #ff0000;
|
144
|
+
stroke: black;
|
145
|
+
stroke-width: 1px;
|
147
146
|
}
|
148
147
|
.key2,.fill2{
|
149
|
-
|
150
|
-
|
151
|
-
|
148
|
+
fill: #0000ff;
|
149
|
+
stroke: black;
|
150
|
+
stroke-width: 1px;
|
152
151
|
}
|
153
152
|
.key3,.fill3{
|
154
|
-
|
155
|
-
|
156
|
-
|
153
|
+
fill: #00ff00;
|
154
|
+
stroke: none;
|
155
|
+
stroke-width: 1px;
|
157
156
|
}
|
158
157
|
.key4,.fill4{
|
159
|
-
|
160
|
-
|
161
|
-
|
158
|
+
fill: #ffcc00;
|
159
|
+
stroke: none;
|
160
|
+
stroke-width: 1px;
|
162
161
|
}
|
163
162
|
.key5,.fill5{
|
164
|
-
|
165
|
-
|
166
|
-
|
163
|
+
fill: #00ccff;
|
164
|
+
stroke: none;
|
165
|
+
stroke-width: 1px;
|
167
166
|
}
|
168
167
|
.key6,.fill6{
|
169
|
-
|
170
|
-
|
171
|
-
|
168
|
+
fill: #ff00ff;
|
169
|
+
stroke: none;
|
170
|
+
stroke-width: 1px;
|
172
171
|
}
|
173
172
|
.key7,.fill7{
|
174
|
-
|
175
|
-
|
176
|
-
|
173
|
+
fill: #00ffff;
|
174
|
+
stroke: none;
|
175
|
+
stroke-width: 1px;
|
177
176
|
}
|
178
177
|
.key8,.fill8{
|
179
|
-
|
180
|
-
|
181
|
-
|
178
|
+
fill: #ffff00;
|
179
|
+
stroke: none;
|
180
|
+
stroke-width: 1px;
|
182
181
|
}
|
183
182
|
.key9,.fill9{
|
184
|
-
|
185
|
-
|
186
|
-
|
183
|
+
fill: #cc6666;
|
184
|
+
stroke: none;
|
185
|
+
stroke-width: 1px;
|
187
186
|
}
|
188
187
|
.key10,.fill10{
|
189
|
-
|
190
|
-
|
191
|
-
|
188
|
+
fill: #663399;
|
189
|
+
stroke: none;
|
190
|
+
stroke-width: 1px;
|
192
191
|
}
|
193
192
|
.key11,.fill11{
|
194
|
-
|
195
|
-
|
196
|
-
|
193
|
+
fill: #339900;
|
194
|
+
stroke: none;
|
195
|
+
stroke-width: 1px;
|
197
196
|
}
|
198
197
|
.key12,.fill12{
|
199
|
-
|
200
|
-
|
201
|
-
|
198
|
+
fill: #9966FF;
|
199
|
+
stroke: none;
|
200
|
+
stroke-width: 1px;
|
202
201
|
}
|
203
202
|
EOL
|
204
|
-
|
205
|
-
|
206
|
-
end
|
207
|
-
end
|
203
|
+
end
|
204
|
+
end
|
205
|
+
end
|
208
206
|
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
require 'matrix'
|
2
|
+
if RUBY_VERSION<="1.9.0"
|
3
|
+
class ::Vector
|
4
|
+
alias_method :old_coerce, :coerce
|
5
|
+
def coerce(other)
|
6
|
+
case other
|
7
|
+
when Numeric
|
8
|
+
return Matrix::Scalar.new(other), self
|
9
|
+
else
|
10
|
+
raise TypeError, "#{self.class} can't be coerced into #{other.class}"
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
class ::Matrix
|
17
|
+
def to_gsl
|
18
|
+
out=[]
|
19
|
+
self.row_size.times{|i|
|
20
|
+
out[i]=self.row(i).to_a
|
21
|
+
}
|
22
|
+
GSL::Matrix[*out]
|
23
|
+
end
|
24
|
+
|
25
|
+
# Calculate marginal of rows
|
26
|
+
def rows_sum
|
27
|
+
(0...row_size).collect {|i|
|
28
|
+
row(i).to_a.inject(0) {|a,v| a+v}
|
29
|
+
}
|
30
|
+
end
|
31
|
+
# Calculate marginal of columns
|
32
|
+
def cols_sum
|
33
|
+
(0...column_size).collect {|i|
|
34
|
+
column(i).to_a.inject(0) {|a,v| a+v}
|
35
|
+
}
|
36
|
+
end
|
37
|
+
# Calculate sum of cells
|
38
|
+
def total_sum
|
39
|
+
rows_sum.inject(0){|a,v| a+v}
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
module GSL
|
44
|
+
class Matrix
|
45
|
+
def to_matrix
|
46
|
+
rows=self.size1
|
47
|
+
cols=self.size2
|
48
|
+
out=(0...rows).collect{|i| (0...cols).collect {|j| self[i,j]} }
|
49
|
+
::Matrix.rows(out)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
module Statsample
|
55
|
+
attr :labels
|
56
|
+
attr :name
|
57
|
+
module CorrelationMatrix
|
58
|
+
def summary
|
59
|
+
rp=ReportBuilder.new()
|
60
|
+
rp.add(self)
|
61
|
+
rp.to_text
|
62
|
+
end
|
63
|
+
def labels=(v)
|
64
|
+
@labels=v
|
65
|
+
end
|
66
|
+
def name=(v)
|
67
|
+
@name=v
|
68
|
+
end
|
69
|
+
def to_reportbuilder(generator)
|
70
|
+
@name||="Correlation Matrix"
|
71
|
+
@labels||=row_size.times.collect {|i| i.to_s}
|
72
|
+
t=ReportBuilder::Table.new(:name=>@name, :header=>[""]+@labels)
|
73
|
+
row_size.times {|i|
|
74
|
+
t.add_row([@labels[i]]+@rows[i].collect {|i| sprintf("%0.3f",i).gsub("0.",".")})
|
75
|
+
}
|
76
|
+
generator.parse_element(t)
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|