statsample 0.4.1 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +4 -0
- data/Manifest.txt +3 -1
- data/lib/statsample.rb +175 -179
- data/lib/statsample/codification.rb +1 -1
- data/lib/statsample/converter/csv18.rb +56 -0
- data/lib/statsample/converter/csv19.rb +60 -0
- data/lib/statsample/converters.rb +26 -75
- data/lib/statsample/dataset.rb +38 -29
- data/lib/statsample/dominanceanalysis.rb +6 -6
- data/lib/statsample/graph/gdchart.rb +2 -1
- data/lib/statsample/graph/svggraph.rb +10 -9
- data/lib/statsample/multiset.rb +3 -3
- data/lib/statsample/regression/multiple.rb +43 -271
- data/lib/statsample/regression/multiple/baseengine.rb +235 -0
- data/lib/statsample/regression/multiple/gslengine.rb +2 -2
- data/lib/statsample/vector.rb +754 -736
- data/test/test_csv.rb +3 -4
- data/test/test_dataset.rb +22 -3
- data/test/test_distribution.rb +4 -3
- data/test/test_ggobi.rb +2 -2
- data/test/test_regression.rb +11 -2
- data/test/test_svg_graph.rb +0 -1
- data/test/test_vector.rb +50 -5
- data/test/test_xls.rb +2 -4
- metadata +5 -3
- data/test/_test_chart.rb +0 -58
data/test/test_csv.rb
CHANGED
@@ -1,13 +1,12 @@
|
|
1
1
|
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
2
2
|
require 'statsample'
|
3
|
-
|
3
|
+
require 'tmpdir'
|
4
4
|
require 'test/unit'
|
5
5
|
|
6
6
|
class StatsampleCSVTestCase < Test::Unit::TestCase
|
7
|
-
|
7
|
+
def setup
|
8
8
|
@ds=Statsample::CSV.read(File.dirname(__FILE__)+"/test_csv.csv")
|
9
|
-
|
10
|
-
end
|
9
|
+
end
|
11
10
|
def test_read
|
12
11
|
assert_equal(6,@ds.cases)
|
13
12
|
assert_equal(%w{id name age city a1},@ds.fields)
|
data/test/test_dataset.rb
CHANGED
@@ -1,18 +1,24 @@
|
|
1
1
|
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
2
2
|
require 'statsample'
|
3
3
|
require 'test/unit'
|
4
|
-
|
4
|
+
require 'tmpdir'
|
5
5
|
class StatsampleDatasetTestCase < Test::Unit::TestCase
|
6
|
-
def
|
6
|
+
def setup
|
7
7
|
@ds=Statsample::Dataset.new({'id' => Statsample::Vector.new([1,2,3,4,5]), 'name'=>Statsample::Vector.new(%w{Alex Claude Peter Franz George}), 'age'=>Statsample::Vector.new([20,23,25,27,5]),
|
8
8
|
'city'=>Statsample::Vector.new(['New York','London','London','Paris','Tome']),
|
9
9
|
'a1'=>Statsample::Vector.new(['a,b','b,c','a',nil,'a,b,c'])}, ['id','name','age','city','a1'])
|
10
|
-
super
|
11
10
|
end
|
12
11
|
def test_basic
|
13
12
|
assert_equal(5,@ds.cases)
|
14
13
|
assert_equal(%w{id name age city a1}, @ds.fields)
|
15
14
|
end
|
15
|
+
def test_saveload
|
16
|
+
outfile=Dir::tmpdir+"/dataset.ds"
|
17
|
+
@ds.save(outfile)
|
18
|
+
a=Statsample.load(outfile)
|
19
|
+
assert_equal(@ds,a)
|
20
|
+
end
|
21
|
+
|
16
22
|
def test_matrix
|
17
23
|
matrix=Matrix[[1,2],[3,4],[5,6]]
|
18
24
|
ds=Statsample::Dataset.new('v1'=>[1,3,5].to_vector,'v2'=>[2,4,6].to_vector)
|
@@ -250,6 +256,19 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
|
|
250
256
|
@ds.from_to("name","a2")
|
251
257
|
end
|
252
258
|
end
|
259
|
+
def test_each_array_with_nils
|
260
|
+
v1=[1,-99,3,4,"na"].to_vector(:scale,:missing_values=>[-99,"na"])
|
261
|
+
v2=[5,6,-99,8,20].to_vector(:scale,:missing_values=>[-99])
|
262
|
+
v3=[9,10,11,12,20].to_vector(:scale,:missing_values=>[-99])
|
263
|
+
ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2,'v3'=>v3})
|
264
|
+
ds2=ds1.dup_empty
|
265
|
+
ds1.each_array_with_nils {|row|
|
266
|
+
ds2.add_case_array(row)
|
267
|
+
}
|
268
|
+
ds2.update_valid_data
|
269
|
+
assert_equal([1,nil,3,4,nil],ds2['v1'].data)
|
270
|
+
assert_equal([5,6,nil,8,20],ds2['v2'].data)
|
271
|
+
end
|
253
272
|
def test_dup_only_valid
|
254
273
|
v1=[1,nil,3,4].to_vector(:scale)
|
255
274
|
v2=[5,6,nil,8].to_vector(:scale)
|
data/test/test_distribution.rb
CHANGED
@@ -33,9 +33,10 @@ class DistributionTestCase < Test::Unit::TestCase
|
|
33
33
|
def test_normal
|
34
34
|
if !NOT_GSL
|
35
35
|
[-2,0.1,0.5,1,2].each{|x|
|
36
|
-
|
37
|
-
|
38
|
-
|
36
|
+
area=Distribution::Normal.cdf(x)
|
37
|
+
assert_in_delta(area, GSL::Cdf.ugaussian_P(x),0.0001)
|
38
|
+
assert_in_delta(Distribution::Normal.p_value(area), GSL::Cdf.ugaussian_Pinv(area),0.0001)
|
39
|
+
assert_in_delta(Distribution::Normal.pdf(x), GSL::Ran::ugaussian_pdf(x),0.0001)
|
39
40
|
}
|
40
41
|
end
|
41
42
|
end
|
data/test/test_ggobi.rb
CHANGED
@@ -13,8 +13,8 @@ class StatsampleGGobiTestCase < Test::Unit::TestCase
|
|
13
13
|
@ds={'v1'=>v1,'v2'=>@v2,'v3'=>v3}.to_dataset
|
14
14
|
end
|
15
15
|
def test_values_definition
|
16
|
-
a=[1.0,2,"a"]
|
17
|
-
assert_equal("
|
16
|
+
a=[1.0,2,"a",nil]
|
17
|
+
assert_equal("1.0 2 a NA", Statsample::GGobi.values_definition(a,"NA"))
|
18
18
|
end
|
19
19
|
def test_variable_definition
|
20
20
|
carrier=OpenStruct.new
|
data/test/test_regression.rb
CHANGED
@@ -120,13 +120,22 @@ class StatsampleRegressionTestCase < Test::Unit::TestCase
|
|
120
120
|
model_test(lr)
|
121
121
|
predicted=[nil,1.7857, 6.0989, 3.2433, 7.2908, 4.9667, 10.3428, 8.8158, 10.4717, 23.6639, 25.3198]
|
122
122
|
c_predicted = lr.predicted
|
123
|
+
|
123
124
|
predicted.each_index{|i|
|
124
|
-
|
125
|
+
if c_predicted[i].nil?
|
126
|
+
assert(predicted[i].nil?)
|
127
|
+
else
|
128
|
+
assert_in_delta(predicted[i], c_predicted[i], 0.001)
|
129
|
+
end
|
125
130
|
}
|
126
131
|
residuals=[nil,1.2142, -2.0989, 1.7566, -1.29085, 2.033, -2.3428, 0.18414, -0.47177, -3.66395, 4.6801]
|
127
132
|
c_residuals=lr.residuals
|
128
133
|
residuals.each_index{|i|
|
129
|
-
|
134
|
+
if c_residuals[i].nil?
|
135
|
+
assert(residuals[i].nil?)
|
136
|
+
else
|
137
|
+
assert_in_delta(residuals[i],c_residuals[i],0.001)
|
138
|
+
end
|
130
139
|
}
|
131
140
|
end
|
132
141
|
def test_ds_by_exp
|
data/test/test_svg_graph.rb
CHANGED
@@ -38,7 +38,6 @@ class StatsampleSvgGraphTestCase < Test::Unit::TestCase
|
|
38
38
|
vector=ar.to_vector
|
39
39
|
file=@image_path+"/svggraph_default.svg"
|
40
40
|
vector.svggraph_frequencies(file)
|
41
|
-
|
42
41
|
file=@image_path+"/svggraph_Bar.svg"
|
43
42
|
vector.svggraph_frequencies(file,800,600,SVG::Graph::Bar,:graph_title=>'Bar')
|
44
43
|
assert(File.exists?(file))
|
data/test/test_vector.rb
CHANGED
@@ -1,18 +1,43 @@
|
|
1
1
|
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
2
2
|
require 'statsample'
|
3
3
|
require 'test/unit'
|
4
|
-
|
4
|
+
require 'tmpdir'
|
5
5
|
class StatsampleVectorTestCase < Test::Unit::TestCase
|
6
6
|
|
7
|
-
|
8
|
-
super
|
7
|
+
def setup
|
9
8
|
@c = Statsample::Vector.new([5,5,5,5,5,6,6,7,8,9,10,1,2,3,4,nil,-99,-99], :nominal)
|
10
9
|
@c.missing_values=[-99]
|
11
|
-
|
10
|
+
|
11
|
+
end
|
12
|
+
def test_save_load
|
13
|
+
outfile=Dir::tmpdir+"/vector.vec"
|
14
|
+
@c.save(outfile)
|
15
|
+
a=Statsample.load(outfile)
|
16
|
+
assert_equal(@c,a)
|
17
|
+
|
18
|
+
end
|
19
|
+
def test_lazy_methods
|
20
|
+
data=[1,2,3,4,5,nil]
|
21
|
+
correct=Statsample::Vector.new(data,:scale)
|
22
|
+
lazy1=data.to_vector(:scale)
|
23
|
+
lazy2=data.to_scale
|
24
|
+
assert_equal(correct,lazy1)
|
25
|
+
assert_equal(correct,lazy2)
|
26
|
+
assert_equal(:scale,lazy2.type)
|
27
|
+
assert_equal([1,2,3,4,5],lazy2.valid_data)
|
28
|
+
end
|
12
29
|
def test_enumerable
|
13
30
|
val=@c.collect {|v| v}
|
14
31
|
assert_equal(val,[5,5,5,5,5,6,6,7,8,9,10,1,2,3,4,nil,-99,-99])
|
15
32
|
end
|
33
|
+
def test_recode
|
34
|
+
a=@c.recode{|v| @c.is_valid?(v) ? 0 : 1 }
|
35
|
+
exp=[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1].to_vector
|
36
|
+
assert_equal(exp,a)
|
37
|
+
exp.recode!{|v| v==0 ? 1:0}
|
38
|
+
exp2=(([1]*15)+([0]*3)).to_vector
|
39
|
+
assert_equal(exp2,exp)
|
40
|
+
end
|
16
41
|
def test_product
|
17
42
|
a=[1,2,3,4,5].to_vector(:scale)
|
18
43
|
assert_equal(120,a.product)
|
@@ -205,7 +230,27 @@ class StatsampleVectorTestCase < Test::Unit::TestCase
|
|
205
230
|
assert_equal(@c.valid_data.to_a.sort, @c.sample_without_replacement(15).sort)
|
206
231
|
|
207
232
|
end
|
208
|
-
|
233
|
+
def test_valid_data
|
234
|
+
a=Statsample::Vector.new([1,2,3,4,"STRING"])
|
235
|
+
a.missing_values=[-99]
|
236
|
+
a.add(1,false)
|
237
|
+
a.add(2,false)
|
238
|
+
a.add(-99,false)
|
239
|
+
a.set_valid_data
|
240
|
+
exp_valid_data=[1,2,3,4,"STRING",1,2]
|
241
|
+
assert_equal(exp_valid_data,a.valid_data)
|
242
|
+
a.add(20,false)
|
243
|
+
a.add(30,false)
|
244
|
+
assert_equal(exp_valid_data,a.valid_data)
|
245
|
+
a.set_valid_data
|
246
|
+
exp_valid_data_2=[1,2,3,4,"STRING",1,2,20,30]
|
247
|
+
assert_equal(exp_valid_data_2,a.valid_data)
|
248
|
+
end
|
249
|
+
def test_set_value
|
250
|
+
@c[2]=10
|
251
|
+
expected=[5,5,10,5,5,6,6,7,8,9,10,1,2,3,4,nil,-99,-99].to_vector
|
252
|
+
assert_equal(expected.data,@c.data)
|
253
|
+
end
|
209
254
|
def test_gsl
|
210
255
|
if HAS_GSL
|
211
256
|
a=Statsample::Vector.new([1,2,3,4,"STRING"], :scale)
|
data/test/test_xls.rb
CHANGED
@@ -8,11 +8,9 @@ rescue LoadError
|
|
8
8
|
puts "You should install spreadsheet (gem install spreadsheet)"
|
9
9
|
end
|
10
10
|
class StatsampleExcelTestCase < Test::Unit::TestCase
|
11
|
-
|
11
|
+
def setup
|
12
12
|
@ds=Statsample::Excel.read(File.dirname(__FILE__)+"/test_xls.xls")
|
13
|
-
|
14
|
-
end
|
15
|
-
|
13
|
+
end
|
16
14
|
def test_read
|
17
15
|
assert_equal(6,@ds.cases)
|
18
16
|
assert_equal(%w{id name age city a1},@ds.fields)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: statsample
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Claudio Bustos
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-09-
|
12
|
+
date: 2009-09-26 00:00:00 -04:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -91,6 +91,8 @@ files:
|
|
91
91
|
- lib/statsample/bivariate.rb
|
92
92
|
- lib/statsample/codification.rb
|
93
93
|
- lib/statsample/combination.rb
|
94
|
+
- lib/statsample/converter/csv18.rb
|
95
|
+
- lib/statsample/converter/csv19.rb
|
94
96
|
- lib/statsample/converters.rb
|
95
97
|
- lib/statsample/crosstab.rb
|
96
98
|
- lib/statsample/dataset.rb
|
@@ -113,6 +115,7 @@ files:
|
|
113
115
|
- lib/statsample/regression/binomial/probit.rb
|
114
116
|
- lib/statsample/regression/multiple.rb
|
115
117
|
- lib/statsample/regression/multiple/alglibengine.rb
|
118
|
+
- lib/statsample/regression/multiple/baseengine.rb
|
116
119
|
- lib/statsample/regression/multiple/gslengine.rb
|
117
120
|
- lib/statsample/regression/multiple/rubyengine.rb
|
118
121
|
- lib/statsample/regression/simple.rb
|
@@ -124,7 +127,6 @@ files:
|
|
124
127
|
- po/es/statsample.po
|
125
128
|
- po/statsample.pot
|
126
129
|
- setup.rb
|
127
|
-
- test/_test_chart.rb
|
128
130
|
- test/test_anova.rb
|
129
131
|
- test/test_codification.rb
|
130
132
|
- test/test_combination.rb
|
data/test/_test_chart.rb
DELETED
@@ -1,58 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__)+'/../lib/statsample'
|
2
|
-
require 'tempfile'
|
3
|
-
require 'test/unit'
|
4
|
-
require 'statsample/chart/gdchart'
|
5
|
-
# Not included on default test, because GDChart send a lot of warnings!
|
6
|
-
class StatsampleChartTestCase < Test::Unit::TestCase
|
7
|
-
|
8
|
-
def initialize(*args)
|
9
|
-
@image_path=File.dirname(__FILE__)+"/images"
|
10
|
-
super
|
11
|
-
end
|
12
|
-
|
13
|
-
def test_base_chart
|
14
|
-
file=@image_path+"/gdchart_base_bar_1.jpg"
|
15
|
-
width=500
|
16
|
-
height=300
|
17
|
-
chart_type=GDChart::BAR
|
18
|
-
labels=["a","b","c","d","e"]
|
19
|
-
options={'set_color'=>[0xFF3399]}
|
20
|
-
n_data=1
|
21
|
-
data=[10,40,30,20,40]
|
22
|
-
|
23
|
-
Statsample::Util.chart_gdchart(file,width,height,chart_type, labels, options,n_data,data)
|
24
|
-
assert(File.exists?(file))
|
25
|
-
%w{STACK_DEPTH STACK_SUM STACK_BESIDE STACK_LAYER}.each{|stack|
|
26
|
-
file=@image_path+"/gdchart_base_bar_2_#{stack}.jpg"
|
27
|
-
n_data=2
|
28
|
-
options={'set_color'=>[0xFF3399,0x33FF99,0xFF99FF,0xFF3399], 'stack_type'=>GDChart.const_get(stack.intern),'title'=>"Bar #{stack}"}
|
29
|
-
|
30
|
-
chart_type=GDChart::BAR
|
31
|
-
|
32
|
-
data=[10,15,10,20,30,30,20,5,15,20]
|
33
|
-
Statsample::Util.chart_gdchart(file,width,height,chart_type, labels, options,n_data,data)
|
34
|
-
assert(File.exists?(file))
|
35
|
-
}
|
36
|
-
end
|
37
|
-
def test_vector
|
38
|
-
file=@image_path+"/gdchart_bar.jpg"
|
39
|
-
ar=[]
|
40
|
-
(1..1000).each {|a|
|
41
|
-
ar.push(rand(10))
|
42
|
-
}
|
43
|
-
vector=ar.to_vector
|
44
|
-
file=@image_path+"/gdchart_bar.jpg"
|
45
|
-
vector.gdchart_frequencies(file,800,600,GDChart::BAR,'title'=>'Bar')
|
46
|
-
assert(File.exists?(file))
|
47
|
-
file=@image_path+"/gdchart_bar3d.jpg"
|
48
|
-
vector.gdchart_frequencies(file,300,100,GDChart::BAR3D,'title'=>'Bar3D')
|
49
|
-
assert(File.exists?(file))
|
50
|
-
file=@image_path+"/gdchart_floatingbar.jpg"
|
51
|
-
vector.gdchart_frequencies(file,200,200,GDChart::LINE,'title'=>'FloatingBar')
|
52
|
-
assert(File.exists?(file))
|
53
|
-
vector.type=:scale
|
54
|
-
file=@image_path+"/gdchart_histogram.jpg"
|
55
|
-
vector.gdchart_histogram(5,file,300,400,GDChart::BAR,'title'=>'Histogram')
|
56
|
-
assert(File.exists?(file))
|
57
|
-
end
|
58
|
-
end
|