statsample 0.4.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +4 -0
- data/Manifest.txt +3 -1
- data/lib/statsample.rb +175 -179
- data/lib/statsample/codification.rb +1 -1
- data/lib/statsample/converter/csv18.rb +56 -0
- data/lib/statsample/converter/csv19.rb +60 -0
- data/lib/statsample/converters.rb +26 -75
- data/lib/statsample/dataset.rb +38 -29
- data/lib/statsample/dominanceanalysis.rb +6 -6
- data/lib/statsample/graph/gdchart.rb +2 -1
- data/lib/statsample/graph/svggraph.rb +10 -9
- data/lib/statsample/multiset.rb +3 -3
- data/lib/statsample/regression/multiple.rb +43 -271
- data/lib/statsample/regression/multiple/baseengine.rb +235 -0
- data/lib/statsample/regression/multiple/gslengine.rb +2 -2
- data/lib/statsample/vector.rb +754 -736
- data/test/test_csv.rb +3 -4
- data/test/test_dataset.rb +22 -3
- data/test/test_distribution.rb +4 -3
- data/test/test_ggobi.rb +2 -2
- data/test/test_regression.rb +11 -2
- data/test/test_svg_graph.rb +0 -1
- data/test/test_vector.rb +50 -5
- data/test/test_xls.rb +2 -4
- metadata +5 -3
- data/test/_test_chart.rb +0 -58
data/test/test_csv.rb
CHANGED
@@ -1,13 +1,12 @@
|
|
1
1
|
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
2
2
|
require 'statsample'
|
3
|
-
|
3
|
+
require 'tmpdir'
|
4
4
|
require 'test/unit'
|
5
5
|
|
6
6
|
class StatsampleCSVTestCase < Test::Unit::TestCase
|
7
|
-
|
7
|
+
def setup
|
8
8
|
@ds=Statsample::CSV.read(File.dirname(__FILE__)+"/test_csv.csv")
|
9
|
-
|
10
|
-
end
|
9
|
+
end
|
11
10
|
def test_read
|
12
11
|
assert_equal(6,@ds.cases)
|
13
12
|
assert_equal(%w{id name age city a1},@ds.fields)
|
data/test/test_dataset.rb
CHANGED
@@ -1,18 +1,24 @@
|
|
1
1
|
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
2
2
|
require 'statsample'
|
3
3
|
require 'test/unit'
|
4
|
-
|
4
|
+
require 'tmpdir'
|
5
5
|
class StatsampleDatasetTestCase < Test::Unit::TestCase
|
6
|
-
def
|
6
|
+
def setup
|
7
7
|
@ds=Statsample::Dataset.new({'id' => Statsample::Vector.new([1,2,3,4,5]), 'name'=>Statsample::Vector.new(%w{Alex Claude Peter Franz George}), 'age'=>Statsample::Vector.new([20,23,25,27,5]),
|
8
8
|
'city'=>Statsample::Vector.new(['New York','London','London','Paris','Tome']),
|
9
9
|
'a1'=>Statsample::Vector.new(['a,b','b,c','a',nil,'a,b,c'])}, ['id','name','age','city','a1'])
|
10
|
-
super
|
11
10
|
end
|
12
11
|
def test_basic
|
13
12
|
assert_equal(5,@ds.cases)
|
14
13
|
assert_equal(%w{id name age city a1}, @ds.fields)
|
15
14
|
end
|
15
|
+
def test_saveload
|
16
|
+
outfile=Dir::tmpdir+"/dataset.ds"
|
17
|
+
@ds.save(outfile)
|
18
|
+
a=Statsample.load(outfile)
|
19
|
+
assert_equal(@ds,a)
|
20
|
+
end
|
21
|
+
|
16
22
|
def test_matrix
|
17
23
|
matrix=Matrix[[1,2],[3,4],[5,6]]
|
18
24
|
ds=Statsample::Dataset.new('v1'=>[1,3,5].to_vector,'v2'=>[2,4,6].to_vector)
|
@@ -250,6 +256,19 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
|
|
250
256
|
@ds.from_to("name","a2")
|
251
257
|
end
|
252
258
|
end
|
259
|
+
def test_each_array_with_nils
|
260
|
+
v1=[1,-99,3,4,"na"].to_vector(:scale,:missing_values=>[-99,"na"])
|
261
|
+
v2=[5,6,-99,8,20].to_vector(:scale,:missing_values=>[-99])
|
262
|
+
v3=[9,10,11,12,20].to_vector(:scale,:missing_values=>[-99])
|
263
|
+
ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2,'v3'=>v3})
|
264
|
+
ds2=ds1.dup_empty
|
265
|
+
ds1.each_array_with_nils {|row|
|
266
|
+
ds2.add_case_array(row)
|
267
|
+
}
|
268
|
+
ds2.update_valid_data
|
269
|
+
assert_equal([1,nil,3,4,nil],ds2['v1'].data)
|
270
|
+
assert_equal([5,6,nil,8,20],ds2['v2'].data)
|
271
|
+
end
|
253
272
|
def test_dup_only_valid
|
254
273
|
v1=[1,nil,3,4].to_vector(:scale)
|
255
274
|
v2=[5,6,nil,8].to_vector(:scale)
|
data/test/test_distribution.rb
CHANGED
@@ -33,9 +33,10 @@ class DistributionTestCase < Test::Unit::TestCase
|
|
33
33
|
def test_normal
|
34
34
|
if !NOT_GSL
|
35
35
|
[-2,0.1,0.5,1,2].each{|x|
|
36
|
-
|
37
|
-
|
38
|
-
|
36
|
+
area=Distribution::Normal.cdf(x)
|
37
|
+
assert_in_delta(area, GSL::Cdf.ugaussian_P(x),0.0001)
|
38
|
+
assert_in_delta(Distribution::Normal.p_value(area), GSL::Cdf.ugaussian_Pinv(area),0.0001)
|
39
|
+
assert_in_delta(Distribution::Normal.pdf(x), GSL::Ran::ugaussian_pdf(x),0.0001)
|
39
40
|
}
|
40
41
|
end
|
41
42
|
end
|
data/test/test_ggobi.rb
CHANGED
@@ -13,8 +13,8 @@ class StatsampleGGobiTestCase < Test::Unit::TestCase
|
|
13
13
|
@ds={'v1'=>v1,'v2'=>@v2,'v3'=>v3}.to_dataset
|
14
14
|
end
|
15
15
|
def test_values_definition
|
16
|
-
a=[1.0,2,"a"]
|
17
|
-
assert_equal("
|
16
|
+
a=[1.0,2,"a",nil]
|
17
|
+
assert_equal("1.0 2 a NA", Statsample::GGobi.values_definition(a,"NA"))
|
18
18
|
end
|
19
19
|
def test_variable_definition
|
20
20
|
carrier=OpenStruct.new
|
data/test/test_regression.rb
CHANGED
@@ -120,13 +120,22 @@ class StatsampleRegressionTestCase < Test::Unit::TestCase
|
|
120
120
|
model_test(lr)
|
121
121
|
predicted=[nil,1.7857, 6.0989, 3.2433, 7.2908, 4.9667, 10.3428, 8.8158, 10.4717, 23.6639, 25.3198]
|
122
122
|
c_predicted = lr.predicted
|
123
|
+
|
123
124
|
predicted.each_index{|i|
|
124
|
-
|
125
|
+
if c_predicted[i].nil?
|
126
|
+
assert(predicted[i].nil?)
|
127
|
+
else
|
128
|
+
assert_in_delta(predicted[i], c_predicted[i], 0.001)
|
129
|
+
end
|
125
130
|
}
|
126
131
|
residuals=[nil,1.2142, -2.0989, 1.7566, -1.29085, 2.033, -2.3428, 0.18414, -0.47177, -3.66395, 4.6801]
|
127
132
|
c_residuals=lr.residuals
|
128
133
|
residuals.each_index{|i|
|
129
|
-
|
134
|
+
if c_residuals[i].nil?
|
135
|
+
assert(residuals[i].nil?)
|
136
|
+
else
|
137
|
+
assert_in_delta(residuals[i],c_residuals[i],0.001)
|
138
|
+
end
|
130
139
|
}
|
131
140
|
end
|
132
141
|
def test_ds_by_exp
|
data/test/test_svg_graph.rb
CHANGED
@@ -38,7 +38,6 @@ class StatsampleSvgGraphTestCase < Test::Unit::TestCase
|
|
38
38
|
vector=ar.to_vector
|
39
39
|
file=@image_path+"/svggraph_default.svg"
|
40
40
|
vector.svggraph_frequencies(file)
|
41
|
-
|
42
41
|
file=@image_path+"/svggraph_Bar.svg"
|
43
42
|
vector.svggraph_frequencies(file,800,600,SVG::Graph::Bar,:graph_title=>'Bar')
|
44
43
|
assert(File.exists?(file))
|
data/test/test_vector.rb
CHANGED
@@ -1,18 +1,43 @@
|
|
1
1
|
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
2
2
|
require 'statsample'
|
3
3
|
require 'test/unit'
|
4
|
-
|
4
|
+
require 'tmpdir'
|
5
5
|
class StatsampleVectorTestCase < Test::Unit::TestCase
|
6
6
|
|
7
|
-
|
8
|
-
super
|
7
|
+
def setup
|
9
8
|
@c = Statsample::Vector.new([5,5,5,5,5,6,6,7,8,9,10,1,2,3,4,nil,-99,-99], :nominal)
|
10
9
|
@c.missing_values=[-99]
|
11
|
-
|
10
|
+
|
11
|
+
end
|
12
|
+
def test_save_load
|
13
|
+
outfile=Dir::tmpdir+"/vector.vec"
|
14
|
+
@c.save(outfile)
|
15
|
+
a=Statsample.load(outfile)
|
16
|
+
assert_equal(@c,a)
|
17
|
+
|
18
|
+
end
|
19
|
+
def test_lazy_methods
|
20
|
+
data=[1,2,3,4,5,nil]
|
21
|
+
correct=Statsample::Vector.new(data,:scale)
|
22
|
+
lazy1=data.to_vector(:scale)
|
23
|
+
lazy2=data.to_scale
|
24
|
+
assert_equal(correct,lazy1)
|
25
|
+
assert_equal(correct,lazy2)
|
26
|
+
assert_equal(:scale,lazy2.type)
|
27
|
+
assert_equal([1,2,3,4,5],lazy2.valid_data)
|
28
|
+
end
|
12
29
|
def test_enumerable
|
13
30
|
val=@c.collect {|v| v}
|
14
31
|
assert_equal(val,[5,5,5,5,5,6,6,7,8,9,10,1,2,3,4,nil,-99,-99])
|
15
32
|
end
|
33
|
+
def test_recode
|
34
|
+
a=@c.recode{|v| @c.is_valid?(v) ? 0 : 1 }
|
35
|
+
exp=[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1].to_vector
|
36
|
+
assert_equal(exp,a)
|
37
|
+
exp.recode!{|v| v==0 ? 1:0}
|
38
|
+
exp2=(([1]*15)+([0]*3)).to_vector
|
39
|
+
assert_equal(exp2,exp)
|
40
|
+
end
|
16
41
|
def test_product
|
17
42
|
a=[1,2,3,4,5].to_vector(:scale)
|
18
43
|
assert_equal(120,a.product)
|
@@ -205,7 +230,27 @@ class StatsampleVectorTestCase < Test::Unit::TestCase
|
|
205
230
|
assert_equal(@c.valid_data.to_a.sort, @c.sample_without_replacement(15).sort)
|
206
231
|
|
207
232
|
end
|
208
|
-
|
233
|
+
def test_valid_data
|
234
|
+
a=Statsample::Vector.new([1,2,3,4,"STRING"])
|
235
|
+
a.missing_values=[-99]
|
236
|
+
a.add(1,false)
|
237
|
+
a.add(2,false)
|
238
|
+
a.add(-99,false)
|
239
|
+
a.set_valid_data
|
240
|
+
exp_valid_data=[1,2,3,4,"STRING",1,2]
|
241
|
+
assert_equal(exp_valid_data,a.valid_data)
|
242
|
+
a.add(20,false)
|
243
|
+
a.add(30,false)
|
244
|
+
assert_equal(exp_valid_data,a.valid_data)
|
245
|
+
a.set_valid_data
|
246
|
+
exp_valid_data_2=[1,2,3,4,"STRING",1,2,20,30]
|
247
|
+
assert_equal(exp_valid_data_2,a.valid_data)
|
248
|
+
end
|
249
|
+
def test_set_value
|
250
|
+
@c[2]=10
|
251
|
+
expected=[5,5,10,5,5,6,6,7,8,9,10,1,2,3,4,nil,-99,-99].to_vector
|
252
|
+
assert_equal(expected.data,@c.data)
|
253
|
+
end
|
209
254
|
def test_gsl
|
210
255
|
if HAS_GSL
|
211
256
|
a=Statsample::Vector.new([1,2,3,4,"STRING"], :scale)
|
data/test/test_xls.rb
CHANGED
@@ -8,11 +8,9 @@ rescue LoadError
|
|
8
8
|
puts "You should install spreadsheet (gem install spreadsheet)"
|
9
9
|
end
|
10
10
|
class StatsampleExcelTestCase < Test::Unit::TestCase
|
11
|
-
|
11
|
+
def setup
|
12
12
|
@ds=Statsample::Excel.read(File.dirname(__FILE__)+"/test_xls.xls")
|
13
|
-
|
14
|
-
end
|
15
|
-
|
13
|
+
end
|
16
14
|
def test_read
|
17
15
|
assert_equal(6,@ds.cases)
|
18
16
|
assert_equal(%w{id name age city a1},@ds.fields)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: statsample
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Claudio Bustos
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-09-
|
12
|
+
date: 2009-09-26 00:00:00 -04:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -91,6 +91,8 @@ files:
|
|
91
91
|
- lib/statsample/bivariate.rb
|
92
92
|
- lib/statsample/codification.rb
|
93
93
|
- lib/statsample/combination.rb
|
94
|
+
- lib/statsample/converter/csv18.rb
|
95
|
+
- lib/statsample/converter/csv19.rb
|
94
96
|
- lib/statsample/converters.rb
|
95
97
|
- lib/statsample/crosstab.rb
|
96
98
|
- lib/statsample/dataset.rb
|
@@ -113,6 +115,7 @@ files:
|
|
113
115
|
- lib/statsample/regression/binomial/probit.rb
|
114
116
|
- lib/statsample/regression/multiple.rb
|
115
117
|
- lib/statsample/regression/multiple/alglibengine.rb
|
118
|
+
- lib/statsample/regression/multiple/baseengine.rb
|
116
119
|
- lib/statsample/regression/multiple/gslengine.rb
|
117
120
|
- lib/statsample/regression/multiple/rubyengine.rb
|
118
121
|
- lib/statsample/regression/simple.rb
|
@@ -124,7 +127,6 @@ files:
|
|
124
127
|
- po/es/statsample.po
|
125
128
|
- po/statsample.pot
|
126
129
|
- setup.rb
|
127
|
-
- test/_test_chart.rb
|
128
130
|
- test/test_anova.rb
|
129
131
|
- test/test_codification.rb
|
130
132
|
- test/test_combination.rb
|
data/test/_test_chart.rb
DELETED
@@ -1,58 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__)+'/../lib/statsample'
|
2
|
-
require 'tempfile'
|
3
|
-
require 'test/unit'
|
4
|
-
require 'statsample/chart/gdchart'
|
5
|
-
# Not included on default test, because GDChart send a lot of warnings!
|
6
|
-
class StatsampleChartTestCase < Test::Unit::TestCase
|
7
|
-
|
8
|
-
def initialize(*args)
|
9
|
-
@image_path=File.dirname(__FILE__)+"/images"
|
10
|
-
super
|
11
|
-
end
|
12
|
-
|
13
|
-
def test_base_chart
|
14
|
-
file=@image_path+"/gdchart_base_bar_1.jpg"
|
15
|
-
width=500
|
16
|
-
height=300
|
17
|
-
chart_type=GDChart::BAR
|
18
|
-
labels=["a","b","c","d","e"]
|
19
|
-
options={'set_color'=>[0xFF3399]}
|
20
|
-
n_data=1
|
21
|
-
data=[10,40,30,20,40]
|
22
|
-
|
23
|
-
Statsample::Util.chart_gdchart(file,width,height,chart_type, labels, options,n_data,data)
|
24
|
-
assert(File.exists?(file))
|
25
|
-
%w{STACK_DEPTH STACK_SUM STACK_BESIDE STACK_LAYER}.each{|stack|
|
26
|
-
file=@image_path+"/gdchart_base_bar_2_#{stack}.jpg"
|
27
|
-
n_data=2
|
28
|
-
options={'set_color'=>[0xFF3399,0x33FF99,0xFF99FF,0xFF3399], 'stack_type'=>GDChart.const_get(stack.intern),'title'=>"Bar #{stack}"}
|
29
|
-
|
30
|
-
chart_type=GDChart::BAR
|
31
|
-
|
32
|
-
data=[10,15,10,20,30,30,20,5,15,20]
|
33
|
-
Statsample::Util.chart_gdchart(file,width,height,chart_type, labels, options,n_data,data)
|
34
|
-
assert(File.exists?(file))
|
35
|
-
}
|
36
|
-
end
|
37
|
-
def test_vector
|
38
|
-
file=@image_path+"/gdchart_bar.jpg"
|
39
|
-
ar=[]
|
40
|
-
(1..1000).each {|a|
|
41
|
-
ar.push(rand(10))
|
42
|
-
}
|
43
|
-
vector=ar.to_vector
|
44
|
-
file=@image_path+"/gdchart_bar.jpg"
|
45
|
-
vector.gdchart_frequencies(file,800,600,GDChart::BAR,'title'=>'Bar')
|
46
|
-
assert(File.exists?(file))
|
47
|
-
file=@image_path+"/gdchart_bar3d.jpg"
|
48
|
-
vector.gdchart_frequencies(file,300,100,GDChart::BAR3D,'title'=>'Bar3D')
|
49
|
-
assert(File.exists?(file))
|
50
|
-
file=@image_path+"/gdchart_floatingbar.jpg"
|
51
|
-
vector.gdchart_frequencies(file,200,200,GDChart::LINE,'title'=>'FloatingBar')
|
52
|
-
assert(File.exists?(file))
|
53
|
-
vector.type=:scale
|
54
|
-
file=@image_path+"/gdchart_histogram.jpg"
|
55
|
-
vector.gdchart_histogram(5,file,300,400,GDChart::BAR,'title'=>'Histogram')
|
56
|
-
assert(File.exists?(file))
|
57
|
-
end
|
58
|
-
end
|