statsample 0.4.1 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
data/test/test_csv.rb CHANGED
@@ -1,13 +1,12 @@
1
1
  $:.unshift(File.dirname(__FILE__)+'/../lib/')
2
2
  require 'statsample'
3
- require 'tmpdir'
3
+ require 'tmpdir'
4
4
  require 'test/unit'
5
5
 
6
6
  class StatsampleCSVTestCase < Test::Unit::TestCase
7
- def initialize(*args)
7
+ def setup
8
8
  @ds=Statsample::CSV.read(File.dirname(__FILE__)+"/test_csv.csv")
9
- super
10
- end
9
+ end
11
10
  def test_read
12
11
  assert_equal(6,@ds.cases)
13
12
  assert_equal(%w{id name age city a1},@ds.fields)
data/test/test_dataset.rb CHANGED
@@ -1,18 +1,24 @@
1
1
  $:.unshift(File.dirname(__FILE__)+'/../lib/')
2
2
  require 'statsample'
3
3
  require 'test/unit'
4
-
4
+ require 'tmpdir'
5
5
  class StatsampleDatasetTestCase < Test::Unit::TestCase
6
- def initialize(*args)
6
+ def setup
7
7
  @ds=Statsample::Dataset.new({'id' => Statsample::Vector.new([1,2,3,4,5]), 'name'=>Statsample::Vector.new(%w{Alex Claude Peter Franz George}), 'age'=>Statsample::Vector.new([20,23,25,27,5]),
8
8
  'city'=>Statsample::Vector.new(['New York','London','London','Paris','Tome']),
9
9
  'a1'=>Statsample::Vector.new(['a,b','b,c','a',nil,'a,b,c'])}, ['id','name','age','city','a1'])
10
- super
11
10
  end
12
11
  def test_basic
13
12
  assert_equal(5,@ds.cases)
14
13
  assert_equal(%w{id name age city a1}, @ds.fields)
15
14
  end
15
+ def test_saveload
16
+ outfile=Dir::tmpdir+"/dataset.ds"
17
+ @ds.save(outfile)
18
+ a=Statsample.load(outfile)
19
+ assert_equal(@ds,a)
20
+ end
21
+
16
22
  def test_matrix
17
23
  matrix=Matrix[[1,2],[3,4],[5,6]]
18
24
  ds=Statsample::Dataset.new('v1'=>[1,3,5].to_vector,'v2'=>[2,4,6].to_vector)
@@ -250,6 +256,19 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
250
256
  @ds.from_to("name","a2")
251
257
  end
252
258
  end
259
+ def test_each_array_with_nils
260
+ v1=[1,-99,3,4,"na"].to_vector(:scale,:missing_values=>[-99,"na"])
261
+ v2=[5,6,-99,8,20].to_vector(:scale,:missing_values=>[-99])
262
+ v3=[9,10,11,12,20].to_vector(:scale,:missing_values=>[-99])
263
+ ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2,'v3'=>v3})
264
+ ds2=ds1.dup_empty
265
+ ds1.each_array_with_nils {|row|
266
+ ds2.add_case_array(row)
267
+ }
268
+ ds2.update_valid_data
269
+ assert_equal([1,nil,3,4,nil],ds2['v1'].data)
270
+ assert_equal([5,6,nil,8,20],ds2['v2'].data)
271
+ end
253
272
  def test_dup_only_valid
254
273
  v1=[1,nil,3,4].to_vector(:scale)
255
274
  v2=[5,6,nil,8].to_vector(:scale)
@@ -33,9 +33,10 @@ class DistributionTestCase < Test::Unit::TestCase
33
33
  def test_normal
34
34
  if !NOT_GSL
35
35
  [-2,0.1,0.5,1,2].each{|x|
36
- area=Distribution::Normal.cdf(x)
37
- assert_in_delta(area, GSL::Cdf.ugaussian_P(x),0.0001)
38
- assert_in_delta(Distribution::Normal.p_value(area), GSL::Cdf.ugaussian_Pinv(area),0.0001)
36
+ area=Distribution::Normal.cdf(x)
37
+ assert_in_delta(area, GSL::Cdf.ugaussian_P(x),0.0001)
38
+ assert_in_delta(Distribution::Normal.p_value(area), GSL::Cdf.ugaussian_Pinv(area),0.0001)
39
+ assert_in_delta(Distribution::Normal.pdf(x), GSL::Ran::ugaussian_pdf(x),0.0001)
39
40
  }
40
41
  end
41
42
  end
data/test/test_ggobi.rb CHANGED
@@ -13,8 +13,8 @@ class StatsampleGGobiTestCase < Test::Unit::TestCase
13
13
  @ds={'v1'=>v1,'v2'=>@v2,'v3'=>v3}.to_dataset
14
14
  end
15
15
  def test_values_definition
16
- a=[1.0,2,"a"]
17
- assert_equal("<real>1.0</real> <int>2</int> <string>a</string>",Statsample::GGobi.values_definition(a))
16
+ a=[1.0,2,"a",nil]
17
+ assert_equal("1.0 2 a NA", Statsample::GGobi.values_definition(a,"NA"))
18
18
  end
19
19
  def test_variable_definition
20
20
  carrier=OpenStruct.new
@@ -120,13 +120,22 @@ class StatsampleRegressionTestCase < Test::Unit::TestCase
120
120
  model_test(lr)
121
121
  predicted=[nil,1.7857, 6.0989, 3.2433, 7.2908, 4.9667, 10.3428, 8.8158, 10.4717, 23.6639, 25.3198]
122
122
  c_predicted = lr.predicted
123
+
123
124
  predicted.each_index{|i|
124
- assert_in_delta(predicted[i],c_predicted[i],0.001)
125
+ if c_predicted[i].nil?
126
+ assert(predicted[i].nil?)
127
+ else
128
+ assert_in_delta(predicted[i], c_predicted[i], 0.001)
129
+ end
125
130
  }
126
131
  residuals=[nil,1.2142, -2.0989, 1.7566, -1.29085, 2.033, -2.3428, 0.18414, -0.47177, -3.66395, 4.6801]
127
132
  c_residuals=lr.residuals
128
133
  residuals.each_index{|i|
129
- assert_in_delta(residuals[i],c_residuals[i],0.001)
134
+ if c_residuals[i].nil?
135
+ assert(residuals[i].nil?)
136
+ else
137
+ assert_in_delta(residuals[i],c_residuals[i],0.001)
138
+ end
130
139
  }
131
140
  end
132
141
  def test_ds_by_exp
@@ -38,7 +38,6 @@ class StatsampleSvgGraphTestCase < Test::Unit::TestCase
38
38
  vector=ar.to_vector
39
39
  file=@image_path+"/svggraph_default.svg"
40
40
  vector.svggraph_frequencies(file)
41
-
42
41
  file=@image_path+"/svggraph_Bar.svg"
43
42
  vector.svggraph_frequencies(file,800,600,SVG::Graph::Bar,:graph_title=>'Bar')
44
43
  assert(File.exists?(file))
data/test/test_vector.rb CHANGED
@@ -1,18 +1,43 @@
1
1
  $:.unshift(File.dirname(__FILE__)+'/../lib/')
2
2
  require 'statsample'
3
3
  require 'test/unit'
4
-
4
+ require 'tmpdir'
5
5
  class StatsampleVectorTestCase < Test::Unit::TestCase
6
6
 
7
- def initialize(*args)
8
- super
7
+ def setup
9
8
  @c = Statsample::Vector.new([5,5,5,5,5,6,6,7,8,9,10,1,2,3,4,nil,-99,-99], :nominal)
10
9
  @c.missing_values=[-99]
11
- end
10
+
11
+ end
12
+ def test_save_load
13
+ outfile=Dir::tmpdir+"/vector.vec"
14
+ @c.save(outfile)
15
+ a=Statsample.load(outfile)
16
+ assert_equal(@c,a)
17
+
18
+ end
19
+ def test_lazy_methods
20
+ data=[1,2,3,4,5,nil]
21
+ correct=Statsample::Vector.new(data,:scale)
22
+ lazy1=data.to_vector(:scale)
23
+ lazy2=data.to_scale
24
+ assert_equal(correct,lazy1)
25
+ assert_equal(correct,lazy2)
26
+ assert_equal(:scale,lazy2.type)
27
+ assert_equal([1,2,3,4,5],lazy2.valid_data)
28
+ end
12
29
  def test_enumerable
13
30
  val=@c.collect {|v| v}
14
31
  assert_equal(val,[5,5,5,5,5,6,6,7,8,9,10,1,2,3,4,nil,-99,-99])
15
32
  end
33
+ def test_recode
34
+ a=@c.recode{|v| @c.is_valid?(v) ? 0 : 1 }
35
+ exp=[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1].to_vector
36
+ assert_equal(exp,a)
37
+ exp.recode!{|v| v==0 ? 1:0}
38
+ exp2=(([1]*15)+([0]*3)).to_vector
39
+ assert_equal(exp2,exp)
40
+ end
16
41
  def test_product
17
42
  a=[1,2,3,4,5].to_vector(:scale)
18
43
  assert_equal(120,a.product)
@@ -205,7 +230,27 @@ class StatsampleVectorTestCase < Test::Unit::TestCase
205
230
  assert_equal(@c.valid_data.to_a.sort, @c.sample_without_replacement(15).sort)
206
231
 
207
232
  end
208
-
233
+ def test_valid_data
234
+ a=Statsample::Vector.new([1,2,3,4,"STRING"])
235
+ a.missing_values=[-99]
236
+ a.add(1,false)
237
+ a.add(2,false)
238
+ a.add(-99,false)
239
+ a.set_valid_data
240
+ exp_valid_data=[1,2,3,4,"STRING",1,2]
241
+ assert_equal(exp_valid_data,a.valid_data)
242
+ a.add(20,false)
243
+ a.add(30,false)
244
+ assert_equal(exp_valid_data,a.valid_data)
245
+ a.set_valid_data
246
+ exp_valid_data_2=[1,2,3,4,"STRING",1,2,20,30]
247
+ assert_equal(exp_valid_data_2,a.valid_data)
248
+ end
249
+ def test_set_value
250
+ @c[2]=10
251
+ expected=[5,5,10,5,5,6,6,7,8,9,10,1,2,3,4,nil,-99,-99].to_vector
252
+ assert_equal(expected.data,@c.data)
253
+ end
209
254
  def test_gsl
210
255
  if HAS_GSL
211
256
  a=Statsample::Vector.new([1,2,3,4,"STRING"], :scale)
data/test/test_xls.rb CHANGED
@@ -8,11 +8,9 @@ rescue LoadError
8
8
  puts "You should install spreadsheet (gem install spreadsheet)"
9
9
  end
10
10
  class StatsampleExcelTestCase < Test::Unit::TestCase
11
- def initialize(*args)
11
+ def setup
12
12
  @ds=Statsample::Excel.read(File.dirname(__FILE__)+"/test_xls.xls")
13
- super
14
- end
15
-
13
+ end
16
14
  def test_read
17
15
  assert_equal(6,@ds.cases)
18
16
  assert_equal(%w{id name age city a1},@ds.fields)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: statsample
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.1
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Claudio Bustos
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-09-12 00:00:00 -04:00
12
+ date: 2009-09-26 00:00:00 -04:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -91,6 +91,8 @@ files:
91
91
  - lib/statsample/bivariate.rb
92
92
  - lib/statsample/codification.rb
93
93
  - lib/statsample/combination.rb
94
+ - lib/statsample/converter/csv18.rb
95
+ - lib/statsample/converter/csv19.rb
94
96
  - lib/statsample/converters.rb
95
97
  - lib/statsample/crosstab.rb
96
98
  - lib/statsample/dataset.rb
@@ -113,6 +115,7 @@ files:
113
115
  - lib/statsample/regression/binomial/probit.rb
114
116
  - lib/statsample/regression/multiple.rb
115
117
  - lib/statsample/regression/multiple/alglibengine.rb
118
+ - lib/statsample/regression/multiple/baseengine.rb
116
119
  - lib/statsample/regression/multiple/gslengine.rb
117
120
  - lib/statsample/regression/multiple/rubyengine.rb
118
121
  - lib/statsample/regression/simple.rb
@@ -124,7 +127,6 @@ files:
124
127
  - po/es/statsample.po
125
128
  - po/statsample.pot
126
129
  - setup.rb
127
- - test/_test_chart.rb
128
130
  - test/test_anova.rb
129
131
  - test/test_codification.rb
130
132
  - test/test_combination.rb
data/test/_test_chart.rb DELETED
@@ -1,58 +0,0 @@
1
- require File.dirname(__FILE__)+'/../lib/statsample'
2
- require 'tempfile'
3
- require 'test/unit'
4
- require 'statsample/chart/gdchart'
5
- # Not included on default test, because GDChart send a lot of warnings!
6
- class StatsampleChartTestCase < Test::Unit::TestCase
7
-
8
- def initialize(*args)
9
- @image_path=File.dirname(__FILE__)+"/images"
10
- super
11
- end
12
-
13
- def test_base_chart
14
- file=@image_path+"/gdchart_base_bar_1.jpg"
15
- width=500
16
- height=300
17
- chart_type=GDChart::BAR
18
- labels=["a","b","c","d","e"]
19
- options={'set_color'=>[0xFF3399]}
20
- n_data=1
21
- data=[10,40,30,20,40]
22
-
23
- Statsample::Util.chart_gdchart(file,width,height,chart_type, labels, options,n_data,data)
24
- assert(File.exists?(file))
25
- %w{STACK_DEPTH STACK_SUM STACK_BESIDE STACK_LAYER}.each{|stack|
26
- file=@image_path+"/gdchart_base_bar_2_#{stack}.jpg"
27
- n_data=2
28
- options={'set_color'=>[0xFF3399,0x33FF99,0xFF99FF,0xFF3399], 'stack_type'=>GDChart.const_get(stack.intern),'title'=>"Bar #{stack}"}
29
-
30
- chart_type=GDChart::BAR
31
-
32
- data=[10,15,10,20,30,30,20,5,15,20]
33
- Statsample::Util.chart_gdchart(file,width,height,chart_type, labels, options,n_data,data)
34
- assert(File.exists?(file))
35
- }
36
- end
37
- def test_vector
38
- file=@image_path+"/gdchart_bar.jpg"
39
- ar=[]
40
- (1..1000).each {|a|
41
- ar.push(rand(10))
42
- }
43
- vector=ar.to_vector
44
- file=@image_path+"/gdchart_bar.jpg"
45
- vector.gdchart_frequencies(file,800,600,GDChart::BAR,'title'=>'Bar')
46
- assert(File.exists?(file))
47
- file=@image_path+"/gdchart_bar3d.jpg"
48
- vector.gdchart_frequencies(file,300,100,GDChart::BAR3D,'title'=>'Bar3D')
49
- assert(File.exists?(file))
50
- file=@image_path+"/gdchart_floatingbar.jpg"
51
- vector.gdchart_frequencies(file,200,200,GDChart::LINE,'title'=>'FloatingBar')
52
- assert(File.exists?(file))
53
- vector.type=:scale
54
- file=@image_path+"/gdchart_histogram.jpg"
55
- vector.gdchart_histogram(5,file,300,400,GDChart::BAR,'title'=>'Histogram')
56
- assert(File.exists?(file))
57
- end
58
- end