statsample 0.4.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/test/test_csv.rb CHANGED
@@ -1,13 +1,12 @@
1
1
  $:.unshift(File.dirname(__FILE__)+'/../lib/')
2
2
  require 'statsample'
3
- require 'tmpdir'
3
+ require 'tmpdir'
4
4
  require 'test/unit'
5
5
 
6
6
  class StatsampleCSVTestCase < Test::Unit::TestCase
7
- def initialize(*args)
7
+ def setup
8
8
  @ds=Statsample::CSV.read(File.dirname(__FILE__)+"/test_csv.csv")
9
- super
10
- end
9
+ end
11
10
  def test_read
12
11
  assert_equal(6,@ds.cases)
13
12
  assert_equal(%w{id name age city a1},@ds.fields)
data/test/test_dataset.rb CHANGED
@@ -1,18 +1,24 @@
1
1
  $:.unshift(File.dirname(__FILE__)+'/../lib/')
2
2
  require 'statsample'
3
3
  require 'test/unit'
4
-
4
+ require 'tmpdir'
5
5
  class StatsampleDatasetTestCase < Test::Unit::TestCase
6
- def initialize(*args)
6
+ def setup
7
7
  @ds=Statsample::Dataset.new({'id' => Statsample::Vector.new([1,2,3,4,5]), 'name'=>Statsample::Vector.new(%w{Alex Claude Peter Franz George}), 'age'=>Statsample::Vector.new([20,23,25,27,5]),
8
8
  'city'=>Statsample::Vector.new(['New York','London','London','Paris','Tome']),
9
9
  'a1'=>Statsample::Vector.new(['a,b','b,c','a',nil,'a,b,c'])}, ['id','name','age','city','a1'])
10
- super
11
10
  end
12
11
  def test_basic
13
12
  assert_equal(5,@ds.cases)
14
13
  assert_equal(%w{id name age city a1}, @ds.fields)
15
14
  end
15
+ def test_saveload
16
+ outfile=Dir::tmpdir+"/dataset.ds"
17
+ @ds.save(outfile)
18
+ a=Statsample.load(outfile)
19
+ assert_equal(@ds,a)
20
+ end
21
+
16
22
  def test_matrix
17
23
  matrix=Matrix[[1,2],[3,4],[5,6]]
18
24
  ds=Statsample::Dataset.new('v1'=>[1,3,5].to_vector,'v2'=>[2,4,6].to_vector)
@@ -250,6 +256,19 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
250
256
  @ds.from_to("name","a2")
251
257
  end
252
258
  end
259
+ def test_each_array_with_nils
260
+ v1=[1,-99,3,4,"na"].to_vector(:scale,:missing_values=>[-99,"na"])
261
+ v2=[5,6,-99,8,20].to_vector(:scale,:missing_values=>[-99])
262
+ v3=[9,10,11,12,20].to_vector(:scale,:missing_values=>[-99])
263
+ ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2,'v3'=>v3})
264
+ ds2=ds1.dup_empty
265
+ ds1.each_array_with_nils {|row|
266
+ ds2.add_case_array(row)
267
+ }
268
+ ds2.update_valid_data
269
+ assert_equal([1,nil,3,4,nil],ds2['v1'].data)
270
+ assert_equal([5,6,nil,8,20],ds2['v2'].data)
271
+ end
253
272
  def test_dup_only_valid
254
273
  v1=[1,nil,3,4].to_vector(:scale)
255
274
  v2=[5,6,nil,8].to_vector(:scale)
@@ -33,9 +33,10 @@ class DistributionTestCase < Test::Unit::TestCase
33
33
  def test_normal
34
34
  if !NOT_GSL
35
35
  [-2,0.1,0.5,1,2].each{|x|
36
- area=Distribution::Normal.cdf(x)
37
- assert_in_delta(area, GSL::Cdf.ugaussian_P(x),0.0001)
38
- assert_in_delta(Distribution::Normal.p_value(area), GSL::Cdf.ugaussian_Pinv(area),0.0001)
36
+ area=Distribution::Normal.cdf(x)
37
+ assert_in_delta(area, GSL::Cdf.ugaussian_P(x),0.0001)
38
+ assert_in_delta(Distribution::Normal.p_value(area), GSL::Cdf.ugaussian_Pinv(area),0.0001)
39
+ assert_in_delta(Distribution::Normal.pdf(x), GSL::Ran::ugaussian_pdf(x),0.0001)
39
40
  }
40
41
  end
41
42
  end
data/test/test_ggobi.rb CHANGED
@@ -13,8 +13,8 @@ class StatsampleGGobiTestCase < Test::Unit::TestCase
13
13
  @ds={'v1'=>v1,'v2'=>@v2,'v3'=>v3}.to_dataset
14
14
  end
15
15
  def test_values_definition
16
- a=[1.0,2,"a"]
17
- assert_equal("<real>1.0</real> <int>2</int> <string>a</string>",Statsample::GGobi.values_definition(a))
16
+ a=[1.0,2,"a",nil]
17
+ assert_equal("1.0 2 a NA", Statsample::GGobi.values_definition(a,"NA"))
18
18
  end
19
19
  def test_variable_definition
20
20
  carrier=OpenStruct.new
@@ -120,13 +120,22 @@ class StatsampleRegressionTestCase < Test::Unit::TestCase
120
120
  model_test(lr)
121
121
  predicted=[nil,1.7857, 6.0989, 3.2433, 7.2908, 4.9667, 10.3428, 8.8158, 10.4717, 23.6639, 25.3198]
122
122
  c_predicted = lr.predicted
123
+
123
124
  predicted.each_index{|i|
124
- assert_in_delta(predicted[i],c_predicted[i],0.001)
125
+ if c_predicted[i].nil?
126
+ assert(predicted[i].nil?)
127
+ else
128
+ assert_in_delta(predicted[i], c_predicted[i], 0.001)
129
+ end
125
130
  }
126
131
  residuals=[nil,1.2142, -2.0989, 1.7566, -1.29085, 2.033, -2.3428, 0.18414, -0.47177, -3.66395, 4.6801]
127
132
  c_residuals=lr.residuals
128
133
  residuals.each_index{|i|
129
- assert_in_delta(residuals[i],c_residuals[i],0.001)
134
+ if c_residuals[i].nil?
135
+ assert(residuals[i].nil?)
136
+ else
137
+ assert_in_delta(residuals[i],c_residuals[i],0.001)
138
+ end
130
139
  }
131
140
  end
132
141
  def test_ds_by_exp
@@ -38,7 +38,6 @@ class StatsampleSvgGraphTestCase < Test::Unit::TestCase
38
38
  vector=ar.to_vector
39
39
  file=@image_path+"/svggraph_default.svg"
40
40
  vector.svggraph_frequencies(file)
41
-
42
41
  file=@image_path+"/svggraph_Bar.svg"
43
42
  vector.svggraph_frequencies(file,800,600,SVG::Graph::Bar,:graph_title=>'Bar')
44
43
  assert(File.exists?(file))
data/test/test_vector.rb CHANGED
@@ -1,18 +1,43 @@
1
1
  $:.unshift(File.dirname(__FILE__)+'/../lib/')
2
2
  require 'statsample'
3
3
  require 'test/unit'
4
-
4
+ require 'tmpdir'
5
5
  class StatsampleVectorTestCase < Test::Unit::TestCase
6
6
 
7
- def initialize(*args)
8
- super
7
+ def setup
9
8
  @c = Statsample::Vector.new([5,5,5,5,5,6,6,7,8,9,10,1,2,3,4,nil,-99,-99], :nominal)
10
9
  @c.missing_values=[-99]
11
- end
10
+
11
+ end
12
+ def test_save_load
13
+ outfile=Dir::tmpdir+"/vector.vec"
14
+ @c.save(outfile)
15
+ a=Statsample.load(outfile)
16
+ assert_equal(@c,a)
17
+
18
+ end
19
+ def test_lazy_methods
20
+ data=[1,2,3,4,5,nil]
21
+ correct=Statsample::Vector.new(data,:scale)
22
+ lazy1=data.to_vector(:scale)
23
+ lazy2=data.to_scale
24
+ assert_equal(correct,lazy1)
25
+ assert_equal(correct,lazy2)
26
+ assert_equal(:scale,lazy2.type)
27
+ assert_equal([1,2,3,4,5],lazy2.valid_data)
28
+ end
12
29
  def test_enumerable
13
30
  val=@c.collect {|v| v}
14
31
  assert_equal(val,[5,5,5,5,5,6,6,7,8,9,10,1,2,3,4,nil,-99,-99])
15
32
  end
33
+ def test_recode
34
+ a=@c.recode{|v| @c.is_valid?(v) ? 0 : 1 }
35
+ exp=[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1].to_vector
36
+ assert_equal(exp,a)
37
+ exp.recode!{|v| v==0 ? 1:0}
38
+ exp2=(([1]*15)+([0]*3)).to_vector
39
+ assert_equal(exp2,exp)
40
+ end
16
41
  def test_product
17
42
  a=[1,2,3,4,5].to_vector(:scale)
18
43
  assert_equal(120,a.product)
@@ -205,7 +230,27 @@ class StatsampleVectorTestCase < Test::Unit::TestCase
205
230
  assert_equal(@c.valid_data.to_a.sort, @c.sample_without_replacement(15).sort)
206
231
 
207
232
  end
208
-
233
+ def test_valid_data
234
+ a=Statsample::Vector.new([1,2,3,4,"STRING"])
235
+ a.missing_values=[-99]
236
+ a.add(1,false)
237
+ a.add(2,false)
238
+ a.add(-99,false)
239
+ a.set_valid_data
240
+ exp_valid_data=[1,2,3,4,"STRING",1,2]
241
+ assert_equal(exp_valid_data,a.valid_data)
242
+ a.add(20,false)
243
+ a.add(30,false)
244
+ assert_equal(exp_valid_data,a.valid_data)
245
+ a.set_valid_data
246
+ exp_valid_data_2=[1,2,3,4,"STRING",1,2,20,30]
247
+ assert_equal(exp_valid_data_2,a.valid_data)
248
+ end
249
+ def test_set_value
250
+ @c[2]=10
251
+ expected=[5,5,10,5,5,6,6,7,8,9,10,1,2,3,4,nil,-99,-99].to_vector
252
+ assert_equal(expected.data,@c.data)
253
+ end
209
254
  def test_gsl
210
255
  if HAS_GSL
211
256
  a=Statsample::Vector.new([1,2,3,4,"STRING"], :scale)
data/test/test_xls.rb CHANGED
@@ -8,11 +8,9 @@ rescue LoadError
8
8
  puts "You should install spreadsheet (gem install spreadsheet)"
9
9
  end
10
10
  class StatsampleExcelTestCase < Test::Unit::TestCase
11
- def initialize(*args)
11
+ def setup
12
12
  @ds=Statsample::Excel.read(File.dirname(__FILE__)+"/test_xls.xls")
13
- super
14
- end
15
-
13
+ end
16
14
  def test_read
17
15
  assert_equal(6,@ds.cases)
18
16
  assert_equal(%w{id name age city a1},@ds.fields)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: statsample
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.1
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Claudio Bustos
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-09-12 00:00:00 -04:00
12
+ date: 2009-09-26 00:00:00 -04:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -91,6 +91,8 @@ files:
91
91
  - lib/statsample/bivariate.rb
92
92
  - lib/statsample/codification.rb
93
93
  - lib/statsample/combination.rb
94
+ - lib/statsample/converter/csv18.rb
95
+ - lib/statsample/converter/csv19.rb
94
96
  - lib/statsample/converters.rb
95
97
  - lib/statsample/crosstab.rb
96
98
  - lib/statsample/dataset.rb
@@ -113,6 +115,7 @@ files:
113
115
  - lib/statsample/regression/binomial/probit.rb
114
116
  - lib/statsample/regression/multiple.rb
115
117
  - lib/statsample/regression/multiple/alglibengine.rb
118
+ - lib/statsample/regression/multiple/baseengine.rb
116
119
  - lib/statsample/regression/multiple/gslengine.rb
117
120
  - lib/statsample/regression/multiple/rubyengine.rb
118
121
  - lib/statsample/regression/simple.rb
@@ -124,7 +127,6 @@ files:
124
127
  - po/es/statsample.po
125
128
  - po/statsample.pot
126
129
  - setup.rb
127
- - test/_test_chart.rb
128
130
  - test/test_anova.rb
129
131
  - test/test_codification.rb
130
132
  - test/test_combination.rb
data/test/_test_chart.rb DELETED
@@ -1,58 +0,0 @@
1
- require File.dirname(__FILE__)+'/../lib/statsample'
2
- require 'tempfile'
3
- require 'test/unit'
4
- require 'statsample/chart/gdchart'
5
- # Not included on default test, because GDChart send a lot of warnings!
6
- class StatsampleChartTestCase < Test::Unit::TestCase
7
-
8
- def initialize(*args)
9
- @image_path=File.dirname(__FILE__)+"/images"
10
- super
11
- end
12
-
13
- def test_base_chart
14
- file=@image_path+"/gdchart_base_bar_1.jpg"
15
- width=500
16
- height=300
17
- chart_type=GDChart::BAR
18
- labels=["a","b","c","d","e"]
19
- options={'set_color'=>[0xFF3399]}
20
- n_data=1
21
- data=[10,40,30,20,40]
22
-
23
- Statsample::Util.chart_gdchart(file,width,height,chart_type, labels, options,n_data,data)
24
- assert(File.exists?(file))
25
- %w{STACK_DEPTH STACK_SUM STACK_BESIDE STACK_LAYER}.each{|stack|
26
- file=@image_path+"/gdchart_base_bar_2_#{stack}.jpg"
27
- n_data=2
28
- options={'set_color'=>[0xFF3399,0x33FF99,0xFF99FF,0xFF3399], 'stack_type'=>GDChart.const_get(stack.intern),'title'=>"Bar #{stack}"}
29
-
30
- chart_type=GDChart::BAR
31
-
32
- data=[10,15,10,20,30,30,20,5,15,20]
33
- Statsample::Util.chart_gdchart(file,width,height,chart_type, labels, options,n_data,data)
34
- assert(File.exists?(file))
35
- }
36
- end
37
- def test_vector
38
- file=@image_path+"/gdchart_bar.jpg"
39
- ar=[]
40
- (1..1000).each {|a|
41
- ar.push(rand(10))
42
- }
43
- vector=ar.to_vector
44
- file=@image_path+"/gdchart_bar.jpg"
45
- vector.gdchart_frequencies(file,800,600,GDChart::BAR,'title'=>'Bar')
46
- assert(File.exists?(file))
47
- file=@image_path+"/gdchart_bar3d.jpg"
48
- vector.gdchart_frequencies(file,300,100,GDChart::BAR3D,'title'=>'Bar3D')
49
- assert(File.exists?(file))
50
- file=@image_path+"/gdchart_floatingbar.jpg"
51
- vector.gdchart_frequencies(file,200,200,GDChart::LINE,'title'=>'FloatingBar')
52
- assert(File.exists?(file))
53
- vector.type=:scale
54
- file=@image_path+"/gdchart_histogram.jpg"
55
- vector.gdchart_histogram(5,file,300,400,GDChart::BAR,'title'=>'Histogram')
56
- assert(File.exists?(file))
57
- end
58
- end