nimbus 2.2.1 → 2.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (34) hide show
  1. checksums.yaml +7 -0
  2. data/CODE_OF_CONDUCT.md +7 -0
  3. data/CONTRIBUTING.md +46 -0
  4. data/MIT-LICENSE.txt +1 -1
  5. data/README.md +131 -21
  6. data/bin/nimbus +2 -2
  7. data/lib/nimbus.rb +2 -6
  8. data/lib/nimbus/classification_tree.rb +9 -12
  9. data/lib/nimbus/configuration.rb +22 -22
  10. data/lib/nimbus/forest.rb +8 -8
  11. data/lib/nimbus/loss_functions.rb +11 -0
  12. data/lib/nimbus/regression_tree.rb +8 -10
  13. data/lib/nimbus/tree.rb +54 -12
  14. data/lib/nimbus/version.rb +1 -1
  15. data/spec/classification_tree_spec.rb +47 -47
  16. data/spec/configuration_spec.rb +55 -55
  17. data/spec/fixtures/{classification_config.yml → classification/config.yml} +3 -3
  18. data/spec/fixtures/classification/random_forest.yml +1174 -0
  19. data/spec/fixtures/{classification_testing.data → classification/testing.data} +0 -0
  20. data/spec/fixtures/{classification_training.data → classification/training.data} +0 -0
  21. data/spec/fixtures/{regression_config.yml → regression/config.yml} +4 -4
  22. data/spec/fixtures/regression/random_forest.yml +2737 -0
  23. data/spec/fixtures/{regression_testing.data → regression/testing.data} +0 -0
  24. data/spec/fixtures/{regression_training.data → regression/training.data} +0 -0
  25. data/spec/forest_spec.rb +39 -39
  26. data/spec/individual_spec.rb +3 -3
  27. data/spec/loss_functions_spec.rb +31 -13
  28. data/spec/nimbus_spec.rb +2 -2
  29. data/spec/regression_tree_spec.rb +44 -44
  30. data/spec/training_set_spec.rb +3 -3
  31. data/spec/tree_spec.rb +4 -4
  32. metadata +37 -34
  33. data/spec/fixtures/classification_random_forest.yml +0 -922
  34. data/spec/fixtures/regression_random_forest.yml +0 -1741
@@ -5,47 +5,47 @@ describe Nimbus::Forest do
5
5
  describe "Regression" do
6
6
  before(:each) do
7
7
  @config = Nimbus::Configuration.new
8
- @config.load fixture_file('regression_config.yml')
9
- @config.load_training_data
8
+ @config.load fixture_file('regression/config.yml')
9
+ @config.load_training_data if @config.do_training
10
10
  @forest = ::Nimbus::Forest.new @config
11
11
  end
12
12
 
13
13
  it 'grows a regression forest of N trees' do
14
- @forest.trees.should == []
15
- @config.forest_size.should == 3
16
- @forest.should_not be_classification
17
- @forest.should be_regression
14
+ expect(@forest.trees).to eq []
15
+ expect(@config.forest_size).to eq 3
16
+ expect(@forest).to_not be_classification
17
+ expect(@forest).to be_regression
18
18
  @forest.grow
19
- @forest.trees.size.should == @config.forest_size
20
- @forest.trees.each{|t| t.should be_kind_of Hash}
19
+ expect(@forest.trees.size).to eq @config.forest_size
20
+ @forest.trees.each{|t| expect(t).to be_kind_of Hash}
21
21
  end
22
22
 
23
23
  it 'creates averaged predictions for individuals in the training set' do
24
- @forest.predictions.should == {}
24
+ expect(@forest.predictions).to eq({})
25
25
  @forest.grow
26
- (@forest.predictions.keys - (1..800).to_a ).should == [] # 800 individuals in the training file
27
- @forest.predictions.values.each{|v| v.should be_kind_of Numeric}
26
+ expect((@forest.predictions.keys - (1..800).to_a )).to eq [] # 800 individuals in the training file
27
+ @forest.predictions.values.each{|v| expect(v).to be_kind_of Numeric}
28
28
  end
29
29
 
30
30
  it 'computes averaged SNP importances for every SNP' do
31
- @forest.snp_importances.should == {}
31
+ expect(@forest.snp_importances).to eq({})
32
32
  @forest.grow
33
- @forest.snp_importances.keys.sort.should == (1..200).to_a # 200 snps in the training file
34
- @forest.snp_importances.values.each{|v| v.should be_kind_of Numeric}
33
+ expect(@forest.snp_importances.keys.sort).to eq (1..200).to_a # 200 snps in the training file
34
+ @forest.snp_importances.values.each{|v| expect(v).to be_kind_of Numeric}
35
35
  end
36
36
 
37
37
  it 'does not compute SNP importances if config set to false' do
38
- @forest.snp_importances.should == {}
38
+ expect(@forest.snp_importances).to eq({})
39
39
  @forest.options.do_importances = false
40
40
  @forest.grow
41
- @forest.snp_importances.should == {}
41
+ expect(@forest.snp_importances).to eq({})
42
42
  end
43
43
 
44
44
  it 'traverses a set of testing individuals through every tree in the forest and returns predictions' do
45
45
  @forest = @config.load_forest
46
- @forest.predictions.should == {}
46
+ expect(@forest.predictions).to eq({})
47
47
 
48
- tree_structure = Psych.load(File.open fixture_file('regression_random_forest.yml'))
48
+ tree_structure = Psych.load(File.open fixture_file('regression/random_forest.yml'))
49
49
  expected_predictions = {}
50
50
  @config.read_testing_data{|individual|
51
51
  individual_prediction = 0.0
@@ -56,60 +56,60 @@ describe Nimbus::Forest do
56
56
  }
57
57
 
58
58
  @forest.traverse
59
- @forest.predictions.should == expected_predictions
59
+ expect(@forest.predictions).to eq expected_predictions
60
60
  end
61
61
 
62
62
  it 'can output forest structure in YAML format' do
63
63
  @forest = @config.load_forest
64
- Psych.load(File.open fixture_file('regression_random_forest.yml')) == Psych.load(@forest.to_yaml)
64
+ Psych.load(File.open fixture_file('regression/random_forest.yml')) == Psych.load(@forest.to_yaml)
65
65
  end
66
66
  end
67
67
 
68
68
  describe "Classification" do
69
69
  before(:each) do
70
70
  @config = Nimbus::Configuration.new
71
- @config.load fixture_file('classification_config.yml')
71
+ @config.load fixture_file('classification/config.yml')
72
72
  @config.load_training_data
73
73
  @forest = ::Nimbus::Forest.new @config
74
74
  end
75
75
 
76
76
  it 'grows a classification forest of N trees' do
77
- @forest.trees.should == []
78
- @config.forest_size.should == 3
79
- @forest.should be_classification
80
- @forest.should_not be_regression
77
+ expect(@forest.trees).to eq []
78
+ expect(@config.forest_size).to eq 3
79
+ expect(@forest).to be_classification
80
+ expect(@forest).to_not be_regression
81
81
  @forest.grow
82
- @forest.trees.size.should == @config.forest_size
83
- @forest.trees.each{|t| t.should be_kind_of Hash}
82
+ expect(@forest.trees.size).to eq @config.forest_size
83
+ @forest.trees.each{|t| expect(t).to be_kind_of Hash}
84
84
  end
85
85
 
86
86
  it 'creates predictions for individuals in the training set' do
87
- @forest.predictions.should == {}
87
+ expect(@forest.predictions).to eq({})
88
88
  @forest.grow
89
- (@forest.predictions.keys - (1..1000).to_a ).should == [] # 1000 individuals in the training file
90
- @forest.predictions.values.each{|v| v.should be_kind_of String}
89
+ expect((@forest.predictions.keys - (1..1000).to_a )).to eq [] # 1000 individuals in the training file
90
+ @forest.predictions.values.each{|v| expect(v).to be_kind_of String}
91
91
  end
92
92
 
93
93
  it 'computes averaged SNP importances for every SNP' do
94
- @forest.snp_importances.should == {}
94
+ expect(@forest.snp_importances).to eq({})
95
95
  @forest.options.do_importances = true
96
96
  @forest.grow
97
- @forest.snp_importances.keys.sort.should == (1..100).to_a # 100 snps in the training file
98
- @forest.snp_importances.values.each{|v| v.should be_kind_of Numeric}
97
+ expect(@forest.snp_importances.keys.sort).to eq (1..100).to_a # 100 snps in the training file
98
+ @forest.snp_importances.values.each{|v| expect(v).to be_kind_of Numeric}
99
99
  end
100
100
 
101
101
  it 'does not compute SNP importances if config set to false' do
102
- @forest.snp_importances.should == {}
102
+ expect(@forest.snp_importances).to eq({})
103
103
  @forest.options.do_importances = false
104
104
  @forest.grow
105
- @forest.snp_importances.should == {}
105
+ expect(@forest.snp_importances).to eq({})
106
106
  end
107
107
 
108
108
  it 'traverses a set of testing individuals through every tree in the forest and returns predictions' do
109
109
  @forest = @config.load_forest
110
- @forest.predictions.should == {}
110
+ expect(@forest.predictions).to eq({})
111
111
 
112
- tree_structure = Psych.load(File.open fixture_file('classification_random_forest.yml'))
112
+ tree_structure = Psych.load(File.open fixture_file('classification/random_forest.yml'))
113
113
  expected_predictions = {}
114
114
  @config.read_testing_data{|individual|
115
115
  individual_prediction = []
@@ -121,12 +121,12 @@ describe Nimbus::Forest do
121
121
  }
122
122
 
123
123
  @forest.traverse
124
- @forest.predictions.should == expected_predictions
124
+ expect(@forest.predictions).to eq expected_predictions
125
125
  end
126
126
 
127
127
  it 'can output forest structure in YAML format' do
128
128
  @forest = @config.load_forest
129
- Psych.load(File.open fixture_file('classification_random_forest.yml')) == Psych.load(@forest.to_yaml)
129
+ Psych.load(File.open fixture_file('classification/random_forest.yml')) == Psych.load(@forest.to_yaml)
130
130
  end
131
131
  end
132
132
  end
@@ -5,9 +5,9 @@ describe Nimbus::Individual do
5
5
 
6
6
  it "stores id, fenotype and SNPs information for an individual" do
7
7
  @individual = Nimbus::Individual.new(11, 33.275, [1,0,2,1])
8
- @individual.id.should == 11
9
- @individual.fenotype.should == 33.275
10
- @individual.snp_list.should == [1,0,2,1]
8
+ expect(@individual.id).to eq 11
9
+ expect(@individual.fenotype).to eq 33.275
10
+ expect(@individual.snp_list).to eq [1,0,2,1]
11
11
  end
12
12
 
13
13
  end
@@ -7,39 +7,57 @@ describe Nimbus::LossFunctions do
7
7
  ids = [1,3,5,7]
8
8
  values = {1 => 10, 2 => 5, 3 => 21, 4 => 8, 5 => 31, 7 => 11, 85 => 22}
9
9
 
10
- Nimbus::LossFunctions.average(ids, values).should == 18.25 # (10 + 21 + 31 + 11 = 73)/4
10
+ expect(Nimbus::LossFunctions.average(ids, values)).to eq 18.25 # (10 + 21 + 31 + 11 = 73)/4
11
11
  end
12
12
 
13
13
  it "method for mean squared error" do
14
14
  ids = [3,7,85]
15
15
  values = {1 => 10, 2 => 5, 3 => 21, 4 => 8, 5 => 31, 7 => 11, 85 => 22}
16
16
 
17
- Nimbus::LossFunctions.mean_squared_error(ids, values).should == 74.0 # (avg(21 + 11 + 22) = 18: sum (x-11)^2
17
+ expect(Nimbus::LossFunctions.mean_squared_error(ids, values)).to eq 74.0 # (avg(21 + 11 + 22) = 18: sum (x-18)^2
18
18
  end
19
19
 
20
20
  it "method for quadratic_loss" do
21
21
  ids = [1,4]
22
22
  values = {1 => 10, 2 => 5, 3 => 21, 4 => 8, 5 => 31, 7 => 11, 85 => 22}
23
23
 
24
- Nimbus::LossFunctions.quadratic_loss(ids, values).round(5).should == 1
24
+ expect(Nimbus::LossFunctions.quadratic_loss(ids, values).round(5)).to eq 1
25
25
  end
26
26
 
27
27
  it "quadratic loss is mean squared error averaged" do
28
28
  ids = [1,2,3,4,5,7,85]
29
29
  values = {1 => 10, 2 => 5, 3 => 21, 4 => 8, 5 => 31, 7 => 11, 85 => 22}
30
- Nimbus::LossFunctions.quadratic_loss(ids, values).round(5).should == (Nimbus::LossFunctions.mean_squared_error(ids, values)/7 ).round(5)
30
+ expect(Nimbus::LossFunctions.quadratic_loss(ids, values).round(5)).to eq (Nimbus::LossFunctions.mean_squared_error(ids, values)/7 ).round(5)
31
+ end
32
+
33
+ it "method for pseudo Huber error" do
34
+ ids = [3,7,85]
35
+ values = {1 => 10, 2 => 5, 3 => 21, 4 => 8, 5 => 31, 7 => 11, 85 => 22}
36
+ expect(Nimbus::LossFunctions.pseudo_huber_error(ids, values).round(5)).to eq 11.92337 # (avg(21 + 11 + 22) = 18: log(cosh(x-18))
37
+ end
38
+
39
+ it "method for pseudo Huber loss function" do
40
+ ids = [1,4]
41
+ values = {1 => 10, 2 => 5, 3 => 21, 4 => 8, 5 => 31, 7 => 11, 85 => 22}
42
+ expect(Nimbus::LossFunctions.pseudo_huber_loss(ids, values).round(5)).to eq 0.43378
43
+ end
44
+
45
+ it "pseudo Huber loss is pseudo Huber error averaged" do
46
+ ids = [1,2,3,4,5,7,85]
47
+ values = {1 => 10, 2 => 5, 3 => 21, 4 => 8, 5 => 31, 7 => 11, 85 => 22}
48
+ expect(Nimbus::LossFunctions.pseudo_huber_loss(ids, values).round(5)).to eq (Nimbus::LossFunctions.pseudo_huber_error(ids, values)/7 ).round(5)
31
49
  end
32
50
 
33
51
  it "method for squared difference" do
34
- Nimbus::LossFunctions.squared_difference(50, 40).should == 100.0
35
- Nimbus::LossFunctions.squared_difference(22, 10).should == 144.0
52
+ expect(Nimbus::LossFunctions.squared_difference(50, 40)).to eq 100.0
53
+ expect(Nimbus::LossFunctions.squared_difference(22, 10)).to eq 144.0
36
54
  end
37
55
 
38
56
  it "method for majority class" do
39
57
  ids = [1,2,3,4,5,7,85]
40
58
  values = {1 => 'B', 2 => 'C', 3 => 'A', 4 => 'A', 5 => 'C', 7 => 'B', 85 => 'C'} #3C, 2A, 2B
41
59
  classes = ['A', 'B', 'C']
42
- Nimbus::LossFunctions.majority_class(ids, values, classes).should == 'C'
60
+ expect(Nimbus::LossFunctions.majority_class(ids, values, classes)).to eq 'C'
43
61
  end
44
62
 
45
63
  it "majority class method selects randomly if more than one majority class" do
@@ -50,27 +68,27 @@ describe Nimbus::LossFunctions do
50
68
  20.times do
51
69
  results << Nimbus::LossFunctions.majority_class(ids, values, classes)
52
70
  end
53
- results.should include('A')
54
- results.should include('C')
71
+ expect(results).to include('A')
72
+ expect(results).to include('C')
55
73
  end
56
74
 
57
75
  it "method for majority class in list" do
58
76
  list = %w(A A A B B B C A B C A B A)
59
77
  classes = ['A', 'B', 'C']
60
- Nimbus::LossFunctions.majority_class_in_list(list, classes).should == 'A'
78
+ expect(Nimbus::LossFunctions.majority_class_in_list(list, classes)).to eq 'A'
61
79
  end
62
80
 
63
81
  it "method for class sizes" do
64
82
  ids = [1,2,3,4,5,7,85]
65
83
  values = {1 => 'B', 2 => 'C', 3 => 'A', 4 => 'A', 5 => 'C', 7 => 'B', 85 => 'C'} #2A, 2B, 3C
66
84
  classes = ['A', 'B', 'C']
67
- Nimbus::LossFunctions.class_sizes(ids, values, classes).should == [2, 2, 3]
85
+ expect(Nimbus::LossFunctions.class_sizes(ids, values, classes)).to eq [2, 2, 3]
68
86
  end
69
87
 
70
88
  it "method for class sizes in list" do
71
89
  list = %w(A A A B B B C A B C A B A) # 6A, 5B, 2C
72
90
  classes = ['A', 'B', 'C']
73
- Nimbus::LossFunctions.class_sizes_in_list(list, classes).should == [6, 5, 2]
91
+ expect(Nimbus::LossFunctions.class_sizes_in_list(list, classes)).to eq [6, 5, 2]
74
92
  end
75
93
 
76
94
  it "Gini index" do
@@ -78,7 +96,7 @@ describe Nimbus::LossFunctions do
78
96
  values = {1 => 'B', 2 => 'C', 3 => 'A', 4 => 'A', 5 => 'C', 7 => 'C'} #3C, 2A, 1B
79
97
  classes = ['A', 'B', 'C']
80
98
  # Gini = 1 - ( (3/6)^2 + (2/6)^2 + (1/6)^2 ) = 0.61111
81
- Nimbus::LossFunctions.gini_index(ids, values, classes).should == 0.61111
99
+ expect(Nimbus::LossFunctions.gini_index(ids, values, classes)).to eq 0.61111
82
100
  end
83
101
 
84
102
  end
@@ -6,13 +6,13 @@ describe 'Nimbus module' do
6
6
 
7
7
  it "manages a Nimbus::Application object" do
8
8
  app = Nimbus.application
9
- app.should be_kind_of Nimbus::Application
9
+ expect(app).to be_kind_of Nimbus::Application
10
10
  end
11
11
 
12
12
  it "accepts setting an external Nimbus::Application" do
13
13
  app = Nimbus::Application.new
14
14
  Nimbus.application = app
15
- Nimbus.application.should == app
15
+ expect(Nimbus.application).to eq app
16
16
  end
17
17
 
18
18
  end
@@ -4,31 +4,31 @@ describe Nimbus::RegressionTree do
4
4
 
5
5
  before(:each) do
6
6
  @config = Nimbus::Configuration.new
7
- @config.load fixture_file('regression_config.yml')
7
+ @config.load fixture_file('regression/config.yml')
8
8
 
9
9
  @tree = Nimbus::RegressionTree.new @config.tree
10
10
  end
11
11
 
12
12
  it "is initialized with tree config info" do
13
- @tree.snp_total_count.should == 200
14
- @tree.snp_sample_size.should == 60
15
- @tree.node_min_size.should == 5
13
+ expect(@tree.snp_total_count).to eq 200
14
+ expect(@tree.snp_sample_size).to eq 60
15
+ expect(@tree.node_min_size).to eq 5
16
16
  end
17
17
 
18
18
  it "creates a tree structure when seeded with training data" do
19
19
  @config.load_training_data
20
- @tree.structure.should be_nil
20
+ expect(@tree.structure).to be_nil
21
21
  @tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
22
- @tree.structure.should_not be_nil
23
- @tree.structure.should be_kind_of Hash
22
+ expect(@tree.structure).to_not be_nil
23
+ expect(@tree.structure).to be_kind_of Hash
24
24
 
25
- @tree.structure.keys.first.should == @tree.used_snps.last
26
- @tree.used_snps.should_not be_empty
25
+ expect(@tree.structure.keys.first).to eq @tree.used_snps.last
26
+ expect(@tree.used_snps).to_not be_empty
27
27
  end
28
28
 
29
- it "split node in three when building a node and finds a suitable split" do
29
+ it "split node when building a node and finds a suitable split" do
30
30
  @config.load_training_data
31
- @tree.stub!(:snps_random_sample).and_return((141..200).to_a) #189 is best split
31
+ allow_any_instance_of(Nimbus::RegressionTree).to receive(:snps_random_sample).and_return((141..200).to_a) #189 is best split
32
32
 
33
33
  @tree.individuals = @config.training_set.individuals
34
34
  @tree.id_to_fenotype = @config.training_set.ids_fenotypes
@@ -36,29 +36,29 @@ describe Nimbus::RegressionTree do
36
36
  @tree.predictions = {}
37
37
 
38
38
  branch = @tree.build_node @config.training_set.all_ids, Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
39
- branch.keys.size.should == 1
40
- branch.keys.first.should == 189
41
- branch[189].size.should == 3
42
- branch[189][0].should be_kind_of Hash
43
- branch[189][1].should be_kind_of Hash
44
- branch[189][2].should be_kind_of Hash
39
+ expect(branch.keys.size).to eq 1
40
+ expect(branch.keys.first).to eq 189
41
+ expect(branch[189].size).to eq 3
42
+ expect(branch[189][0]).to be_kind_of Hash
43
+ expect([Nimbus::Tree::NODE_SPLIT_01_2, Nimbus::Tree::NODE_SPLIT_0_12]).to include(branch[189][1])
44
+ expect(branch[189][2]).to be_kind_of Hash
45
45
  end
46
46
 
47
47
  it "keeps track of all SNPs used for the tree" do
48
48
  @config.load_training_data
49
49
  snps = (131..190).to_a
50
- @tree.stub!(:snps_random_sample).and_return(snps)
51
- @tree.used_snps.should be_nil
50
+ allow_any_instance_of(Nimbus::RegressionTree).to receive(:snps_random_sample).and_return(snps)
51
+ expect(@tree.used_snps).to be_nil
52
52
  @tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
53
- @tree.used_snps.size.should > 4
53
+ expect(@tree.used_snps.size).to be > 4
54
54
  @tree.used_snps.each{|snp|
55
- snps.include?(snp).should be_true
55
+ expect(snps.include?(snp)).to be true
56
56
  }
57
57
  end
58
58
 
59
59
  it "labels node when building a node and there is not a suitable split" do
60
60
  @config.load_training_data
61
- @tree.stub!(:snps_random_sample).and_return([33])
61
+ allow_any_instance_of(Nimbus::RegressionTree).to receive(:snps_random_sample).and_return([91])
62
62
 
63
63
  @tree.individuals = @config.training_set.individuals
64
64
  @tree.id_to_fenotype = @config.training_set.ids_fenotypes
@@ -66,9 +66,9 @@ describe Nimbus::RegressionTree do
66
66
  @tree.predictions = {}
67
67
 
68
68
  branch = @tree.build_node @config.training_set.all_ids, Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
69
- branch[33][0].should be_kind_of Numeric
70
- branch[33][1].should be_kind_of Numeric
71
- branch[33][2].should be_kind_of Numeric
69
+ expect(branch[91][0]).to be_kind_of Numeric
70
+ expect([Nimbus::Tree::NODE_SPLIT_01_2, Nimbus::Tree::NODE_SPLIT_0_12]).to include(branch[91][1])
71
+ expect(branch[91][2]).to be_kind_of Numeric
72
72
  end
73
73
 
74
74
  it "labels node when building a node with less individuals than the minimum node size" do
@@ -80,50 +80,50 @@ describe Nimbus::RegressionTree do
80
80
  @tree.predictions = {}
81
81
 
82
82
  label = @tree.build_node [1, 10, 33], Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
83
- label.should be_kind_of Numeric
83
+ expect(label).to be_kind_of Numeric
84
84
 
85
85
  label = @tree.build_node [2, 10], Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
86
- label.should be_kind_of Numeric
86
+ expect(label).to be_kind_of Numeric
87
87
 
88
88
  label = @tree.build_node [1, 10, 33], Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
89
- label.should be_kind_of Numeric
89
+ expect(label).to be_kind_of Numeric
90
90
 
91
91
  label = @tree.build_node [108, 22, 10, 33], Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
92
- label.should be_kind_of Numeric
92
+ expect(label).to be_kind_of Numeric
93
93
  end
94
94
 
95
95
  it 'computes generalization error for the tree' do
96
96
  @config.load_training_data
97
97
  @tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
98
- @tree.generalization_error.should be_nil
98
+ expect(@tree.generalization_error).to be_nil
99
99
  @tree.generalization_error_from_oob((2..200).to_a)
100
- @tree.generalization_error.should be_kind_of Numeric
101
- @tree.generalization_error.should > 0.0
102
- @tree.generalization_error.should < 1.0
100
+ expect(@tree.generalization_error).to be_kind_of Numeric
101
+ expect(@tree.generalization_error).to be > 0.0
102
+ expect(@tree.generalization_error).to be < 1.0
103
103
  end
104
104
 
105
105
  it 'estimates importance for all SNPs' do
106
106
  @config.load_training_data
107
107
  @tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
108
- @tree.importances.should be_nil
108
+ expect(@tree.importances).to be_nil
109
109
  @tree.estimate_importances((300..533).to_a)
110
- @tree.importances.should be_kind_of Hash
111
- @tree.importances.keys.should_not be_empty
112
- (@tree.importances.keys - (1..200).to_a).should be_empty #all keys are snp indexes (200 snps in training file)
110
+ expect(@tree.importances).to be_kind_of Hash
111
+ expect(@tree.importances.keys).to_not be_empty
112
+ expect((@tree.importances.keys - (1..200).to_a)).to be_empty #all keys are snp indexes (200 snps in training file)
113
113
  end
114
114
 
115
115
  it 'get prediction for an individual pushing it down a tree structure' do
116
- tree_structure = Psych.load(File.open fixture_file('regression_random_forest.yml')).first
116
+ tree_structure = Psych.load(File.open fixture_file('regression/random_forest.yml')).first
117
117
  individual_data = [0]*200
118
118
  prediction = Nimbus::Tree.traverse tree_structure, individual_data
119
- prediction.should == 0.25043
119
+ expect(prediction).to eq -0.90813
120
120
 
121
- individual_data[189-1] = 1
122
- individual_data[4-1] = 1
123
- individual_data[62-1] = 2
124
- individual_data[146-1] = 2
121
+ individual_data[44-1] = 2
122
+ individual_data[98-1] = 1
123
+ individual_data[22-1] = 1
124
+ individual_data[31-1] = 2
125
125
  prediction = Nimbus::Tree.traverse tree_structure, individual_data
126
- prediction.should == -0.9854
126
+ expect(prediction).to eq -0.95805
127
127
  end
128
128
 
129
129
  end