nimbus 2.2.1 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. checksums.yaml +7 -0
  2. data/CODE_OF_CONDUCT.md +7 -0
  3. data/CONTRIBUTING.md +46 -0
  4. data/MIT-LICENSE.txt +1 -1
  5. data/README.md +131 -21
  6. data/bin/nimbus +2 -2
  7. data/lib/nimbus.rb +2 -6
  8. data/lib/nimbus/classification_tree.rb +9 -12
  9. data/lib/nimbus/configuration.rb +22 -22
  10. data/lib/nimbus/forest.rb +8 -8
  11. data/lib/nimbus/loss_functions.rb +11 -0
  12. data/lib/nimbus/regression_tree.rb +8 -10
  13. data/lib/nimbus/tree.rb +54 -12
  14. data/lib/nimbus/version.rb +1 -1
  15. data/spec/classification_tree_spec.rb +47 -47
  16. data/spec/configuration_spec.rb +55 -55
  17. data/spec/fixtures/{classification_config.yml → classification/config.yml} +3 -3
  18. data/spec/fixtures/classification/random_forest.yml +1174 -0
  19. data/spec/fixtures/{classification_testing.data → classification/testing.data} +0 -0
  20. data/spec/fixtures/{classification_training.data → classification/training.data} +0 -0
  21. data/spec/fixtures/{regression_config.yml → regression/config.yml} +4 -4
  22. data/spec/fixtures/regression/random_forest.yml +2737 -0
  23. data/spec/fixtures/{regression_testing.data → regression/testing.data} +0 -0
  24. data/spec/fixtures/{regression_training.data → regression/training.data} +0 -0
  25. data/spec/forest_spec.rb +39 -39
  26. data/spec/individual_spec.rb +3 -3
  27. data/spec/loss_functions_spec.rb +31 -13
  28. data/spec/nimbus_spec.rb +2 -2
  29. data/spec/regression_tree_spec.rb +44 -44
  30. data/spec/training_set_spec.rb +3 -3
  31. data/spec/tree_spec.rb +4 -4
  32. metadata +37 -34
  33. data/spec/fixtures/classification_random_forest.yml +0 -922
  34. data/spec/fixtures/regression_random_forest.yml +0 -1741
@@ -5,47 +5,47 @@ describe Nimbus::Forest do
5
5
  describe "Regression" do
6
6
  before(:each) do
7
7
  @config = Nimbus::Configuration.new
8
- @config.load fixture_file('regression_config.yml')
9
- @config.load_training_data
8
+ @config.load fixture_file('regression/config.yml')
9
+ @config.load_training_data if @config.do_training
10
10
  @forest = ::Nimbus::Forest.new @config
11
11
  end
12
12
 
13
13
  it 'grows a regression forest of N trees' do
14
- @forest.trees.should == []
15
- @config.forest_size.should == 3
16
- @forest.should_not be_classification
17
- @forest.should be_regression
14
+ expect(@forest.trees).to eq []
15
+ expect(@config.forest_size).to eq 3
16
+ expect(@forest).to_not be_classification
17
+ expect(@forest).to be_regression
18
18
  @forest.grow
19
- @forest.trees.size.should == @config.forest_size
20
- @forest.trees.each{|t| t.should be_kind_of Hash}
19
+ expect(@forest.trees.size).to eq @config.forest_size
20
+ @forest.trees.each{|t| expect(t).to be_kind_of Hash}
21
21
  end
22
22
 
23
23
  it 'creates averaged predictions for individuals in the training set' do
24
- @forest.predictions.should == {}
24
+ expect(@forest.predictions).to eq({})
25
25
  @forest.grow
26
- (@forest.predictions.keys - (1..800).to_a ).should == [] # 800 individuals in the training file
27
- @forest.predictions.values.each{|v| v.should be_kind_of Numeric}
26
+ expect((@forest.predictions.keys - (1..800).to_a )).to eq [] # 800 individuals in the training file
27
+ @forest.predictions.values.each{|v| expect(v).to be_kind_of Numeric}
28
28
  end
29
29
 
30
30
  it 'computes averaged SNP importances for every SNP' do
31
- @forest.snp_importances.should == {}
31
+ expect(@forest.snp_importances).to eq({})
32
32
  @forest.grow
33
- @forest.snp_importances.keys.sort.should == (1..200).to_a # 200 snps in the training file
34
- @forest.snp_importances.values.each{|v| v.should be_kind_of Numeric}
33
+ expect(@forest.snp_importances.keys.sort).to eq (1..200).to_a # 200 snps in the training file
34
+ @forest.snp_importances.values.each{|v| expect(v).to be_kind_of Numeric}
35
35
  end
36
36
 
37
37
  it 'does not compute SNP importances if config set to false' do
38
- @forest.snp_importances.should == {}
38
+ expect(@forest.snp_importances).to eq({})
39
39
  @forest.options.do_importances = false
40
40
  @forest.grow
41
- @forest.snp_importances.should == {}
41
+ expect(@forest.snp_importances).to eq({})
42
42
  end
43
43
 
44
44
  it 'traverses a set of testing individuals through every tree in the forest and returns predictions' do
45
45
  @forest = @config.load_forest
46
- @forest.predictions.should == {}
46
+ expect(@forest.predictions).to eq({})
47
47
 
48
- tree_structure = Psych.load(File.open fixture_file('regression_random_forest.yml'))
48
+ tree_structure = Psych.load(File.open fixture_file('regression/random_forest.yml'))
49
49
  expected_predictions = {}
50
50
  @config.read_testing_data{|individual|
51
51
  individual_prediction = 0.0
@@ -56,60 +56,60 @@ describe Nimbus::Forest do
56
56
  }
57
57
 
58
58
  @forest.traverse
59
- @forest.predictions.should == expected_predictions
59
+ expect(@forest.predictions).to eq expected_predictions
60
60
  end
61
61
 
62
62
  it 'can output forest structure in YAML format' do
63
63
  @forest = @config.load_forest
64
- Psych.load(File.open fixture_file('regression_random_forest.yml')) == Psych.load(@forest.to_yaml)
64
+ Psych.load(File.open fixture_file('regression/random_forest.yml')) == Psych.load(@forest.to_yaml)
65
65
  end
66
66
  end
67
67
 
68
68
  describe "Classification" do
69
69
  before(:each) do
70
70
  @config = Nimbus::Configuration.new
71
- @config.load fixture_file('classification_config.yml')
71
+ @config.load fixture_file('classification/config.yml')
72
72
  @config.load_training_data
73
73
  @forest = ::Nimbus::Forest.new @config
74
74
  end
75
75
 
76
76
  it 'grows a classification forest of N trees' do
77
- @forest.trees.should == []
78
- @config.forest_size.should == 3
79
- @forest.should be_classification
80
- @forest.should_not be_regression
77
+ expect(@forest.trees).to eq []
78
+ expect(@config.forest_size).to eq 3
79
+ expect(@forest).to be_classification
80
+ expect(@forest).to_not be_regression
81
81
  @forest.grow
82
- @forest.trees.size.should == @config.forest_size
83
- @forest.trees.each{|t| t.should be_kind_of Hash}
82
+ expect(@forest.trees.size).to eq @config.forest_size
83
+ @forest.trees.each{|t| expect(t).to be_kind_of Hash}
84
84
  end
85
85
 
86
86
  it 'creates predictions for individuals in the training set' do
87
- @forest.predictions.should == {}
87
+ expect(@forest.predictions).to eq({})
88
88
  @forest.grow
89
- (@forest.predictions.keys - (1..1000).to_a ).should == [] # 1000 individuals in the training file
90
- @forest.predictions.values.each{|v| v.should be_kind_of String}
89
+ expect((@forest.predictions.keys - (1..1000).to_a )).to eq [] # 1000 individuals in the training file
90
+ @forest.predictions.values.each{|v| expect(v).to be_kind_of String}
91
91
  end
92
92
 
93
93
  it 'computes averaged SNP importances for every SNP' do
94
- @forest.snp_importances.should == {}
94
+ expect(@forest.snp_importances).to eq({})
95
95
  @forest.options.do_importances = true
96
96
  @forest.grow
97
- @forest.snp_importances.keys.sort.should == (1..100).to_a # 100 snps in the training file
98
- @forest.snp_importances.values.each{|v| v.should be_kind_of Numeric}
97
+ expect(@forest.snp_importances.keys.sort).to eq (1..100).to_a # 100 snps in the training file
98
+ @forest.snp_importances.values.each{|v| expect(v).to be_kind_of Numeric}
99
99
  end
100
100
 
101
101
  it 'does not compute SNP importances if config set to false' do
102
- @forest.snp_importances.should == {}
102
+ expect(@forest.snp_importances).to eq({})
103
103
  @forest.options.do_importances = false
104
104
  @forest.grow
105
- @forest.snp_importances.should == {}
105
+ expect(@forest.snp_importances).to eq({})
106
106
  end
107
107
 
108
108
  it 'traverses a set of testing individuals through every tree in the forest and returns predictions' do
109
109
  @forest = @config.load_forest
110
- @forest.predictions.should == {}
110
+ expect(@forest.predictions).to eq({})
111
111
 
112
- tree_structure = Psych.load(File.open fixture_file('classification_random_forest.yml'))
112
+ tree_structure = Psych.load(File.open fixture_file('classification/random_forest.yml'))
113
113
  expected_predictions = {}
114
114
  @config.read_testing_data{|individual|
115
115
  individual_prediction = []
@@ -121,12 +121,12 @@ describe Nimbus::Forest do
121
121
  }
122
122
 
123
123
  @forest.traverse
124
- @forest.predictions.should == expected_predictions
124
+ expect(@forest.predictions).to eq expected_predictions
125
125
  end
126
126
 
127
127
  it 'can output forest structure in YAML format' do
128
128
  @forest = @config.load_forest
129
- Psych.load(File.open fixture_file('classification_random_forest.yml')) == Psych.load(@forest.to_yaml)
129
+ Psych.load(File.open fixture_file('classification/random_forest.yml')) == Psych.load(@forest.to_yaml)
130
130
  end
131
131
  end
132
132
  end
@@ -5,9 +5,9 @@ describe Nimbus::Individual do
5
5
 
6
6
  it "stores id, fenotype and SNPs information for an individual" do
7
7
  @individual = Nimbus::Individual.new(11, 33.275, [1,0,2,1])
8
- @individual.id.should == 11
9
- @individual.fenotype.should == 33.275
10
- @individual.snp_list.should == [1,0,2,1]
8
+ expect(@individual.id).to eq 11
9
+ expect(@individual.fenotype).to eq 33.275
10
+ expect(@individual.snp_list).to eq [1,0,2,1]
11
11
  end
12
12
 
13
13
  end
@@ -7,39 +7,57 @@ describe Nimbus::LossFunctions do
7
7
  ids = [1,3,5,7]
8
8
  values = {1 => 10, 2 => 5, 3 => 21, 4 => 8, 5 => 31, 7 => 11, 85 => 22}
9
9
 
10
- Nimbus::LossFunctions.average(ids, values).should == 18.25 # (10 + 21 + 31 + 11 = 73)/4
10
+ expect(Nimbus::LossFunctions.average(ids, values)).to eq 18.25 # (10 + 21 + 31 + 11 = 73)/4
11
11
  end
12
12
 
13
13
  it "method for mean squared error" do
14
14
  ids = [3,7,85]
15
15
  values = {1 => 10, 2 => 5, 3 => 21, 4 => 8, 5 => 31, 7 => 11, 85 => 22}
16
16
 
17
- Nimbus::LossFunctions.mean_squared_error(ids, values).should == 74.0 # (avg(21 + 11 + 22) = 18: sum (x-11)^2
17
+ expect(Nimbus::LossFunctions.mean_squared_error(ids, values)).to eq 74.0 # (avg(21 + 11 + 22) = 18: sum (x-18)^2
18
18
  end
19
19
 
20
20
  it "method for quadratic_loss" do
21
21
  ids = [1,4]
22
22
  values = {1 => 10, 2 => 5, 3 => 21, 4 => 8, 5 => 31, 7 => 11, 85 => 22}
23
23
 
24
- Nimbus::LossFunctions.quadratic_loss(ids, values).round(5).should == 1
24
+ expect(Nimbus::LossFunctions.quadratic_loss(ids, values).round(5)).to eq 1
25
25
  end
26
26
 
27
27
  it "quadratic loss is mean squared error averaged" do
28
28
  ids = [1,2,3,4,5,7,85]
29
29
  values = {1 => 10, 2 => 5, 3 => 21, 4 => 8, 5 => 31, 7 => 11, 85 => 22}
30
- Nimbus::LossFunctions.quadratic_loss(ids, values).round(5).should == (Nimbus::LossFunctions.mean_squared_error(ids, values)/7 ).round(5)
30
+ expect(Nimbus::LossFunctions.quadratic_loss(ids, values).round(5)).to eq (Nimbus::LossFunctions.mean_squared_error(ids, values)/7 ).round(5)
31
+ end
32
+
33
+ it "method for pseudo Huber error" do
34
+ ids = [3,7,85]
35
+ values = {1 => 10, 2 => 5, 3 => 21, 4 => 8, 5 => 31, 7 => 11, 85 => 22}
36
+ expect(Nimbus::LossFunctions.pseudo_huber_error(ids, values).round(5)).to eq 11.92337 # (avg(21 + 11 + 22) = 18: log(cosh(x-18))
37
+ end
38
+
39
+ it "method for pseudo Huber loss function" do
40
+ ids = [1,4]
41
+ values = {1 => 10, 2 => 5, 3 => 21, 4 => 8, 5 => 31, 7 => 11, 85 => 22}
42
+ expect(Nimbus::LossFunctions.pseudo_huber_loss(ids, values).round(5)).to eq 0.43378
43
+ end
44
+
45
+ it "pseudo Huber loss is pseudo Huber error averaged" do
46
+ ids = [1,2,3,4,5,7,85]
47
+ values = {1 => 10, 2 => 5, 3 => 21, 4 => 8, 5 => 31, 7 => 11, 85 => 22}
48
+ expect(Nimbus::LossFunctions.pseudo_huber_loss(ids, values).round(5)).to eq (Nimbus::LossFunctions.pseudo_huber_error(ids, values)/7 ).round(5)
31
49
  end
32
50
 
33
51
  it "method for squared difference" do
34
- Nimbus::LossFunctions.squared_difference(50, 40).should == 100.0
35
- Nimbus::LossFunctions.squared_difference(22, 10).should == 144.0
52
+ expect(Nimbus::LossFunctions.squared_difference(50, 40)).to eq 100.0
53
+ expect(Nimbus::LossFunctions.squared_difference(22, 10)).to eq 144.0
36
54
  end
37
55
 
38
56
  it "method for majority class" do
39
57
  ids = [1,2,3,4,5,7,85]
40
58
  values = {1 => 'B', 2 => 'C', 3 => 'A', 4 => 'A', 5 => 'C', 7 => 'B', 85 => 'C'} #3C, 2A, 2B
41
59
  classes = ['A', 'B', 'C']
42
- Nimbus::LossFunctions.majority_class(ids, values, classes).should == 'C'
60
+ expect(Nimbus::LossFunctions.majority_class(ids, values, classes)).to eq 'C'
43
61
  end
44
62
 
45
63
  it "majority class method selects randomly if more than one majority class" do
@@ -50,27 +68,27 @@ describe Nimbus::LossFunctions do
50
68
  20.times do
51
69
  results << Nimbus::LossFunctions.majority_class(ids, values, classes)
52
70
  end
53
- results.should include('A')
54
- results.should include('C')
71
+ expect(results).to include('A')
72
+ expect(results).to include('C')
55
73
  end
56
74
 
57
75
  it "method for majority class in list" do
58
76
  list = %w(A A A B B B C A B C A B A)
59
77
  classes = ['A', 'B', 'C']
60
- Nimbus::LossFunctions.majority_class_in_list(list, classes).should == 'A'
78
+ expect(Nimbus::LossFunctions.majority_class_in_list(list, classes)).to eq 'A'
61
79
  end
62
80
 
63
81
  it "method for class sizes" do
64
82
  ids = [1,2,3,4,5,7,85]
65
83
  values = {1 => 'B', 2 => 'C', 3 => 'A', 4 => 'A', 5 => 'C', 7 => 'B', 85 => 'C'} #2A, 2B, 3C
66
84
  classes = ['A', 'B', 'C']
67
- Nimbus::LossFunctions.class_sizes(ids, values, classes).should == [2, 2, 3]
85
+ expect(Nimbus::LossFunctions.class_sizes(ids, values, classes)).to eq [2, 2, 3]
68
86
  end
69
87
 
70
88
  it "method for class sizes in list" do
71
89
  list = %w(A A A B B B C A B C A B A) # 6A, 5B, 2C
72
90
  classes = ['A', 'B', 'C']
73
- Nimbus::LossFunctions.class_sizes_in_list(list, classes).should == [6, 5, 2]
91
+ expect(Nimbus::LossFunctions.class_sizes_in_list(list, classes)).to eq [6, 5, 2]
74
92
  end
75
93
 
76
94
  it "Gini index" do
@@ -78,7 +96,7 @@ describe Nimbus::LossFunctions do
78
96
  values = {1 => 'B', 2 => 'C', 3 => 'A', 4 => 'A', 5 => 'C', 7 => 'C'} #3C, 2A, 1B
79
97
  classes = ['A', 'B', 'C']
80
98
  # Gini = 1 - ( (3/6)^2 + (2/6)^2 + (1/6)^2 ) = 0.61111
81
- Nimbus::LossFunctions.gini_index(ids, values, classes).should == 0.61111
99
+ expect(Nimbus::LossFunctions.gini_index(ids, values, classes)).to eq 0.61111
82
100
  end
83
101
 
84
102
  end
@@ -6,13 +6,13 @@ describe 'Nimbus module' do
6
6
 
7
7
  it "manages a Nimbus::Application object" do
8
8
  app = Nimbus.application
9
- app.should be_kind_of Nimbus::Application
9
+ expect(app).to be_kind_of Nimbus::Application
10
10
  end
11
11
 
12
12
  it "accepts setting an external Nimbus::Application" do
13
13
  app = Nimbus::Application.new
14
14
  Nimbus.application = app
15
- Nimbus.application.should == app
15
+ expect(Nimbus.application).to eq app
16
16
  end
17
17
 
18
18
  end
@@ -4,31 +4,31 @@ describe Nimbus::RegressionTree do
4
4
 
5
5
  before(:each) do
6
6
  @config = Nimbus::Configuration.new
7
- @config.load fixture_file('regression_config.yml')
7
+ @config.load fixture_file('regression/config.yml')
8
8
 
9
9
  @tree = Nimbus::RegressionTree.new @config.tree
10
10
  end
11
11
 
12
12
  it "is initialized with tree config info" do
13
- @tree.snp_total_count.should == 200
14
- @tree.snp_sample_size.should == 60
15
- @tree.node_min_size.should == 5
13
+ expect(@tree.snp_total_count).to eq 200
14
+ expect(@tree.snp_sample_size).to eq 60
15
+ expect(@tree.node_min_size).to eq 5
16
16
  end
17
17
 
18
18
  it "creates a tree structure when seeded with training data" do
19
19
  @config.load_training_data
20
- @tree.structure.should be_nil
20
+ expect(@tree.structure).to be_nil
21
21
  @tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
22
- @tree.structure.should_not be_nil
23
- @tree.structure.should be_kind_of Hash
22
+ expect(@tree.structure).to_not be_nil
23
+ expect(@tree.structure).to be_kind_of Hash
24
24
 
25
- @tree.structure.keys.first.should == @tree.used_snps.last
26
- @tree.used_snps.should_not be_empty
25
+ expect(@tree.structure.keys.first).to eq @tree.used_snps.last
26
+ expect(@tree.used_snps).to_not be_empty
27
27
  end
28
28
 
29
- it "split node in three when building a node and finds a suitable split" do
29
+ it "split node when building a node and finds a suitable split" do
30
30
  @config.load_training_data
31
- @tree.stub!(:snps_random_sample).and_return((141..200).to_a) #189 is best split
31
+ allow_any_instance_of(Nimbus::RegressionTree).to receive(:snps_random_sample).and_return((141..200).to_a) #189 is best split
32
32
 
33
33
  @tree.individuals = @config.training_set.individuals
34
34
  @tree.id_to_fenotype = @config.training_set.ids_fenotypes
@@ -36,29 +36,29 @@ describe Nimbus::RegressionTree do
36
36
  @tree.predictions = {}
37
37
 
38
38
  branch = @tree.build_node @config.training_set.all_ids, Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
39
- branch.keys.size.should == 1
40
- branch.keys.first.should == 189
41
- branch[189].size.should == 3
42
- branch[189][0].should be_kind_of Hash
43
- branch[189][1].should be_kind_of Hash
44
- branch[189][2].should be_kind_of Hash
39
+ expect(branch.keys.size).to eq 1
40
+ expect(branch.keys.first).to eq 189
41
+ expect(branch[189].size).to eq 3
42
+ expect(branch[189][0]).to be_kind_of Hash
43
+ expect([Nimbus::Tree::NODE_SPLIT_01_2, Nimbus::Tree::NODE_SPLIT_0_12]).to include(branch[189][1])
44
+ expect(branch[189][2]).to be_kind_of Hash
45
45
  end
46
46
 
47
47
  it "keeps track of all SNPs used for the tree" do
48
48
  @config.load_training_data
49
49
  snps = (131..190).to_a
50
- @tree.stub!(:snps_random_sample).and_return(snps)
51
- @tree.used_snps.should be_nil
50
+ allow_any_instance_of(Nimbus::RegressionTree).to receive(:snps_random_sample).and_return(snps)
51
+ expect(@tree.used_snps).to be_nil
52
52
  @tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
53
- @tree.used_snps.size.should > 4
53
+ expect(@tree.used_snps.size).to be > 4
54
54
  @tree.used_snps.each{|snp|
55
- snps.include?(snp).should be_true
55
+ expect(snps.include?(snp)).to be true
56
56
  }
57
57
  end
58
58
 
59
59
  it "labels node when building a node and there is not a suitable split" do
60
60
  @config.load_training_data
61
- @tree.stub!(:snps_random_sample).and_return([33])
61
+ allow_any_instance_of(Nimbus::RegressionTree).to receive(:snps_random_sample).and_return([91])
62
62
 
63
63
  @tree.individuals = @config.training_set.individuals
64
64
  @tree.id_to_fenotype = @config.training_set.ids_fenotypes
@@ -66,9 +66,9 @@ describe Nimbus::RegressionTree do
66
66
  @tree.predictions = {}
67
67
 
68
68
  branch = @tree.build_node @config.training_set.all_ids, Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
69
- branch[33][0].should be_kind_of Numeric
70
- branch[33][1].should be_kind_of Numeric
71
- branch[33][2].should be_kind_of Numeric
69
+ expect(branch[91][0]).to be_kind_of Numeric
70
+ expect([Nimbus::Tree::NODE_SPLIT_01_2, Nimbus::Tree::NODE_SPLIT_0_12]).to include(branch[91][1])
71
+ expect(branch[91][2]).to be_kind_of Numeric
72
72
  end
73
73
 
74
74
  it "labels node when building a node with less individuals than the minimum node size" do
@@ -80,50 +80,50 @@ describe Nimbus::RegressionTree do
80
80
  @tree.predictions = {}
81
81
 
82
82
  label = @tree.build_node [1, 10, 33], Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
83
- label.should be_kind_of Numeric
83
+ expect(label).to be_kind_of Numeric
84
84
 
85
85
  label = @tree.build_node [2, 10], Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
86
- label.should be_kind_of Numeric
86
+ expect(label).to be_kind_of Numeric
87
87
 
88
88
  label = @tree.build_node [1, 10, 33], Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
89
- label.should be_kind_of Numeric
89
+ expect(label).to be_kind_of Numeric
90
90
 
91
91
  label = @tree.build_node [108, 22, 10, 33], Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
92
- label.should be_kind_of Numeric
92
+ expect(label).to be_kind_of Numeric
93
93
  end
94
94
 
95
95
  it 'computes generalization error for the tree' do
96
96
  @config.load_training_data
97
97
  @tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
98
- @tree.generalization_error.should be_nil
98
+ expect(@tree.generalization_error).to be_nil
99
99
  @tree.generalization_error_from_oob((2..200).to_a)
100
- @tree.generalization_error.should be_kind_of Numeric
101
- @tree.generalization_error.should > 0.0
102
- @tree.generalization_error.should < 1.0
100
+ expect(@tree.generalization_error).to be_kind_of Numeric
101
+ expect(@tree.generalization_error).to be > 0.0
102
+ expect(@tree.generalization_error).to be < 1.0
103
103
  end
104
104
 
105
105
  it 'estimates importance for all SNPs' do
106
106
  @config.load_training_data
107
107
  @tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
108
- @tree.importances.should be_nil
108
+ expect(@tree.importances).to be_nil
109
109
  @tree.estimate_importances((300..533).to_a)
110
- @tree.importances.should be_kind_of Hash
111
- @tree.importances.keys.should_not be_empty
112
- (@tree.importances.keys - (1..200).to_a).should be_empty #all keys are snp indexes (200 snps in training file)
110
+ expect(@tree.importances).to be_kind_of Hash
111
+ expect(@tree.importances.keys).to_not be_empty
112
+ expect((@tree.importances.keys - (1..200).to_a)).to be_empty #all keys are snp indexes (200 snps in training file)
113
113
  end
114
114
 
115
115
  it 'get prediction for an individual pushing it down a tree structure' do
116
- tree_structure = Psych.load(File.open fixture_file('regression_random_forest.yml')).first
116
+ tree_structure = Psych.load(File.open fixture_file('regression/random_forest.yml')).first
117
117
  individual_data = [0]*200
118
118
  prediction = Nimbus::Tree.traverse tree_structure, individual_data
119
- prediction.should == 0.25043
119
+ expect(prediction).to eq -0.90813
120
120
 
121
- individual_data[189-1] = 1
122
- individual_data[4-1] = 1
123
- individual_data[62-1] = 2
124
- individual_data[146-1] = 2
121
+ individual_data[44-1] = 2
122
+ individual_data[98-1] = 1
123
+ individual_data[22-1] = 1
124
+ individual_data[31-1] = 2
125
125
  prediction = Nimbus::Tree.traverse tree_structure, individual_data
126
- prediction.should == -0.9854
126
+ expect(prediction).to eq -0.95805
127
127
  end
128
128
 
129
129
  end