nimbus 2.2.1 → 2.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CODE_OF_CONDUCT.md +7 -0
- data/CONTRIBUTING.md +46 -0
- data/MIT-LICENSE.txt +1 -1
- data/README.md +131 -21
- data/bin/nimbus +2 -2
- data/lib/nimbus.rb +2 -6
- data/lib/nimbus/classification_tree.rb +9 -12
- data/lib/nimbus/configuration.rb +22 -22
- data/lib/nimbus/forest.rb +8 -8
- data/lib/nimbus/loss_functions.rb +11 -0
- data/lib/nimbus/regression_tree.rb +8 -10
- data/lib/nimbus/tree.rb +54 -12
- data/lib/nimbus/version.rb +1 -1
- data/spec/classification_tree_spec.rb +47 -47
- data/spec/configuration_spec.rb +55 -55
- data/spec/fixtures/{classification_config.yml → classification/config.yml} +3 -3
- data/spec/fixtures/classification/random_forest.yml +1174 -0
- data/spec/fixtures/{classification_testing.data → classification/testing.data} +0 -0
- data/spec/fixtures/{classification_training.data → classification/training.data} +0 -0
- data/spec/fixtures/{regression_config.yml → regression/config.yml} +4 -4
- data/spec/fixtures/regression/random_forest.yml +2737 -0
- data/spec/fixtures/{regression_testing.data → regression/testing.data} +0 -0
- data/spec/fixtures/{regression_training.data → regression/training.data} +0 -0
- data/spec/forest_spec.rb +39 -39
- data/spec/individual_spec.rb +3 -3
- data/spec/loss_functions_spec.rb +31 -13
- data/spec/nimbus_spec.rb +2 -2
- data/spec/regression_tree_spec.rb +44 -44
- data/spec/training_set_spec.rb +3 -3
- data/spec/tree_spec.rb +4 -4
- metadata +37 -34
- data/spec/fixtures/classification_random_forest.yml +0 -922
- data/spec/fixtures/regression_random_forest.yml +0 -1741
File without changes
|
File without changes
|
data/spec/forest_spec.rb
CHANGED
@@ -5,47 +5,47 @@ describe Nimbus::Forest do
|
|
5
5
|
describe "Regression" do
|
6
6
|
before(:each) do
|
7
7
|
@config = Nimbus::Configuration.new
|
8
|
-
@config.load fixture_file('
|
9
|
-
@config.load_training_data
|
8
|
+
@config.load fixture_file('regression/config.yml')
|
9
|
+
@config.load_training_data if @config.do_training
|
10
10
|
@forest = ::Nimbus::Forest.new @config
|
11
11
|
end
|
12
12
|
|
13
13
|
it 'grows a regression forest of N trees' do
|
14
|
-
@forest.trees.
|
15
|
-
@config.forest_size.
|
16
|
-
@forest.
|
17
|
-
@forest.
|
14
|
+
expect(@forest.trees).to eq []
|
15
|
+
expect(@config.forest_size).to eq 3
|
16
|
+
expect(@forest).to_not be_classification
|
17
|
+
expect(@forest).to be_regression
|
18
18
|
@forest.grow
|
19
|
-
@forest.trees.size.
|
20
|
-
@forest.trees.each{|t| t.
|
19
|
+
expect(@forest.trees.size).to eq @config.forest_size
|
20
|
+
@forest.trees.each{|t| expect(t).to be_kind_of Hash}
|
21
21
|
end
|
22
22
|
|
23
23
|
it 'creates averaged predictions for individuals in the training set' do
|
24
|
-
@forest.predictions.
|
24
|
+
expect(@forest.predictions).to eq({})
|
25
25
|
@forest.grow
|
26
|
-
(@forest.predictions.keys - (1..800).to_a ).
|
27
|
-
@forest.predictions.values.each{|v| v.
|
26
|
+
expect((@forest.predictions.keys - (1..800).to_a )).to eq [] # 800 individuals in the training file
|
27
|
+
@forest.predictions.values.each{|v| expect(v).to be_kind_of Numeric}
|
28
28
|
end
|
29
29
|
|
30
30
|
it 'computes averaged SNP importances for every SNP' do
|
31
|
-
@forest.snp_importances.
|
31
|
+
expect(@forest.snp_importances).to eq({})
|
32
32
|
@forest.grow
|
33
|
-
@forest.snp_importances.keys.sort.
|
34
|
-
@forest.snp_importances.values.each{|v| v.
|
33
|
+
expect(@forest.snp_importances.keys.sort).to eq (1..200).to_a # 200 snps in the training file
|
34
|
+
@forest.snp_importances.values.each{|v| expect(v).to be_kind_of Numeric}
|
35
35
|
end
|
36
36
|
|
37
37
|
it 'does not compute SNP importances if config set to false' do
|
38
|
-
@forest.snp_importances.
|
38
|
+
expect(@forest.snp_importances).to eq({})
|
39
39
|
@forest.options.do_importances = false
|
40
40
|
@forest.grow
|
41
|
-
@forest.snp_importances.
|
41
|
+
expect(@forest.snp_importances).to eq({})
|
42
42
|
end
|
43
43
|
|
44
44
|
it 'traverses a set of testing individuals through every tree in the forest and returns predictions' do
|
45
45
|
@forest = @config.load_forest
|
46
|
-
@forest.predictions.
|
46
|
+
expect(@forest.predictions).to eq({})
|
47
47
|
|
48
|
-
tree_structure = Psych.load(File.open fixture_file('
|
48
|
+
tree_structure = Psych.load(File.open fixture_file('regression/random_forest.yml'))
|
49
49
|
expected_predictions = {}
|
50
50
|
@config.read_testing_data{|individual|
|
51
51
|
individual_prediction = 0.0
|
@@ -56,60 +56,60 @@ describe Nimbus::Forest do
|
|
56
56
|
}
|
57
57
|
|
58
58
|
@forest.traverse
|
59
|
-
@forest.predictions.
|
59
|
+
expect(@forest.predictions).to eq expected_predictions
|
60
60
|
end
|
61
61
|
|
62
62
|
it 'can output forest structure in YAML format' do
|
63
63
|
@forest = @config.load_forest
|
64
|
-
Psych.load(File.open fixture_file('
|
64
|
+
Psych.load(File.open fixture_file('regression/random_forest.yml')) == Psych.load(@forest.to_yaml)
|
65
65
|
end
|
66
66
|
end
|
67
67
|
|
68
68
|
describe "Classification" do
|
69
69
|
before(:each) do
|
70
70
|
@config = Nimbus::Configuration.new
|
71
|
-
@config.load fixture_file('
|
71
|
+
@config.load fixture_file('classification/config.yml')
|
72
72
|
@config.load_training_data
|
73
73
|
@forest = ::Nimbus::Forest.new @config
|
74
74
|
end
|
75
75
|
|
76
76
|
it 'grows a classification forest of N trees' do
|
77
|
-
@forest.trees.
|
78
|
-
@config.forest_size.
|
79
|
-
@forest.
|
80
|
-
@forest.
|
77
|
+
expect(@forest.trees).to eq []
|
78
|
+
expect(@config.forest_size).to eq 3
|
79
|
+
expect(@forest).to be_classification
|
80
|
+
expect(@forest).to_not be_regression
|
81
81
|
@forest.grow
|
82
|
-
@forest.trees.size.
|
83
|
-
@forest.trees.each{|t| t.
|
82
|
+
expect(@forest.trees.size).to eq @config.forest_size
|
83
|
+
@forest.trees.each{|t| expect(t).to be_kind_of Hash}
|
84
84
|
end
|
85
85
|
|
86
86
|
it 'creates predictions for individuals in the training set' do
|
87
|
-
@forest.predictions.
|
87
|
+
expect(@forest.predictions).to eq({})
|
88
88
|
@forest.grow
|
89
|
-
(@forest.predictions.keys - (1..1000).to_a ).
|
90
|
-
@forest.predictions.values.each{|v| v.
|
89
|
+
expect((@forest.predictions.keys - (1..1000).to_a )).to eq [] # 1000 individuals in the training file
|
90
|
+
@forest.predictions.values.each{|v| expect(v).to be_kind_of String}
|
91
91
|
end
|
92
92
|
|
93
93
|
it 'computes averaged SNP importances for every SNP' do
|
94
|
-
@forest.snp_importances.
|
94
|
+
expect(@forest.snp_importances).to eq({})
|
95
95
|
@forest.options.do_importances = true
|
96
96
|
@forest.grow
|
97
|
-
@forest.snp_importances.keys.sort.
|
98
|
-
@forest.snp_importances.values.each{|v| v.
|
97
|
+
expect(@forest.snp_importances.keys.sort).to eq (1..100).to_a # 100 snps in the training file
|
98
|
+
@forest.snp_importances.values.each{|v| expect(v).to be_kind_of Numeric}
|
99
99
|
end
|
100
100
|
|
101
101
|
it 'does not compute SNP importances if config set to false' do
|
102
|
-
@forest.snp_importances.
|
102
|
+
expect(@forest.snp_importances).to eq({})
|
103
103
|
@forest.options.do_importances = false
|
104
104
|
@forest.grow
|
105
|
-
@forest.snp_importances.
|
105
|
+
expect(@forest.snp_importances).to eq({})
|
106
106
|
end
|
107
107
|
|
108
108
|
it 'traverses a set of testing individuals through every tree in the forest and returns predictions' do
|
109
109
|
@forest = @config.load_forest
|
110
|
-
@forest.predictions.
|
110
|
+
expect(@forest.predictions).to eq({})
|
111
111
|
|
112
|
-
tree_structure = Psych.load(File.open fixture_file('
|
112
|
+
tree_structure = Psych.load(File.open fixture_file('classification/random_forest.yml'))
|
113
113
|
expected_predictions = {}
|
114
114
|
@config.read_testing_data{|individual|
|
115
115
|
individual_prediction = []
|
@@ -121,12 +121,12 @@ describe Nimbus::Forest do
|
|
121
121
|
}
|
122
122
|
|
123
123
|
@forest.traverse
|
124
|
-
@forest.predictions.
|
124
|
+
expect(@forest.predictions).to eq expected_predictions
|
125
125
|
end
|
126
126
|
|
127
127
|
it 'can output forest structure in YAML format' do
|
128
128
|
@forest = @config.load_forest
|
129
|
-
Psych.load(File.open fixture_file('
|
129
|
+
Psych.load(File.open fixture_file('classification/random_forest.yml')) == Psych.load(@forest.to_yaml)
|
130
130
|
end
|
131
131
|
end
|
132
132
|
end
|
data/spec/individual_spec.rb
CHANGED
@@ -5,9 +5,9 @@ describe Nimbus::Individual do
|
|
5
5
|
|
6
6
|
it "stores id, fenotype and SNPs information for an individual" do
|
7
7
|
@individual = Nimbus::Individual.new(11, 33.275, [1,0,2,1])
|
8
|
-
@individual.id.
|
9
|
-
@individual.fenotype.
|
10
|
-
@individual.snp_list.
|
8
|
+
expect(@individual.id).to eq 11
|
9
|
+
expect(@individual.fenotype).to eq 33.275
|
10
|
+
expect(@individual.snp_list).to eq [1,0,2,1]
|
11
11
|
end
|
12
12
|
|
13
13
|
end
|
data/spec/loss_functions_spec.rb
CHANGED
@@ -7,39 +7,57 @@ describe Nimbus::LossFunctions do
|
|
7
7
|
ids = [1,3,5,7]
|
8
8
|
values = {1 => 10, 2 => 5, 3 => 21, 4 => 8, 5 => 31, 7 => 11, 85 => 22}
|
9
9
|
|
10
|
-
Nimbus::LossFunctions.average(ids, values).
|
10
|
+
expect(Nimbus::LossFunctions.average(ids, values)).to eq 18.25 # (10 + 21 + 31 + 11 = 73)/4
|
11
11
|
end
|
12
12
|
|
13
13
|
it "method for mean squared error" do
|
14
14
|
ids = [3,7,85]
|
15
15
|
values = {1 => 10, 2 => 5, 3 => 21, 4 => 8, 5 => 31, 7 => 11, 85 => 22}
|
16
16
|
|
17
|
-
Nimbus::LossFunctions.mean_squared_error(ids, values).
|
17
|
+
expect(Nimbus::LossFunctions.mean_squared_error(ids, values)).to eq 74.0 # (avg(21 + 11 + 22) = 18: sum (x-18)^2
|
18
18
|
end
|
19
19
|
|
20
20
|
it "method for quadratic_loss" do
|
21
21
|
ids = [1,4]
|
22
22
|
values = {1 => 10, 2 => 5, 3 => 21, 4 => 8, 5 => 31, 7 => 11, 85 => 22}
|
23
23
|
|
24
|
-
Nimbus::LossFunctions.quadratic_loss(ids, values).round(5).
|
24
|
+
expect(Nimbus::LossFunctions.quadratic_loss(ids, values).round(5)).to eq 1
|
25
25
|
end
|
26
26
|
|
27
27
|
it "quadratic loss is mean squared error averaged" do
|
28
28
|
ids = [1,2,3,4,5,7,85]
|
29
29
|
values = {1 => 10, 2 => 5, 3 => 21, 4 => 8, 5 => 31, 7 => 11, 85 => 22}
|
30
|
-
Nimbus::LossFunctions.quadratic_loss(ids, values).round(5).
|
30
|
+
expect(Nimbus::LossFunctions.quadratic_loss(ids, values).round(5)).to eq (Nimbus::LossFunctions.mean_squared_error(ids, values)/7 ).round(5)
|
31
|
+
end
|
32
|
+
|
33
|
+
it "method for pseudo Huber error" do
|
34
|
+
ids = [3,7,85]
|
35
|
+
values = {1 => 10, 2 => 5, 3 => 21, 4 => 8, 5 => 31, 7 => 11, 85 => 22}
|
36
|
+
expect(Nimbus::LossFunctions.pseudo_huber_error(ids, values).round(5)).to eq 11.92337 # (avg(21 + 11 + 22) = 18: log(cosh(x-18))
|
37
|
+
end
|
38
|
+
|
39
|
+
it "method for pseudo Huber loss function" do
|
40
|
+
ids = [1,4]
|
41
|
+
values = {1 => 10, 2 => 5, 3 => 21, 4 => 8, 5 => 31, 7 => 11, 85 => 22}
|
42
|
+
expect(Nimbus::LossFunctions.pseudo_huber_loss(ids, values).round(5)).to eq 0.43378
|
43
|
+
end
|
44
|
+
|
45
|
+
it "pseudo Huber loss is pseudo Huber error averaged" do
|
46
|
+
ids = [1,2,3,4,5,7,85]
|
47
|
+
values = {1 => 10, 2 => 5, 3 => 21, 4 => 8, 5 => 31, 7 => 11, 85 => 22}
|
48
|
+
expect(Nimbus::LossFunctions.pseudo_huber_loss(ids, values).round(5)).to eq (Nimbus::LossFunctions.pseudo_huber_error(ids, values)/7 ).round(5)
|
31
49
|
end
|
32
50
|
|
33
51
|
it "method for squared difference" do
|
34
|
-
Nimbus::LossFunctions.squared_difference(50, 40).
|
35
|
-
Nimbus::LossFunctions.squared_difference(22, 10).
|
52
|
+
expect(Nimbus::LossFunctions.squared_difference(50, 40)).to eq 100.0
|
53
|
+
expect(Nimbus::LossFunctions.squared_difference(22, 10)).to eq 144.0
|
36
54
|
end
|
37
55
|
|
38
56
|
it "method for majority class" do
|
39
57
|
ids = [1,2,3,4,5,7,85]
|
40
58
|
values = {1 => 'B', 2 => 'C', 3 => 'A', 4 => 'A', 5 => 'C', 7 => 'B', 85 => 'C'} #3C, 2A, 2B
|
41
59
|
classes = ['A', 'B', 'C']
|
42
|
-
Nimbus::LossFunctions.majority_class(ids, values, classes).
|
60
|
+
expect(Nimbus::LossFunctions.majority_class(ids, values, classes)).to eq 'C'
|
43
61
|
end
|
44
62
|
|
45
63
|
it "majority class method selects randomly if more than one majority class" do
|
@@ -50,27 +68,27 @@ describe Nimbus::LossFunctions do
|
|
50
68
|
20.times do
|
51
69
|
results << Nimbus::LossFunctions.majority_class(ids, values, classes)
|
52
70
|
end
|
53
|
-
results.
|
54
|
-
results.
|
71
|
+
expect(results).to include('A')
|
72
|
+
expect(results).to include('C')
|
55
73
|
end
|
56
74
|
|
57
75
|
it "method for majority class in list" do
|
58
76
|
list = %w(A A A B B B C A B C A B A)
|
59
77
|
classes = ['A', 'B', 'C']
|
60
|
-
Nimbus::LossFunctions.majority_class_in_list(list, classes).
|
78
|
+
expect(Nimbus::LossFunctions.majority_class_in_list(list, classes)).to eq 'A'
|
61
79
|
end
|
62
80
|
|
63
81
|
it "method for class sizes" do
|
64
82
|
ids = [1,2,3,4,5,7,85]
|
65
83
|
values = {1 => 'B', 2 => 'C', 3 => 'A', 4 => 'A', 5 => 'C', 7 => 'B', 85 => 'C'} #2A, 2B, 3C
|
66
84
|
classes = ['A', 'B', 'C']
|
67
|
-
Nimbus::LossFunctions.class_sizes(ids, values, classes).
|
85
|
+
expect(Nimbus::LossFunctions.class_sizes(ids, values, classes)).to eq [2, 2, 3]
|
68
86
|
end
|
69
87
|
|
70
88
|
it "method for class sizes in list" do
|
71
89
|
list = %w(A A A B B B C A B C A B A) # 6A, 5B, 2C
|
72
90
|
classes = ['A', 'B', 'C']
|
73
|
-
Nimbus::LossFunctions.class_sizes_in_list(list, classes).
|
91
|
+
expect(Nimbus::LossFunctions.class_sizes_in_list(list, classes)).to eq [6, 5, 2]
|
74
92
|
end
|
75
93
|
|
76
94
|
it "Gini index" do
|
@@ -78,7 +96,7 @@ describe Nimbus::LossFunctions do
|
|
78
96
|
values = {1 => 'B', 2 => 'C', 3 => 'A', 4 => 'A', 5 => 'C', 7 => 'C'} #3C, 2A, 1B
|
79
97
|
classes = ['A', 'B', 'C']
|
80
98
|
# Gini = 1 - ( (3/6)^2 + (2/6)^2 + (1/6)^2 ) = 0.61111
|
81
|
-
Nimbus::LossFunctions.gini_index(ids, values, classes).
|
99
|
+
expect(Nimbus::LossFunctions.gini_index(ids, values, classes)).to eq 0.61111
|
82
100
|
end
|
83
101
|
|
84
102
|
end
|
data/spec/nimbus_spec.rb
CHANGED
@@ -6,13 +6,13 @@ describe 'Nimbus module' do
|
|
6
6
|
|
7
7
|
it "manages a Nimbus::Application object" do
|
8
8
|
app = Nimbus.application
|
9
|
-
app.
|
9
|
+
expect(app).to be_kind_of Nimbus::Application
|
10
10
|
end
|
11
11
|
|
12
12
|
it "accepts setting an external Nimbus::Application" do
|
13
13
|
app = Nimbus::Application.new
|
14
14
|
Nimbus.application = app
|
15
|
-
Nimbus.application.
|
15
|
+
expect(Nimbus.application).to eq app
|
16
16
|
end
|
17
17
|
|
18
18
|
end
|
@@ -4,31 +4,31 @@ describe Nimbus::RegressionTree do
|
|
4
4
|
|
5
5
|
before(:each) do
|
6
6
|
@config = Nimbus::Configuration.new
|
7
|
-
@config.load fixture_file('
|
7
|
+
@config.load fixture_file('regression/config.yml')
|
8
8
|
|
9
9
|
@tree = Nimbus::RegressionTree.new @config.tree
|
10
10
|
end
|
11
11
|
|
12
12
|
it "is initialized with tree config info" do
|
13
|
-
@tree.snp_total_count.
|
14
|
-
@tree.snp_sample_size.
|
15
|
-
@tree.node_min_size.
|
13
|
+
expect(@tree.snp_total_count).to eq 200
|
14
|
+
expect(@tree.snp_sample_size).to eq 60
|
15
|
+
expect(@tree.node_min_size).to eq 5
|
16
16
|
end
|
17
17
|
|
18
18
|
it "creates a tree structure when seeded with training data" do
|
19
19
|
@config.load_training_data
|
20
|
-
@tree.structure.
|
20
|
+
expect(@tree.structure).to be_nil
|
21
21
|
@tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
|
22
|
-
@tree.structure.
|
23
|
-
@tree.structure.
|
22
|
+
expect(@tree.structure).to_not be_nil
|
23
|
+
expect(@tree.structure).to be_kind_of Hash
|
24
24
|
|
25
|
-
@tree.structure.keys.first.
|
26
|
-
@tree.used_snps.
|
25
|
+
expect(@tree.structure.keys.first).to eq @tree.used_snps.last
|
26
|
+
expect(@tree.used_snps).to_not be_empty
|
27
27
|
end
|
28
28
|
|
29
|
-
it "split node
|
29
|
+
it "split node when building a node and finds a suitable split" do
|
30
30
|
@config.load_training_data
|
31
|
-
|
31
|
+
allow_any_instance_of(Nimbus::RegressionTree).to receive(:snps_random_sample).and_return((141..200).to_a) #189 is best split
|
32
32
|
|
33
33
|
@tree.individuals = @config.training_set.individuals
|
34
34
|
@tree.id_to_fenotype = @config.training_set.ids_fenotypes
|
@@ -36,29 +36,29 @@ describe Nimbus::RegressionTree do
|
|
36
36
|
@tree.predictions = {}
|
37
37
|
|
38
38
|
branch = @tree.build_node @config.training_set.all_ids, Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
|
39
|
-
branch.keys.size.
|
40
|
-
branch.keys.first.
|
41
|
-
branch[189].size.
|
42
|
-
branch[189][0].
|
43
|
-
branch[189][1]
|
44
|
-
branch[189][2].
|
39
|
+
expect(branch.keys.size).to eq 1
|
40
|
+
expect(branch.keys.first).to eq 189
|
41
|
+
expect(branch[189].size).to eq 3
|
42
|
+
expect(branch[189][0]).to be_kind_of Hash
|
43
|
+
expect([Nimbus::Tree::NODE_SPLIT_01_2, Nimbus::Tree::NODE_SPLIT_0_12]).to include(branch[189][1])
|
44
|
+
expect(branch[189][2]).to be_kind_of Hash
|
45
45
|
end
|
46
46
|
|
47
47
|
it "keeps track of all SNPs used for the tree" do
|
48
48
|
@config.load_training_data
|
49
49
|
snps = (131..190).to_a
|
50
|
-
|
51
|
-
@tree.used_snps.
|
50
|
+
allow_any_instance_of(Nimbus::RegressionTree).to receive(:snps_random_sample).and_return(snps)
|
51
|
+
expect(@tree.used_snps).to be_nil
|
52
52
|
@tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
|
53
|
-
@tree.used_snps.size.
|
53
|
+
expect(@tree.used_snps.size).to be > 4
|
54
54
|
@tree.used_snps.each{|snp|
|
55
|
-
snps.include?(snp).
|
55
|
+
expect(snps.include?(snp)).to be true
|
56
56
|
}
|
57
57
|
end
|
58
58
|
|
59
59
|
it "labels node when building a node and there is not a suitable split" do
|
60
60
|
@config.load_training_data
|
61
|
-
|
61
|
+
allow_any_instance_of(Nimbus::RegressionTree).to receive(:snps_random_sample).and_return([91])
|
62
62
|
|
63
63
|
@tree.individuals = @config.training_set.individuals
|
64
64
|
@tree.id_to_fenotype = @config.training_set.ids_fenotypes
|
@@ -66,9 +66,9 @@ describe Nimbus::RegressionTree do
|
|
66
66
|
@tree.predictions = {}
|
67
67
|
|
68
68
|
branch = @tree.build_node @config.training_set.all_ids, Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
|
69
|
-
branch[
|
70
|
-
branch[
|
71
|
-
branch[
|
69
|
+
expect(branch[91][0]).to be_kind_of Numeric
|
70
|
+
expect([Nimbus::Tree::NODE_SPLIT_01_2, Nimbus::Tree::NODE_SPLIT_0_12]).to include(branch[91][1])
|
71
|
+
expect(branch[91][2]).to be_kind_of Numeric
|
72
72
|
end
|
73
73
|
|
74
74
|
it "labels node when building a node with less individuals than the minimum node size" do
|
@@ -80,50 +80,50 @@ describe Nimbus::RegressionTree do
|
|
80
80
|
@tree.predictions = {}
|
81
81
|
|
82
82
|
label = @tree.build_node [1, 10, 33], Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
|
83
|
-
label.
|
83
|
+
expect(label).to be_kind_of Numeric
|
84
84
|
|
85
85
|
label = @tree.build_node [2, 10], Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
|
86
|
-
label.
|
86
|
+
expect(label).to be_kind_of Numeric
|
87
87
|
|
88
88
|
label = @tree.build_node [1, 10, 33], Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
|
89
|
-
label.
|
89
|
+
expect(label).to be_kind_of Numeric
|
90
90
|
|
91
91
|
label = @tree.build_node [108, 22, 10, 33], Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
|
92
|
-
label.
|
92
|
+
expect(label).to be_kind_of Numeric
|
93
93
|
end
|
94
94
|
|
95
95
|
it 'computes generalization error for the tree' do
|
96
96
|
@config.load_training_data
|
97
97
|
@tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
|
98
|
-
@tree.generalization_error.
|
98
|
+
expect(@tree.generalization_error).to be_nil
|
99
99
|
@tree.generalization_error_from_oob((2..200).to_a)
|
100
|
-
@tree.generalization_error.
|
101
|
-
@tree.generalization_error.
|
102
|
-
@tree.generalization_error.
|
100
|
+
expect(@tree.generalization_error).to be_kind_of Numeric
|
101
|
+
expect(@tree.generalization_error).to be > 0.0
|
102
|
+
expect(@tree.generalization_error).to be < 1.0
|
103
103
|
end
|
104
104
|
|
105
105
|
it 'estimates importance for all SNPs' do
|
106
106
|
@config.load_training_data
|
107
107
|
@tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
|
108
|
-
@tree.importances.
|
108
|
+
expect(@tree.importances).to be_nil
|
109
109
|
@tree.estimate_importances((300..533).to_a)
|
110
|
-
@tree.importances.
|
111
|
-
@tree.importances.keys.
|
112
|
-
(@tree.importances.keys - (1..200).to_a).
|
110
|
+
expect(@tree.importances).to be_kind_of Hash
|
111
|
+
expect(@tree.importances.keys).to_not be_empty
|
112
|
+
expect((@tree.importances.keys - (1..200).to_a)).to be_empty #all keys are snp indexes (200 snps in training file)
|
113
113
|
end
|
114
114
|
|
115
115
|
it 'get prediction for an individual pushing it down a tree structure' do
|
116
|
-
tree_structure = Psych.load(File.open fixture_file('
|
116
|
+
tree_structure = Psych.load(File.open fixture_file('regression/random_forest.yml')).first
|
117
117
|
individual_data = [0]*200
|
118
118
|
prediction = Nimbus::Tree.traverse tree_structure, individual_data
|
119
|
-
prediction.
|
119
|
+
expect(prediction).to eq -0.90813
|
120
120
|
|
121
|
-
individual_data[
|
122
|
-
individual_data[
|
123
|
-
individual_data[
|
124
|
-
individual_data[
|
121
|
+
individual_data[44-1] = 2
|
122
|
+
individual_data[98-1] = 1
|
123
|
+
individual_data[22-1] = 1
|
124
|
+
individual_data[31-1] = 2
|
125
125
|
prediction = Nimbus::Tree.traverse tree_structure, individual_data
|
126
|
-
prediction.
|
126
|
+
expect(prediction).to eq -0.95805
|
127
127
|
end
|
128
128
|
|
129
129
|
end
|