nimbus 2.2.1 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CODE_OF_CONDUCT.md +7 -0
- data/CONTRIBUTING.md +46 -0
- data/MIT-LICENSE.txt +1 -1
- data/README.md +131 -21
- data/bin/nimbus +2 -2
- data/lib/nimbus.rb +2 -6
- data/lib/nimbus/classification_tree.rb +9 -12
- data/lib/nimbus/configuration.rb +22 -22
- data/lib/nimbus/forest.rb +8 -8
- data/lib/nimbus/loss_functions.rb +11 -0
- data/lib/nimbus/regression_tree.rb +8 -10
- data/lib/nimbus/tree.rb +54 -12
- data/lib/nimbus/version.rb +1 -1
- data/spec/classification_tree_spec.rb +47 -47
- data/spec/configuration_spec.rb +55 -55
- data/spec/fixtures/{classification_config.yml → classification/config.yml} +3 -3
- data/spec/fixtures/classification/random_forest.yml +1174 -0
- data/spec/fixtures/{classification_testing.data → classification/testing.data} +0 -0
- data/spec/fixtures/{classification_training.data → classification/training.data} +0 -0
- data/spec/fixtures/{regression_config.yml → regression/config.yml} +4 -4
- data/spec/fixtures/regression/random_forest.yml +2737 -0
- data/spec/fixtures/{regression_testing.data → regression/testing.data} +0 -0
- data/spec/fixtures/{regression_training.data → regression/training.data} +0 -0
- data/spec/forest_spec.rb +39 -39
- data/spec/individual_spec.rb +3 -3
- data/spec/loss_functions_spec.rb +31 -13
- data/spec/nimbus_spec.rb +2 -2
- data/spec/regression_tree_spec.rb +44 -44
- data/spec/training_set_spec.rb +3 -3
- data/spec/tree_spec.rb +4 -4
- metadata +37 -34
- data/spec/fixtures/classification_random_forest.yml +0 -922
- data/spec/fixtures/regression_random_forest.yml +0 -1741
File without changes
|
File without changes
|
data/spec/forest_spec.rb
CHANGED
@@ -5,47 +5,47 @@ describe Nimbus::Forest do
|
|
5
5
|
describe "Regression" do
|
6
6
|
before(:each) do
|
7
7
|
@config = Nimbus::Configuration.new
|
8
|
-
@config.load fixture_file('
|
9
|
-
@config.load_training_data
|
8
|
+
@config.load fixture_file('regression/config.yml')
|
9
|
+
@config.load_training_data if @config.do_training
|
10
10
|
@forest = ::Nimbus::Forest.new @config
|
11
11
|
end
|
12
12
|
|
13
13
|
it 'grows a regression forest of N trees' do
|
14
|
-
@forest.trees.
|
15
|
-
@config.forest_size.
|
16
|
-
@forest.
|
17
|
-
@forest.
|
14
|
+
expect(@forest.trees).to eq []
|
15
|
+
expect(@config.forest_size).to eq 3
|
16
|
+
expect(@forest).to_not be_classification
|
17
|
+
expect(@forest).to be_regression
|
18
18
|
@forest.grow
|
19
|
-
@forest.trees.size.
|
20
|
-
@forest.trees.each{|t| t.
|
19
|
+
expect(@forest.trees.size).to eq @config.forest_size
|
20
|
+
@forest.trees.each{|t| expect(t).to be_kind_of Hash}
|
21
21
|
end
|
22
22
|
|
23
23
|
it 'creates averaged predictions for individuals in the training set' do
|
24
|
-
@forest.predictions.
|
24
|
+
expect(@forest.predictions).to eq({})
|
25
25
|
@forest.grow
|
26
|
-
(@forest.predictions.keys - (1..800).to_a ).
|
27
|
-
@forest.predictions.values.each{|v| v.
|
26
|
+
expect((@forest.predictions.keys - (1..800).to_a )).to eq [] # 800 individuals in the training file
|
27
|
+
@forest.predictions.values.each{|v| expect(v).to be_kind_of Numeric}
|
28
28
|
end
|
29
29
|
|
30
30
|
it 'computes averaged SNP importances for every SNP' do
|
31
|
-
@forest.snp_importances.
|
31
|
+
expect(@forest.snp_importances).to eq({})
|
32
32
|
@forest.grow
|
33
|
-
@forest.snp_importances.keys.sort.
|
34
|
-
@forest.snp_importances.values.each{|v| v.
|
33
|
+
expect(@forest.snp_importances.keys.sort).to eq (1..200).to_a # 200 snps in the training file
|
34
|
+
@forest.snp_importances.values.each{|v| expect(v).to be_kind_of Numeric}
|
35
35
|
end
|
36
36
|
|
37
37
|
it 'does not compute SNP importances if config set to false' do
|
38
|
-
@forest.snp_importances.
|
38
|
+
expect(@forest.snp_importances).to eq({})
|
39
39
|
@forest.options.do_importances = false
|
40
40
|
@forest.grow
|
41
|
-
@forest.snp_importances.
|
41
|
+
expect(@forest.snp_importances).to eq({})
|
42
42
|
end
|
43
43
|
|
44
44
|
it 'traverses a set of testing individuals through every tree in the forest and returns predictions' do
|
45
45
|
@forest = @config.load_forest
|
46
|
-
@forest.predictions.
|
46
|
+
expect(@forest.predictions).to eq({})
|
47
47
|
|
48
|
-
tree_structure = Psych.load(File.open fixture_file('
|
48
|
+
tree_structure = Psych.load(File.open fixture_file('regression/random_forest.yml'))
|
49
49
|
expected_predictions = {}
|
50
50
|
@config.read_testing_data{|individual|
|
51
51
|
individual_prediction = 0.0
|
@@ -56,60 +56,60 @@ describe Nimbus::Forest do
|
|
56
56
|
}
|
57
57
|
|
58
58
|
@forest.traverse
|
59
|
-
@forest.predictions.
|
59
|
+
expect(@forest.predictions).to eq expected_predictions
|
60
60
|
end
|
61
61
|
|
62
62
|
it 'can output forest structure in YAML format' do
|
63
63
|
@forest = @config.load_forest
|
64
|
-
Psych.load(File.open fixture_file('
|
64
|
+
Psych.load(File.open fixture_file('regression/random_forest.yml')) == Psych.load(@forest.to_yaml)
|
65
65
|
end
|
66
66
|
end
|
67
67
|
|
68
68
|
describe "Classification" do
|
69
69
|
before(:each) do
|
70
70
|
@config = Nimbus::Configuration.new
|
71
|
-
@config.load fixture_file('
|
71
|
+
@config.load fixture_file('classification/config.yml')
|
72
72
|
@config.load_training_data
|
73
73
|
@forest = ::Nimbus::Forest.new @config
|
74
74
|
end
|
75
75
|
|
76
76
|
it 'grows a classification forest of N trees' do
|
77
|
-
@forest.trees.
|
78
|
-
@config.forest_size.
|
79
|
-
@forest.
|
80
|
-
@forest.
|
77
|
+
expect(@forest.trees).to eq []
|
78
|
+
expect(@config.forest_size).to eq 3
|
79
|
+
expect(@forest).to be_classification
|
80
|
+
expect(@forest).to_not be_regression
|
81
81
|
@forest.grow
|
82
|
-
@forest.trees.size.
|
83
|
-
@forest.trees.each{|t| t.
|
82
|
+
expect(@forest.trees.size).to eq @config.forest_size
|
83
|
+
@forest.trees.each{|t| expect(t).to be_kind_of Hash}
|
84
84
|
end
|
85
85
|
|
86
86
|
it 'creates predictions for individuals in the training set' do
|
87
|
-
@forest.predictions.
|
87
|
+
expect(@forest.predictions).to eq({})
|
88
88
|
@forest.grow
|
89
|
-
(@forest.predictions.keys - (1..1000).to_a ).
|
90
|
-
@forest.predictions.values.each{|v| v.
|
89
|
+
expect((@forest.predictions.keys - (1..1000).to_a )).to eq [] # 1000 individuals in the training file
|
90
|
+
@forest.predictions.values.each{|v| expect(v).to be_kind_of String}
|
91
91
|
end
|
92
92
|
|
93
93
|
it 'computes averaged SNP importances for every SNP' do
|
94
|
-
@forest.snp_importances.
|
94
|
+
expect(@forest.snp_importances).to eq({})
|
95
95
|
@forest.options.do_importances = true
|
96
96
|
@forest.grow
|
97
|
-
@forest.snp_importances.keys.sort.
|
98
|
-
@forest.snp_importances.values.each{|v| v.
|
97
|
+
expect(@forest.snp_importances.keys.sort).to eq (1..100).to_a # 100 snps in the training file
|
98
|
+
@forest.snp_importances.values.each{|v| expect(v).to be_kind_of Numeric}
|
99
99
|
end
|
100
100
|
|
101
101
|
it 'does not compute SNP importances if config set to false' do
|
102
|
-
@forest.snp_importances.
|
102
|
+
expect(@forest.snp_importances).to eq({})
|
103
103
|
@forest.options.do_importances = false
|
104
104
|
@forest.grow
|
105
|
-
@forest.snp_importances.
|
105
|
+
expect(@forest.snp_importances).to eq({})
|
106
106
|
end
|
107
107
|
|
108
108
|
it 'traverses a set of testing individuals through every tree in the forest and returns predictions' do
|
109
109
|
@forest = @config.load_forest
|
110
|
-
@forest.predictions.
|
110
|
+
expect(@forest.predictions).to eq({})
|
111
111
|
|
112
|
-
tree_structure = Psych.load(File.open fixture_file('
|
112
|
+
tree_structure = Psych.load(File.open fixture_file('classification/random_forest.yml'))
|
113
113
|
expected_predictions = {}
|
114
114
|
@config.read_testing_data{|individual|
|
115
115
|
individual_prediction = []
|
@@ -121,12 +121,12 @@ describe Nimbus::Forest do
|
|
121
121
|
}
|
122
122
|
|
123
123
|
@forest.traverse
|
124
|
-
@forest.predictions.
|
124
|
+
expect(@forest.predictions).to eq expected_predictions
|
125
125
|
end
|
126
126
|
|
127
127
|
it 'can output forest structure in YAML format' do
|
128
128
|
@forest = @config.load_forest
|
129
|
-
Psych.load(File.open fixture_file('
|
129
|
+
Psych.load(File.open fixture_file('classification/random_forest.yml')) == Psych.load(@forest.to_yaml)
|
130
130
|
end
|
131
131
|
end
|
132
132
|
end
|
data/spec/individual_spec.rb
CHANGED
@@ -5,9 +5,9 @@ describe Nimbus::Individual do
|
|
5
5
|
|
6
6
|
it "stores id, fenotype and SNPs information for an individual" do
|
7
7
|
@individual = Nimbus::Individual.new(11, 33.275, [1,0,2,1])
|
8
|
-
@individual.id.
|
9
|
-
@individual.fenotype.
|
10
|
-
@individual.snp_list.
|
8
|
+
expect(@individual.id).to eq 11
|
9
|
+
expect(@individual.fenotype).to eq 33.275
|
10
|
+
expect(@individual.snp_list).to eq [1,0,2,1]
|
11
11
|
end
|
12
12
|
|
13
13
|
end
|
data/spec/loss_functions_spec.rb
CHANGED
@@ -7,39 +7,57 @@ describe Nimbus::LossFunctions do
|
|
7
7
|
ids = [1,3,5,7]
|
8
8
|
values = {1 => 10, 2 => 5, 3 => 21, 4 => 8, 5 => 31, 7 => 11, 85 => 22}
|
9
9
|
|
10
|
-
Nimbus::LossFunctions.average(ids, values).
|
10
|
+
expect(Nimbus::LossFunctions.average(ids, values)).to eq 18.25 # (10 + 21 + 31 + 11 = 73)/4
|
11
11
|
end
|
12
12
|
|
13
13
|
it "method for mean squared error" do
|
14
14
|
ids = [3,7,85]
|
15
15
|
values = {1 => 10, 2 => 5, 3 => 21, 4 => 8, 5 => 31, 7 => 11, 85 => 22}
|
16
16
|
|
17
|
-
Nimbus::LossFunctions.mean_squared_error(ids, values).
|
17
|
+
expect(Nimbus::LossFunctions.mean_squared_error(ids, values)).to eq 74.0 # (avg(21 + 11 + 22) = 18: sum (x-18)^2
|
18
18
|
end
|
19
19
|
|
20
20
|
it "method for quadratic_loss" do
|
21
21
|
ids = [1,4]
|
22
22
|
values = {1 => 10, 2 => 5, 3 => 21, 4 => 8, 5 => 31, 7 => 11, 85 => 22}
|
23
23
|
|
24
|
-
Nimbus::LossFunctions.quadratic_loss(ids, values).round(5).
|
24
|
+
expect(Nimbus::LossFunctions.quadratic_loss(ids, values).round(5)).to eq 1
|
25
25
|
end
|
26
26
|
|
27
27
|
it "quadratic loss is mean squared error averaged" do
|
28
28
|
ids = [1,2,3,4,5,7,85]
|
29
29
|
values = {1 => 10, 2 => 5, 3 => 21, 4 => 8, 5 => 31, 7 => 11, 85 => 22}
|
30
|
-
Nimbus::LossFunctions.quadratic_loss(ids, values).round(5).
|
30
|
+
expect(Nimbus::LossFunctions.quadratic_loss(ids, values).round(5)).to eq (Nimbus::LossFunctions.mean_squared_error(ids, values)/7 ).round(5)
|
31
|
+
end
|
32
|
+
|
33
|
+
it "method for pseudo Huber error" do
|
34
|
+
ids = [3,7,85]
|
35
|
+
values = {1 => 10, 2 => 5, 3 => 21, 4 => 8, 5 => 31, 7 => 11, 85 => 22}
|
36
|
+
expect(Nimbus::LossFunctions.pseudo_huber_error(ids, values).round(5)).to eq 11.92337 # (avg(21 + 11 + 22) = 18: log(cosh(x-18))
|
37
|
+
end
|
38
|
+
|
39
|
+
it "method for pseudo Huber loss function" do
|
40
|
+
ids = [1,4]
|
41
|
+
values = {1 => 10, 2 => 5, 3 => 21, 4 => 8, 5 => 31, 7 => 11, 85 => 22}
|
42
|
+
expect(Nimbus::LossFunctions.pseudo_huber_loss(ids, values).round(5)).to eq 0.43378
|
43
|
+
end
|
44
|
+
|
45
|
+
it "pseudo Huber loss is pseudo Huber error averaged" do
|
46
|
+
ids = [1,2,3,4,5,7,85]
|
47
|
+
values = {1 => 10, 2 => 5, 3 => 21, 4 => 8, 5 => 31, 7 => 11, 85 => 22}
|
48
|
+
expect(Nimbus::LossFunctions.pseudo_huber_loss(ids, values).round(5)).to eq (Nimbus::LossFunctions.pseudo_huber_error(ids, values)/7 ).round(5)
|
31
49
|
end
|
32
50
|
|
33
51
|
it "method for squared difference" do
|
34
|
-
Nimbus::LossFunctions.squared_difference(50, 40).
|
35
|
-
Nimbus::LossFunctions.squared_difference(22, 10).
|
52
|
+
expect(Nimbus::LossFunctions.squared_difference(50, 40)).to eq 100.0
|
53
|
+
expect(Nimbus::LossFunctions.squared_difference(22, 10)).to eq 144.0
|
36
54
|
end
|
37
55
|
|
38
56
|
it "method for majority class" do
|
39
57
|
ids = [1,2,3,4,5,7,85]
|
40
58
|
values = {1 => 'B', 2 => 'C', 3 => 'A', 4 => 'A', 5 => 'C', 7 => 'B', 85 => 'C'} #3C, 2A, 2B
|
41
59
|
classes = ['A', 'B', 'C']
|
42
|
-
Nimbus::LossFunctions.majority_class(ids, values, classes).
|
60
|
+
expect(Nimbus::LossFunctions.majority_class(ids, values, classes)).to eq 'C'
|
43
61
|
end
|
44
62
|
|
45
63
|
it "majority class method selects randomly if more than one majority class" do
|
@@ -50,27 +68,27 @@ describe Nimbus::LossFunctions do
|
|
50
68
|
20.times do
|
51
69
|
results << Nimbus::LossFunctions.majority_class(ids, values, classes)
|
52
70
|
end
|
53
|
-
results.
|
54
|
-
results.
|
71
|
+
expect(results).to include('A')
|
72
|
+
expect(results).to include('C')
|
55
73
|
end
|
56
74
|
|
57
75
|
it "method for majority class in list" do
|
58
76
|
list = %w(A A A B B B C A B C A B A)
|
59
77
|
classes = ['A', 'B', 'C']
|
60
|
-
Nimbus::LossFunctions.majority_class_in_list(list, classes).
|
78
|
+
expect(Nimbus::LossFunctions.majority_class_in_list(list, classes)).to eq 'A'
|
61
79
|
end
|
62
80
|
|
63
81
|
it "method for class sizes" do
|
64
82
|
ids = [1,2,3,4,5,7,85]
|
65
83
|
values = {1 => 'B', 2 => 'C', 3 => 'A', 4 => 'A', 5 => 'C', 7 => 'B', 85 => 'C'} #2A, 2B, 3C
|
66
84
|
classes = ['A', 'B', 'C']
|
67
|
-
Nimbus::LossFunctions.class_sizes(ids, values, classes).
|
85
|
+
expect(Nimbus::LossFunctions.class_sizes(ids, values, classes)).to eq [2, 2, 3]
|
68
86
|
end
|
69
87
|
|
70
88
|
it "method for class sizes in list" do
|
71
89
|
list = %w(A A A B B B C A B C A B A) # 6A, 5B, 2C
|
72
90
|
classes = ['A', 'B', 'C']
|
73
|
-
Nimbus::LossFunctions.class_sizes_in_list(list, classes).
|
91
|
+
expect(Nimbus::LossFunctions.class_sizes_in_list(list, classes)).to eq [6, 5, 2]
|
74
92
|
end
|
75
93
|
|
76
94
|
it "Gini index" do
|
@@ -78,7 +96,7 @@ describe Nimbus::LossFunctions do
|
|
78
96
|
values = {1 => 'B', 2 => 'C', 3 => 'A', 4 => 'A', 5 => 'C', 7 => 'C'} #3C, 2A, 1B
|
79
97
|
classes = ['A', 'B', 'C']
|
80
98
|
# Gini = 1 - ( (3/6)^2 + (2/6)^2 + (1/6)^2 ) = 0.61111
|
81
|
-
Nimbus::LossFunctions.gini_index(ids, values, classes).
|
99
|
+
expect(Nimbus::LossFunctions.gini_index(ids, values, classes)).to eq 0.61111
|
82
100
|
end
|
83
101
|
|
84
102
|
end
|
data/spec/nimbus_spec.rb
CHANGED
@@ -6,13 +6,13 @@ describe 'Nimbus module' do
|
|
6
6
|
|
7
7
|
it "manages a Nimbus::Application object" do
|
8
8
|
app = Nimbus.application
|
9
|
-
app.
|
9
|
+
expect(app).to be_kind_of Nimbus::Application
|
10
10
|
end
|
11
11
|
|
12
12
|
it "accepts setting an external Nimbus::Application" do
|
13
13
|
app = Nimbus::Application.new
|
14
14
|
Nimbus.application = app
|
15
|
-
Nimbus.application.
|
15
|
+
expect(Nimbus.application).to eq app
|
16
16
|
end
|
17
17
|
|
18
18
|
end
|
@@ -4,31 +4,31 @@ describe Nimbus::RegressionTree do
|
|
4
4
|
|
5
5
|
before(:each) do
|
6
6
|
@config = Nimbus::Configuration.new
|
7
|
-
@config.load fixture_file('
|
7
|
+
@config.load fixture_file('regression/config.yml')
|
8
8
|
|
9
9
|
@tree = Nimbus::RegressionTree.new @config.tree
|
10
10
|
end
|
11
11
|
|
12
12
|
it "is initialized with tree config info" do
|
13
|
-
@tree.snp_total_count.
|
14
|
-
@tree.snp_sample_size.
|
15
|
-
@tree.node_min_size.
|
13
|
+
expect(@tree.snp_total_count).to eq 200
|
14
|
+
expect(@tree.snp_sample_size).to eq 60
|
15
|
+
expect(@tree.node_min_size).to eq 5
|
16
16
|
end
|
17
17
|
|
18
18
|
it "creates a tree structure when seeded with training data" do
|
19
19
|
@config.load_training_data
|
20
|
-
@tree.structure.
|
20
|
+
expect(@tree.structure).to be_nil
|
21
21
|
@tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
|
22
|
-
@tree.structure.
|
23
|
-
@tree.structure.
|
22
|
+
expect(@tree.structure).to_not be_nil
|
23
|
+
expect(@tree.structure).to be_kind_of Hash
|
24
24
|
|
25
|
-
@tree.structure.keys.first.
|
26
|
-
@tree.used_snps.
|
25
|
+
expect(@tree.structure.keys.first).to eq @tree.used_snps.last
|
26
|
+
expect(@tree.used_snps).to_not be_empty
|
27
27
|
end
|
28
28
|
|
29
|
-
it "split node
|
29
|
+
it "split node when building a node and finds a suitable split" do
|
30
30
|
@config.load_training_data
|
31
|
-
|
31
|
+
allow_any_instance_of(Nimbus::RegressionTree).to receive(:snps_random_sample).and_return((141..200).to_a) #189 is best split
|
32
32
|
|
33
33
|
@tree.individuals = @config.training_set.individuals
|
34
34
|
@tree.id_to_fenotype = @config.training_set.ids_fenotypes
|
@@ -36,29 +36,29 @@ describe Nimbus::RegressionTree do
|
|
36
36
|
@tree.predictions = {}
|
37
37
|
|
38
38
|
branch = @tree.build_node @config.training_set.all_ids, Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
|
39
|
-
branch.keys.size.
|
40
|
-
branch.keys.first.
|
41
|
-
branch[189].size.
|
42
|
-
branch[189][0].
|
43
|
-
branch[189][1]
|
44
|
-
branch[189][2].
|
39
|
+
expect(branch.keys.size).to eq 1
|
40
|
+
expect(branch.keys.first).to eq 189
|
41
|
+
expect(branch[189].size).to eq 3
|
42
|
+
expect(branch[189][0]).to be_kind_of Hash
|
43
|
+
expect([Nimbus::Tree::NODE_SPLIT_01_2, Nimbus::Tree::NODE_SPLIT_0_12]).to include(branch[189][1])
|
44
|
+
expect(branch[189][2]).to be_kind_of Hash
|
45
45
|
end
|
46
46
|
|
47
47
|
it "keeps track of all SNPs used for the tree" do
|
48
48
|
@config.load_training_data
|
49
49
|
snps = (131..190).to_a
|
50
|
-
|
51
|
-
@tree.used_snps.
|
50
|
+
allow_any_instance_of(Nimbus::RegressionTree).to receive(:snps_random_sample).and_return(snps)
|
51
|
+
expect(@tree.used_snps).to be_nil
|
52
52
|
@tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
|
53
|
-
@tree.used_snps.size.
|
53
|
+
expect(@tree.used_snps.size).to be > 4
|
54
54
|
@tree.used_snps.each{|snp|
|
55
|
-
snps.include?(snp).
|
55
|
+
expect(snps.include?(snp)).to be true
|
56
56
|
}
|
57
57
|
end
|
58
58
|
|
59
59
|
it "labels node when building a node and there is not a suitable split" do
|
60
60
|
@config.load_training_data
|
61
|
-
|
61
|
+
allow_any_instance_of(Nimbus::RegressionTree).to receive(:snps_random_sample).and_return([91])
|
62
62
|
|
63
63
|
@tree.individuals = @config.training_set.individuals
|
64
64
|
@tree.id_to_fenotype = @config.training_set.ids_fenotypes
|
@@ -66,9 +66,9 @@ describe Nimbus::RegressionTree do
|
|
66
66
|
@tree.predictions = {}
|
67
67
|
|
68
68
|
branch = @tree.build_node @config.training_set.all_ids, Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
|
69
|
-
branch[
|
70
|
-
branch[
|
71
|
-
branch[
|
69
|
+
expect(branch[91][0]).to be_kind_of Numeric
|
70
|
+
expect([Nimbus::Tree::NODE_SPLIT_01_2, Nimbus::Tree::NODE_SPLIT_0_12]).to include(branch[91][1])
|
71
|
+
expect(branch[91][2]).to be_kind_of Numeric
|
72
72
|
end
|
73
73
|
|
74
74
|
it "labels node when building a node with less individuals than the minimum node size" do
|
@@ -80,50 +80,50 @@ describe Nimbus::RegressionTree do
|
|
80
80
|
@tree.predictions = {}
|
81
81
|
|
82
82
|
label = @tree.build_node [1, 10, 33], Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
|
83
|
-
label.
|
83
|
+
expect(label).to be_kind_of Numeric
|
84
84
|
|
85
85
|
label = @tree.build_node [2, 10], Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
|
86
|
-
label.
|
86
|
+
expect(label).to be_kind_of Numeric
|
87
87
|
|
88
88
|
label = @tree.build_node [1, 10, 33], Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
|
89
|
-
label.
|
89
|
+
expect(label).to be_kind_of Numeric
|
90
90
|
|
91
91
|
label = @tree.build_node [108, 22, 10, 33], Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
|
92
|
-
label.
|
92
|
+
expect(label).to be_kind_of Numeric
|
93
93
|
end
|
94
94
|
|
95
95
|
it 'computes generalization error for the tree' do
|
96
96
|
@config.load_training_data
|
97
97
|
@tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
|
98
|
-
@tree.generalization_error.
|
98
|
+
expect(@tree.generalization_error).to be_nil
|
99
99
|
@tree.generalization_error_from_oob((2..200).to_a)
|
100
|
-
@tree.generalization_error.
|
101
|
-
@tree.generalization_error.
|
102
|
-
@tree.generalization_error.
|
100
|
+
expect(@tree.generalization_error).to be_kind_of Numeric
|
101
|
+
expect(@tree.generalization_error).to be > 0.0
|
102
|
+
expect(@tree.generalization_error).to be < 1.0
|
103
103
|
end
|
104
104
|
|
105
105
|
it 'estimates importance for all SNPs' do
|
106
106
|
@config.load_training_data
|
107
107
|
@tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
|
108
|
-
@tree.importances.
|
108
|
+
expect(@tree.importances).to be_nil
|
109
109
|
@tree.estimate_importances((300..533).to_a)
|
110
|
-
@tree.importances.
|
111
|
-
@tree.importances.keys.
|
112
|
-
(@tree.importances.keys - (1..200).to_a).
|
110
|
+
expect(@tree.importances).to be_kind_of Hash
|
111
|
+
expect(@tree.importances.keys).to_not be_empty
|
112
|
+
expect((@tree.importances.keys - (1..200).to_a)).to be_empty #all keys are snp indexes (200 snps in training file)
|
113
113
|
end
|
114
114
|
|
115
115
|
it 'get prediction for an individual pushing it down a tree structure' do
|
116
|
-
tree_structure = Psych.load(File.open fixture_file('
|
116
|
+
tree_structure = Psych.load(File.open fixture_file('regression/random_forest.yml')).first
|
117
117
|
individual_data = [0]*200
|
118
118
|
prediction = Nimbus::Tree.traverse tree_structure, individual_data
|
119
|
-
prediction.
|
119
|
+
expect(prediction).to eq -0.90813
|
120
120
|
|
121
|
-
individual_data[
|
122
|
-
individual_data[
|
123
|
-
individual_data[
|
124
|
-
individual_data[
|
121
|
+
individual_data[44-1] = 2
|
122
|
+
individual_data[98-1] = 1
|
123
|
+
individual_data[22-1] = 1
|
124
|
+
individual_data[31-1] = 2
|
125
125
|
prediction = Nimbus::Tree.traverse tree_structure, individual_data
|
126
|
-
prediction.
|
126
|
+
expect(prediction).to eq -0.95805
|
127
127
|
end
|
128
128
|
|
129
129
|
end
|