nimbus 2.2.1 → 2.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CODE_OF_CONDUCT.md +7 -0
- data/CONTRIBUTING.md +46 -0
- data/MIT-LICENSE.txt +1 -1
- data/README.md +141 -22
- data/bin/nimbus +2 -2
- data/lib/nimbus/classification_tree.rb +9 -12
- data/lib/nimbus/configuration.rb +27 -27
- data/lib/nimbus/forest.rb +8 -8
- data/lib/nimbus/loss_functions.rb +11 -0
- data/lib/nimbus/regression_tree.rb +8 -10
- data/lib/nimbus/tree.rb +54 -12
- data/lib/nimbus/version.rb +1 -1
- data/lib/nimbus.rb +2 -6
- data/spec/classification_tree_spec.rb +47 -47
- data/spec/configuration_spec.rb +55 -55
- data/spec/fixtures/{classification_config.yml → classification/config.yml} +3 -3
- data/spec/fixtures/classification/random_forest.yml +1174 -0
- data/spec/fixtures/{regression_config.yml → regression/config.yml} +4 -4
- data/spec/fixtures/regression/random_forest.yml +2737 -0
- data/spec/forest_spec.rb +39 -39
- data/spec/individual_spec.rb +3 -3
- data/spec/loss_functions_spec.rb +31 -13
- data/spec/nimbus_spec.rb +2 -2
- data/spec/regression_tree_spec.rb +44 -44
- data/spec/training_set_spec.rb +3 -3
- data/spec/tree_spec.rb +4 -4
- metadata +42 -39
- data/spec/fixtures/classification_random_forest.yml +0 -922
- data/spec/fixtures/regression_random_forest.yml +0 -1741
- /data/spec/fixtures/{classification_testing.data → classification/testing.data} +0 -0
- /data/spec/fixtures/{classification_training.data → classification/training.data} +0 -0
- /data/spec/fixtures/{regression_testing.data → regression/testing.data} +0 -0
- /data/spec/fixtures/{regression_training.data → regression/training.data} +0 -0
data/spec/forest_spec.rb
CHANGED
@@ -5,47 +5,47 @@ describe Nimbus::Forest do
|
|
5
5
|
describe "Regression" do
|
6
6
|
before(:each) do
|
7
7
|
@config = Nimbus::Configuration.new
|
8
|
-
@config.load fixture_file('
|
9
|
-
@config.load_training_data
|
8
|
+
@config.load fixture_file('regression/config.yml')
|
9
|
+
@config.load_training_data if @config.do_training
|
10
10
|
@forest = ::Nimbus::Forest.new @config
|
11
11
|
end
|
12
12
|
|
13
13
|
it 'grows a regression forest of N trees' do
|
14
|
-
@forest.trees.
|
15
|
-
@config.forest_size.
|
16
|
-
@forest.
|
17
|
-
@forest.
|
14
|
+
expect(@forest.trees).to eq []
|
15
|
+
expect(@config.forest_size).to eq 3
|
16
|
+
expect(@forest).to_not be_classification
|
17
|
+
expect(@forest).to be_regression
|
18
18
|
@forest.grow
|
19
|
-
@forest.trees.size.
|
20
|
-
@forest.trees.each{|t| t.
|
19
|
+
expect(@forest.trees.size).to eq @config.forest_size
|
20
|
+
@forest.trees.each{|t| expect(t).to be_kind_of Hash}
|
21
21
|
end
|
22
22
|
|
23
23
|
it 'creates averaged predictions for individuals in the training set' do
|
24
|
-
@forest.predictions.
|
24
|
+
expect(@forest.predictions).to eq({})
|
25
25
|
@forest.grow
|
26
|
-
(@forest.predictions.keys - (1..800).to_a ).
|
27
|
-
@forest.predictions.values.each{|v| v.
|
26
|
+
expect((@forest.predictions.keys - (1..800).to_a )).to eq [] # 800 individuals in the training file
|
27
|
+
@forest.predictions.values.each{|v| expect(v).to be_kind_of Numeric}
|
28
28
|
end
|
29
29
|
|
30
30
|
it 'computes averaged SNP importances for every SNP' do
|
31
|
-
@forest.snp_importances.
|
31
|
+
expect(@forest.snp_importances).to eq({})
|
32
32
|
@forest.grow
|
33
|
-
@forest.snp_importances.keys.sort.
|
34
|
-
@forest.snp_importances.values.each{|v| v.
|
33
|
+
expect(@forest.snp_importances.keys.sort).to eq (1..200).to_a # 200 snps in the training file
|
34
|
+
@forest.snp_importances.values.each{|v| expect(v).to be_kind_of Numeric}
|
35
35
|
end
|
36
36
|
|
37
37
|
it 'does not compute SNP importances if config set to false' do
|
38
|
-
@forest.snp_importances.
|
38
|
+
expect(@forest.snp_importances).to eq({})
|
39
39
|
@forest.options.do_importances = false
|
40
40
|
@forest.grow
|
41
|
-
@forest.snp_importances.
|
41
|
+
expect(@forest.snp_importances).to eq({})
|
42
42
|
end
|
43
43
|
|
44
44
|
it 'traverses a set of testing individuals through every tree in the forest and returns predictions' do
|
45
45
|
@forest = @config.load_forest
|
46
|
-
@forest.predictions.
|
46
|
+
expect(@forest.predictions).to eq({})
|
47
47
|
|
48
|
-
tree_structure = Psych.load(File.open fixture_file('
|
48
|
+
tree_structure = Psych.load(File.open fixture_file('regression/random_forest.yml'))
|
49
49
|
expected_predictions = {}
|
50
50
|
@config.read_testing_data{|individual|
|
51
51
|
individual_prediction = 0.0
|
@@ -56,60 +56,60 @@ describe Nimbus::Forest do
|
|
56
56
|
}
|
57
57
|
|
58
58
|
@forest.traverse
|
59
|
-
@forest.predictions.
|
59
|
+
expect(@forest.predictions).to eq expected_predictions
|
60
60
|
end
|
61
61
|
|
62
62
|
it 'can output forest structure in YAML format' do
|
63
63
|
@forest = @config.load_forest
|
64
|
-
Psych.load(File.open fixture_file('
|
64
|
+
Psych.load(File.open fixture_file('regression/random_forest.yml')) == Psych.load(@forest.to_yaml)
|
65
65
|
end
|
66
66
|
end
|
67
67
|
|
68
68
|
describe "Classification" do
|
69
69
|
before(:each) do
|
70
70
|
@config = Nimbus::Configuration.new
|
71
|
-
@config.load fixture_file('
|
71
|
+
@config.load fixture_file('classification/config.yml')
|
72
72
|
@config.load_training_data
|
73
73
|
@forest = ::Nimbus::Forest.new @config
|
74
74
|
end
|
75
75
|
|
76
76
|
it 'grows a classification forest of N trees' do
|
77
|
-
@forest.trees.
|
78
|
-
@config.forest_size.
|
79
|
-
@forest.
|
80
|
-
@forest.
|
77
|
+
expect(@forest.trees).to eq []
|
78
|
+
expect(@config.forest_size).to eq 3
|
79
|
+
expect(@forest).to be_classification
|
80
|
+
expect(@forest).to_not be_regression
|
81
81
|
@forest.grow
|
82
|
-
@forest.trees.size.
|
83
|
-
@forest.trees.each{|t| t.
|
82
|
+
expect(@forest.trees.size).to eq @config.forest_size
|
83
|
+
@forest.trees.each{|t| expect(t).to be_kind_of Hash}
|
84
84
|
end
|
85
85
|
|
86
86
|
it 'creates predictions for individuals in the training set' do
|
87
|
-
@forest.predictions.
|
87
|
+
expect(@forest.predictions).to eq({})
|
88
88
|
@forest.grow
|
89
|
-
(@forest.predictions.keys - (1..1000).to_a ).
|
90
|
-
@forest.predictions.values.each{|v| v.
|
89
|
+
expect((@forest.predictions.keys - (1..1000).to_a )).to eq [] # 1000 individuals in the training file
|
90
|
+
@forest.predictions.values.each{|v| expect(v).to be_kind_of String}
|
91
91
|
end
|
92
92
|
|
93
93
|
it 'computes averaged SNP importances for every SNP' do
|
94
|
-
@forest.snp_importances.
|
94
|
+
expect(@forest.snp_importances).to eq({})
|
95
95
|
@forest.options.do_importances = true
|
96
96
|
@forest.grow
|
97
|
-
@forest.snp_importances.keys.sort.
|
98
|
-
@forest.snp_importances.values.each{|v| v.
|
97
|
+
expect(@forest.snp_importances.keys.sort).to eq (1..100).to_a # 100 snps in the training file
|
98
|
+
@forest.snp_importances.values.each{|v| expect(v).to be_kind_of Numeric}
|
99
99
|
end
|
100
100
|
|
101
101
|
it 'does not compute SNP importances if config set to false' do
|
102
|
-
@forest.snp_importances.
|
102
|
+
expect(@forest.snp_importances).to eq({})
|
103
103
|
@forest.options.do_importances = false
|
104
104
|
@forest.grow
|
105
|
-
@forest.snp_importances.
|
105
|
+
expect(@forest.snp_importances).to eq({})
|
106
106
|
end
|
107
107
|
|
108
108
|
it 'traverses a set of testing individuals through every tree in the forest and returns predictions' do
|
109
109
|
@forest = @config.load_forest
|
110
|
-
@forest.predictions.
|
110
|
+
expect(@forest.predictions).to eq({})
|
111
111
|
|
112
|
-
tree_structure = Psych.load(File.open fixture_file('
|
112
|
+
tree_structure = Psych.load(File.open fixture_file('classification/random_forest.yml'))
|
113
113
|
expected_predictions = {}
|
114
114
|
@config.read_testing_data{|individual|
|
115
115
|
individual_prediction = []
|
@@ -121,12 +121,12 @@ describe Nimbus::Forest do
|
|
121
121
|
}
|
122
122
|
|
123
123
|
@forest.traverse
|
124
|
-
@forest.predictions.
|
124
|
+
expect(@forest.predictions).to eq expected_predictions
|
125
125
|
end
|
126
126
|
|
127
127
|
it 'can output forest structure in YAML format' do
|
128
128
|
@forest = @config.load_forest
|
129
|
-
Psych.load(File.open fixture_file('
|
129
|
+
Psych.load(File.open fixture_file('classification/random_forest.yml')) == Psych.load(@forest.to_yaml)
|
130
130
|
end
|
131
131
|
end
|
132
132
|
end
|
data/spec/individual_spec.rb
CHANGED
@@ -5,9 +5,9 @@ describe Nimbus::Individual do
|
|
5
5
|
|
6
6
|
it "stores id, fenotype and SNPs information for an individual" do
|
7
7
|
@individual = Nimbus::Individual.new(11, 33.275, [1,0,2,1])
|
8
|
-
@individual.id.
|
9
|
-
@individual.fenotype.
|
10
|
-
@individual.snp_list.
|
8
|
+
expect(@individual.id).to eq 11
|
9
|
+
expect(@individual.fenotype).to eq 33.275
|
10
|
+
expect(@individual.snp_list).to eq [1,0,2,1]
|
11
11
|
end
|
12
12
|
|
13
13
|
end
|
data/spec/loss_functions_spec.rb
CHANGED
@@ -7,39 +7,57 @@ describe Nimbus::LossFunctions do
|
|
7
7
|
ids = [1,3,5,7]
|
8
8
|
values = {1 => 10, 2 => 5, 3 => 21, 4 => 8, 5 => 31, 7 => 11, 85 => 22}
|
9
9
|
|
10
|
-
Nimbus::LossFunctions.average(ids, values).
|
10
|
+
expect(Nimbus::LossFunctions.average(ids, values)).to eq 18.25 # (10 + 21 + 31 + 11 = 73)/4
|
11
11
|
end
|
12
12
|
|
13
13
|
it "method for mean squared error" do
|
14
14
|
ids = [3,7,85]
|
15
15
|
values = {1 => 10, 2 => 5, 3 => 21, 4 => 8, 5 => 31, 7 => 11, 85 => 22}
|
16
16
|
|
17
|
-
Nimbus::LossFunctions.mean_squared_error(ids, values).
|
17
|
+
expect(Nimbus::LossFunctions.mean_squared_error(ids, values)).to eq 74.0 # (avg(21 + 11 + 22) = 18: sum (x-18)^2
|
18
18
|
end
|
19
19
|
|
20
20
|
it "method for quadratic_loss" do
|
21
21
|
ids = [1,4]
|
22
22
|
values = {1 => 10, 2 => 5, 3 => 21, 4 => 8, 5 => 31, 7 => 11, 85 => 22}
|
23
23
|
|
24
|
-
Nimbus::LossFunctions.quadratic_loss(ids, values).round(5).
|
24
|
+
expect(Nimbus::LossFunctions.quadratic_loss(ids, values).round(5)).to eq 1
|
25
25
|
end
|
26
26
|
|
27
27
|
it "quadratic loss is mean squared error averaged" do
|
28
28
|
ids = [1,2,3,4,5,7,85]
|
29
29
|
values = {1 => 10, 2 => 5, 3 => 21, 4 => 8, 5 => 31, 7 => 11, 85 => 22}
|
30
|
-
Nimbus::LossFunctions.quadratic_loss(ids, values).round(5).
|
30
|
+
expect(Nimbus::LossFunctions.quadratic_loss(ids, values).round(5)).to eq (Nimbus::LossFunctions.mean_squared_error(ids, values)/7 ).round(5)
|
31
|
+
end
|
32
|
+
|
33
|
+
it "method for pseudo Huber error" do
|
34
|
+
ids = [3,7,85]
|
35
|
+
values = {1 => 10, 2 => 5, 3 => 21, 4 => 8, 5 => 31, 7 => 11, 85 => 22}
|
36
|
+
expect(Nimbus::LossFunctions.pseudo_huber_error(ids, values).round(5)).to eq 11.92337 # (avg(21 + 11 + 22) = 18: log(cosh(x-18))
|
37
|
+
end
|
38
|
+
|
39
|
+
it "method for pseudo Huber loss function" do
|
40
|
+
ids = [1,4]
|
41
|
+
values = {1 => 10, 2 => 5, 3 => 21, 4 => 8, 5 => 31, 7 => 11, 85 => 22}
|
42
|
+
expect(Nimbus::LossFunctions.pseudo_huber_loss(ids, values).round(5)).to eq 0.43378
|
43
|
+
end
|
44
|
+
|
45
|
+
it "pseudo Huber loss is pseudo Huber error averaged" do
|
46
|
+
ids = [1,2,3,4,5,7,85]
|
47
|
+
values = {1 => 10, 2 => 5, 3 => 21, 4 => 8, 5 => 31, 7 => 11, 85 => 22}
|
48
|
+
expect(Nimbus::LossFunctions.pseudo_huber_loss(ids, values).round(5)).to eq (Nimbus::LossFunctions.pseudo_huber_error(ids, values)/7 ).round(5)
|
31
49
|
end
|
32
50
|
|
33
51
|
it "method for squared difference" do
|
34
|
-
Nimbus::LossFunctions.squared_difference(50, 40).
|
35
|
-
Nimbus::LossFunctions.squared_difference(22, 10).
|
52
|
+
expect(Nimbus::LossFunctions.squared_difference(50, 40)).to eq 100.0
|
53
|
+
expect(Nimbus::LossFunctions.squared_difference(22, 10)).to eq 144.0
|
36
54
|
end
|
37
55
|
|
38
56
|
it "method for majority class" do
|
39
57
|
ids = [1,2,3,4,5,7,85]
|
40
58
|
values = {1 => 'B', 2 => 'C', 3 => 'A', 4 => 'A', 5 => 'C', 7 => 'B', 85 => 'C'} #3C, 2A, 2B
|
41
59
|
classes = ['A', 'B', 'C']
|
42
|
-
Nimbus::LossFunctions.majority_class(ids, values, classes).
|
60
|
+
expect(Nimbus::LossFunctions.majority_class(ids, values, classes)).to eq 'C'
|
43
61
|
end
|
44
62
|
|
45
63
|
it "majority class method selects randomly if more than one majority class" do
|
@@ -50,27 +68,27 @@ describe Nimbus::LossFunctions do
|
|
50
68
|
20.times do
|
51
69
|
results << Nimbus::LossFunctions.majority_class(ids, values, classes)
|
52
70
|
end
|
53
|
-
results.
|
54
|
-
results.
|
71
|
+
expect(results).to include('A')
|
72
|
+
expect(results).to include('C')
|
55
73
|
end
|
56
74
|
|
57
75
|
it "method for majority class in list" do
|
58
76
|
list = %w(A A A B B B C A B C A B A)
|
59
77
|
classes = ['A', 'B', 'C']
|
60
|
-
Nimbus::LossFunctions.majority_class_in_list(list, classes).
|
78
|
+
expect(Nimbus::LossFunctions.majority_class_in_list(list, classes)).to eq 'A'
|
61
79
|
end
|
62
80
|
|
63
81
|
it "method for class sizes" do
|
64
82
|
ids = [1,2,3,4,5,7,85]
|
65
83
|
values = {1 => 'B', 2 => 'C', 3 => 'A', 4 => 'A', 5 => 'C', 7 => 'B', 85 => 'C'} #2A, 2B, 3C
|
66
84
|
classes = ['A', 'B', 'C']
|
67
|
-
Nimbus::LossFunctions.class_sizes(ids, values, classes).
|
85
|
+
expect(Nimbus::LossFunctions.class_sizes(ids, values, classes)).to eq [2, 2, 3]
|
68
86
|
end
|
69
87
|
|
70
88
|
it "method for class sizes in list" do
|
71
89
|
list = %w(A A A B B B C A B C A B A) # 6A, 5B, 2C
|
72
90
|
classes = ['A', 'B', 'C']
|
73
|
-
Nimbus::LossFunctions.class_sizes_in_list(list, classes).
|
91
|
+
expect(Nimbus::LossFunctions.class_sizes_in_list(list, classes)).to eq [6, 5, 2]
|
74
92
|
end
|
75
93
|
|
76
94
|
it "Gini index" do
|
@@ -78,7 +96,7 @@ describe Nimbus::LossFunctions do
|
|
78
96
|
values = {1 => 'B', 2 => 'C', 3 => 'A', 4 => 'A', 5 => 'C', 7 => 'C'} #3C, 2A, 1B
|
79
97
|
classes = ['A', 'B', 'C']
|
80
98
|
# Gini = 1 - ( (3/6)^2 + (2/6)^2 + (1/6)^2 ) = 0.61111
|
81
|
-
Nimbus::LossFunctions.gini_index(ids, values, classes).
|
99
|
+
expect(Nimbus::LossFunctions.gini_index(ids, values, classes)).to eq 0.61111
|
82
100
|
end
|
83
101
|
|
84
102
|
end
|
data/spec/nimbus_spec.rb
CHANGED
@@ -6,13 +6,13 @@ describe 'Nimbus module' do
|
|
6
6
|
|
7
7
|
it "manages a Nimbus::Application object" do
|
8
8
|
app = Nimbus.application
|
9
|
-
app.
|
9
|
+
expect(app).to be_kind_of Nimbus::Application
|
10
10
|
end
|
11
11
|
|
12
12
|
it "accepts setting an external Nimbus::Application" do
|
13
13
|
app = Nimbus::Application.new
|
14
14
|
Nimbus.application = app
|
15
|
-
Nimbus.application.
|
15
|
+
expect(Nimbus.application).to eq app
|
16
16
|
end
|
17
17
|
|
18
18
|
end
|
@@ -4,31 +4,31 @@ describe Nimbus::RegressionTree do
|
|
4
4
|
|
5
5
|
before(:each) do
|
6
6
|
@config = Nimbus::Configuration.new
|
7
|
-
@config.load fixture_file('
|
7
|
+
@config.load fixture_file('regression/config.yml')
|
8
8
|
|
9
9
|
@tree = Nimbus::RegressionTree.new @config.tree
|
10
10
|
end
|
11
11
|
|
12
12
|
it "is initialized with tree config info" do
|
13
|
-
@tree.snp_total_count.
|
14
|
-
@tree.snp_sample_size.
|
15
|
-
@tree.node_min_size.
|
13
|
+
expect(@tree.snp_total_count).to eq 200
|
14
|
+
expect(@tree.snp_sample_size).to eq 60
|
15
|
+
expect(@tree.node_min_size).to eq 5
|
16
16
|
end
|
17
17
|
|
18
18
|
it "creates a tree structure when seeded with training data" do
|
19
19
|
@config.load_training_data
|
20
|
-
@tree.structure.
|
20
|
+
expect(@tree.structure).to be_nil
|
21
21
|
@tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
|
22
|
-
@tree.structure.
|
23
|
-
@tree.structure.
|
22
|
+
expect(@tree.structure).to_not be_nil
|
23
|
+
expect(@tree.structure).to be_kind_of Hash
|
24
24
|
|
25
|
-
@tree.structure.keys.first.
|
26
|
-
@tree.used_snps.
|
25
|
+
expect(@tree.structure.keys.first).to eq @tree.used_snps.last
|
26
|
+
expect(@tree.used_snps).to_not be_empty
|
27
27
|
end
|
28
28
|
|
29
|
-
it "split node
|
29
|
+
it "split node when building a node and finds a suitable split" do
|
30
30
|
@config.load_training_data
|
31
|
-
|
31
|
+
allow_any_instance_of(Nimbus::RegressionTree).to receive(:snps_random_sample).and_return((141..200).to_a) #189 is best split
|
32
32
|
|
33
33
|
@tree.individuals = @config.training_set.individuals
|
34
34
|
@tree.id_to_fenotype = @config.training_set.ids_fenotypes
|
@@ -36,29 +36,29 @@ describe Nimbus::RegressionTree do
|
|
36
36
|
@tree.predictions = {}
|
37
37
|
|
38
38
|
branch = @tree.build_node @config.training_set.all_ids, Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
|
39
|
-
branch.keys.size.
|
40
|
-
branch.keys.first.
|
41
|
-
branch[189].size.
|
42
|
-
branch[189][0].
|
43
|
-
branch[189][1]
|
44
|
-
branch[189][2].
|
39
|
+
expect(branch.keys.size).to eq 1
|
40
|
+
expect(branch.keys.first).to eq 189
|
41
|
+
expect(branch[189].size).to eq 3
|
42
|
+
expect(branch[189][0]).to be_kind_of Hash
|
43
|
+
expect([Nimbus::Tree::NODE_SPLIT_01_2, Nimbus::Tree::NODE_SPLIT_0_12]).to include(branch[189][1])
|
44
|
+
expect(branch[189][2]).to be_kind_of Hash
|
45
45
|
end
|
46
46
|
|
47
47
|
it "keeps track of all SNPs used for the tree" do
|
48
48
|
@config.load_training_data
|
49
49
|
snps = (131..190).to_a
|
50
|
-
|
51
|
-
@tree.used_snps.
|
50
|
+
allow_any_instance_of(Nimbus::RegressionTree).to receive(:snps_random_sample).and_return(snps)
|
51
|
+
expect(@tree.used_snps).to be_nil
|
52
52
|
@tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
|
53
|
-
@tree.used_snps.size.
|
53
|
+
expect(@tree.used_snps.size).to be > 4
|
54
54
|
@tree.used_snps.each{|snp|
|
55
|
-
snps.include?(snp).
|
55
|
+
expect(snps.include?(snp)).to be true
|
56
56
|
}
|
57
57
|
end
|
58
58
|
|
59
59
|
it "labels node when building a node and there is not a suitable split" do
|
60
60
|
@config.load_training_data
|
61
|
-
|
61
|
+
allow_any_instance_of(Nimbus::RegressionTree).to receive(:snps_random_sample).and_return([91])
|
62
62
|
|
63
63
|
@tree.individuals = @config.training_set.individuals
|
64
64
|
@tree.id_to_fenotype = @config.training_set.ids_fenotypes
|
@@ -66,9 +66,9 @@ describe Nimbus::RegressionTree do
|
|
66
66
|
@tree.predictions = {}
|
67
67
|
|
68
68
|
branch = @tree.build_node @config.training_set.all_ids, Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
|
69
|
-
branch[
|
70
|
-
branch[
|
71
|
-
branch[
|
69
|
+
expect(branch[91][0]).to be_kind_of Numeric
|
70
|
+
expect([Nimbus::Tree::NODE_SPLIT_01_2, Nimbus::Tree::NODE_SPLIT_0_12]).to include(branch[91][1])
|
71
|
+
expect(branch[91][2]).to be_kind_of Numeric
|
72
72
|
end
|
73
73
|
|
74
74
|
it "labels node when building a node with less individuals than the minimum node size" do
|
@@ -80,50 +80,50 @@ describe Nimbus::RegressionTree do
|
|
80
80
|
@tree.predictions = {}
|
81
81
|
|
82
82
|
label = @tree.build_node [1, 10, 33], Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
|
83
|
-
label.
|
83
|
+
expect(label).to be_kind_of Numeric
|
84
84
|
|
85
85
|
label = @tree.build_node [2, 10], Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
|
86
|
-
label.
|
86
|
+
expect(label).to be_kind_of Numeric
|
87
87
|
|
88
88
|
label = @tree.build_node [1, 10, 33], Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
|
89
|
-
label.
|
89
|
+
expect(label).to be_kind_of Numeric
|
90
90
|
|
91
91
|
label = @tree.build_node [108, 22, 10, 33], Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
|
92
|
-
label.
|
92
|
+
expect(label).to be_kind_of Numeric
|
93
93
|
end
|
94
94
|
|
95
95
|
it 'computes generalization error for the tree' do
|
96
96
|
@config.load_training_data
|
97
97
|
@tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
|
98
|
-
@tree.generalization_error.
|
98
|
+
expect(@tree.generalization_error).to be_nil
|
99
99
|
@tree.generalization_error_from_oob((2..200).to_a)
|
100
|
-
@tree.generalization_error.
|
101
|
-
@tree.generalization_error.
|
102
|
-
@tree.generalization_error.
|
100
|
+
expect(@tree.generalization_error).to be_kind_of Numeric
|
101
|
+
expect(@tree.generalization_error).to be > 0.0
|
102
|
+
expect(@tree.generalization_error).to be < 1.0
|
103
103
|
end
|
104
104
|
|
105
105
|
it 'estimates importance for all SNPs' do
|
106
106
|
@config.load_training_data
|
107
107
|
@tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
|
108
|
-
@tree.importances.
|
108
|
+
expect(@tree.importances).to be_nil
|
109
109
|
@tree.estimate_importances((300..533).to_a)
|
110
|
-
@tree.importances.
|
111
|
-
@tree.importances.keys.
|
112
|
-
(@tree.importances.keys - (1..200).to_a).
|
110
|
+
expect(@tree.importances).to be_kind_of Hash
|
111
|
+
expect(@tree.importances.keys).to_not be_empty
|
112
|
+
expect((@tree.importances.keys - (1..200).to_a)).to be_empty #all keys are snp indexes (200 snps in training file)
|
113
113
|
end
|
114
114
|
|
115
115
|
it 'get prediction for an individual pushing it down a tree structure' do
|
116
|
-
tree_structure = Psych.load(File.open fixture_file('
|
116
|
+
tree_structure = Psych.load(File.open fixture_file('regression/random_forest.yml')).first
|
117
117
|
individual_data = [0]*200
|
118
118
|
prediction = Nimbus::Tree.traverse tree_structure, individual_data
|
119
|
-
prediction.
|
119
|
+
expect(prediction).to eq -0.90813
|
120
120
|
|
121
|
-
individual_data[
|
122
|
-
individual_data[
|
123
|
-
individual_data[
|
124
|
-
individual_data[
|
121
|
+
individual_data[44-1] = 2
|
122
|
+
individual_data[98-1] = 1
|
123
|
+
individual_data[22-1] = 1
|
124
|
+
individual_data[31-1] = 2
|
125
125
|
prediction = Nimbus::Tree.traverse tree_structure, individual_data
|
126
|
-
prediction.
|
126
|
+
expect(prediction).to eq -0.95805
|
127
127
|
end
|
128
128
|
|
129
129
|
end
|
data/spec/training_set_spec.rb
CHANGED
@@ -9,8 +9,8 @@ describe Nimbus::TrainingSet do
|
|
9
9
|
i3 = Nimbus::Individual.new 3, 33.0, [0,2,1,0]
|
10
10
|
@training_set = Nimbus::TrainingSet.new [i1, i3], {i1.id => 11.0, i3.id => 33.0}
|
11
11
|
|
12
|
-
@training_set.individuals.
|
13
|
-
@training_set.ids_fenotypes.
|
12
|
+
expect(@training_set.individuals).to eq [i1, i3]
|
13
|
+
expect(@training_set.ids_fenotypes).to eq ({i1.id => 11.0, i3.id => 33.0})
|
14
14
|
end
|
15
15
|
|
16
16
|
it "keeps track of ids of all individuals in the training set" do
|
@@ -19,7 +19,7 @@ describe Nimbus::TrainingSet do
|
|
19
19
|
i3 = Nimbus::Individual.new 3, 33.0, [0,2,1,0]
|
20
20
|
@training_set = Nimbus::TrainingSet.new [i1, i3], {i1.id => 11.0, i3.id => 33.0}
|
21
21
|
|
22
|
-
@training_set.all_ids.
|
22
|
+
expect(@training_set.all_ids).to eq [1,3]
|
23
23
|
end
|
24
24
|
|
25
25
|
end
|
data/spec/tree_spec.rb
CHANGED
@@ -5,15 +5,15 @@ describe Nimbus::Tree do
|
|
5
5
|
|
6
6
|
before(:each) do
|
7
7
|
@config = Nimbus::Configuration.new
|
8
|
-
@config.load fixture_file('
|
8
|
+
@config.load fixture_file('regression/config.yml')
|
9
9
|
|
10
10
|
@tree = Nimbus::Tree.new @config.tree
|
11
11
|
end
|
12
12
|
|
13
13
|
it "is initialized with tree config info" do
|
14
|
-
@tree.snp_total_count.
|
15
|
-
@tree.snp_sample_size.
|
16
|
-
@tree.node_min_size.
|
14
|
+
expect(@tree.snp_total_count).to eq 200
|
15
|
+
expect(@tree.snp_sample_size).to eq 60
|
16
|
+
expect(@tree.node_min_size).to eq 5
|
17
17
|
end
|
18
18
|
|
19
19
|
end
|
metadata
CHANGED
@@ -1,28 +1,30 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nimbus
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
5
|
-
prerelease:
|
4
|
+
version: 2.4.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Juanjo Bazán
|
9
8
|
- Oscar González Recio
|
10
|
-
autorequire:
|
9
|
+
autorequire:
|
11
10
|
bindir: bin
|
12
11
|
cert_chain: []
|
13
|
-
date:
|
12
|
+
date: 2023-03-09 00:00:00.000000000 Z
|
14
13
|
dependencies:
|
15
14
|
- !ruby/object:Gem::Dependency
|
16
15
|
name: rspec
|
17
|
-
requirement:
|
18
|
-
none: false
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
19
17
|
requirements:
|
20
|
-
- -
|
18
|
+
- - "~>"
|
21
19
|
- !ruby/object:Gem::Version
|
22
|
-
version:
|
20
|
+
version: '3.12'
|
23
21
|
type: :development
|
24
22
|
prerelease: false
|
25
|
-
version_requirements:
|
23
|
+
version_requirements: !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - "~>"
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
version: '3.12'
|
26
28
|
description: Nimbus is a Ruby gem to implement Random Forest in a genomic selection
|
27
29
|
context.
|
28
30
|
email:
|
@@ -32,8 +34,12 @@ executables:
|
|
32
34
|
extensions: []
|
33
35
|
extra_rdoc_files: []
|
34
36
|
files:
|
37
|
+
- CODE_OF_CONDUCT.md
|
38
|
+
- CONTRIBUTING.md
|
35
39
|
- MIT-LICENSE.txt
|
36
40
|
- README.md
|
41
|
+
- bin/nimbus
|
42
|
+
- lib/nimbus.rb
|
37
43
|
- lib/nimbus/application.rb
|
38
44
|
- lib/nimbus/classification_tree.rb
|
39
45
|
- lib/nimbus/configuration.rb
|
@@ -45,17 +51,16 @@ files:
|
|
45
51
|
- lib/nimbus/training_set.rb
|
46
52
|
- lib/nimbus/tree.rb
|
47
53
|
- lib/nimbus/version.rb
|
48
|
-
- lib/nimbus.rb
|
49
54
|
- spec/classification_tree_spec.rb
|
50
55
|
- spec/configuration_spec.rb
|
51
|
-
- spec/fixtures/
|
52
|
-
- spec/fixtures/
|
53
|
-
- spec/fixtures/
|
54
|
-
- spec/fixtures/
|
55
|
-
- spec/fixtures/
|
56
|
-
- spec/fixtures/
|
57
|
-
- spec/fixtures/
|
58
|
-
- spec/fixtures/
|
56
|
+
- spec/fixtures/classification/config.yml
|
57
|
+
- spec/fixtures/classification/random_forest.yml
|
58
|
+
- spec/fixtures/classification/testing.data
|
59
|
+
- spec/fixtures/classification/training.data
|
60
|
+
- spec/fixtures/regression/config.yml
|
61
|
+
- spec/fixtures/regression/random_forest.yml
|
62
|
+
- spec/fixtures/regression/testing.data
|
63
|
+
- spec/fixtures/regression/training.data
|
59
64
|
- spec/forest_spec.rb
|
60
65
|
- spec/individual_spec.rb
|
61
66
|
- spec/loss_functions_spec.rb
|
@@ -64,45 +69,43 @@ files:
|
|
64
69
|
- spec/spec_helper.rb
|
65
70
|
- spec/training_set_spec.rb
|
66
71
|
- spec/tree_spec.rb
|
67
|
-
- bin/nimbus
|
68
72
|
homepage: http://nimbusgem.org
|
69
|
-
licenses:
|
70
|
-
|
73
|
+
licenses:
|
74
|
+
- MIT
|
75
|
+
metadata: {}
|
76
|
+
post_install_message:
|
71
77
|
rdoc_options:
|
72
|
-
- --main
|
78
|
+
- "--main"
|
73
79
|
- README.rdoc
|
74
|
-
- --charset=UTF-8
|
80
|
+
- "--charset=UTF-8"
|
75
81
|
require_paths:
|
76
82
|
- lib
|
77
83
|
required_ruby_version: !ruby/object:Gem::Requirement
|
78
|
-
none: false
|
79
84
|
requirements:
|
80
|
-
- -
|
85
|
+
- - ">="
|
81
86
|
- !ruby/object:Gem::Version
|
82
87
|
version: '0'
|
83
88
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
84
|
-
none: false
|
85
89
|
requirements:
|
86
|
-
- -
|
90
|
+
- - ">="
|
87
91
|
- !ruby/object:Gem::Version
|
88
92
|
version: '0'
|
89
93
|
requirements: []
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
specification_version: 3
|
94
|
+
rubygems_version: 3.4.6
|
95
|
+
signing_key:
|
96
|
+
specification_version: 4
|
94
97
|
summary: Random Forest algorithm for Genomics
|
95
98
|
test_files:
|
96
99
|
- spec/classification_tree_spec.rb
|
97
100
|
- spec/configuration_spec.rb
|
98
|
-
- spec/fixtures/
|
99
|
-
- spec/fixtures/
|
100
|
-
- spec/fixtures/
|
101
|
-
- spec/fixtures/
|
102
|
-
- spec/fixtures/
|
103
|
-
- spec/fixtures/
|
104
|
-
- spec/fixtures/
|
105
|
-
- spec/fixtures/
|
101
|
+
- spec/fixtures/classification/config.yml
|
102
|
+
- spec/fixtures/classification/random_forest.yml
|
103
|
+
- spec/fixtures/classification/testing.data
|
104
|
+
- spec/fixtures/classification/training.data
|
105
|
+
- spec/fixtures/regression/config.yml
|
106
|
+
- spec/fixtures/regression/random_forest.yml
|
107
|
+
- spec/fixtures/regression/testing.data
|
108
|
+
- spec/fixtures/regression/training.data
|
106
109
|
- spec/forest_spec.rb
|
107
110
|
- spec/individual_spec.rb
|
108
111
|
- spec/loss_functions_spec.rb
|