nimbus 1.0.1 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/spec/forest_spec.rb CHANGED
@@ -2,56 +2,115 @@
2
2
  require File.dirname(__FILE__) + '/spec_helper'
3
3
 
4
4
  describe Nimbus::Forest do
5
- before(:each) do
6
- @config = Nimbus::Configuration.new
7
- @config.load fixture_file('regression_config.yml')
8
- @config.load_training_data
9
- @forest = ::Nimbus::Forest.new @config
10
- end
11
-
12
- it 'grows a forest of N trees' do
13
- @forest.trees.should == []
14
- @config.forest_size.should == 3
15
- @forest.grow
16
- @forest.trees.size.should == @config.forest_size
17
- @forest.trees.each{|t| t.should be_kind_of Hash}
18
- end
19
-
20
- it 'creates averaged predictions for individuals in the training set' do
21
- @forest.predictions.should == {}
22
- @forest.grow
23
- (@forest.predictions.keys - (1..800).to_a ).should == []
24
- @forest.predictions.values.each{|v| v.should be_kind_of Numeric}
25
- end
26
-
27
- it 'computes averaged SNP importances for every SNP' do
28
- @forest.snp_importances.should == {}
29
- @forest.grow
30
- @forest.snp_importances.keys.sort.should == (1..200).to_a
31
- @forest.snp_importances.values.each{|v| v.should be_kind_of Numeric}
32
- end
33
-
34
- it 'traverses a set of testing individuals through every tree in the forest and return predictions' do
35
- @forest = @config.load_forest
36
- @forest.predictions.should == {}
37
-
38
- tree_structure = YAML.load(File.open fixture_file('regression_random_forest.yml'))
39
- expected_predictions = {}
40
- @config.read_testing_data{|individual|
41
- individual_prediction = 0.0
42
- tree_structure.each do |t|
43
- individual_prediction = (individual_prediction + Nimbus::Tree.traverse(t, individual.snp_list)).round(5)
44
- end
45
- expected_predictions[individual.id] = (individual_prediction / 3).round(5)
46
- }
47
-
48
- @forest.traverse
49
- @forest.predictions.should == expected_predictions
5
+ describe "Regression" do
6
+ before(:each) do
7
+ @config = Nimbus::Configuration.new
8
+ @config.load fixture_file('regression_config.yml')
9
+ @config.load_training_data
10
+ @forest = ::Nimbus::Forest.new @config
11
+ end
12
+
13
+ it 'grows a regression forest of N trees' do
14
+ @forest.trees.should == []
15
+ @config.forest_size.should == 3
16
+ @forest.should_not be_classification
17
+ @forest.should be_regression
18
+ @forest.grow
19
+ @forest.trees.size.should == @config.forest_size
20
+ @forest.trees.each{|t| t.should be_kind_of Hash}
21
+ end
22
+
23
+ it 'creates averaged predictions for individuals in the training set' do
24
+ @forest.predictions.should == {}
25
+ @forest.grow
26
+ (@forest.predictions.keys - (1..800).to_a ).should == [] # 800 individuals in the training file
27
+ @forest.predictions.values.each{|v| v.should be_kind_of Numeric}
28
+ end
29
+
30
+ it 'computes averaged SNP importances for every SNP' do
31
+ @forest.snp_importances.should == {}
32
+ @forest.grow
33
+ @forest.snp_importances.keys.sort.should == (1..200).to_a # 200 snps in the training file
34
+ @forest.snp_importances.values.each{|v| v.should be_kind_of Numeric}
35
+ end
36
+
37
+ it 'traverses a set of testing individuals through every tree in the forest and returns predictions' do
38
+ @forest = @config.load_forest
39
+ @forest.predictions.should == {}
40
+
41
+ tree_structure = YAML.load(File.open fixture_file('regression_random_forest.yml'))
42
+ expected_predictions = {}
43
+ @config.read_testing_data{|individual|
44
+ individual_prediction = 0.0
45
+ tree_structure.each do |t|
46
+ individual_prediction = (individual_prediction + Nimbus::Tree.traverse(t, individual.snp_list)).round(5)
47
+ end
48
+ expected_predictions[individual.id] = (individual_prediction / 3).round(5)
49
+ }
50
+
51
+ @forest.traverse
52
+ @forest.predictions.should == expected_predictions
53
+ end
54
+
55
+ it 'can output forest structure in YAML format' do
56
+ @forest = @config.load_forest
57
+ YAML.load(File.open fixture_file('regression_random_forest.yml')) == YAML.load(@forest.to_yaml)
58
+ end
50
59
  end
51
-
52
- it 'can output forest structure in YAML format' do
53
- @forest = @config.load_forest
54
- YAML.load(File.open fixture_file('regression_random_forest.yml')) == YAML.load(@forest.to_yaml)
60
+
61
+ describe "Classification" do
62
+ before(:each) do
63
+ @config = Nimbus::Configuration.new
64
+ @config.load fixture_file('classification_config.yml')
65
+ @config.load_training_data
66
+ @forest = ::Nimbus::Forest.new @config
67
+ end
68
+
69
+ it 'grows a classification forest of N trees' do
70
+ @forest.trees.should == []
71
+ @config.forest_size.should == 3
72
+ @forest.should be_classification
73
+ @forest.should_not be_regression
74
+ @forest.grow
75
+ @forest.trees.size.should == @config.forest_size
76
+ @forest.trees.each{|t| t.should be_kind_of Hash}
77
+ end
78
+
79
+ it 'creates predictions for individuals in the training set' do
80
+ @forest.predictions.should == {}
81
+ @forest.grow
82
+ (@forest.predictions.keys - (1..1000).to_a ).should == [] # 1000 individuals in the training file
83
+ @forest.predictions.values.each{|v| v.should be_kind_of String}
84
+ end
85
+
86
+ it 'computes averaged SNP importances for every SNP' do
87
+ @forest.snp_importances.should == {}
88
+ @forest.grow
89
+ @forest.snp_importances.keys.sort.should == (1..100).to_a # 100 snps in the training file
90
+ @forest.snp_importances.values.each{|v| v.should be_kind_of Numeric}
91
+ end
92
+
93
+ it 'traverses a set of testing individuals through every tree in the forest and returns predictions' do
94
+ @forest = @config.load_forest
95
+ @forest.predictions.should == {}
96
+
97
+ tree_structure = YAML.load(File.open fixture_file('classification_random_forest.yml'))
98
+ expected_predictions = {}
99
+ @config.read_testing_data{|individual|
100
+ individual_prediction = []
101
+ tree_structure.each do |t|
102
+ individual_prediction << Nimbus::Tree.traverse(t, individual.snp_list)
103
+ end
104
+ expected_predictions[individual.id] = Nimbus::LossFunctions.majority_class_in_list(individual_prediction, @config.tree[:classes])
105
+ }
106
+
107
+ @forest.traverse
108
+ @forest.predictions.should == expected_predictions
109
+ end
110
+
111
+ it 'can output forest structure in YAML format' do
112
+ @forest = @config.load_forest
113
+ YAML.load(File.open fixture_file('classification_random_forest.yml')) == YAML.load(@forest.to_yaml)
114
+ end
55
115
  end
56
-
57
116
  end
@@ -2,12 +2,12 @@
2
2
  require File.dirname(__FILE__) + '/spec_helper'
3
3
 
4
4
  describe Nimbus::Individual do
5
-
5
+
6
6
  it "stores id, fenotype and SNPs information for an individual" do
7
7
  @individual = Nimbus::Individual.new(11, 33.275, [1,0,2,1])
8
8
  @individual.id.should == 11
9
9
  @individual.fenotype.should == 33.275
10
10
  @individual.snp_list.should == [1,0,2,1]
11
11
  end
12
-
12
+
13
13
  end
@@ -0,0 +1,71 @@
1
+ # encoding: UTF-8
2
+ require File.dirname(__FILE__) + '/spec_helper'
3
+
4
+ describe Nimbus::LossFunctions do
5
+
6
+ it "method for average" do
7
+ ids = [1,3,5,7]
8
+ values = {1 => 10, 2 => 5, 3 => 21, 4 => 8, 5 => 31, 7 => 11, 85 => 22}
9
+
10
+ Nimbus::LossFunctions.average(ids, values).should == 18.25 # (10 + 21 + 31 + 11 = 73)/4
11
+ end
12
+
13
+ it "method for mean squared error" do
14
+ ids = [3,7,85]
15
+ values = {1 => 10, 2 => 5, 3 => 21, 4 => 8, 5 => 31, 7 => 11, 85 => 22}
16
+
17
+ Nimbus::LossFunctions.mean_squared_error(ids, values).should == 74.0 # (avg(21 + 11 + 22) = 18: sum (x-11)^2
18
+ end
19
+
20
+ it "method for quadratic_loss" do
21
+ ids = [1,4]
22
+ values = {1 => 10, 2 => 5, 3 => 21, 4 => 8, 5 => 31, 7 => 11, 85 => 22}
23
+
24
+ Nimbus::LossFunctions.quadratic_loss(ids, values).round(5).should == 1
25
+ end
26
+
27
+ it "quadratic loss is mean squared error averaged" do
28
+ ids = [1,2,3,4,5,7,85]
29
+ values = {1 => 10, 2 => 5, 3 => 21, 4 => 8, 5 => 31, 7 => 11, 85 => 22}
30
+ Nimbus::LossFunctions.quadratic_loss(ids, values).round(5).should == (Nimbus::LossFunctions.mean_squared_error(ids, values)/7 ).round(5)
31
+ end
32
+
33
+ it "method for squared difference" do
34
+ Nimbus::LossFunctions.squared_difference(50, 40).should == 100.0
35
+ Nimbus::LossFunctions.squared_difference(22, 10).should == 144.0
36
+ end
37
+
38
+ it "method for majority class" do
39
+ ids = [1,2,3,4,5,7,85]
40
+ values = {1 => 'B', 2 => 'C', 3 => 'A', 4 => 'A', 5 => 'C', 7 => 'B', 85 => 'C'} #3C, 2A, 2B
41
+ classes = ['A', 'B', 'C']
42
+ Nimbus::LossFunctions.majority_class(ids, values, classes).should == 'C'
43
+ end
44
+
45
+ it "majority class method selects randomly if more than one majority class" do
46
+ ids = [1,2,3,4,5,7,85,99]
47
+ values = {1 => 'B', 2 => 'C', 3 => 'A', 4 => 'A', 5 => 'C', 7 => 'B', 85 => 'C', 99 => 'A'} #3C, 3A, 2B
48
+ classes = ['A', 'B', 'C']
49
+ results = []
50
+ 20.times do
51
+ results << Nimbus::LossFunctions.majority_class(ids, values, classes)
52
+ end
53
+ results.should include('A')
54
+ results.should include('C')
55
+ end
56
+
57
+ it "method for majority class in list" do
58
+ list = %w(A A A B B B C A B C A B A)
59
+ classes = ['A', 'B', 'C']
60
+ Nimbus::LossFunctions.majority_class_in_list(list, classes).should == 'A'
61
+ end
62
+
63
+ it "Gini index" do
64
+ ids = [1,2,3,4,5,7]
65
+ values = {1 => 'B', 2 => 'C', 3 => 'A', 4 => 'A', 5 => 'C', 7 => 'C'} #3C, 2A, 1B
66
+ classes = ['A', 'B', 'C']
67
+ # Gini = 1 - ( (3/6)^2 + (2/6)^2 + (1/6)^2 ) = 0.61111
68
+ Nimbus::LossFunctions.gini_index(ids, values, classes).should == 0.61111
69
+ end
70
+
71
+ end
data/spec/nimbus_spec.rb CHANGED
@@ -3,16 +3,16 @@ require File.dirname(__FILE__) + '/spec_helper'
3
3
 
4
4
 
5
5
  describe 'Nimbus module' do
6
-
6
+
7
7
  it "manages a Nimbus::Application object" do
8
8
  app = Nimbus.application
9
9
  app.should be_kind_of Nimbus::Application
10
10
  end
11
-
11
+
12
12
  it "accepts setting an external Nimbus::Application" do
13
- app = Nimbus::Application.new
13
+ app = Nimbus::Application.new
14
14
  Nimbus.application = app
15
15
  Nimbus.application.should == app
16
16
  end
17
-
17
+
18
18
  end
@@ -0,0 +1,129 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+
3
+ describe Nimbus::RegressionTree do
4
+
5
+ before(:each) do
6
+ @config = Nimbus::Configuration.new
7
+ @config.load fixture_file('regression_config.yml')
8
+
9
+ @tree = Nimbus::RegressionTree.new @config.tree
10
+ end
11
+
12
+ it "is initialized with tree config info" do
13
+ @tree.snp_total_count.should == 200
14
+ @tree.snp_sample_size.should == 60
15
+ @tree.node_min_size.should == 5
16
+ end
17
+
18
+ it "creates a tree structure when seeded with training data" do
19
+ @config.load_training_data
20
+ @tree.structure.should be_nil
21
+ @tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
22
+ @tree.structure.should_not be_nil
23
+ @tree.structure.should be_kind_of Hash
24
+
25
+ @tree.structure.keys.first.should == @tree.used_snps.last
26
+ @tree.used_snps.should_not be_empty
27
+ end
28
+
29
+ it "split node in three when building a node and finds a suitable split" do
30
+ @config.load_training_data
31
+ @tree.stub!(:snps_random_sample).and_return((141..200).to_a) #189 is best split
32
+
33
+ @tree.individuals = @config.training_set.individuals
34
+ @tree.id_to_fenotype = @config.training_set.ids_fenotypes
35
+ @tree.used_snps = []
36
+ @tree.predictions = {}
37
+
38
+ branch = @tree.build_node @config.training_set.all_ids, Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
39
+ branch.keys.size.should == 1
40
+ branch.keys.first.should == 189
41
+ branch[189].size.should == 3
42
+ branch[189][0].should be_kind_of Hash
43
+ branch[189][1].should be_kind_of Hash
44
+ branch[189][2].should be_kind_of Hash
45
+ end
46
+
47
+ it "keeps track of all SNPs used for the tree" do
48
+ @config.load_training_data
49
+ snps = (131..190).to_a
50
+ @tree.stub!(:snps_random_sample).and_return(snps)
51
+ @tree.used_snps.should be_nil
52
+ @tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
53
+ @tree.used_snps.size.should > 4
54
+ @tree.used_snps.each{|snp|
55
+ snps.include?(snp).should be_true
56
+ }
57
+ end
58
+
59
+ it "labels node when building a node and there is not a suitable split" do
60
+ @config.load_training_data
61
+ @tree.stub!(:snps_random_sample).and_return([33])
62
+
63
+ @tree.individuals = @config.training_set.individuals
64
+ @tree.id_to_fenotype = @config.training_set.ids_fenotypes
65
+ @tree.used_snps = []
66
+ @tree.predictions = {}
67
+
68
+ branch = @tree.build_node @config.training_set.all_ids, Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
69
+ branch[33][0].should be_kind_of Numeric
70
+ branch[33][1].should be_kind_of Numeric
71
+ branch[33][2].should be_kind_of Numeric
72
+ end
73
+
74
+ it "labels node when building a node with less individuals than the minimum node size" do
75
+ @config.load_training_data
76
+
77
+ @tree.individuals = @config.training_set.individuals
78
+ @tree.id_to_fenotype = @config.training_set.ids_fenotypes
79
+ @tree.used_snps = []
80
+ @tree.predictions = {}
81
+
82
+ label = @tree.build_node [1, 10, 33], Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
83
+ label.should be_kind_of Numeric
84
+
85
+ label = @tree.build_node [2, 10], Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
86
+ label.should be_kind_of Numeric
87
+
88
+ label = @tree.build_node [1, 10, 33], Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
89
+ label.should be_kind_of Numeric
90
+
91
+ label = @tree.build_node [108, 22, 10, 33], Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
92
+ label.should be_kind_of Numeric
93
+ end
94
+
95
+ it 'computes generalization error for the tree' do
96
+ @config.load_training_data
97
+ @tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
98
+ @tree.generalization_error.should be_nil
99
+ @tree.generalization_error_from_oob((2..200).to_a)
100
+ @tree.generalization_error.should be_kind_of Numeric
101
+ @tree.generalization_error.should > 0.0
102
+ @tree.generalization_error.should < 1.0
103
+ end
104
+
105
+ it 'estimates importance for all SNPs' do
106
+ @config.load_training_data
107
+ @tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
108
+ @tree.importances.should be_nil
109
+ @tree.estimate_importances((300..533).to_a)
110
+ @tree.importances.should be_kind_of Hash
111
+ @tree.importances.keys.should_not be_empty
112
+ (@tree.importances.keys - (1..200).to_a).should be_empty #all keys are snp indexes (200 snps in training file)
113
+ end
114
+
115
+ it 'get prediction for an individual pushing it down a tree structure' do
116
+ tree_structure = YAML.load(File.open fixture_file('regression_random_forest.yml')).first
117
+ individual_data = [0]*200
118
+ prediction = Nimbus::Tree.traverse tree_structure, individual_data
119
+ prediction.should == 0.25043
120
+
121
+ individual_data[189-1] = 1
122
+ individual_data[4-1] = 1
123
+ individual_data[62-1] = 2
124
+ individual_data[146-1] = 2
125
+ prediction = Nimbus::Tree.traverse tree_structure, individual_data
126
+ prediction.should == -0.9854
127
+ end
128
+
129
+ end
@@ -2,24 +2,24 @@
2
2
  require File.dirname(__FILE__) + '/spec_helper'
3
3
 
4
4
  describe Nimbus::TrainingSet do
5
-
5
+
6
6
  it "stores individuals list and fenotype data for them" do
7
7
  i1 = Nimbus::Individual.new 1, 11.0, [1,0,2,1]
8
8
  i2 = Nimbus::Individual.new 2, 22.0, [2,1,2,2]
9
9
  i3 = Nimbus::Individual.new 3, 33.0, [0,2,1,0]
10
10
  @training_set = Nimbus::TrainingSet.new [i1, i3], {i1.id => 11.0, i3.id => 33.0}
11
-
11
+
12
12
  @training_set.individuals.should == [i1, i3]
13
13
  @training_set.ids_fenotypes.should == {i1.id => 11.0, i3.id => 33.0}
14
14
  end
15
-
15
+
16
16
  it "keeps track of ids of all individuals in the training set" do
17
17
  i1 = Nimbus::Individual.new 1, 11.0, [1,0,2,1]
18
18
  i2 = Nimbus::Individual.new 2, 22.0, [2,1,2,2]
19
19
  i3 = Nimbus::Individual.new 3, 33.0, [0,2,1,0]
20
20
  @training_set = Nimbus::TrainingSet.new [i1, i3], {i1.id => 11.0, i3.id => 33.0}
21
-
21
+
22
22
  @training_set.all_ids.should == [1,3]
23
23
  end
24
-
24
+
25
25
  end
data/spec/tree_spec.rb CHANGED
@@ -2,129 +2,18 @@
2
2
  require File.dirname(__FILE__) + '/spec_helper'
3
3
 
4
4
  describe Nimbus::Tree do
5
-
5
+
6
6
  before(:each) do
7
7
  @config = Nimbus::Configuration.new
8
8
  @config.load fixture_file('regression_config.yml')
9
-
9
+
10
10
  @tree = Nimbus::Tree.new @config.tree
11
11
  end
12
-
12
+
13
13
  it "is initialized with tree config info" do
14
14
  @tree.snp_total_count.should == 200
15
15
  @tree.snp_sample_size.should == 60
16
16
  @tree.node_min_size.should == 5
17
17
  end
18
-
19
- it "creates a tree structure when seeded with training data" do
20
- @config.load_training_data
21
- @tree.structure.should be_nil
22
- @tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
23
- @tree.structure.should_not be_nil
24
- @tree.structure.should be_kind_of Hash
25
-
26
- @tree.structure.keys.first.should == @tree.used_snps.last
27
- @tree.used_snps.should_not be_empty
28
- end
29
-
30
- it "split node in three when building a node and finds a suitable split" do
31
- @config.load_training_data
32
- @tree.stub!(:snps_random_sample).and_return((141..200).to_a) #189 is best split
33
-
34
- @tree.individuals = @config.training_set.individuals
35
- @tree.id_to_fenotype = @config.training_set.ids_fenotypes
36
- @tree.used_snps = []
37
- @tree.predictions = {}
38
-
39
- branch = @tree.build_node @config.training_set.all_ids, Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
40
- branch.keys.size.should == 1
41
- branch.keys.first.should == 189
42
- branch[189].size.should == 3
43
- branch[189][0].should be_kind_of Hash
44
- branch[189][1].should be_kind_of Hash
45
- branch[189][2].should be_kind_of Hash
46
- end
47
-
48
- it "keeps track of all SNPs used for the tree" do
49
- @config.load_training_data
50
- snps = (131..190).to_a
51
- @tree.stub!(:snps_random_sample).and_return(snps)
52
- @tree.used_snps.should be_nil
53
- @tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
54
- @tree.used_snps.size.should > 4
55
- @tree.used_snps.each{|snp|
56
- snps.include?(snp).should be_true
57
- }
58
- end
59
-
60
- it "labels node when building a node and there is not a suitable split" do
61
- @config.load_training_data
62
- @tree.stub!(:snps_random_sample).and_return([33])
63
-
64
- @tree.individuals = @config.training_set.individuals
65
- @tree.id_to_fenotype = @config.training_set.ids_fenotypes
66
- @tree.used_snps = []
67
- @tree.predictions = {}
68
-
69
- branch = @tree.build_node @config.training_set.all_ids, Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
70
- branch[33][0].should be_kind_of Numeric
71
- branch[33][1].should be_kind_of Numeric
72
- branch[33][2].should be_kind_of Numeric
73
- end
74
-
75
- it "labels node when building a node with less individuals than the minimum node size" do
76
- @config.load_training_data
77
-
78
- @tree.individuals = @config.training_set.individuals
79
- @tree.id_to_fenotype = @config.training_set.ids_fenotypes
80
- @tree.used_snps = []
81
- @tree.predictions = {}
82
-
83
- label = @tree.build_node [1, 10, 33], Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
84
- label.should be_kind_of Numeric
85
-
86
- label = @tree.build_node [2, 10], Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
87
- label.should be_kind_of Numeric
88
-
89
- label = @tree.build_node [1, 10, 33], Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
90
- label.should be_kind_of Numeric
91
-
92
- label = @tree.build_node [108, 22, 10, 33], Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
93
- label.should be_kind_of Numeric
94
- end
95
-
96
- it 'computes generalization error for the tree' do
97
- @config.load_training_data
98
- @tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
99
- @tree.generalization_error.should be_nil
100
- @tree.generalization_error_from_oob((2..200).to_a)
101
- @tree.generalization_error.should be_kind_of Numeric
102
- @tree.generalization_error.should > 0.0
103
- @tree.generalization_error.should < 100.0
104
- end
105
-
106
- it 'estimates importance for all SNPs' do
107
- @config.load_training_data
108
- @tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
109
- @tree.importances.should be_nil
110
- @tree.estimate_importances((300..533).to_a)
111
- @tree.importances.should be_kind_of Hash
112
- @tree.importances.keys.should_not be_empty
113
- (@tree.importances.keys - (1..200).to_a).should be_empty
114
- end
115
-
116
- it 'get prediction for an individual pushing it down a tree structure' do
117
- tree_structure = YAML.load(File.open fixture_file('regression_random_forest.yml')).first
118
- individual_data = [0]*200
119
- prediction = Nimbus::Tree.traverse tree_structure, individual_data
120
- prediction.should == 0.25043
121
-
122
- individual_data[189-1] = 1
123
- individual_data[4-1] = 1
124
- individual_data[62-1] = 2
125
- individual_data[146-1] = 2
126
- prediction = Nimbus::Tree.traverse tree_structure, individual_data
127
- prediction.should == -0.9854
128
- end
129
-
18
+
130
19
  end