nimbus 1.0.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/spec/forest_spec.rb CHANGED
@@ -2,56 +2,115 @@
2
2
  require File.dirname(__FILE__) + '/spec_helper'
3
3
 
4
4
  describe Nimbus::Forest do
5
- before(:each) do
6
- @config = Nimbus::Configuration.new
7
- @config.load fixture_file('regression_config.yml')
8
- @config.load_training_data
9
- @forest = ::Nimbus::Forest.new @config
10
- end
11
-
12
- it 'grows a forest of N trees' do
13
- @forest.trees.should == []
14
- @config.forest_size.should == 3
15
- @forest.grow
16
- @forest.trees.size.should == @config.forest_size
17
- @forest.trees.each{|t| t.should be_kind_of Hash}
18
- end
19
-
20
- it 'creates averaged predictions for individuals in the training set' do
21
- @forest.predictions.should == {}
22
- @forest.grow
23
- (@forest.predictions.keys - (1..800).to_a ).should == []
24
- @forest.predictions.values.each{|v| v.should be_kind_of Numeric}
25
- end
26
-
27
- it 'computes averaged SNP importances for every SNP' do
28
- @forest.snp_importances.should == {}
29
- @forest.grow
30
- @forest.snp_importances.keys.sort.should == (1..200).to_a
31
- @forest.snp_importances.values.each{|v| v.should be_kind_of Numeric}
32
- end
33
-
34
- it 'traverses a set of testing individuals through every tree in the forest and return predictions' do
35
- @forest = @config.load_forest
36
- @forest.predictions.should == {}
37
-
38
- tree_structure = YAML.load(File.open fixture_file('regression_random_forest.yml'))
39
- expected_predictions = {}
40
- @config.read_testing_data{|individual|
41
- individual_prediction = 0.0
42
- tree_structure.each do |t|
43
- individual_prediction = (individual_prediction + Nimbus::Tree.traverse(t, individual.snp_list)).round(5)
44
- end
45
- expected_predictions[individual.id] = (individual_prediction / 3).round(5)
46
- }
47
-
48
- @forest.traverse
49
- @forest.predictions.should == expected_predictions
5
+ describe "Regression" do
6
+ before(:each) do
7
+ @config = Nimbus::Configuration.new
8
+ @config.load fixture_file('regression_config.yml')
9
+ @config.load_training_data
10
+ @forest = ::Nimbus::Forest.new @config
11
+ end
12
+
13
+ it 'grows a regression forest of N trees' do
14
+ @forest.trees.should == []
15
+ @config.forest_size.should == 3
16
+ @forest.should_not be_classification
17
+ @forest.should be_regression
18
+ @forest.grow
19
+ @forest.trees.size.should == @config.forest_size
20
+ @forest.trees.each{|t| t.should be_kind_of Hash}
21
+ end
22
+
23
+ it 'creates averaged predictions for individuals in the training set' do
24
+ @forest.predictions.should == {}
25
+ @forest.grow
26
+ (@forest.predictions.keys - (1..800).to_a ).should == [] # 800 individuals in the training file
27
+ @forest.predictions.values.each{|v| v.should be_kind_of Numeric}
28
+ end
29
+
30
+ it 'computes averaged SNP importances for every SNP' do
31
+ @forest.snp_importances.should == {}
32
+ @forest.grow
33
+ @forest.snp_importances.keys.sort.should == (1..200).to_a # 200 snps in the training file
34
+ @forest.snp_importances.values.each{|v| v.should be_kind_of Numeric}
35
+ end
36
+
37
+ it 'traverses a set of testing individuals through every tree in the forest and returns predictions' do
38
+ @forest = @config.load_forest
39
+ @forest.predictions.should == {}
40
+
41
+ tree_structure = YAML.load(File.open fixture_file('regression_random_forest.yml'))
42
+ expected_predictions = {}
43
+ @config.read_testing_data{|individual|
44
+ individual_prediction = 0.0
45
+ tree_structure.each do |t|
46
+ individual_prediction = (individual_prediction + Nimbus::Tree.traverse(t, individual.snp_list)).round(5)
47
+ end
48
+ expected_predictions[individual.id] = (individual_prediction / 3).round(5)
49
+ }
50
+
51
+ @forest.traverse
52
+ @forest.predictions.should == expected_predictions
53
+ end
54
+
55
+ it 'can output forest structure in YAML format' do
56
+ @forest = @config.load_forest
57
+ YAML.load(File.open fixture_file('regression_random_forest.yml')) == YAML.load(@forest.to_yaml)
58
+ end
50
59
  end
51
-
52
- it 'can output forest structure in YAML format' do
53
- @forest = @config.load_forest
54
- YAML.load(File.open fixture_file('regression_random_forest.yml')) == YAML.load(@forest.to_yaml)
60
+
61
+ describe "Classification" do
62
+ before(:each) do
63
+ @config = Nimbus::Configuration.new
64
+ @config.load fixture_file('classification_config.yml')
65
+ @config.load_training_data
66
+ @forest = ::Nimbus::Forest.new @config
67
+ end
68
+
69
+ it 'grows a classification forest of N trees' do
70
+ @forest.trees.should == []
71
+ @config.forest_size.should == 3
72
+ @forest.should be_classification
73
+ @forest.should_not be_regression
74
+ @forest.grow
75
+ @forest.trees.size.should == @config.forest_size
76
+ @forest.trees.each{|t| t.should be_kind_of Hash}
77
+ end
78
+
79
+ it 'creates predictions for individuals in the training set' do
80
+ @forest.predictions.should == {}
81
+ @forest.grow
82
+ (@forest.predictions.keys - (1..1000).to_a ).should == [] # 1000 individuals in the training file
83
+ @forest.predictions.values.each{|v| v.should be_kind_of String}
84
+ end
85
+
86
+ it 'computes averaged SNP importances for every SNP' do
87
+ @forest.snp_importances.should == {}
88
+ @forest.grow
89
+ @forest.snp_importances.keys.sort.should == (1..100).to_a # 100 snps in the training file
90
+ @forest.snp_importances.values.each{|v| v.should be_kind_of Numeric}
91
+ end
92
+
93
+ it 'traverses a set of testing individuals through every tree in the forest and returns predictions' do
94
+ @forest = @config.load_forest
95
+ @forest.predictions.should == {}
96
+
97
+ tree_structure = YAML.load(File.open fixture_file('classification_random_forest.yml'))
98
+ expected_predictions = {}
99
+ @config.read_testing_data{|individual|
100
+ individual_prediction = []
101
+ tree_structure.each do |t|
102
+ individual_prediction << Nimbus::Tree.traverse(t, individual.snp_list)
103
+ end
104
+ expected_predictions[individual.id] = Nimbus::LossFunctions.majority_class_in_list(individual_prediction, @config.tree[:classes])
105
+ }
106
+
107
+ @forest.traverse
108
+ @forest.predictions.should == expected_predictions
109
+ end
110
+
111
+ it 'can output forest structure in YAML format' do
112
+ @forest = @config.load_forest
113
+ YAML.load(File.open fixture_file('classification_random_forest.yml')) == YAML.load(@forest.to_yaml)
114
+ end
55
115
  end
56
-
57
116
  end
@@ -2,12 +2,12 @@
2
2
  require File.dirname(__FILE__) + '/spec_helper'
3
3
 
4
4
  describe Nimbus::Individual do
5
-
5
+
6
6
  it "stores id, fenotype and SNPs information for an individual" do
7
7
  @individual = Nimbus::Individual.new(11, 33.275, [1,0,2,1])
8
8
  @individual.id.should == 11
9
9
  @individual.fenotype.should == 33.275
10
10
  @individual.snp_list.should == [1,0,2,1]
11
11
  end
12
-
12
+
13
13
  end
@@ -0,0 +1,71 @@
1
+ # encoding: UTF-8
2
+ require File.dirname(__FILE__) + '/spec_helper'
3
+
4
+ describe Nimbus::LossFunctions do
5
+
6
+ it "method for average" do
7
+ ids = [1,3,5,7]
8
+ values = {1 => 10, 2 => 5, 3 => 21, 4 => 8, 5 => 31, 7 => 11, 85 => 22}
9
+
10
+ Nimbus::LossFunctions.average(ids, values).should == 18.25 # (10 + 21 + 31 + 11 = 73)/4
11
+ end
12
+
13
+ it "method for mean squared error" do
14
+ ids = [3,7,85]
15
+ values = {1 => 10, 2 => 5, 3 => 21, 4 => 8, 5 => 31, 7 => 11, 85 => 22}
16
+
17
+ Nimbus::LossFunctions.mean_squared_error(ids, values).should == 74.0 # (avg(21 + 11 + 22) = 18: sum (x-11)^2
18
+ end
19
+
20
+ it "method for quadratic_loss" do
21
+ ids = [1,4]
22
+ values = {1 => 10, 2 => 5, 3 => 21, 4 => 8, 5 => 31, 7 => 11, 85 => 22}
23
+
24
+ Nimbus::LossFunctions.quadratic_loss(ids, values).round(5).should == 1
25
+ end
26
+
27
+ it "quadratic loss is mean squared error averaged" do
28
+ ids = [1,2,3,4,5,7,85]
29
+ values = {1 => 10, 2 => 5, 3 => 21, 4 => 8, 5 => 31, 7 => 11, 85 => 22}
30
+ Nimbus::LossFunctions.quadratic_loss(ids, values).round(5).should == (Nimbus::LossFunctions.mean_squared_error(ids, values)/7 ).round(5)
31
+ end
32
+
33
+ it "method for squared difference" do
34
+ Nimbus::LossFunctions.squared_difference(50, 40).should == 100.0
35
+ Nimbus::LossFunctions.squared_difference(22, 10).should == 144.0
36
+ end
37
+
38
+ it "method for majority class" do
39
+ ids = [1,2,3,4,5,7,85]
40
+ values = {1 => 'B', 2 => 'C', 3 => 'A', 4 => 'A', 5 => 'C', 7 => 'B', 85 => 'C'} #3C, 2A, 2B
41
+ classes = ['A', 'B', 'C']
42
+ Nimbus::LossFunctions.majority_class(ids, values, classes).should == 'C'
43
+ end
44
+
45
+ it "majority class method selects randomly if more than one majority class" do
46
+ ids = [1,2,3,4,5,7,85,99]
47
+ values = {1 => 'B', 2 => 'C', 3 => 'A', 4 => 'A', 5 => 'C', 7 => 'B', 85 => 'C', 99 => 'A'} #3C, 3A, 2B
48
+ classes = ['A', 'B', 'C']
49
+ results = []
50
+ 20.times do
51
+ results << Nimbus::LossFunctions.majority_class(ids, values, classes)
52
+ end
53
+ results.should include('A')
54
+ results.should include('C')
55
+ end
56
+
57
+ it "method for majority class in list" do
58
+ list = %w(A A A B B B C A B C A B A)
59
+ classes = ['A', 'B', 'C']
60
+ Nimbus::LossFunctions.majority_class_in_list(list, classes).should == 'A'
61
+ end
62
+
63
+ it "Gini index" do
64
+ ids = [1,2,3,4,5,7]
65
+ values = {1 => 'B', 2 => 'C', 3 => 'A', 4 => 'A', 5 => 'C', 7 => 'C'} #3C, 2A, 1B
66
+ classes = ['A', 'B', 'C']
67
+ # Gini = 1 - ( (3/6)^2 + (2/6)^2 + (1/6)^2 ) = 0.61111
68
+ Nimbus::LossFunctions.gini_index(ids, values, classes).should == 0.61111
69
+ end
70
+
71
+ end
data/spec/nimbus_spec.rb CHANGED
@@ -3,16 +3,16 @@ require File.dirname(__FILE__) + '/spec_helper'
3
3
 
4
4
 
5
5
  describe 'Nimbus module' do
6
-
6
+
7
7
  it "manages a Nimbus::Application object" do
8
8
  app = Nimbus.application
9
9
  app.should be_kind_of Nimbus::Application
10
10
  end
11
-
11
+
12
12
  it "accepts setting an external Nimbus::Application" do
13
- app = Nimbus::Application.new
13
+ app = Nimbus::Application.new
14
14
  Nimbus.application = app
15
15
  Nimbus.application.should == app
16
16
  end
17
-
17
+
18
18
  end
@@ -0,0 +1,129 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+
3
+ describe Nimbus::RegressionTree do
4
+
5
+ before(:each) do
6
+ @config = Nimbus::Configuration.new
7
+ @config.load fixture_file('regression_config.yml')
8
+
9
+ @tree = Nimbus::RegressionTree.new @config.tree
10
+ end
11
+
12
+ it "is initialized with tree config info" do
13
+ @tree.snp_total_count.should == 200
14
+ @tree.snp_sample_size.should == 60
15
+ @tree.node_min_size.should == 5
16
+ end
17
+
18
+ it "creates a tree structure when seeded with training data" do
19
+ @config.load_training_data
20
+ @tree.structure.should be_nil
21
+ @tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
22
+ @tree.structure.should_not be_nil
23
+ @tree.structure.should be_kind_of Hash
24
+
25
+ @tree.structure.keys.first.should == @tree.used_snps.last
26
+ @tree.used_snps.should_not be_empty
27
+ end
28
+
29
+ it "split node in three when building a node and finds a suitable split" do
30
+ @config.load_training_data
31
+ @tree.stub!(:snps_random_sample).and_return((141..200).to_a) #189 is best split
32
+
33
+ @tree.individuals = @config.training_set.individuals
34
+ @tree.id_to_fenotype = @config.training_set.ids_fenotypes
35
+ @tree.used_snps = []
36
+ @tree.predictions = {}
37
+
38
+ branch = @tree.build_node @config.training_set.all_ids, Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
39
+ branch.keys.size.should == 1
40
+ branch.keys.first.should == 189
41
+ branch[189].size.should == 3
42
+ branch[189][0].should be_kind_of Hash
43
+ branch[189][1].should be_kind_of Hash
44
+ branch[189][2].should be_kind_of Hash
45
+ end
46
+
47
+ it "keeps track of all SNPs used for the tree" do
48
+ @config.load_training_data
49
+ snps = (131..190).to_a
50
+ @tree.stub!(:snps_random_sample).and_return(snps)
51
+ @tree.used_snps.should be_nil
52
+ @tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
53
+ @tree.used_snps.size.should > 4
54
+ @tree.used_snps.each{|snp|
55
+ snps.include?(snp).should be_true
56
+ }
57
+ end
58
+
59
+ it "labels node when building a node and there is not a suitable split" do
60
+ @config.load_training_data
61
+ @tree.stub!(:snps_random_sample).and_return([33])
62
+
63
+ @tree.individuals = @config.training_set.individuals
64
+ @tree.id_to_fenotype = @config.training_set.ids_fenotypes
65
+ @tree.used_snps = []
66
+ @tree.predictions = {}
67
+
68
+ branch = @tree.build_node @config.training_set.all_ids, Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
69
+ branch[33][0].should be_kind_of Numeric
70
+ branch[33][1].should be_kind_of Numeric
71
+ branch[33][2].should be_kind_of Numeric
72
+ end
73
+
74
+ it "labels node when building a node with less individuals than the minimum node size" do
75
+ @config.load_training_data
76
+
77
+ @tree.individuals = @config.training_set.individuals
78
+ @tree.id_to_fenotype = @config.training_set.ids_fenotypes
79
+ @tree.used_snps = []
80
+ @tree.predictions = {}
81
+
82
+ label = @tree.build_node [1, 10, 33], Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
83
+ label.should be_kind_of Numeric
84
+
85
+ label = @tree.build_node [2, 10], Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
86
+ label.should be_kind_of Numeric
87
+
88
+ label = @tree.build_node [1, 10, 33], Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
89
+ label.should be_kind_of Numeric
90
+
91
+ label = @tree.build_node [108, 22, 10, 33], Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
92
+ label.should be_kind_of Numeric
93
+ end
94
+
95
+ it 'computes generalization error for the tree' do
96
+ @config.load_training_data
97
+ @tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
98
+ @tree.generalization_error.should be_nil
99
+ @tree.generalization_error_from_oob((2..200).to_a)
100
+ @tree.generalization_error.should be_kind_of Numeric
101
+ @tree.generalization_error.should > 0.0
102
+ @tree.generalization_error.should < 1.0
103
+ end
104
+
105
+ it 'estimates importance for all SNPs' do
106
+ @config.load_training_data
107
+ @tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
108
+ @tree.importances.should be_nil
109
+ @tree.estimate_importances((300..533).to_a)
110
+ @tree.importances.should be_kind_of Hash
111
+ @tree.importances.keys.should_not be_empty
112
+ (@tree.importances.keys - (1..200).to_a).should be_empty #all keys are snp indexes (200 snps in training file)
113
+ end
114
+
115
+ it 'get prediction for an individual pushing it down a tree structure' do
116
+ tree_structure = YAML.load(File.open fixture_file('regression_random_forest.yml')).first
117
+ individual_data = [0]*200
118
+ prediction = Nimbus::Tree.traverse tree_structure, individual_data
119
+ prediction.should == 0.25043
120
+
121
+ individual_data[189-1] = 1
122
+ individual_data[4-1] = 1
123
+ individual_data[62-1] = 2
124
+ individual_data[146-1] = 2
125
+ prediction = Nimbus::Tree.traverse tree_structure, individual_data
126
+ prediction.should == -0.9854
127
+ end
128
+
129
+ end
@@ -2,24 +2,24 @@
2
2
  require File.dirname(__FILE__) + '/spec_helper'
3
3
 
4
4
  describe Nimbus::TrainingSet do
5
-
5
+
6
6
  it "stores individuals list and fenotype data for them" do
7
7
  i1 = Nimbus::Individual.new 1, 11.0, [1,0,2,1]
8
8
  i2 = Nimbus::Individual.new 2, 22.0, [2,1,2,2]
9
9
  i3 = Nimbus::Individual.new 3, 33.0, [0,2,1,0]
10
10
  @training_set = Nimbus::TrainingSet.new [i1, i3], {i1.id => 11.0, i3.id => 33.0}
11
-
11
+
12
12
  @training_set.individuals.should == [i1, i3]
13
13
  @training_set.ids_fenotypes.should == {i1.id => 11.0, i3.id => 33.0}
14
14
  end
15
-
15
+
16
16
  it "keeps track of ids of all individuals in the training set" do
17
17
  i1 = Nimbus::Individual.new 1, 11.0, [1,0,2,1]
18
18
  i2 = Nimbus::Individual.new 2, 22.0, [2,1,2,2]
19
19
  i3 = Nimbus::Individual.new 3, 33.0, [0,2,1,0]
20
20
  @training_set = Nimbus::TrainingSet.new [i1, i3], {i1.id => 11.0, i3.id => 33.0}
21
-
21
+
22
22
  @training_set.all_ids.should == [1,3]
23
23
  end
24
-
24
+
25
25
  end
data/spec/tree_spec.rb CHANGED
@@ -2,129 +2,18 @@
2
2
  require File.dirname(__FILE__) + '/spec_helper'
3
3
 
4
4
  describe Nimbus::Tree do
5
-
5
+
6
6
  before(:each) do
7
7
  @config = Nimbus::Configuration.new
8
8
  @config.load fixture_file('regression_config.yml')
9
-
9
+
10
10
  @tree = Nimbus::Tree.new @config.tree
11
11
  end
12
-
12
+
13
13
  it "is initialized with tree config info" do
14
14
  @tree.snp_total_count.should == 200
15
15
  @tree.snp_sample_size.should == 60
16
16
  @tree.node_min_size.should == 5
17
17
  end
18
-
19
- it "creates a tree structure when seeded with training data" do
20
- @config.load_training_data
21
- @tree.structure.should be_nil
22
- @tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
23
- @tree.structure.should_not be_nil
24
- @tree.structure.should be_kind_of Hash
25
-
26
- @tree.structure.keys.first.should == @tree.used_snps.last
27
- @tree.used_snps.should_not be_empty
28
- end
29
-
30
- it "split node in three when building a node and finds a suitable split" do
31
- @config.load_training_data
32
- @tree.stub!(:snps_random_sample).and_return((141..200).to_a) #189 is best split
33
-
34
- @tree.individuals = @config.training_set.individuals
35
- @tree.id_to_fenotype = @config.training_set.ids_fenotypes
36
- @tree.used_snps = []
37
- @tree.predictions = {}
38
-
39
- branch = @tree.build_node @config.training_set.all_ids, Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
40
- branch.keys.size.should == 1
41
- branch.keys.first.should == 189
42
- branch[189].size.should == 3
43
- branch[189][0].should be_kind_of Hash
44
- branch[189][1].should be_kind_of Hash
45
- branch[189][2].should be_kind_of Hash
46
- end
47
-
48
- it "keeps track of all SNPs used for the tree" do
49
- @config.load_training_data
50
- snps = (131..190).to_a
51
- @tree.stub!(:snps_random_sample).and_return(snps)
52
- @tree.used_snps.should be_nil
53
- @tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
54
- @tree.used_snps.size.should > 4
55
- @tree.used_snps.each{|snp|
56
- snps.include?(snp).should be_true
57
- }
58
- end
59
-
60
- it "labels node when building a node and there is not a suitable split" do
61
- @config.load_training_data
62
- @tree.stub!(:snps_random_sample).and_return([33])
63
-
64
- @tree.individuals = @config.training_set.individuals
65
- @tree.id_to_fenotype = @config.training_set.ids_fenotypes
66
- @tree.used_snps = []
67
- @tree.predictions = {}
68
-
69
- branch = @tree.build_node @config.training_set.all_ids, Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
70
- branch[33][0].should be_kind_of Numeric
71
- branch[33][1].should be_kind_of Numeric
72
- branch[33][2].should be_kind_of Numeric
73
- end
74
-
75
- it "labels node when building a node with less individuals than the minimum node size" do
76
- @config.load_training_data
77
-
78
- @tree.individuals = @config.training_set.individuals
79
- @tree.id_to_fenotype = @config.training_set.ids_fenotypes
80
- @tree.used_snps = []
81
- @tree.predictions = {}
82
-
83
- label = @tree.build_node [1, 10, 33], Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
84
- label.should be_kind_of Numeric
85
-
86
- label = @tree.build_node [2, 10], Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
87
- label.should be_kind_of Numeric
88
-
89
- label = @tree.build_node [1, 10, 33], Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
90
- label.should be_kind_of Numeric
91
-
92
- label = @tree.build_node [108, 22, 10, 33], Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
93
- label.should be_kind_of Numeric
94
- end
95
-
96
- it 'computes generalization error for the tree' do
97
- @config.load_training_data
98
- @tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
99
- @tree.generalization_error.should be_nil
100
- @tree.generalization_error_from_oob((2..200).to_a)
101
- @tree.generalization_error.should be_kind_of Numeric
102
- @tree.generalization_error.should > 0.0
103
- @tree.generalization_error.should < 100.0
104
- end
105
-
106
- it 'estimates importance for all SNPs' do
107
- @config.load_training_data
108
- @tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
109
- @tree.importances.should be_nil
110
- @tree.estimate_importances((300..533).to_a)
111
- @tree.importances.should be_kind_of Hash
112
- @tree.importances.keys.should_not be_empty
113
- (@tree.importances.keys - (1..200).to_a).should be_empty
114
- end
115
-
116
- it 'get prediction for an individual pushing it down a tree structure' do
117
- tree_structure = YAML.load(File.open fixture_file('regression_random_forest.yml')).first
118
- individual_data = [0]*200
119
- prediction = Nimbus::Tree.traverse tree_structure, individual_data
120
- prediction.should == 0.25043
121
-
122
- individual_data[189-1] = 1
123
- individual_data[4-1] = 1
124
- individual_data[62-1] = 2
125
- individual_data[146-1] = 2
126
- prediction = Nimbus::Tree.traverse tree_structure, individual_data
127
- prediction.should == -0.9854
128
- end
129
-
18
+
130
19
  end