nimbus 2.2.1 → 2.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (34) hide show
  1. checksums.yaml +7 -0
  2. data/CODE_OF_CONDUCT.md +7 -0
  3. data/CONTRIBUTING.md +46 -0
  4. data/MIT-LICENSE.txt +1 -1
  5. data/README.md +131 -21
  6. data/bin/nimbus +2 -2
  7. data/lib/nimbus.rb +2 -6
  8. data/lib/nimbus/classification_tree.rb +9 -12
  9. data/lib/nimbus/configuration.rb +22 -22
  10. data/lib/nimbus/forest.rb +8 -8
  11. data/lib/nimbus/loss_functions.rb +11 -0
  12. data/lib/nimbus/regression_tree.rb +8 -10
  13. data/lib/nimbus/tree.rb +54 -12
  14. data/lib/nimbus/version.rb +1 -1
  15. data/spec/classification_tree_spec.rb +47 -47
  16. data/spec/configuration_spec.rb +55 -55
  17. data/spec/fixtures/{classification_config.yml → classification/config.yml} +3 -3
  18. data/spec/fixtures/classification/random_forest.yml +1174 -0
  19. data/spec/fixtures/{classification_testing.data → classification/testing.data} +0 -0
  20. data/spec/fixtures/{classification_training.data → classification/training.data} +0 -0
  21. data/spec/fixtures/{regression_config.yml → regression/config.yml} +4 -4
  22. data/spec/fixtures/regression/random_forest.yml +2737 -0
  23. data/spec/fixtures/{regression_testing.data → regression/testing.data} +0 -0
  24. data/spec/fixtures/{regression_training.data → regression/training.data} +0 -0
  25. data/spec/forest_spec.rb +39 -39
  26. data/spec/individual_spec.rb +3 -3
  27. data/spec/loss_functions_spec.rb +31 -13
  28. data/spec/nimbus_spec.rb +2 -2
  29. data/spec/regression_tree_spec.rb +44 -44
  30. data/spec/training_set_spec.rb +3 -3
  31. data/spec/tree_spec.rb +4 -4
  32. metadata +37 -34
  33. data/spec/fixtures/classification_random_forest.yml +0 -922
  34. data/spec/fixtures/regression_random_forest.yml +0 -1741
@@ -88,6 +88,14 @@ module Nimbus
88
88
  @trees.to_yaml
89
89
  end
90
90
 
91
+ def classification?
92
+ @options.tree[:classes]
93
+ end
94
+
95
+ def regression?
96
+ @options.tree[:classes].nil?
97
+ end
98
+
91
99
  private
92
100
 
93
101
  def individuals_random_sample
@@ -140,14 +148,6 @@ module Nimbus
140
148
  }
141
149
  end
142
150
 
143
- def classification?
144
- @options.tree[:classes]
145
- end
146
-
147
- def regression?
148
- @options.tree[:classes].nil?
149
- end
150
-
151
151
  end
152
152
 
153
153
  end
@@ -35,6 +35,17 @@ module Nimbus
35
35
  def squared_difference(x,y)
36
36
  0.0 + (x-y)**2
37
37
  end
38
+
39
+ # Simplified Huber function
40
+ def pseudo_huber_error(ids, value_table, mean = nil)
41
+ mean ||= self.average ids, value_table
42
+ ids.inject(0.0){|sum, i| sum + (Math.log(Math.cosh(value_table[i] - mean))) }
43
+ end
44
+
45
+ # Simplified Huber loss function: PHE / n
46
+ def pseudo_huber_loss(ids, value_table, mean = nil)
47
+ self.pseudo_huber_error(ids, value_table, mean) / ids.size
48
+ end
38
49
 
39
50
  ## CLASSSIFICATION
40
51
 
@@ -8,7 +8,7 @@ module Nimbus
8
8
  # * 1: Calculate loss function for the individuals in the node (first node contains all the individuals).
9
9
  # * 2: Take a random sample of the SNPs (size m << total count of SNPs)
10
10
  # * 3: Compute the loss function (quadratic loss) for the split of the sample based on value of every SNP.
11
- # * 4: If the SNP with minimum loss function also minimizes the general loss of the node, split the individuals sample in three nodes, based on value for that SNP [0, 1, or 2]
11
+ # * 4: If the SNP with minimum loss function also minimizes the general loss of the node, split the individuals sample in two nodes, based on average value for that SNP [0,1][2], or [0][1,2]
12
12
  # * 5: Repeat from 1 for every node until:
13
13
  # - a) The individuals count in that node is < minimum size OR
14
14
  # - b) None of the SNP splits has a loss function smaller than the node loss function
@@ -27,8 +27,8 @@ module Nimbus
27
27
 
28
28
  # Creates a node by taking a random sample of the SNPs and computing the loss function for every split by SNP of that sample.
29
29
  #
30
- # * If SNP_min is the SNP with smaller loss function and it is < the loss function of the node, it splits the individuals sample in three:
31
- # (those with value 0 for the SNP_min, those with value 1 for the SNP_min, and those with value 2 for the SNP_min) then it builds these 3 new nodes.
30
+ # * If SNP_min is the SNP with smaller loss function and it is < the loss function of the node, it splits the individuals sample in two:
31
+ # (the average of the 0,1,2 values for the SNP_min in the individuals is computed, and they are splitted in [<=avg], [>avg]) then it builds these 2 new nodes.
32
32
  # * Otherwise every individual in the node gets labeled with the average of the fenotype values of all of them.
33
33
  def build_node(individuals_ids, y_hat)
34
34
  # General loss function value for the node
@@ -38,22 +38,20 @@ module Nimbus
38
38
 
39
39
  # Finding the SNP that minimizes loss function
40
40
  snps = snps_random_sample
41
- min_loss, min_SNP, split, means = node_loss_function, nil, nil, nil
41
+ min_loss, min_SNP, split, split_type, means = node_loss_function, nil, nil, nil, nil
42
42
 
43
43
  snps.each do |snp|
44
- individuals_split_by_snp_value = split_by_snp_value individuals_ids, snp
44
+ individuals_split_by_snp_value, node_split_type = split_by_snp_avegare_value individuals_ids, snp
45
45
  mean_0 = Nimbus::LossFunctions.average individuals_split_by_snp_value[0], @id_to_fenotype
46
46
  mean_1 = Nimbus::LossFunctions.average individuals_split_by_snp_value[1], @id_to_fenotype
47
- mean_2 = Nimbus::LossFunctions.average individuals_split_by_snp_value[2], @id_to_fenotype
48
47
  loss_0 = Nimbus::LossFunctions.mean_squared_error individuals_split_by_snp_value[0], @id_to_fenotype, mean_0
49
48
  loss_1 = Nimbus::LossFunctions.mean_squared_error individuals_split_by_snp_value[1], @id_to_fenotype, mean_1
50
- loss_2 = Nimbus::LossFunctions.mean_squared_error individuals_split_by_snp_value[2], @id_to_fenotype, mean_2
51
- loss_snp = (loss_0 + loss_1 + loss_2) / individuals_count
49
+ loss_snp = (loss_0 + loss_1) / individuals_count
52
50
 
53
- min_loss, min_SNP, split, means = loss_snp, snp, individuals_split_by_snp_value, [mean_0, mean_1, mean_2] if loss_snp < min_loss
51
+ min_loss, min_SNP, split, split_type, means = loss_snp, snp, individuals_split_by_snp_value, node_split_type, [mean_0, mean_1] if loss_snp < min_loss
54
52
  end
55
53
 
56
- return build_branch(min_SNP, split, means, y_hat) if min_loss < node_loss_function
54
+ return build_branch(min_SNP, split, split_type, means, y_hat) if min_loss < node_loss_function
57
55
  return label_node(y_hat, individuals_ids)
58
56
  end
59
57
 
@@ -18,6 +18,9 @@ module Nimbus
18
18
  attr_accessor :snp_sample_size, :snp_total_count, :node_min_size, :used_snps, :structure, :generalization_error, :predictions, :importances
19
19
  attr_accessor :individuals, :id_to_fenotype
20
20
 
21
+ NODE_SPLIT_01_2 = "zero"
22
+ NODE_SPLIT_0_12 = "two"
23
+
21
24
  # Initialize Tree object with the configuration (as in Nimbus::Configuration.tree) options received.
22
25
  def initialize(options)
23
26
  @snp_total_count = options[:snp_total_count]
@@ -53,8 +56,14 @@ module Nimbus
53
56
  # Returns the prediction for that individual (the label of the final node reached by the individual).
54
57
  def self.traverse(tree_structure, data)
55
58
  return tree_structure if tree_structure.is_a?(Numeric) || tree_structure.is_a?(String)
59
+
56
60
  raise Nimbus::TreeError, "Forest data has invalid structure. Please check your forest data (file)." if !(tree_structure.is_a?(Hash) && tree_structure.keys.size == 1)
57
- return self.traverse( tree_structure.values.first[ data[tree_structure.keys.first - 1].to_i], data)
61
+
62
+ branch = tree_structure.values.first
63
+ split_type = branch[1].to_s
64
+ datum = data_traversing_value(data[tree_structure.keys.first - 1], split_type)
65
+
66
+ return self.traverse(branch[datum], data)
58
67
  end
59
68
 
60
69
  protected
@@ -63,13 +72,12 @@ module Nimbus
63
72
  (1..@snp_total_count).to_a.sample(@snp_sample_size).sort
64
73
  end
65
74
 
66
- def build_branch(snp, split, y_hats, parent_y_hat)
67
- node_0 = split[0].size == 0 ? label_node(parent_y_hat, []) : build_node(split[0], y_hats[0])
68
- node_1 = split[1].size == 0 ? label_node(parent_y_hat, []) : build_node(split[1], y_hats[1])
69
- node_2 = split[2].size == 0 ? label_node(parent_y_hat, []) : build_node(split[2], y_hats[2])
75
+ def build_branch(snp, split, split_type, y_hats, parent_y_hat)
76
+ node_a = split[0].size == 0 ? label_node(parent_y_hat, []) : build_node(split[0], y_hats[0])
77
+ node_b = split[1].size == 0 ? label_node(parent_y_hat, []) : build_node(split[1], y_hats[1])
70
78
 
71
79
  split_by_snp(snp)
72
- return { snp => [node_0, node_1, node_2] }
80
+ return { snp => [node_a, split_type, node_b] }
73
81
  end
74
82
 
75
83
  def label_node(value, ids)
@@ -78,24 +86,58 @@ module Nimbus
78
86
  label
79
87
  end
80
88
 
81
- def split_by_snp_value(ids, snp)
82
- split = [[], [], []]
89
+ def split_by_snp_avegare_value(ids, snp)
90
+ split_012 = [[], [], []]
83
91
  ids.each do |i|
84
- split[ @individuals[i].snp_list[snp-1] ] << @individuals[i].id
92
+ split_012[ @individuals[i].snp_list[snp-1] ] << @individuals[i].id
85
93
  end
86
- split
94
+ # we split by the average number of 0,1,2 values.
95
+ # So if there are less or equal 0s than 2s the split is [0,1][2]
96
+ # and if there are more 0s than 2s the average will be <1 so the split is [0][1,2]
97
+ split_type = (split_012[0].size <= split_012[2].size ? NODE_SPLIT_01_2 : NODE_SPLIT_0_12)
98
+ split_type == NODE_SPLIT_01_2 ? split_012[0] += split_012[1] : split_012[2] += split_012[1]
99
+ split = [split_012[0], split_012[2]]
100
+ [split, split_type]
87
101
  rescue => ex
88
102
  raise Nimbus::TreeError, "Values for SNPs columns must be in [0, 1, 2]"
89
103
  end
90
104
 
105
+ def split_by_value(ids, snp, value)
106
+ split = [[], []]
107
+ ids.each do |i|
108
+ @individuals[i].snp_list[snp-1] > value ? (split[1] << @individuals[i].id) : (split[0] << @individuals[i].id)
109
+ end
110
+ split
111
+ rescue => ex
112
+ raise Nimbus::TreeError, "Values for SNPs columns must be numeric"
113
+ end
114
+
91
115
  def split_by_snp(x)
92
116
  @used_snps << x
93
117
  end
94
118
 
95
119
  def traverse_with_permutation(tree_structure, data, snp_to_permute, individual_to_permute)
96
120
  return tree_structure if tree_structure.is_a?(Numeric) || tree_structure.is_a?(String)
97
- individual_data = (tree_structure.keys.first == snp_to_permute ? individual_to_permute : data)
98
- return traverse_with_permutation( tree_structure.values.first[ individual_data[tree_structure.keys.first - 1].to_i], data, snp_to_permute, individual_to_permute)
121
+
122
+ key = tree_structure.keys.first
123
+ branch = tree_structure.values.first
124
+ individual_data = (key == snp_to_permute ? individual_to_permute : data)
125
+ split_type = branch[1]
126
+ datum = data_traversing_value(individual_data[key - 1].to_i, split_type)
127
+
128
+ return traverse_with_permutation branch[datum], data, snp_to_permute, individual_to_permute
129
+ end
130
+
131
+ def data_traversing_value(datum, split_type)
132
+ Nimbus::Tree.data_traversing_value(datum, split_type)
133
+ end
134
+
135
+ def self.data_traversing_value(datum, split_type)
136
+ if datum == 1
137
+ return 0 if split_type == NODE_SPLIT_01_2
138
+ return 2 if split_type == NODE_SPLIT_0_12
139
+ end
140
+ datum
99
141
  end
100
142
 
101
143
  end
@@ -1,3 +1,3 @@
1
1
  module Nimbus
2
- VERSION = "2.2.1"
2
+ VERSION = "2.3.0"
3
3
  end
@@ -4,34 +4,34 @@ describe Nimbus::ClassificationTree do
4
4
 
5
5
  before(:each) do
6
6
  @config = Nimbus::Configuration.new
7
- @config.load fixture_file('classification_config.yml')
7
+ @config.load fixture_file('classification/config.yml')
8
8
 
9
9
  @tree = Nimbus::ClassificationTree.new @config.tree
10
10
  end
11
11
 
12
12
  it "is initialized with tree config info" do
13
- @tree.snp_total_count.should == 100
14
- @tree.snp_sample_size.should == 33
15
- @tree.node_min_size.should == 5
16
- @tree.classes.size.should == 2
17
- @tree.classes[0].should == '0'
18
- @tree.classes[1].should == '1'
13
+ expect(@tree.snp_total_count).to eq 100
14
+ expect(@tree.snp_sample_size).to eq 33
15
+ expect(@tree.node_min_size).to eq 5
16
+ expect(@tree.classes.size).to eq 2
17
+ expect(@tree.classes[0]).to eq '0'
18
+ expect(@tree.classes[1]).to eq '1'
19
19
  end
20
20
 
21
21
  it "creates a tree structure when seeded with training data" do
22
22
  @config.load_training_data
23
- @tree.structure.should be_nil
23
+ expect(@tree.structure).to be_nil
24
24
  @tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
25
- @tree.structure.should_not be_nil
26
- @tree.structure.should be_kind_of Hash
25
+ expect(@tree.structure).to_not be_nil
26
+ expect(@tree.structure).to be_kind_of Hash
27
27
 
28
- @tree.structure.keys.first.should == @tree.used_snps.last
29
- @tree.used_snps.should_not be_empty
28
+ expect(@tree.structure.keys.first).to eq @tree.used_snps.last
29
+ expect(@tree.used_snps).to_not be_empty
30
30
  end
31
31
 
32
- it "splits node in three when building a node and finds a suitable split" do
32
+ it "splits node when building a node and finds a suitable split" do
33
33
  @config.load_training_data
34
- @tree.stub!(:snps_random_sample).and_return((68..100).to_a) #97 is best split
34
+ allow_any_instance_of(Nimbus::ClassificationTree).to receive(:snps_random_sample).and_return((68..100).to_a) #97 is best split
35
35
 
36
36
  @tree.individuals = @config.training_set.individuals
37
37
  @tree.id_to_fenotype = @config.training_set.ids_fenotypes
@@ -39,29 +39,29 @@ describe Nimbus::ClassificationTree do
39
39
  @tree.predictions = {}
40
40
 
41
41
  branch = @tree.build_node @config.training_set.all_ids, Nimbus::LossFunctions.majority_class(@config.training_set.all_ids, @config.training_set.ids_fenotypes, @config.classes)
42
- branch.keys.size.should == 1
43
- branch.keys.first.should == 97
44
- branch[97].size.should == 3
45
- branch[97][0].should be_kind_of Hash
46
- branch[97][1].should be_kind_of Hash
47
- branch[97][2].should be_kind_of Hash
42
+ expect(branch.keys.size).to eq 1
43
+ expect(branch.keys.first).to eq 97
44
+ expect(branch[97].size).to eq 3
45
+ expect(branch[97][0]).to be_kind_of Hash
46
+ expect([Nimbus::Tree::NODE_SPLIT_01_2, Nimbus::Tree::NODE_SPLIT_0_12]).to include(branch[97][1])
47
+ expect(branch[97][2]).to be_kind_of Hash
48
48
  end
49
49
 
50
50
  it "keeps track of all SNPs used for the tree" do
51
51
  @config.load_training_data
52
52
  snps = (33..65).to_a
53
- @tree.stub!(:snps_random_sample).and_return(snps)
54
- @tree.used_snps.should be_nil
53
+ allow_any_instance_of(Nimbus::ClassificationTree).to receive(:snps_random_sample).and_return(snps)
54
+ expect(@tree.used_snps).to be_nil
55
55
  @tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
56
- @tree.used_snps.size.should > 4
56
+ expect(@tree.used_snps.size).to be > 4
57
57
  @tree.used_snps.each{|snp|
58
- snps.include?(snp).should be_true
58
+ expect(snps.include?(snp)).to be true
59
59
  }
60
60
  end
61
61
 
62
62
  it "labels node when building a node and there is not a suitable split" do
63
63
  @config.load_training_data
64
- @tree.stub!(:snps_random_sample).and_return([33])
64
+ allow_any_instance_of(Nimbus::ClassificationTree).to receive(:snps_random_sample).and_return([11])
65
65
 
66
66
  @tree.individuals = @config.training_set.individuals
67
67
  @tree.id_to_fenotype = @config.training_set.ids_fenotypes
@@ -69,9 +69,9 @@ describe Nimbus::ClassificationTree do
69
69
  @tree.predictions = {}
70
70
 
71
71
  branch = @tree.build_node @config.training_set.all_ids, Nimbus::LossFunctions.majority_class(@config.training_set.all_ids, @config.training_set.ids_fenotypes, @config.classes)
72
- branch[33][0].should be_kind_of String
73
- branch[33][1].should be_kind_of String
74
- branch[33][2].should be_kind_of String
72
+ expect(branch[11][0]).to be_kind_of String
73
+ expect(branch[11][1]).to be_kind_of String
74
+ expect(branch[11][2]).to be_kind_of String
75
75
  end
76
76
 
77
77
  it "labels node when building a node with less individuals than the minimum node size" do
@@ -83,50 +83,50 @@ describe Nimbus::ClassificationTree do
83
83
  @tree.predictions = {}
84
84
 
85
85
  label = @tree.build_node [1, 10, 33], Nimbus::LossFunctions.majority_class(@config.training_set.all_ids, @config.training_set.ids_fenotypes, @config.classes)
86
- label.should be_kind_of String
86
+ expect(label).to be_kind_of String
87
87
 
88
88
  label = @tree.build_node [2, 10], Nimbus::LossFunctions.majority_class(@config.training_set.all_ids, @config.training_set.ids_fenotypes, @config.classes)
89
- label.should be_kind_of String
89
+ expect(label).to be_kind_of String
90
90
 
91
91
  label = @tree.build_node [1, 10, 33], Nimbus::LossFunctions.majority_class(@config.training_set.all_ids, @config.training_set.ids_fenotypes, @config.classes)
92
- label.should be_kind_of String
92
+ expect(label).to be_kind_of String
93
93
 
94
94
  label = @tree.build_node [99, 22, 10, 33], Nimbus::LossFunctions.majority_class(@config.training_set.all_ids, @config.training_set.ids_fenotypes, @config.classes)
95
- label.should be_kind_of String
95
+ expect(label).to be_kind_of String
96
96
  end
97
97
 
98
98
  it 'computes generalization error for the tree' do
99
99
  @config.load_training_data
100
100
  @tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
101
- @tree.generalization_error.should be_nil
101
+ expect(@tree.generalization_error).to be_nil
102
102
  @tree.generalization_error_from_oob((3..300).to_a)
103
- @tree.generalization_error.should be_kind_of Numeric
104
- @tree.generalization_error.should > 0.0
105
- @tree.generalization_error.should < 1.0
103
+ expect(@tree.generalization_error).to be_kind_of Numeric
104
+ expect(@tree.generalization_error).to be > 0.0
105
+ expect(@tree.generalization_error).to be < 1.0
106
106
  end
107
107
 
108
108
  it 'estimates importance for all SNPs' do
109
109
  @config.load_training_data
110
110
  @tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
111
- @tree.importances.should be_nil
111
+ expect(@tree.importances).to be_nil
112
112
  @tree.estimate_importances((200..533).to_a)
113
- @tree.importances.should be_kind_of Hash
114
- @tree.importances.keys.should_not be_empty
115
- (@tree.importances.keys - (1..100).to_a).should be_empty #all keys are snp indexes (100 snps in training file)
113
+ expect(@tree.importances).to be_kind_of Hash
114
+ expect(@tree.importances.keys).to_not be_empty
115
+ expect((@tree.importances.keys - (1..100).to_a)).to be_empty #all keys are snp indexes (100 snps in training file)
116
116
  end
117
117
 
118
118
  it 'get prediction for an individual pushing it down a tree structure' do
119
- tree_structure = Psych.load(File.open fixture_file('classification_random_forest.yml')).first
119
+ tree_structure = Psych.load(File.open fixture_file('classification/random_forest.yml')).first
120
120
  individual_data = [0]*100
121
121
  prediction = Nimbus::Tree.traverse tree_structure, individual_data
122
- prediction.should == '1'
122
+ expect(prediction).to eq '0'
123
123
 
124
- individual_data[26-1] = 1
125
- individual_data[57-1] = 2
126
- individual_data[98-1] = 2
127
- individual_data[8-1] = 1
124
+ individual_data[8-1] = 2
125
+ individual_data[29-1] = 0
126
+ individual_data[1-1] = 1
127
+ individual_data[7-1] = 1
128
128
  prediction = Nimbus::Tree.traverse tree_structure, individual_data
129
- prediction.should == '0'
129
+ expect(prediction).to eq '1'
130
130
  end
131
131
 
132
132
  end
@@ -5,65 +5,65 @@ describe Nimbus::Configuration do
5
5
 
6
6
  it "loads configuration options from config file" do
7
7
  config = Nimbus::Configuration.new
8
- config.load fixture_file('regression_config.yml')
8
+ config.load fixture_file('regression/config.yml')
9
9
 
10
- config.training_file.should == fixture_file('regression_training.data')
11
- config.testing_file.should == fixture_file('regression_testing.data')
12
- config.forest_file.should == fixture_file('regression_random_forest.yml')
13
- config.classes.should be_nil
14
- config.do_importances.should be
10
+ expect(config.training_file).to eq fixture_file('regression/training.data')
11
+ expect(config.testing_file).to eq fixture_file('regression/testing.data')
12
+ expect(config.forest_file).to eq fixture_file('regression/random_forest.yml')
13
+ expect(config.classes).to be_nil
14
+ expect(config.do_importances).to be
15
15
 
16
- config.forest_size.should == 3
17
- config.tree_SNP_sample_size.should == 60
18
- config.tree_SNP_total_count.should == 200
19
- config.tree_node_min_size.should == 5
16
+ expect(config.forest_size).to eq 3
17
+ expect(config.tree_SNP_sample_size).to eq 60
18
+ expect(config.tree_SNP_total_count).to eq 200
19
+ expect(config.tree_node_min_size).to eq 5
20
20
 
21
21
  config = Nimbus::Configuration.new
22
- config.load fixture_file('classification_config.yml')
23
-
24
- config.training_file.should == fixture_file('classification_training.data')
25
- config.testing_file.should == fixture_file('classification_testing.data')
26
- config.forest_file.should == fixture_file('classification_random_forest.yml')
27
- config.classes.should == ['0','1']
28
- config.do_importances.should_not be
29
-
30
- config.forest_size.should == 3
31
- config.tree_SNP_sample_size.should == 33
32
- config.tree_SNP_total_count.should == 100
33
- config.tree_node_min_size.should == 5
22
+ config.load fixture_file('classification/config.yml')
23
+
24
+ expect(config.training_file).to eq fixture_file('classification/training.data')
25
+ expect(config.testing_file).to eq fixture_file('classification/testing.data')
26
+ expect(config.forest_file).to eq fixture_file('classification/random_forest.yml')
27
+ expect(config.classes).to eq ['0','1']
28
+ expect(config.do_importances).to_not be
29
+
30
+ expect(config.forest_size).to eq 3
31
+ expect(config.tree_SNP_sample_size).to eq 33
32
+ expect(config.tree_SNP_total_count).to eq 100
33
+ expect(config.tree_node_min_size).to eq 5
34
34
  end
35
35
 
36
36
  it 'tree method return tree-related subset of options for regression trees' do
37
37
  config = Nimbus::Configuration.new
38
- config.load fixture_file('regression_config.yml')
38
+ config.load fixture_file('regression/config.yml')
39
39
  tree_options = config.tree
40
40
 
41
- tree_options[:snp_sample_size].should_not be_nil
42
- tree_options[:snp_total_count].should_not be_nil
43
- tree_options[:tree_node_min_size].should_not be_nil
44
- tree_options[:classes].should be_nil
41
+ expect(tree_options[:snp_sample_size]).to_not be_nil
42
+ expect(tree_options[:snp_total_count]).to_not be_nil
43
+ expect(tree_options[:tree_node_min_size]).to_not be_nil
44
+ expect(tree_options[:classes]).to be_nil
45
45
  end
46
46
 
47
47
  it 'tree method return tree-related subset of options for classification trees' do
48
48
  config = Nimbus::Configuration.new
49
- config.load fixture_file('classification_config.yml')
49
+ config.load fixture_file('classification/config.yml')
50
50
  tree_options = config.tree
51
51
 
52
- tree_options[:snp_sample_size].should_not be_nil
53
- tree_options[:snp_total_count].should_not be_nil
54
- tree_options[:tree_node_min_size].should_not be_nil
55
- tree_options[:classes].should_not be_nil
52
+ expect(tree_options[:snp_sample_size]).to_not be_nil
53
+ expect(tree_options[:snp_total_count]).to_not be_nil
54
+ expect(tree_options[:tree_node_min_size]).to_not be_nil
55
+ expect(tree_options[:classes]).to_not be_nil
56
56
  end
57
57
 
58
58
  it "creates a training set object from training data file" do
59
59
  config = Nimbus::Configuration.new
60
- config.load fixture_file('regression_config.yml')
61
- config.training_set.should be_nil
60
+ config.load fixture_file('regression/config.yml')
61
+ expect(config.training_set).to be_nil
62
62
  config.load_training_data
63
- config.training_set.should be_kind_of Nimbus::TrainingSet
64
- config.training_set.all_ids.sort.should == (1..800).to_a
63
+ expect(config.training_set).to be_kind_of Nimbus::TrainingSet
64
+ expect(config.training_set.all_ids.sort).to eq (1..800).to_a
65
65
 
66
- File.open(fixture_file('regression_training.data')) {|file|
66
+ File.open(fixture_file('regression/training.data')) {|file|
67
67
  feno1, id1, *snp_list_1 = file.readline.split
68
68
  feno2, id2, *snp_list_2 = file.readline.split
69
69
  feno3, id3, *snp_list_3 = file.readline.split
@@ -72,9 +72,9 @@ describe Nimbus::Configuration do
72
72
  i2 = Nimbus::Individual.new(id2.to_i, feno2.to_f, snp_list_2.map{|snp| snp.to_i})
73
73
  i3 = Nimbus::Individual.new(id3.to_i, feno3.to_f, snp_list_3.map{|snp| snp.to_i})
74
74
 
75
- config.training_set.individuals[id1.to_i].id.should == i1.id
76
- config.training_set.individuals[id2.to_i].fenotype.should == i2.fenotype
77
- config.training_set.individuals[id3.to_i].snp_list.should == i3.snp_list
75
+ expect(config.training_set.individuals[id1.to_i].id).to eq i1.id
76
+ expect(config.training_set.individuals[id2.to_i].fenotype).to eq i2.fenotype
77
+ expect(config.training_set.individuals[id3.to_i].snp_list).to eq i3.snp_list
78
78
 
79
79
  config.training_set.ids_fenotypes[id1.to_i] = feno1.to_f
80
80
  config.training_set.ids_fenotypes[id2.to_i] = feno2.to_f
@@ -84,38 +84,38 @@ describe Nimbus::Configuration do
84
84
 
85
85
  it "reads testing data and yields one individual at a time" do
86
86
  config = Nimbus::Configuration.new
87
- config.load fixture_file('regression_config.yml')
87
+ config.load fixture_file('regression/config.yml')
88
88
 
89
89
  test_individuals = []
90
- File.open(fixture_file('regression_testing.data')) {|file|
90
+ File.open(fixture_file('regression/testing.data')) {|file|
91
91
  file.each do |line|
92
92
  data_id, *snp_list = line.strip.split
93
93
  test_individuals << Nimbus::Individual.new(data_id.to_i, nil, snp_list.map{|snp| snp.to_i})
94
94
  end
95
95
  }
96
- test_individuals.size.should == 200
96
+ expect(test_individuals.size).to eq 200
97
97
  config.read_testing_data{|individual|
98
98
  test_individual = test_individuals.shift
99
- individual.id.should_not be_nil
100
- individual.id.should == test_individual.id
101
- individual.snp_list.should_not be_empty
102
- individual.snp_list.should == test_individual.snp_list
99
+ expect(individual.id).to_not be_nil
100
+ expect(individual.id).to eq test_individual.id
101
+ expect(individual.snp_list).to_not be_empty
102
+ expect(individual.snp_list).to eq test_individual.snp_list
103
103
  }
104
104
  end
105
105
 
106
106
  it "creates a forest object loading data from a yaml file" do
107
107
  config = Nimbus::Configuration.new
108
- config.load fixture_file('regression_config.yml')
108
+ config.load fixture_file('regression/config.yml')
109
109
 
110
- trees = Psych.load(File.open fixture_file('regression_random_forest.yml'))
111
- trees.first.keys.first.should == 189
112
- trees.size.should == 3
110
+ trees = Psych.load(File.open fixture_file('regression/random_forest.yml'))
111
+ expect(trees.first.keys.first).to eq 176
112
+ expect(trees.size).to eq 3
113
113
 
114
114
  forest = config.load_forest
115
- forest.should be_kind_of Nimbus::Forest
116
- forest.trees[0].should == trees.first
117
- forest.trees[1].should == trees[1]
118
- forest.trees.last.should == trees[2]
115
+ expect(forest).to be_kind_of Nimbus::Forest
116
+ expect(forest.trees[0]).to eq trees.first
117
+ expect(forest.trees[1]).to eq trees[1]
118
+ expect(forest.trees.last).to eq trees[2]
119
119
  end
120
120
 
121
121
  end